From 96ac2606ea162fa24c390917a99e5a506f4e195b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 25 Jan 2026 13:26:24 +0100 Subject: [PATCH 01/46] fmt --- benches/mdf_benchmark.rs | 2 +- examples/inspect_test_files.rs | 20 +- src/c_api.rs | 322 +++++++++++++++++--------------- src/data_holder/tensor_arrow.rs | 4 +- src/export/hdf5.rs | 4 +- src/export/numpy.rs | 4 +- src/mdfinfo.rs | 10 +- src/mdfinfo/mdfinfo4.rs | 8 +- src/mdfr.rs | 9 +- src/mdfreader/conversions3.rs | 230 ++++++++++++++--------- src/mdfreader/data_read3.rs | 2 +- src/mdfreader/data_read4.rs | 3 +- src/mdfreader/mdfreader3.rs | 2 +- src/mdfreader/mdfreader4.rs | 12 +- src/mdfwriter/mdfwriter4.rs | 4 +- tests/basic.rs | 8 +- tests/channel_types.rs | 18 +- tests/compressed_data.rs | 3 +- tests/conversion.rs | 3 +- tests/data_list.rs | 3 +- tests/data_types.rs | 9 +- tests/export.rs | 5 +- tests/writing.rs | 5 +- 23 files changed, 390 insertions(+), 300 deletions(-) diff --git a/benches/mdf_benchmark.rs b/benches/mdf_benchmark.rs index 0cedfa9..2418fe9 100644 --- a/benches/mdf_benchmark.rs +++ b/benches/mdf_benchmark.rs @@ -1,6 +1,6 @@ use anyhow::Result; use core::time::Duration; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{Criterion, criterion_group, criterion_main}; use mdfr::mdfreader::Mdf; use std::process::Command; use std::sync::LazyLock; diff --git a/examples/inspect_test_files.rs b/examples/inspect_test_files.rs index aacb45d..7e92a63 100644 --- a/examples/inspect_test_files.rs +++ b/examples/inspect_test_files.rs @@ -4,46 +4,46 @@ use mdfr::mdfreader::Mdf; fn main() -> Result<()> { println!("=== MDF 4.3 Test File Inspector ===\n"); - + // 1. Channel List (CL) + Data Stream (DS) test file println!("1. ChannelList test (simple_list.mf4):"); println!("====================================="); let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/DynamicData/ChannelList/simple_list.mf4"; inspect_file(file)?; - + // 2. Channel Variant (CV) test file println!("\n2. Channel Variant test (Etas_cv_storage_with_fixed_length.mf4):"); println!("=================================================================="); let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Variant/Etas_cv_storage_with_fixed_length.mf4"; inspect_file(file)?; - + // 3. Channel Union (CU) test file println!("\n3. Channel Union test (Etas_cu_storage_with_fixed_length.mf4):"); println!("============================================================"); let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Union/Etas_cu_storage_with_fixed_length.mf4"; inspect_file(file)?; - + Ok(()) } fn inspect_file(file_path: &str) -> Result<()> { let mut mdf = Mdf::new(file_path)?; mdf.load_all_channels_data_in_memory()?; - + // Get all channels let channels = mdf.get_channel_names_set(); println!("Total channels: {}", channels.len()); println!("Channel names: {:?}", channels.iter().collect::>()); - + // Inspect each channel for channel_name in channels.iter() { println!("\nChannel: {}", channel_name); - + // Get channel data if let Some(data) = mdf.get_channel_data(channel_name) { println!(" Length: {}", data.len()); println!(" Type: {:?}", data.data_type(false)); - + // Show first few values let formatted = format!("{:?}", data); let preview = if formatted.len() > 200 { @@ -53,7 +53,7 @@ fn inspect_file(file_path: &str) -> Result<()> { }; println!(" Preview: {}", preview); } - + // Get metadata if let Ok(Some(unit)) = mdf.get_channel_unit(channel_name) { println!(" Unit: {}", unit); @@ -69,6 +69,6 @@ fn inspect_file(file_path: &str) -> Result<()> { println!(" Master channel: {}", master); } } - + Ok(()) } diff --git a/src/c_api.rs b/src/c_api.rs index b71a625..f1adab9 100644 --- a/src/c_api.rs +++ b/src/c_api.rs @@ -1,42 +1,46 @@ //! C API use crate::mdfreader::Mdf; -use arrow::ffi::{to_ffi, FFI_ArrowArray}; +use arrow::ffi::{FFI_ArrowArray, to_ffi}; use libc::c_char; -use std::ffi::{c_uchar, c_ushort, CStr, CString}; +use std::ffi::{CStr, CString, c_uchar, c_ushort}; /// create a new mdf from a file and its metadata #[unsafe(no_mangle)] -pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { unsafe { - // # Safety - // - // It is the caller's guarantee to ensure `file_name`: - // - // - is not a null pointer - // - points to valid, initialized data - // - points to memory ending in a null byte - // - won't be mutated for the duration of this function call - let f = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - match Mdf::new(f) { - Ok(mut mdf) => { - let p: *mut Mdf = &mut mdf; - std::mem::forget(mdf); - p +pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { + unsafe { + // # Safety + // + // It is the caller's guarantee to ensure `file_name`: + // + // - is not a null pointer + // - points to valid, initialized data + // - points to memory ending in a null byte + // - won't be mutated for the duration of this function call + let f = CStr::from_ptr(file_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + match Mdf::new(f) { + Ok(mut mdf) => { + let p: *mut Mdf = &mut mdf; + std::mem::forget(mdf); + p + } + Err(e) => panic!("{e:?}"), } - Err(e) => panic!("{e:?}"), } -}} +} /// returns mdf file version #[unsafe(no_mangle)] -pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { unsafe { - if let Some(mdf) = mdf.as_ref() { - mdf.get_version() - } else { - panic!("Null pointer given for Mdf Rust object") +pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { + unsafe { + if let Some(mdf) = mdf.as_ref() { + mdf.get_version() + } else { + panic!("Null pointer given for Mdf Rust object") + } } -}} +} /// returns channel's unit string /// if no unit is existing for this channel, returns a null pointer @@ -44,24 +48,26 @@ pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { unsafe { pub unsafe extern "C" fn get_channel_unit( mdf: *const Mdf, channel_name: *const c_char, -) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - if let Some(mdf) = mdf.as_ref() { - match mdf.get_channel_unit(name) { - Ok(unit) => match unit { - Some(unit) => CString::new(unit) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer - }, - Err(e) => panic!("{}", e), +) -> *const c_char { + unsafe { + let name = CStr::from_ptr(channel_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + if let Some(mdf) = mdf.as_ref() { + match mdf.get_channel_unit(name) { + Ok(unit) => match unit { + Some(unit) => CString::new(unit) + .expect("CString::new failed because of internal 0 byte") + .into_raw(), + None => std::ptr::null::(), // null pointer + }, + Err(e) => panic!("{}", e), + } + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} /// returns channel's description string /// if no description is existing for this channel, returns null pointer @@ -69,26 +75,28 @@ pub unsafe extern "C" fn get_channel_unit( pub unsafe extern "C" fn get_channel_desc( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - if let Some(mdf) = mdf.as_ref() { - match mdf.get_channel_desc(name) { - Ok(desc) => { - match desc { - Some(desc) => CString::new(desc) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer +) -> *const c_char { + unsafe { + let name = CStr::from_ptr(channel_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + if let Some(mdf) = mdf.as_ref() { + match mdf.get_channel_desc(name) { + Ok(desc) => { + match desc { + Some(desc) => CString::new(desc) + .expect("CString::new failed because of internal 0 byte") + .into_raw(), + None => std::ptr::null::(), // null pointer + } } + Err(e) => panic!("{}", e), } - Err(e) => panic!("{}", e), + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} /// returns channel's associated master channel name string /// if no master channel existing, returns null pointer @@ -96,21 +104,23 @@ pub unsafe extern "C" fn get_channel_desc( pub unsafe extern "C" fn get_channel_master( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - if let Some(mdf) = mdf.as_ref() { - match mdf.get_channel_master(name) { - Some(st) => CString::new(st) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer +) -> *const c_char { + unsafe { + let name = CStr::from_ptr(channel_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + if let Some(mdf) = mdf.as_ref() { + match mdf.get_channel_master(name) { + Some(st) => CString::new(st) + .expect("CString::new failed because of internal 0 byte") + .into_raw(), + None => std::ptr::null::(), // null pointer + } + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} /// returns channel's associated master channel type string /// 0 = None (normal data channels), 1 = Time (seconds), 2 = Angle (radians), @@ -119,52 +129,58 @@ pub unsafe extern "C" fn get_channel_master( pub unsafe extern "C" fn get_channel_master_type( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> c_uchar { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - if let Some(mdf) = mdf.as_ref() { - mdf.get_channel_master_type(name) - } else { - panic!("Null pointer given for Mdf Rust object") +) -> c_uchar { + unsafe { + let name = CStr::from_ptr(channel_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + if let Some(mdf) = mdf.as_ref() { + mdf.get_channel_master_type(name) + } else { + panic!("Null pointer given for Mdf Rust object") + } } -}} +} /// returns a sorted array of strings of all channel names contained in file #[unsafe(no_mangle)] -pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c_char { unsafe { - if let Some(mdf) = mdf.as_ref() { - let set = mdf.get_channel_names_set(); - let mut s = set.into_iter().collect::>(); - s.sort(); - let cstring_vec = s - .iter() - .map(|e| { - CString::new(e.to_string()) - .expect("CString::new failed because of internal 0 byte") - .into_raw() - }) - .collect::>(); - let p = cstring_vec.as_ptr(); - std::mem::forget(cstring_vec); - p - } else { - panic!("Null pointer given for Mdf Rust object") +pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c_char { + unsafe { + if let Some(mdf) = mdf.as_ref() { + let set = mdf.get_channel_names_set(); + let mut s = set.into_iter().collect::>(); + s.sort(); + let cstring_vec = s + .iter() + .map(|e| { + CString::new(e.to_string()) + .expect("CString::new failed because of internal 0 byte") + .into_raw() + }) + .collect::>(); + let p = cstring_vec.as_ptr(); + std::mem::forget(cstring_vec); + p + } else { + panic!("Null pointer given for Mdf Rust object") + } } -}} +} /// load all channels data in memory #[unsafe(no_mangle)] -pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { unsafe { - if let Some(mdf) = mdf.as_mut() { - match mdf.load_all_channels_data_in_memory() { - Ok(_) => {} - Err(e) => panic!("{}", e), +pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { + unsafe { + if let Some(mdf) = mdf.as_mut() { + match mdf.load_all_channels_data_in_memory() { + Ok(_) => {} + Err(e) => panic!("{}", e), + } + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} /// returns channel's arrow Array. /// null pointer returned if not found @@ -172,24 +188,26 @@ pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { unsaf pub unsafe extern "C" fn get_channel_array( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> *const FFI_ArrowArray { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - if let Some(mdf) = mdf.as_ref() { - match mdf.get_channel_data(name) { - Some(data) => { - let (array, _) = - to_ffi(&data.to_data()).expect("ffi failed converting arrow array into C"); - let array_ptr: *const FFI_ArrowArray = &array; - array_ptr +) -> *const FFI_ArrowArray { + unsafe { + let name = CStr::from_ptr(channel_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + if let Some(mdf) = mdf.as_ref() { + match mdf.get_channel_data(name) { + Some(data) => { + let (array, _) = + to_ffi(&data.to_data()).expect("ffi failed converting arrow array into C"); + let array_ptr: *const FFI_ArrowArray = &array; + array_ptr + } + None => std::ptr::null::(), // null pointers } - None => std::ptr::null::(), // null pointers + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} // export to Parquet file // Compression can be one of the following strings @@ -201,36 +219,38 @@ pub unsafe extern "C" fn export_to_parquet( mdf: *const Mdf, file_name: *const c_char, compression: *const c_char, -) { unsafe { - // # Safety - // - // It is the caller's guarantee to ensure `file_name`: - // - // - is not a null pointer - // - points to valid, initialized data - // - points to memory ending in a null byte - // - won't be mutated for the duration of this function call - let name = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - let comp = if compression.is_null() { - None - } else { - Some( - CStr::from_ptr(compression) - .to_str() - .expect("Could not convert into utf8 the compression string"), - ) - }; - if let Some(mdf) = mdf.as_ref() { - match mdf.export_to_parquet(name, comp) { - Ok(_) => {} - Err(e) => panic!("{}", e), +) { + unsafe { + // # Safety + // + // It is the caller's guarantee to ensure `file_name`: + // + // - is not a null pointer + // - points to valid, initialized data + // - points to memory ending in a null byte + // - won't be mutated for the duration of this function call + let name = CStr::from_ptr(file_name) + .to_str() + .expect("Could not convert into utf8 the file name string"); + let comp = if compression.is_null() { + None + } else { + Some( + CStr::from_ptr(compression) + .to_str() + .expect("Could not convert into utf8 the compression string"), + ) + }; + if let Some(mdf) = mdf.as_ref() { + match mdf.export_to_parquet(name, comp) { + Ok(_) => {} + Err(e) => panic!("{}", e), + } + } else { + panic!("Null pointer given for Mdf Rust object") } - } else { - panic!("Null pointer given for Mdf Rust object") } -}} +} // export to hdf5 file // Compression can be one of the following strings diff --git a/src/data_holder/tensor_arrow.rs b/src/data_holder/tensor_arrow.rs index b8ec42a..3d2eb0e 100644 --- a/src/data_holder/tensor_arrow.rs +++ b/src/data_holder/tensor_arrow.rs @@ -5,8 +5,8 @@ use arrow::{ array::{ArrayBuilder, BooleanBufferBuilder, PrimitiveArray, PrimitiveBuilder}, buffer::{BooleanBuffer, MutableBuffer}, datatypes::{ - ArrowPrimitiveType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, - UInt16Type, UInt32Type, UInt64Type, UInt8Type, + ArrowPrimitiveType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, + UInt8Type, UInt16Type, UInt32Type, UInt64Type, }, }; #[cfg(feature = "ndarray")] diff --git a/src/export/hdf5.rs b/src/export/hdf5.rs index 627e695..f73f9f5 100644 --- a/src/export/hdf5.rs +++ b/src/export/hdf5.rs @@ -2,9 +2,9 @@ use anyhow::{Context, Error, Result}; use arrow::array::Array; use hdf5::{ + Dataset, DatasetBuilder, Group, H5Type, file::File, types::{VarLenArray, VarLenUnicode}, - Dataset, DatasetBuilder, Group, H5Type, }; use log::info; use ndarray::{Array as NdArray, IxDyn}; @@ -13,9 +13,9 @@ use crate::mdfreader::Mdf; use crate::{ data_holder::channel_data::ChannelData, mdfinfo::{ + MdfInfo, mdfinfo3::{Cg3, Cn3, MdfInfo3}, mdfinfo4::{Cg4, Cn4, Dg4, MdfInfo4}, - MdfInfo, }, }; #[cfg(feature = "hdf5-mpio")] diff --git a/src/export/numpy.rs b/src/export/numpy.rs index 8a9bfeb..86d820a 100644 --- a/src/export/numpy.rs +++ b/src/export/numpy.rs @@ -1,11 +1,11 @@ //! this module provides methods to get directly channelData into python -use arrow::array::{make_array, Array, ArrayData}; +use arrow::array::{Array, ArrayData, make_array}; use arrow::pyarrow::PyArrowType; use numpy::npyffi::types::NPY_ORDER; use numpy::{PyArrayMethods, ToPyArray}; -use pyo3::{prelude::*, Bound}; +use pyo3::{Bound, prelude::*}; use crate::data_holder::channel_data::ChannelData; use crate::data_holder::tensor_arrow::Order; diff --git a/src/mdfinfo.rs b/src/mdfinfo.rs index 85542eb..6c615b3 100644 --- a/src/mdfinfo.rs +++ b/src/mdfinfo.rs @@ -2,9 +2,9 @@ //! mdfinfo module use anyhow::Error; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result, bail}; use arrow::array::Array; -use binrw::{binrw, BinReaderExt}; +use binrw::{BinReaderExt, binrw}; use codepage::to_encoding; use encoding_rs::Encoding; use log::info; @@ -22,10 +22,10 @@ pub mod mdfinfo4; pub mod sym_buf_reader; use binrw::io::Cursor; -use mdfinfo3::{hd3_comment_parser, hd3_parser, parse_dg3, MdfInfo3, SharableBlocks3}; +use mdfinfo3::{MdfInfo3, SharableBlocks3, hd3_comment_parser, hd3_parser, parse_dg3}; use mdfinfo4::{ - build_channel_db, hd4_parser, parse_at4, parse_ch4, parse_dg4, parse_ev4, parse_fh, MdfInfo4, - SharableBlocks, + MdfInfo4, SharableBlocks, build_channel_db, hd4_parser, parse_at4, parse_ch4, parse_dg4, + parse_ev4, parse_fh, }; use crate::data_holder::channel_data::ChannelData; diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index b2df0f5..c67da94 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -1577,7 +1577,6 @@ pub fn parse_ev4( Ok((ev, position)) } - /// Ch4Block struct #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] @@ -3928,12 +3927,7 @@ pub fn decompress_data( let m = org_data_length / zip_parameter as u64; let tail: Vec = data.split_off((m * zip_parameter as u64) as usize); let mut output = vec![0u8; (m * zip_parameter as u64) as usize]; - transpose::transpose( - &data, - &mut output, - m as usize, - zip_parameter as usize, - ); + transpose::transpose(&data, &mut output, m as usize, zip_parameter as usize); data = output; if !tail.is_empty() { data.extend(tail); diff --git a/src/mdfr.rs b/src/mdfr.rs index 4f9aca5..dc793db 100644 --- a/src/mdfr.rs +++ b/src/mdfr.rs @@ -523,7 +523,8 @@ df=polars.DataFrame(series) if let Ok(res) = mdf.mdf_info.get_tx(chb.ch_tx_name) { let _ = chdict.set_item("name", res); } - let _ = chdict.set_item("comment", mdf.mdf_info.get_comments(chb.ch_md_comment)); + let _ = + chdict.set_item("comment", mdf.mdf_info.get_comments(chb.ch_md_comment)); let type_name = match chb.ch_type { 0 => "Group", 1 => "Function", @@ -731,8 +732,7 @@ pyplot.show() // Events let ev_info = mdfinfo4.list_events(); if !ev_info.is_empty() { - writeln!(output, "\n--- Events ---") - .context("cannot print events header")?; + writeln!(output, "\n--- Events ---").context("cannot print events header")?; write!(output, "{}", ev_info).context("cannot print events")?; } // Channel Hierarchy @@ -743,8 +743,7 @@ pyplot.show() write!(output, "{}", ch_info).context("cannot print channel hierarchy")?; } // Channels - writeln!(output, "\n--- Channels ---") - .context("cannot print channels header")?; + writeln!(output, "\n--- Channels ---").context("cannot print channels header")?; for (master, list) in mdfinfo4.get_master_channel_names_set().iter() { if let Some(master_name) = master { writeln!(output, "\nMaster: {master_name}") diff --git a/src/mdfreader/conversions3.rs b/src/mdfreader/conversions3.rs index 3b4ea19..0f35558 100644 --- a/src/mdfreader/conversions3.rs +++ b/src/mdfreader/conversions3.rs @@ -4,9 +4,9 @@ use arrow::array::{Float64Array, Float64Builder, LargeStringBuilder, PrimitiveBu use arrow::datatypes::{ArrowPrimitiveType, Float64Type}; use arrow::error::ArrowError; use itertools::Itertools; +use num::NumCast; use num::abs; use num::cast::AsPrimitive; -use num::NumCast; use std::collections::BTreeMap; use crate::data_holder::channel_data::ChannelData; @@ -254,9 +254,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut out = Float64Builder::with_capacity(array.capacity()); @@ -360,9 +359,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut out = Float64Builder::with_capacity(array.capacity()); @@ -481,9 +479,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut out = Float64Builder::with_capacity(array.capacity()); @@ -606,9 +603,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut new_array = vec![0f64; *cycle_count]; @@ -801,9 +797,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut new_array = vec![0f64; cycle_count]; @@ -835,56 +830,82 @@ fn value_to_value_with_interpolation( ) -> Result<(), Error> { match &mut cn.data { ChannelData::Int8(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of i8 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context("failed value to value with interpolation conversion of i8 channel")?, + ); } ChannelData::UInt8(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of u8 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context("failed value to value with interpolation conversion of u8 channel")?, + ); } ChannelData::Int16(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of i16 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of i16 channel", + )?, + ); } ChannelData::UInt16(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of u16 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of u16 channel", + )?, + ); } ChannelData::Int32(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of i32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of i32 channel", + )?, + ); } ChannelData::UInt32(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of u32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of u32 channel", + )?, + ); } ChannelData::Float32(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of f32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of f32 channel", + )?, + ); } ChannelData::Int64(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of i64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of i64 channel", + )?, + ); } ChannelData::UInt64(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of u64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of u64 channel", + )?, + ); } ChannelData::Float64(a) => { - cn.data = ChannelData::Float64(value_to_value_with_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value with interpolation conversion of f64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_with_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value with interpolation conversion of f64 channel", + )?, + ); } - _=> warn!( + _ => warn!( "not possible to apply value to value with interpolation conversion to the data type of channel {}", cn.unique_name, ), @@ -907,9 +928,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; let mut new_array = vec![0f64; cycle_count]; @@ -926,11 +946,7 @@ where Err(idx) => { let (x0, y0) = val[idx - 1]; let (x1, y1) = val[idx]; - if (a - x0) > (x1 - a) { - *y1 - } else { - *y0 - } + if (a - x0) > (x1 - a) { *y1 } else { *y0 } } }; }); @@ -945,54 +961,84 @@ fn value_to_value_without_interpolation( ) -> Result<(), Error> { match &mut cn.data { ChannelData::Int8(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of i8 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of i8 channel", + )?, + ); } ChannelData::UInt8(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of u8 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of u8 channel", + )?, + ); } ChannelData::Int16(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of i16 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of i16 channel", + )?, + ); } ChannelData::UInt16(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of u16 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of u16 channel", + )?, + ); } ChannelData::Int32(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of i32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of i32 channel", + )?, + ); } ChannelData::UInt32(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of u32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of u32 channel", + )?, + ); } ChannelData::Float32(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of f32 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of f32 channel", + )?, + ); } ChannelData::Int64(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of i64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of i64 channel", + )?, + ); } ChannelData::UInt64(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of u64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of u64 channel", + )?, + ); } ChannelData::Float64(a) => { - cn.data = ChannelData::Float64(value_to_value_without_interpolation_calculation(a, - cc_val, - *cycle_count as usize).context("failed value to value without interpolation conversion of f64 channel")?); + cn.data = ChannelData::Float64( + value_to_value_without_interpolation_calculation(a, cc_val, *cycle_count as usize) + .context( + "failed value to value without interpolation conversion of f64 channel", + )?, + ); } _ => warn!( "not possible to apply value to value without interpolation conversion to the data type of channel {}", @@ -1017,9 +1063,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; array_f64.values().iter().for_each(|val| { @@ -1124,9 +1169,8 @@ where let array_f64: Float64Array = array .finish() .try_unary(|value| { - num::cast::cast::(value).ok_or_else(|| { - ArrowError::CastError(format!("Can't cast value {value:?} to f64")) - }) + num::cast::cast::(value) + .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) }) .context("failed converting array to f64")?; array_f64.values().iter().for_each(|a| { @@ -1134,9 +1178,9 @@ where .0 .iter() .enumerate() - .find(|&x| (x.1 .0 <= *a) && (*a < x.1 .1)); + .find(|&x| (x.1.0 <= *a) && (*a < x.1.1)); if let Some(key) = matched_key { - new_array.append_value(key.1 .2.clone()); + new_array.append_value(key.1.2.clone()); } else { new_array.append_value(cc_val_ref.1.clone()); } diff --git a/src/mdfreader/data_read3.rs b/src/mdfreader/data_read3.rs index f054669..5b3b902 100644 --- a/src/mdfreader/data_read3.rs +++ b/src/mdfreader/data_read3.rs @@ -1,6 +1,6 @@ //! this module implements low level data reading for mdf3 files. use crate::mdfinfo::mdfinfo3::Cn3; -use anyhow::{bail, Context, Error, Result}; +use anyhow::{Context, Error, Result, bail}; use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; use encoding_rs::WINDOWS_1252; use half::f16; diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index 53e15e6..bcffe88 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -1640,7 +1640,8 @@ pub fn read_channels_from_bytes( vlsd_channel.push((cn.block.cn_type, *rec_pos)); } } else { - let mut is_dynamic = cn.block.cn_type == 1 || (cn.block.cn_flags & CN_F_DATA_STREAM_MODE != 0); + let mut is_dynamic = + cn.block.cn_type == 1 || (cn.block.cn_flags & CN_F_DATA_STREAM_MODE != 0); if !is_dynamic && let Some(composition) = &cn.composition { match &composition.block { Compo::CA(ca) if ca.ca_storage == 5 => is_dynamic = true, diff --git a/src/mdfreader/mdfreader3.rs b/src/mdfreader/mdfreader3.rs index 9f2c552..6e87962 100644 --- a/src/mdfreader/mdfreader3.rs +++ b/src/mdfreader/mdfreader3.rs @@ -1,8 +1,8 @@ //! data read and load in memory based in MdfInfo3's metadata use rayon::prelude::*; -use crate::mdfinfo::mdfinfo3::{Cg3, Cn3, Dg3}; use crate::mdfinfo::MdfInfo; +use crate::mdfinfo::mdfinfo3::{Cg3, Cn3, Dg3}; use anyhow::{Context, Error, Result}; use std::collections::{HashMap, HashSet}; use std::fs::File; diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 5a39f25..1dac5f1 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -852,7 +852,8 @@ fn read_vlsc_from_bytes( { // UTF-8 BOM let record = &record[3..]; - let dst = str::from_utf8(record).context("Found invalid UTF-8 with BOM")?; + let dst = + str::from_utf8(record).context("Found invalid UTF-8 with BOM")?; array.append_value(dst); } else if record.len() >= 2 && record[0] == 0xFF && record[1] == 0xFE { // UTF-16 LE BOM @@ -870,7 +871,8 @@ fn read_vlsc_from_bytes( array.append_value(dst.trim_end_matches('\0')); } else { // No recognized BOM, try UTF-8 - let dst = str::from_utf8(record).context("Found invalid UTF-8 (no BOM)")?; + let dst = + str::from_utf8(record).context("Found invalid UTF-8 (no BOM)")?; array.append_value(dst); } max_position = max_position.max(start + length); @@ -1209,7 +1211,11 @@ fn parser_dl4_sorted( previous_index = read_vlsd_from_bytes(&mut data, cn, previous_index, decoder)?; } } else { - let n_record_chunk = if record_length > 0 { block_length / record_length } else { 0 }; + let n_record_chunk = if record_length > 0 { + block_length / record_length + } else { + 0 + }; if previous_index >= cg_cycle_count || n_record_chunk == 0 { continue; } diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 7989273..088d764 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -26,11 +26,11 @@ use anyhow::{Context, Error, Result, bail}; use arrow::buffer::NullBuffer; use binrw::BinWriterExt; use crossbeam_channel::bounded; +use flate2::Compression; +use flate2::write::ZlibEncoder; use parking_lot::Mutex; use rayon::iter::{IntoParallelRefMutIterator, ParallelIterator}; use std::fs::File; -use flate2::write::ZlibEncoder; -use flate2::Compression; /// writes mdf4.2 file pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result { diff --git a/tests/basic.rs b/tests/basic.rs index 709718e..35a8002 100644 --- a/tests/basic.rs +++ b/tests/basic.rs @@ -9,8 +9,12 @@ use std::sync::LazyLock; static MDFREADER_TESTS_PATH: &str = "/home/ratal/workspace/mdfreader/mdfreader/tests/"; -static BASE_PATH_MDF4: LazyLock = - LazyLock::new(|| format!("{}MDF4/MDF4.3/Base_Standard/Examples/", MDFREADER_TESTS_PATH)); +static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { + format!( + "{}MDF4/MDF4.3/Base_Standard/Examples/", + MDFREADER_TESTS_PATH + ) +}); fn parse_info_folder(folder: &String) -> Result<()> { let path = Path::new(folder); diff --git a/tests/channel_types.rs b/tests/channel_types.rs index 421afb2..7780cd2 100644 --- a/tests/channel_types.rs +++ b/tests/channel_types.rs @@ -1,5 +1,7 @@ use anyhow::Result; -use arrow::array::{AsArray, Float64Builder, Int32Builder, LargeStringBuilder, UInt16Builder, UInt64Builder}; +use arrow::array::{ + AsArray, Float64Builder, Int32Builder, LargeStringBuilder, UInt16Builder, UInt64Builder, +}; use arrow::datatypes::Float64Type; use mdfr::data_holder::channel_data::ChannelData; use mdfr::mdfreader::Mdf; @@ -7,7 +9,8 @@ use std::path::Path; use std::sync::LazyLock; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/".to_string() + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() }); #[test] @@ -184,7 +187,10 @@ fn virtual_data_channels() -> Result<()> { mdf.load_all_channels_data_in_memory()?; if let Some(data) = mdf.get_channel_data("Data channel") { assert_eq!( - ChannelData::Float64(Float64Builder::new_from_buffer(vec![42f64; 200].into(), None)), + ChannelData::Float64(Float64Builder::new_from_buffer( + vec![42f64; 200].into(), + None + )), *data ); } @@ -370,8 +376,10 @@ fn channel_list() -> Result<()> { if let Some(size_data) = mdf.get_channel_data("size") { assert_eq!(size_data.len(), 2, "size should have 2 samples"); // Size values: [0, 202] - let expected_size = - ChannelData::UInt16(UInt16Builder::new_from_buffer(vec![0u16, 202u16].into(), None)); + let expected_size = ChannelData::UInt16(UInt16Builder::new_from_buffer( + vec![0u16, 202u16].into(), + None, + )); assert_eq!( &expected_size, size_data, "size channel values should match" diff --git a/tests/compressed_data.rs b/tests/compressed_data.rs index dd9c627..9c4bfed 100644 --- a/tests/compressed_data.rs +++ b/tests/compressed_data.rs @@ -7,7 +7,8 @@ use std::path::Path; use std::sync::LazyLock; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/".to_string() + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() }); fn parse_info_folder(folder: &String) -> Result<()> { diff --git a/tests/conversion.rs b/tests/conversion.rs index e126c9d..5462b69 100644 --- a/tests/conversion.rs +++ b/tests/conversion.rs @@ -5,7 +5,8 @@ use mdfr::mdfreader::Mdf; use std::sync::LazyLock; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/".to_string() + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() }); #[test] diff --git a/tests/data_list.rs b/tests/data_list.rs index 038afb8..777055c 100644 --- a/tests/data_list.rs +++ b/tests/data_list.rs @@ -3,7 +3,8 @@ use mdfr::mdfreader::Mdf; use std::sync::LazyLock; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/".to_string() + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() }); #[test] diff --git a/tests/data_types.rs b/tests/data_types.rs index 2007a63..84fbbf4 100644 --- a/tests/data_types.rs +++ b/tests/data_types.rs @@ -9,7 +9,8 @@ use std::fs; use std::sync::LazyLock; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/".to_string() + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() }); static BASE_TEST_PATH: LazyLock = @@ -124,7 +125,11 @@ fn real_types() -> Result<()> { let mut mdf = Mdf::new(&file_name)?; mdf.load_all_channels_data_in_memory()?; - let file_name = format!("{}{}", BASE_PATH_MDF4.as_str(), "Halffloat/halffloat_sinus.mf4"); + let file_name = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Halffloat/halffloat_sinus.mf4" + ); let mut mdf = Mdf::new(&file_name)?; mdf.load_all_channels_data_in_memory()?; diff --git a/tests/export.rs b/tests/export.rs index a79e22f..f1a2859 100644 --- a/tests/export.rs +++ b/tests/export.rs @@ -8,7 +8,10 @@ static MDFREADER_TESTS_PATH: &str = "/home/ratal/workspace/mdfreader/mdfreader/t static MDFR_PATH: &str = "/home/ratal/workspace/mdfr/"; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - format!("{}MDF4/MDF4.3/Base_Standard/Examples/", MDFREADER_TESTS_PATH) + format!( + "{}MDF4/MDF4.3/Base_Standard/Examples/", + MDFREADER_TESTS_PATH + ) }); static BASE_PATH_MDF3: LazyLock = diff --git a/tests/writing.rs b/tests/writing.rs index 72deb78..80e1422 100644 --- a/tests/writing.rs +++ b/tests/writing.rs @@ -10,7 +10,10 @@ static MDFREADER_TESTS_PATH: &str = "/home/ratal/workspace/mdfreader/mdfreader/t static MDFR_PATH: &str = "/home/ratal/workspace/mdfr/"; static BASE_PATH_MDF4: LazyLock = LazyLock::new(|| { - format!("{}MDF4/MDF4.3/Base_Standard/Examples/", MDFREADER_TESTS_PATH) + format!( + "{}MDF4/MDF4.3/Base_Standard/Examples/", + MDFREADER_TESTS_PATH + ) }); static BASE_PATH_MDF3: LazyLock = From 13d9f7ce005964f3883bedf9d7a7b1fa40719bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 25 Jan 2026 13:41:40 +0100 Subject: [PATCH 02/46] added more python versions for package generation, updated to more recent hdf5 version --- .github/workflows/py-mac-aarch64-apple-release.yml | 4 ++-- .github/workflows/py-release-manylinux.yml | 2 +- .github/workflows/py-release-windows-macos.yml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/py-mac-aarch64-apple-release.yml b/.github/workflows/py-mac-aarch64-apple-release.yml index 65167fd..057adfb 100644 --- a/.github/workflows/py-mac-aarch64-apple-release.yml +++ b/.github/workflows/py-mac-aarch64-apple-release.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: ["macos-latest"] - python-version: [ "3.9", "3.10", "3.11", "3.12"] + python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.14" ] steps: - uses: actions/checkout@v3 - name: Install latest Rust nightly @@ -33,7 +33,7 @@ jobs: shell: bash run: | rustup override set nightly - brew install hdf5@1.10 + brew install hdf5@2.0 - name: maturin publish uses: messense/maturin-action@v1 env: diff --git a/.github/workflows/py-release-manylinux.yml b/.github/workflows/py-release-manylinux.yml index da0e020..470fc52 100644 --- a/.github/workflows/py-release-manylinux.yml +++ b/.github/workflows/py-release-manylinux.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10' , "3.11", "3.12"] + python-version: [ '3.9', '3.10' , "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/.github/workflows/py-release-windows-macos.yml b/.github/workflows/py-release-windows-macos.yml index a5f031c..ed29476 100644 --- a/.github/workflows/py-release-windows-macos.yml +++ b/.github/workflows/py-release-windows-macos.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: os: ["windows-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - name: Install latest Rust nightly @@ -34,9 +34,9 @@ jobs: run: | python -m pip install --upgrade pip pip install maturin - C:\msys64\usr\bin\wget.exe -q -O hdf5.zip https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.10/hdf5-1.10.11/bin/windows/hdf5-1.10.11-Std-win10_64-vs16.zip + C:\msys64\usr\bin\wget.exe -q -O hdf5.zip https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.13/hdf5-1.13.0/bin/windows/hdf5-1.13.0-Std-win10_64-vs15.zip 7z x hdf5.zip -y - msiexec /i hdf\\HDF5-1.10.11-win64.msi /quiet /qn /norestart + msiexec /i hdf\\HDF5-1.13.0-win64.msi /quiet /qn /norestart - name: Maturin publish shell: bash env: From 2a6d15d9341815b654736c4c5738d5171fc2a8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Tue, 27 Jan 2026 00:45:11 +0100 Subject: [PATCH 03/46] added data stream mode --- src/mdfinfo/mdfinfo4.rs | 6 +- src/mdfreader.rs | 1 + src/mdfreader/mdfreader4.rs | 214 +++++++++++++++++++++++++++++++++++- 3 files changed, 215 insertions(+), 6 deletions(-) diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index c67da94..1d77ecf 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -2423,17 +2423,17 @@ pub struct Cn4Block { /// Bit offset (0-7): first bit (=LSB) of signal value after Byte offset has been applied (see 4.21.4.2 Reading the Signal Value). If zero, the signal value is 1-Byte aligned. A value different to zero is only allowed for Integer data types (cn_data_type ≤ 3) and if the Integer signal value fits into 8 contiguous Bytes (cn_bit_count + cn_bit_offset ≤ 64). For all other cases, cn_bit_offset must be zero. pub cn_bit_offset: u8, /// Offset to first Byte in the data record that contains bits of the signal value. The offset is applied to the plain record data, i.e. skipping the record ID. - cn_byte_offset: u32, + pub cn_byte_offset: u32, /// Number of bits for signal value in record pub cn_bit_count: u32, /// Flags (see CN_F_xxx) pub cn_flags: u32, /// Position of invalidation bit. - cn_inval_bit_pos: u32, + pub cn_inval_bit_pos: u32, /// Precision for display of floating point values. 0xFF means unrestricted precision (infinite). Any other value specifies the number of decimal places to use for display of floating point values. Only valid if "precision valid" flag (bit 2) is set cn_precision: u8, /// Byte alignment with previous channel in data stream - cn_alignment: u8, + pub cn_alignment: u8, /// Number of attachment for this channel cn_attachment_count: u16, /// Minimum signal value that occurred for this signal (raw value) Only valid if "value range valid" flag (bit 3) is set. diff --git a/src/mdfreader.rs b/src/mdfreader.rs index f90a9be..94faa2d 100644 --- a/src/mdfreader.rs +++ b/src/mdfreader.rs @@ -3,6 +3,7 @@ pub mod conversions3; pub mod conversions4; pub mod data_read3; pub mod data_read4; +pub mod datastream_decoder; pub mod mdfreader3; pub mod mdfreader4; use std::collections::{HashMap, HashSet}; diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 1dac5f1..630d126 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -1,7 +1,7 @@ //! data read and load in memory based in MdfInfo4's metadata use crate::data_holder::channel_data::ChannelData; use crate::mdfinfo::MdfInfo; -use crate::mdfinfo::mdfinfo4::{Blockheader4, Cg4, Cn4, Dg4}; +use crate::mdfinfo::mdfinfo4::{Blockheader4, Cg4, Cn4, Composition, Dg4, Ds4Block}; use crate::mdfinfo::mdfinfo4::{ CG_F_VLSC, CG_F_VLSD, Dl4Block, Dt4Block, Gd4Block, Hl4Block, Ld4Block, parse_dz, parser_dl4_block, parser_ld4_block, @@ -9,6 +9,7 @@ use crate::mdfinfo::mdfinfo4::{ use crate::mdfreader::conversions4::convert_all_channels; use crate::mdfreader::data_read4::read_channels_from_bytes; use crate::mdfreader::data_read4::read_one_channel_array; +use crate::mdfreader::datastream_decoder; use anyhow::{Context, Error, Result, bail}; use binrw::BinReaderExt; use encoding_rs::{Decoder, GB18030, UTF_8, UTF_16BE, UTF_16LE, WINDOWS_1252}; @@ -1985,10 +1986,217 @@ fn read_ds( continue; }; - // Read data into channel. We assume length-prefixed samples for dynamic data. - read_vlsd_from_bytes(&mut data, cn, 0, decoder)?; + // Check if this is data stream mode (ds_mode == 0) with composition + // We need to extract what we need from cn before dropping the borrow + let ds_decode_info: Option<(Ds4Block, Box)> = + if let Some(composition) = &cn.composition { + if let Compo::DS(ds_block) = &composition.block { + if ds_block.ds_mode == 0 { + // Clone the ds_block (dereferencing the Box) and the composition + composition + .compo + .as_ref() + .map(|c| ((**ds_block).clone(), c.clone())) + } else { + None + } + } else { + None + } + } else { + None + }; + + if let Some((ds_block, child_composition)) = ds_decode_info { + // Parse record offsets and sizes from length-prefixed data + let (record_offsets, record_sizes) = parse_vlsd_record_offsets(&data)?; + + // Decode using composition + let decoded = datastream_decoder::decode_datastream_blob( + &data, + &ds_block, + &child_composition, + &channel_group.cn, + &record_offsets, + &record_sizes, + )?; + + // Store decoded child channel data + for (rec_pos, values) in decoded { + if let Some(child_cn) = channel_group.cn.get_mut(&rec_pos) { + store_decoded_values_in_channel(child_cn, values, decoder)?; + } + } + } else { + // Fallback: read as length-prefixed samples for dynamic data + if let Some(cn) = channel_group.cn.get_mut(rec_pos) { + read_vlsd_from_bytes(&mut data, cn, 0, decoder)?; + } + } } } } Ok(position) } + +/// Parses VLSD record offsets and sizes from a length-prefixed data blob. +/// Returns (offsets, sizes) vectors where each element corresponds to one record. +fn parse_vlsd_record_offsets(data: &[u8]) -> Result<(Vec, Vec)> { + let mut offsets = Vec::new(); + let mut sizes = Vec::new(); + let mut position: usize = 0; + + while position + 4 <= data.len() { + let length = u32::from_le_bytes( + data[position..position + 4] + .try_into() + .context("Could not read VLSD length prefix")?, + ) as usize; + + // Record starts after the 4-byte length prefix + offsets.push((position + 4) as u64); + sizes.push(length as u64); + + // Move to next record + position += 4 + length; + + if position > data.len() { + break; + } + } + + Ok((offsets, sizes)) +} + +/// Stores decoded byte values into a channel's data structure +fn store_decoded_values_in_channel( + cn: &mut Cn4, + values: Vec>, + decoder: &mut Dec, +) -> Result<()> { + for value_bytes in values { + match &mut cn.data { + ChannelData::Int8(builder) => { + if !value_bytes.is_empty() { + builder.append_value(value_bytes[0] as i8); + } + } + ChannelData::UInt8(builder) => { + if !value_bytes.is_empty() { + builder.append_value(value_bytes[0]); + } + } + ChannelData::Int16(builder) => { + if value_bytes.len() >= 2 { + let val = if cn.endian { + i16::from_be_bytes(value_bytes[..2].try_into()?) + } else { + i16::from_le_bytes(value_bytes[..2].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::UInt16(builder) => { + if value_bytes.len() >= 2 { + let val = if cn.endian { + u16::from_be_bytes(value_bytes[..2].try_into()?) + } else { + u16::from_le_bytes(value_bytes[..2].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::Int32(builder) => { + if value_bytes.len() >= 4 { + let val = if cn.endian { + i32::from_be_bytes(value_bytes[..4].try_into()?) + } else { + i32::from_le_bytes(value_bytes[..4].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::UInt32(builder) => { + if value_bytes.len() >= 4 { + let val = if cn.endian { + u32::from_be_bytes(value_bytes[..4].try_into()?) + } else { + u32::from_le_bytes(value_bytes[..4].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::Float32(builder) => { + if value_bytes.len() >= 4 { + let val = if cn.endian { + f32::from_be_bytes(value_bytes[..4].try_into()?) + } else { + f32::from_le_bytes(value_bytes[..4].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::Int64(builder) => { + if value_bytes.len() >= 8 { + let val = if cn.endian { + i64::from_be_bytes(value_bytes[..8].try_into()?) + } else { + i64::from_le_bytes(value_bytes[..8].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::UInt64(builder) => { + if value_bytes.len() >= 8 { + let val = if cn.endian { + u64::from_be_bytes(value_bytes[..8].try_into()?) + } else { + u64::from_le_bytes(value_bytes[..8].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::Float64(builder) => { + if value_bytes.len() >= 8 { + let val = if cn.endian { + f64::from_be_bytes(value_bytes[..8].try_into()?) + } else { + f64::from_le_bytes(value_bytes[..8].try_into()?) + }; + builder.append_value(val); + } + } + ChannelData::Utf8(builder) => { + // Decode string based on data type + let s = if cn.block.cn_data_type == 6 { + // SBC (Windows-1252) + let mut dst = String::with_capacity(value_bytes.len()); + let _ = decoder + .windows_1252 + .decode_to_string(&value_bytes, &mut dst, false); + dst + } else if cn.block.cn_data_type == 7 || cn.block.cn_data_type == 9 { + // UTF-8 or ISO-8859-1 (treat as UTF-8) + String::from_utf8_lossy(&value_bytes).into_owned() + } else if cn.block.cn_data_type == 8 { + // UTF-16 LE + let mut dst = String::with_capacity(value_bytes.len()); + let _ = decoder + .utf_16_le + .decode_to_string(&value_bytes, &mut dst, false); + dst + } else { + String::from_utf8_lossy(&value_bytes).into_owned() + }; + builder.append_value(s); + } + ChannelData::VariableSizeByteArray(builder) => { + builder.append_value(&value_bytes); + } + _ => { + // For other types (complex, tensor, etc.), skip for now + } + } + } + Ok(()) +} From 5b4418fd55117a1e00395759a5952e341d84a858 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Tue, 27 Jan 2026 23:31:15 +0100 Subject: [PATCH 04/46] fixed missing consideration for DL/HL/DZ blocks for VLSC + UTF16 wrong endianess --- src/mdfinfo/mdfinfo4.rs | 5 ++ src/mdfreader/mdfreader4.rs | 129 +++++++++++++++++++----------------- tests/channel_types.rs | 46 +++++++++++++ 3 files changed, 119 insertions(+), 61 deletions(-) diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index 1d77ecf..017f11d 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -3006,6 +3006,11 @@ fn parse_cn4_block( { endian = true; // big endian } + // For VLSC/VLSD channels, cn_data_type describes the signal data block encoding + // (e.g. UTF-16 BE), not the byte order of the integer offsets stored in the DT block. + if block.cn_type == 1 || block.cn_type == 7 { + endian = false; + } let data_type = block.cn_data_type; let cn_type = block.cn_type; diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 630d126..843a58e 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -351,6 +351,62 @@ fn read_data( Ok(position) } +/// Reads and concatenates data from any data block type (##DT, ##SD, ##VD, ##DZ, ##DL, ##HL). +/// The block id must already have been read. Returns concatenated raw bytes and updated position. +fn read_all_blocks_to_bytes( + rdr: &mut BufReader<&File>, + id: [u8; 4], + mut position: i64, +) -> Result, i64)>> { + if id == *b"##DT" || id == *b"##SD" || id == *b"##VD" { + let block_header: Dt4Block = rdr.read_le().context("Could not read data block header")?; + let mut buf = vec![0u8; block_header.len as usize - 24]; + rdr.read_exact(&mut buf) + .context("could not read data block buffer")?; + position += block_header.len as i64; + Ok(Some((buf, position))) + } else if id == *b"##DZ" { + let (buf, block_header) = parse_dz(rdr)?; + position += block_header.len as i64; + Ok(Some((buf, position))) + } else if id == *b"##HL" || id == *b"##DL" { + let current_pos = if id == *b"##HL" { + let (pos, _id) = read_hl(rdr, position)?; + pos + } else { + position + }; + let (dl_blocks, mut pos) = parser_dl4(rdr, current_pos)?; + let mut combined_data = Vec::new(); + for dl in dl_blocks { + for data_ptr in dl.dl_data { + if data_ptr == 0 { + continue; + } + rdr.seek_relative(data_ptr - pos)?; + pos = data_ptr; + let mut inner_id = [0u8; 4]; + rdr.read_exact(&mut inner_id)?; + if inner_id == *b"##DZ" { + let (buf, header) = parse_dz(rdr)?; + pos += header.len as i64; + combined_data.extend(buf); + } else { + // ##DT, ##SD, ##VD or any other raw data block + let header: Dt4Block = rdr.read_le()?; + let mut buf = vec![0u8; header.len as usize - 24]; + rdr.read_exact(&mut buf)?; + pos += header.len as i64; + combined_data.extend(buf); + } + } + } + Ok(Some((combined_data, pos))) + } else { + Ok(None) + } +} + /// Header List block reader /// This HL Block references Data List Blocks that are listing DZ Blocks /// It is existing to add complementary information about compression in DZ @@ -544,21 +600,13 @@ fn read_vd( rdr.read_exact(&mut id) .context("could not read VD block id")?; - let data: Vec; - if "##VD".as_bytes() == id { - let block_header: Dt4Block = rdr.read_le().context("Could not read VD block struct")?; - let mut buf = vec![0u8; block_header.len as usize - 24]; - rdr.read_exact(&mut buf) - .context("could not read VD data buffer")?; - position += block_header.len as i64; - data = buf; - } else if "##DZ".as_bytes() == id { - let (buf, block_header) = parse_dz(rdr)?; - position += block_header.len as i64; - data = buf; - } else { - continue; - } + let data: Vec = match read_all_blocks_to_bytes(rdr, id, position)? { + Some((buf, pos)) => { + position = pos; + buf + } + None => continue, + }; // Now update cn.data with the actual variable length data if let Some(cn) = channel_group.cn.get_mut(rec_pos) { @@ -1937,53 +1985,12 @@ fn read_ds( rdr.read_exact(&mut id) .context("could not read DS data block id")?; - let mut data: Vec = if "##DT".as_bytes() == id || "##SD".as_bytes() == id { - let block_header: Dt4Block = - rdr.read_le().context("Could not read DT/SD header in DS")?; - let mut buf = vec![0u8; block_header.len as usize - 24]; - rdr.read_exact(&mut buf) - .context("could not read DT/SD data buffer in DS")?; - position += block_header.len as i64; - buf - } else if "##DZ".as_bytes() == id { - let (buf, block_header) = parse_dz(rdr)?; - position += block_header.len as i64; - buf - } else if "##DL".as_bytes() == id || "##HL".as_bytes() == id { - let current_pos = if id == "##HL".as_bytes() { - let (pos, _id) = read_hl(rdr, position)?; - pos - } else { - position - }; - let (dl_blocks, mut pos) = parser_dl4(rdr, current_pos)?; - let mut combined_data = Vec::new(); - for dl in dl_blocks { - for data_ptr in dl.dl_data { - if data_ptr == 0 { - continue; - } - rdr.seek_relative(data_ptr - pos)?; - pos = data_ptr; - let mut inner_id = [0u8; 4]; - rdr.read_exact(&mut inner_id)?; - if "##DT".as_bytes() == inner_id || "##SD".as_bytes() == inner_id { - let header: Dt4Block = rdr.read_le()?; - let mut buf = vec![0u8; header.len as usize - 24]; - rdr.read_exact(&mut buf)?; - pos += header.len as i64; - combined_data.extend(buf); - } else if "##DZ".as_bytes() == inner_id { - let (buf, header) = parse_dz(rdr)?; - pos += header.len as i64; - combined_data.extend(buf); - } - } + let mut data: Vec = match read_all_blocks_to_bytes(rdr, id, position)? { + Some((buf, pos)) => { + position = pos; + buf } - position = pos; - combined_data - } else { - continue; + None => continue, }; // Check if this is data stream mode (ds_mode == 0) with composition diff --git a/tests/channel_types.rs b/tests/channel_types.rs index 7780cd2..bd57e6f 100644 --- a/tests/channel_types.rs +++ b/tests/channel_types.rs @@ -287,6 +287,52 @@ fn vlsc_channels() -> Result<()> { panic!("VLSC Data channel not found"); } + // VLSC with different string encodings + for file in [ + "Vector_VLSC_String_SBC.mf4", + "Vector_VLSC_String_UTF16_LE.mf4", + "Vector_VLSC_String_UTF16_BE.mf4", + ] { + let file_name = format!("{}{}{}", BASE_PATH_MDF4.as_str(), list_of_paths[0], file); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + if let Some(data) = mdf.get_channel_data("Data channel") { + assert_eq!(expected_string_result, data.clone(), "Failed for {}", file); + } else { + panic!("VLSC Data channel not found in {}", file); + } + } + + // VLSC with single VD block (uncompressed and compressed) + for file in [ + "Vector_VLSC_Single_VD.mf4", + "Vector_VLSC_Single_VD_Compressed.mf4", + ] { + let file_name = format!("{}{}{}", BASE_PATH_MDF4.as_str(), list_of_paths[0], file); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + if let Some(data) = mdf.get_channel_data("data") { + assert!(!data.is_empty(), "VLSC data should not be empty in {}", file); + } else { + panic!("VLSC data channel not found in {}", file); + } + } + + // VLSC with Data List (DL -> VD blocks), uncompressed and compressed (DL -> DZ) + for file in [ + "Vector_VLSC_DataList_VD.mf4", + "Vector_VLSC_DataList_VD_Compressed.mf4", + ] { + let file_name = format!("{}{}{}", BASE_PATH_MDF4.as_str(), list_of_paths[0], file); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + if let Some(data) = mdf.get_channel_data("data") { + assert!(!data.is_empty(), "VLSC data should not be empty in {}", file); + } else { + panic!("VLSC data channel not found in {}", file); + } + } + // VLSC Etas with BOM - this file has channels: time, size, comment (VLSC) // Note: This file has mixed BOM encodings (UTF-8 and UTF-16 LE) let file_name = format!( From 1d276f1d8d42e4cc196cd0a9688f2d6fcb1326b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Wed, 28 Jan 2026 23:52:09 +0100 Subject: [PATCH 05/46] added channel Variants and Union parsing --- src/data_holder/channel_data.rs | 168 ++++++++++++++++++++- src/export/hdf5.rs | 4 + src/export/numpy.rs | 9 +- src/mdfinfo/mdfinfo4.rs | 259 +++++++++++++++++++++++++++++--- src/mdfreader/data_read4.rs | 2 + src/mdfreader/mdfreader4.rs | 6 + tests/channel_types.rs | 49 +++++- 7 files changed, 471 insertions(+), 26 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 7499746..18e1752 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -6,7 +6,7 @@ use arrow::array::{ Array, ArrayBuilder, ArrayData, ArrayRef, BinaryArray, BooleanBufferBuilder, FixedSizeBinaryArray, FixedSizeBinaryBuilder, FixedSizeListArray, Int8Builder, LargeBinaryArray, LargeBinaryBuilder, LargeStringArray, LargeStringBuilder, PrimitiveBuilder, - StringArray, as_primitive_array, + StringArray, UnionArray, as_primitive_array, }; use arrow::buffer::{MutableBuffer, NullBuffer}; use arrow::datatypes::{ @@ -57,6 +57,7 @@ pub enum ChannelData { ArrayDInt64(TensorArrow), ArrayDUInt64(TensorArrow), ArrayDFloat64(TensorArrow), + Union(UnionArray), } impl PartialEq for ChannelData { @@ -91,6 +92,7 @@ impl PartialEq for ChannelData { (Self::ArrayDInt64(l0), Self::ArrayDInt64(r0)) => l0 == r0, (Self::ArrayDUInt64(l0), Self::ArrayDUInt64(r0)) => l0 == r0, (Self::ArrayDFloat64(l0), Self::ArrayDFloat64(r0)) => l0 == r0, + (Self::Union(l0), Self::Union(r0)) => l0.to_data() == r0.to_data(), _ => false, } } @@ -205,6 +207,9 @@ impl Clone for ChannelData { Self::ArrayDInt64(arg0) => Self::ArrayDInt64(arg0.clone()), Self::ArrayDUInt64(arg0) => Self::ArrayDUInt64(arg0.clone()), Self::ArrayDFloat64(arg0) => Self::ArrayDFloat64(arg0.clone()), + Self::Union(arg0) => { + Self::Union(UnionArray::from(arg0.to_data())) + } } } } @@ -370,6 +375,9 @@ impl ChannelData { shape.1, ))) } + ChannelData::Union(_) => { + bail!("Union channels cannot be zero-initialized") + } } } } @@ -401,6 +409,7 @@ impl ChannelData { ChannelData::ArrayDInt64(data) => data.is_empty(), ChannelData::ArrayDUInt64(data) => data.is_empty(), ChannelData::ArrayDFloat64(data) => data.is_empty(), + ChannelData::Union(data) => data.is_empty(), } } /// flatten length of tensor @@ -431,6 +440,7 @@ impl ChannelData { ChannelData::ArrayDInt64(data) => data.len(), ChannelData::ArrayDUInt64(data) => data.len(), ChannelData::ArrayDFloat64(data) => data.len(), + ChannelData::Union(data) => data.len(), } } /// returns the max bit count of each values in array @@ -481,6 +491,20 @@ impl ChannelData { ChannelData::ArrayDInt64(_) => 64, ChannelData::ArrayDUInt64(_) => 64, ChannelData::ArrayDFloat64(_) => 64, + ChannelData::Union(a) => { + // Union bit size is the max of all member sizes (in bits) + if let DataType::Union(fields, _) = a.data_type() { + fields + .iter() + .filter_map(|(_, field)| { + field.data_type().primitive_width().map(|w| (w * 8) as u32) + }) + .max() + .unwrap_or(0) + } else { + 0 + } + } } } /// returns the max byte count of each values in array @@ -527,6 +551,20 @@ impl ChannelData { ChannelData::ArrayDInt64(_) => 8, ChannelData::ArrayDUInt64(_) => 8, ChannelData::ArrayDFloat64(_) => 8, + ChannelData::Union(a) => { + // Union byte size is the max of all member sizes + if let DataType::Union(fields, _) = a.data_type() { + fields + .iter() + .filter_map(|(_, field)| { + field.data_type().primitive_width().map(|w| w as u32) + }) + .max() + .unwrap_or(0) + } else { + 0 + } + } } } /// returns mdf4 data type @@ -559,6 +597,7 @@ impl ChannelData { ChannelData::ArrayDUInt64(_) => 1, ChannelData::ArrayDFloat64(_) => 5, ChannelData::Utf8(_) => 7, + ChannelData::Union(_) => 10, } } else { // LE @@ -588,6 +627,7 @@ impl ChannelData { ChannelData::ArrayDUInt64(_) => 0, ChannelData::ArrayDFloat64(_) => 4, ChannelData::Utf8(_) => 7, + ChannelData::Union(_) => 10, } } } @@ -621,6 +661,7 @@ impl ChannelData { ChannelData::ArrayDUInt64(_a) => DataType::UInt64, ChannelData::ArrayDFloat64(_a) => DataType::Float64, ChannelData::Utf8(_) => DataType::LargeUtf8, + ChannelData::Union(a) => a.data_type().clone(), } } /// returns raw bytes vectors from ndarray @@ -753,6 +794,45 @@ impl ChannelData { .iter() .flat_map(|x| x.to_ne_bytes()) .collect()), + ChannelData::Union(a) => { + // Union has fixed-length records equal to the max member size. + // For each row, extract the active child's raw bytes, zero-padded + // to the union byte count. + let union_byte_count = self.byte_count() as usize; + if union_byte_count == 0 { + return Ok(Vec::new()); + } + let n = a.len(); + let mut bytes = vec![0u8; n * union_byte_count]; + let is_dense = matches!( + a.data_type(), + DataType::Union(_, arrow::datatypes::UnionMode::Dense) + ); + for i in 0..n { + let type_id = a.type_id(i); + let child = a.child(type_id); + let child_offset = if is_dense { + a.value_offset(i) + } else { + i + }; + let child_data = child.to_data(); + if let Some(elem_size) = child.data_type().primitive_width() { + // Primitive type: extract raw bytes from values buffer + if let Some(buffer) = child_data.buffers().first() { + let start = (child_data.offset() + child_offset) * elem_size; + let end = start + elem_size; + let copy_len = elem_size.min(union_byte_count); + if end <= buffer.len() { + bytes[i * union_byte_count..i * union_byte_count + copy_len] + .copy_from_slice(&buffer.as_slice()[start..start + copy_len]); + } + } + } + // Non-primitive children (strings, etc.) are left as zero bytes + } + Ok(bytes) + } } } /// returns the number of dimensions of the channel @@ -783,6 +863,7 @@ impl ChannelData { ChannelData::ArrayDUInt64(a) => a.ndim(), ChannelData::ArrayDFloat64(a) => a.ndim(), ChannelData::Utf8(_) => 1, + ChannelData::Union(_) => 1, } } /// returns the shape of channel @@ -813,6 +894,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => (a.shape().clone(), a.order().clone()), ChannelData::ArrayDUInt64(a) => (a.shape().clone(), a.order().clone()), ChannelData::ArrayDFloat64(a) => (a.shape().clone(), a.order().clone()), + ChannelData::Union(a) => (vec![a.len(); 1], Order::RowMajor), } } /// returns optional tuple of minimum and maximum values contained in the channel @@ -955,6 +1037,7 @@ impl ChannelData { (min, max) } ChannelData::Utf8(_) => (None, None), + ChannelData::Union(_) => (None, None), } } /// convert channel arrow data into dyn Array @@ -985,6 +1068,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => Arc::new(a.finish_cloned()) as ArrayRef, ChannelData::ArrayDUInt64(a) => Arc::new(a.finish_cloned()) as ArrayRef, ChannelData::ArrayDFloat64(a) => Arc::new(a.finish_cloned()) as ArrayRef, + ChannelData::Union(a) => Arc::new(UnionArray::from(a.to_data())) as ArrayRef, } } /// convert channel arrow data into dyn Array @@ -1015,6 +1099,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => Arc::new(a.finish()) as ArrayRef, ChannelData::ArrayDUInt64(a) => Arc::new(a.finish()) as ArrayRef, ChannelData::ArrayDFloat64(a) => Arc::new(a.finish()) as ArrayRef, + ChannelData::Union(a) => Arc::new(UnionArray::from(a.to_data())) as ArrayRef, } } /// Convert ChannelData into ArrayData @@ -1045,6 +1130,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => a.finish_cloned().to_data(), ChannelData::ArrayDUInt64(a) => a.finish_cloned().to_data(), ChannelData::ArrayDFloat64(a) => a.finish_cloned().to_data(), + ChannelData::Union(a) => a.to_data(), } } /// Change the validity mask of the channel @@ -1141,6 +1227,78 @@ impl ChannelData { ChannelData::ArrayDFloat64(a) => { a.set_validity(mask); } + ChannelData::Union(a) => { + // Apply the validity mask to each child array in the union. + // In MDF, the invalidation bit applies to the whole union per record. + // For sparse unions, every child gets the same mask. + // For dense unions, the mask is mapped per-child via type_ids/offsets. + let validity_mask = mask.finish(); + let data = a.to_data(); + let n_children = data.child_data().len(); + let is_dense = data.buffers().len() > 1; // dense has type_ids + offsets buffers + + if is_dense { + // Dense union: map validity per-child using type_ids and offsets + let type_ids = a.type_ids(); + let mut child_nulls: Vec> = (0..n_children) + .map(|_| Vec::new()) + .collect(); + for (i, &tid) in type_ids.iter().enumerate() { + let valid = validity_mask.value(i); + if let Some(child_vec) = child_nulls.get_mut(tid as usize) { + child_vec.push(valid); + } + } + let new_children: Vec = data + .child_data() + .iter() + .enumerate() + .map(|(idx, child)| { + let child_mask = &child_nulls[idx]; + let mut null_builder = + BooleanBufferBuilder::new(child_mask.len()); + for &v in child_mask { + null_builder.append(v); + } + let null_buffer = NullBuffer::new(null_builder.finish()); + child + .clone() + .into_builder() + .null_bit_buffer(Some(null_buffer.into_inner().into_inner())) + .build() + .unwrap_or_else(|_| child.clone()) + }) + .collect(); + let new_data = data + .into_builder() + .child_data(new_children) + .build() + .unwrap_or_else(|_| a.to_data()); + *a = UnionArray::from(new_data); + } else { + // Sparse union: apply same mask to all children + let null_buffer = NullBuffer::new(validity_mask); + let null_bit_buffer = null_buffer.into_inner().into_inner(); + let new_children: Vec = data + .child_data() + .iter() + .map(|child| { + child + .clone() + .into_builder() + .null_bit_buffer(Some(null_bit_buffer.clone())) + .build() + .unwrap_or_else(|_| child.clone()) + }) + .collect(); + let new_data = data + .into_builder() + .child_data(new_children) + .build() + .unwrap_or_else(|_| a.to_data()); + *a = UnionArray::from(new_data); + } + } } Ok(()) } @@ -1172,6 +1330,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => a.finish_cloned().nulls().cloned(), ChannelData::ArrayDUInt64(a) => a.finish_cloned().nulls().cloned(), ChannelData::ArrayDFloat64(a) => a.finish_cloned().nulls().cloned(), + ChannelData::Union(a) => a.logical_nulls(), } } /// Returns the channel's validity mask as a slice @@ -1202,6 +1361,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => a.validity_slice(), ChannelData::ArrayDUInt64(a) => a.validity_slice(), ChannelData::ArrayDFloat64(a) => a.validity_slice(), + ChannelData::Union(_) => None, } } /// returns True if a validity mask is existing for the channel @@ -1232,6 +1392,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => a.nulls().is_some(), ChannelData::ArrayDUInt64(a) => a.nulls().is_some(), ChannelData::ArrayDFloat64(a) => a.nulls().is_some(), + ChannelData::Union(a) => a.logical_nulls().is_some(), } } /// converts the ChannelData into a ArrayRef @@ -1262,6 +1423,7 @@ impl ChannelData { ChannelData::ArrayDInt64(a) => Arc::new(a.finish_cloned()) as ArrayRef, ChannelData::ArrayDUInt64(a) => Arc::new(a.finish_cloned()) as ArrayRef, ChannelData::ArrayDFloat64(a) => Arc::new(a.finish_cloned()) as ArrayRef, + ChannelData::Union(a) => Arc::new(UnionArray::from(a.to_data())) as ArrayRef, } } #[cfg(feature = "numpy")] @@ -1369,6 +1531,10 @@ impl ChannelData { shape: a.shape().to_vec(), kind: "f8".to_string(), }, + ChannelData::Union(a) => NumpyDType { + shape: vec![a.len()], + kind: "O".to_string(), + }, } } } diff --git a/src/export/hdf5.rs b/src/export/hdf5.rs index f73f9f5..83a4d14 100644 --- a/src/export/hdf5.rs +++ b/src/export/hdf5.rs @@ -526,6 +526,10 @@ fn convert_channel_data_into_ndarray( .context("Failed converting channelData nd f64 into ndarray")?, ) .create(name)?), + ChannelData::Union(_) => { + info!("Union channel {} skipped for hdf5 export", name); + Ok(builder.with_data(&[0u8; 0]).create(name)?) + } } } diff --git a/src/export/numpy.rs b/src/export/numpy.rs index 86d820a..986ad10 100644 --- a/src/export/numpy.rs +++ b/src/export/numpy.rs @@ -1,6 +1,6 @@ //! this module provides methods to get directly channelData into python -use arrow::array::{Array, ArrayData, make_array}; +use arrow::array::{Array, ArrayData, UnionArray, make_array}; use arrow::pyarrow::PyArrowType; use numpy::npyffi::types::NPY_ORDER; @@ -134,6 +134,13 @@ impl<'py> IntoPyObject<'py> for ChannelData { .into_pyobject(py) .expect("error converting Utf8 array into python object")) } + ChannelData::Union(array) => { + let arrow_data = to_py_array(py, Arc::new(UnionArray::from(array.to_data()))) + .expect("error converting Union array into python object"); + Ok(arrow_data + .into_pyobject(py) + .expect("error converting Union PyArrow into python object")) + } } } } diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index 017f11d..1ad0d57 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -1,7 +1,13 @@ //! Parsing of file metadata into MdfInfo4 struct use crate::mdfreader::{DataSignature, MasterSignature}; use anyhow::{Context, Error, Result, bail}; -use arrow::array::{Array, BooleanBufferBuilder, UInt8Builder, UInt16Builder, UInt32Builder}; +use arrow::array::{ + Array, ArrayRef, BooleanBufferBuilder, UInt8Builder, UInt16Builder, UInt32Builder, UInt32Array, + UnionArray, +}; +use arrow::buffer::ScalarBuffer; +use arrow::compute::take; +use arrow::datatypes::{Field, UnionFields}; use binrw::{BinReaderExt, BinWriterExt, binrw}; use byteorder::{LittleEndian, ReadBytesExt}; use chrono::{DateTime, Local}; @@ -2205,6 +2211,7 @@ impl Cg4 { // First pass: collect all needed data (immutable borrows complete before mutable) let discriminator_values: Vec; let option_data: Vec>; + let option_names: Vec; { // Find the discriminator channel by block_position @@ -2241,7 +2248,7 @@ impl Cg4 { continue; } - // Collect option channel data (clone to own data) + // Collect option channel data and names (clone to own data) option_data = option_ptrs .iter() .map(|ptr| { @@ -2251,6 +2258,17 @@ impl Cg4 { .map(|cn| cn.data.clone()) }) .collect(); + + option_names = option_ptrs + .iter() + .map(|ptr| { + self.cn + .values() + .find(|cn| cn.block_position == *ptr) + .map(|cn| cn.unique_name.clone()) + .unwrap_or_default() + }) + .collect(); } // Immutable borrows end here @@ -2261,29 +2279,220 @@ impl Cg4 { .map(|(idx, val)| (*val, idx)) .collect(); - // Get template from first valid option - let template = option_data.iter().find_map(|o| o.clone()); + // Check if all option channels have the same data type + let all_same_type = { + let mut discriminants: Vec> = Vec::new(); + for data in option_data.iter().flatten() { + discriminants.push(std::mem::discriminant(data)); + } + discriminants.windows(2).all(|w| w[0] == w[1]) + }; - // Second pass: update parent channel (mutable borrow) - if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) - && let Some(tmpl) = template - { - // Create new merged data with same type as first option - let merged_data = merge_variant_data_owned( - &discriminator_values, - &option_data, - &val_to_option, - &tmpl, - ); + if all_same_type { + // All options have the same type: use existing merge path + let template = option_data.iter().find_map(|o| o.clone()); + + // Second pass: update parent channel (mutable borrow) + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) + && let Some(tmpl) = template + { + let merged_data = merge_variant_data_owned( + &discriminator_values, + &option_data, + &val_to_option, + &tmpl, + ); + + if let Some(data) = merged_data { + parent_cn.data = data; + } + } + } else { + // Mixed types: build a dense UnionArray + // Effective sample count is the minimum of discriminator and all option lengths + let n_samples = { + let mut min_len = discriminator_values.len(); + for data in option_data.iter().flatten() { + min_len = min_len.min(data.len()); + } + min_len + }; + + // Build type_ids and offsets for dense union + let mut type_ids = Vec::with_capacity(n_samples); + let mut offsets = Vec::with_capacity(n_samples); + let mut child_counts = vec![0i32; option_data.len()]; + + for disc_val in &discriminator_values[..n_samples] { + let opt_idx = val_to_option.get(disc_val).copied().unwrap_or(0); + type_ids.push(opt_idx as i8); + offsets.push(child_counts[opt_idx]); + child_counts[opt_idx] += 1; + } + + // Build child arrays: for dense union, each child contains only the + // rows where the discriminator selects that option + let children: Vec = option_data + .iter() + .enumerate() + .map(|(opt_idx, opt)| { + if let Some(data) = opt { + let full_array = data.finish_cloned(); + // Find indices where this option is selected (within n_samples) + let indices: Vec = discriminator_values[..n_samples] + .iter() + .enumerate() + .filter_map(|(i, disc_val)| { + if val_to_option.get(disc_val) == Some(&opt_idx) { + Some(i as u32) + } else { + None + } + }) + .collect(); + let indices_array = UInt32Array::from(indices); + take(&*full_array, &indices_array, None) + .unwrap_or(full_array) + } else { + Arc::new(arrow::array::NullArray::new(0)) as ArrayRef + } + }) + .collect(); - if let Some(data) = merged_data { - parent_cn.data = data; + // Build UnionFields + let fields: Vec<(i8, Arc)> = children + .iter() + .enumerate() + .map(|(idx, array)| { + let name = option_names.get(idx).cloned().unwrap_or_default(); + let field = Field::new(name, array.data_type().clone(), true); + (idx as i8, Arc::new(field)) + }) + .collect(); + + let union_fields = UnionFields::from_iter(fields); + let type_ids_buffer = ScalarBuffer::from(type_ids); + let offsets_buffer = ScalarBuffer::from(offsets); + + match UnionArray::try_new( + union_fields, + type_ids_buffer, + Some(offsets_buffer), + children, + ) { + Ok(union_array) => { + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { + parent_cn.data = ChannelData::Union(union_array); + } + } + Err(e) => { + log::warn!( + "Failed to create dense UnionArray for CV variant: {}", + e + ); + } } } } Ok(()) } + + /// Process Channel Union (CU) compositions after data is loaded. + /// For each channel with a CU composition, this method: + /// 1. Collects member channel data (already read by pipeline) + /// 2. Builds UnionFields from member names and data types + /// 3. Creates a sparse UnionArray where all members are valid at every row + /// 4. Replaces parent channel data with ChannelData::Union + /// + /// CU blocks represent C-style unions: all members share the same bytes and are + /// simultaneously valid, just interpreted differently. + pub fn process_channel_unions(&mut self) -> Result<(), Error> { + // Find channels with CU composition and collect info + let cu_channels: Vec<(i32, Vec)> = self + .cn + .iter() + .filter_map(|(rec_pos, cn)| { + if let Some(composition) = &cn.composition + && let Compo::CU(cu_block) = &composition.block + { + return Some((*rec_pos, cu_block.cu_cn_member.clone())); + } + None + }) + .collect(); + + for (parent_rec_pos, member_ptrs) in cu_channels { + if member_ptrs.is_empty() { + continue; + } + + // Collect member channel info: (name, data as ArrayRef) + let member_info: Vec<(String, ArrayRef)> = member_ptrs + .iter() + .filter_map(|ptr| { + self.cn.values().find(|cn| cn.block_position == *ptr).map( + |cn| { + let name = cn.unique_name.clone(); + let array = cn.data.finish_cloned(); + (name, array) + }, + ) + }) + .collect(); + + if member_info.is_empty() { + log::warn!("CU member channels not found for parent at rec_pos {}", parent_rec_pos); + continue; + } + + // All members should have the same length (same number of samples) + let n_samples = member_info.first().map(|(_, arr)| arr.len()).unwrap_or(0); + if n_samples == 0 { + continue; + } + + // Build UnionFields: (type_id, Field) + let fields: Vec<(i8, Arc)> = member_info + .iter() + .enumerate() + .map(|(idx, (name, array))| { + let field = Field::new(name.clone(), array.data_type().clone(), true); + (idx as i8, Arc::new(field)) + }) + .collect(); + + let union_fields = UnionFields::from_iter(fields); + + // Collect child arrays + let children: Vec = member_info.iter().map(|(_, array)| array.clone()).collect(); + + // For sparse union: type_ids all set to 0 (first member as primary interpretation) + // In reality for CU blocks, all members are equally valid - we just pick the first + let type_ids: ScalarBuffer = ScalarBuffer::from(vec![0i8; n_samples]); + + // Create sparse UnionArray (offsets = None) + let union_array = match UnionArray::try_new( + union_fields, + type_ids, + None, // sparse union: no offsets + children, + ) { + Ok(arr) => arr, + Err(e) => { + log::warn!("Failed to create UnionArray for CU channel: {}", e); + continue; + } + }; + + // Update parent channel data + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { + parent_cn.data = ChannelData::Union(union_array); + } + } + + Ok(()) + } } /// Merge variant option data based on discriminator values (using owned ChannelData) @@ -3606,6 +3815,8 @@ fn parse_composition( // Note: cv_cn_discriminator points to the discriminator channel (parsed elsewhere in the CG) // cv_option_val contains the discriminator values for each option // reads all the listed option Channel Blocks + // For CV options, re-key using negative block_position to avoid HashMap + // collisions since all options share the same byte offset in the record. for target in cv_block.cv_cn_option.iter() { let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( rdr, @@ -3617,7 +3828,11 @@ fn parse_composition( )?; position = pos; n_cn += n_cns; - cns.extend(cnss); + // Re-key option channels using negative block_position + for (_rec_pos, cn_struct) in cnss { + let unique_key = -(cn_struct.block_position as i32); + cns.insert(unique_key, cn_struct); + } } Ok(( Composition { @@ -3637,6 +3852,8 @@ fn parse_composition( let shape = (Vec::::new(), Order::RowMajor); array_size = 0; // reads all the listed Channel Blocks + // For CU members, re-key using negative block_position to avoid HashMap + // collisions since all members share the same byte offset in the record. for target in cu_block.cu_cn_member.iter() { let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( rdr, @@ -3648,7 +3865,11 @@ fn parse_composition( )?; position = pos; n_cn += n_cns; - cns.extend(cnss); + // Re-key member channels using negative block_position + for (_rec_pos, cn_struct) in cnss { + let unique_key = -(cn_struct.block_position as i32); + cns.insert(unique_key, cn_struct); + } } Ok(( Composition { diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index bcffe88..7f3c737 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -662,6 +662,7 @@ pub fn read_one_channel_array( cn.shape.1.clone(), ); } + ChannelData::Union(_) => {} // Union channels are constructed post-read } } // Other channel types : virtual channels cn_type 3 & 6 are handled at initialisation @@ -1630,6 +1631,7 @@ pub fn read_channels_from_bytes( } } } + ChannelData::Union(_) => {} // Union channels are constructed post-read } // VLSC channels: offsets were read above, now mark for VD block processing if cn.block.cn_type == 7 { diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 843a58e..0c9d54f 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -88,6 +88,10 @@ pub fn mdfreader4<'a>( channel_group .process_channel_variants() .context("failed processing channel variants")?; + // Process Channel Unions (CU) - create UnionArray from member channels + channel_group + .process_channel_unions() + .context("failed processing channel unions")?; } // conversion of all channels to physical values convert_all_channels(dg, &info.sharable) @@ -792,6 +796,7 @@ fn read_vlsd_from_bytes( ChannelData::ArrayDInt64(_) => {} ChannelData::ArrayDUInt64(_) => {} ChannelData::ArrayDFloat64(_) => {} + ChannelData::Union(_) => {} } Ok(nrecord + previous_index) } @@ -1889,6 +1894,7 @@ fn apply_bit_mask_offset( }; } ChannelData::ArrayDFloat64(_) => (), + ChannelData::Union(_) => (), } } } diff --git a/tests/channel_types.rs b/tests/channel_types.rs index bd57e6f..66ef7d8 100644 --- a/tests/channel_types.rs +++ b/tests/channel_types.rs @@ -1,6 +1,6 @@ use anyhow::Result; use arrow::array::{ - AsArray, Float64Builder, Int32Builder, LargeStringBuilder, UInt16Builder, UInt64Builder, + Array, AsArray, Float64Builder, Int32Builder, LargeStringBuilder, UInt16Builder, UInt64Builder, }; use arrow::datatypes::Float64Type; use mdfr::data_holder::channel_data::ChannelData; @@ -509,10 +509,32 @@ fn channel_variant() -> Result<()> { ); } - // Verify variant channel exists and has correct length + // Verify variant channel is a dense UnionArray with 3 mixed-type options if let Some(variant_data) = mdf.get_channel_data("variant") { assert_eq!(variant_data.len(), 3, "variant should have 3 samples"); - // Variant data is FixedSizeByteArray containing merged data from different options + if let ChannelData::Union(arr) = variant_data { + assert_eq!(arr.len(), 3, "UnionArray should have 3 samples"); + let data_type = arr.data_type(); + if let arrow::datatypes::DataType::Union(fields, arrow::datatypes::UnionMode::Dense) = + data_type + { + assert_eq!(fields.len(), 3, "Union should have 3 option fields"); + } else { + panic!( + "variant channel should be a Dense Union, got {:?}", + data_type + ); + } + // Verify each sample selects the correct option via type_ids [0, 1, 2] + assert_eq!(arr.type_id(0), 0, "sample 0 should select option 0"); + assert_eq!(arr.type_id(1), 1, "sample 1 should select option 1"); + assert_eq!(arr.type_id(2), 2, "sample 2 should select option 2"); + } else { + panic!( + "variant channel should be ChannelData::Union for mixed types, got {:?}", + std::mem::discriminant(variant_data) + ); + } } Ok(()) } @@ -567,10 +589,27 @@ fn channel_union() -> Result<()> { ); } - // Verify union channel exists and has correct length + // Verify union channel exists, has correct length, and is Union type if let Some(union_data) = mdf.get_channel_data("union") { assert_eq!(union_data.len(), 3, "union should have 3 samples"); - // Union data is FixedSizeByteArray containing overlapping member data + // Verify it's now a Union type (not FixedSizeByteArray) + if let ChannelData::Union(arr) = union_data { + // UnionArray should have the same length + assert_eq!(arr.len(), 3, "UnionArray should have 3 samples"); + // Check that we have member fields + let data_type = arr.data_type(); + if let arrow::datatypes::DataType::Union(fields, _mode) = data_type { + assert!( + !fields.is_empty(), + "Union should have at least one member field" + ); + } + } else { + panic!( + "union channel should be ChannelData::Union type, got {:?}", + std::mem::discriminant(union_data) + ); + } } } Ok(()) From 46cbeff80430fcef610dbdcea18cb4a7b671d183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 29 Jan 2026 07:57:13 +0100 Subject: [PATCH 06/46] simplifications and optimisations for CV and CU --- src/data_holder/channel_data.rs | 59 +++++---- src/mdfinfo/mdfinfo4.rs | 217 ++++++++++++++++---------------- 2 files changed, 137 insertions(+), 139 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 18e1752..cb253b8 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -491,20 +491,7 @@ impl ChannelData { ChannelData::ArrayDInt64(_) => 64, ChannelData::ArrayDUInt64(_) => 64, ChannelData::ArrayDFloat64(_) => 64, - ChannelData::Union(a) => { - // Union bit size is the max of all member sizes (in bits) - if let DataType::Union(fields, _) = a.data_type() { - fields - .iter() - .filter_map(|(_, field)| { - field.data_type().primitive_width().map(|w| (w * 8) as u32) - }) - .max() - .unwrap_or(0) - } else { - 0 - } - } + ChannelData::Union(_) => self.byte_count() * 8, } } /// returns the max byte count of each values in array @@ -808,24 +795,34 @@ impl ChannelData { a.data_type(), DataType::Union(_, arrow::datatypes::UnionMode::Dense) ); + // Pre-compute child metadata per type_id to avoid per-row to_data() calls + let mut children_meta: std::collections::HashMap = + std::collections::HashMap::new(); + if let DataType::Union(fields, _) = a.data_type() { + for (type_id, _) in fields.iter() { + let child = a.child(type_id); + if let Some(elem_size) = child.data_type().primitive_width() { + children_meta.insert(type_id, (child.to_data(), elem_size)); + } + } + } for i in 0..n { let type_id = a.type_id(i); - let child = a.child(type_id); - let child_offset = if is_dense { - a.value_offset(i) - } else { - i - }; - let child_data = child.to_data(); - if let Some(elem_size) = child.data_type().primitive_width() { - // Primitive type: extract raw bytes from values buffer + if let Some((child_data, elem_size)) = children_meta.get(&type_id) { + let child_offset = if is_dense { + a.value_offset(i) + } else { + i + }; if let Some(buffer) = child_data.buffers().first() { let start = (child_data.offset() + child_offset) * elem_size; let end = start + elem_size; - let copy_len = elem_size.min(union_byte_count); + let copy_len = (*elem_size).min(union_byte_count); if end <= buffer.len() { bytes[i * union_byte_count..i * union_byte_count + copy_len] - .copy_from_slice(&buffer.as_slice()[start..start + copy_len]); + .copy_from_slice( + &buffer.as_slice()[start..start + copy_len], + ); } } } @@ -1240,8 +1237,16 @@ impl ChannelData { if is_dense { // Dense union: map validity per-child using type_ids and offsets let type_ids = a.type_ids(); - let mut child_nulls: Vec> = (0..n_children) - .map(|_| Vec::new()) + // Pre-count child sizes for capacity allocation + let mut child_sizes = vec![0usize; n_children]; + for &tid in type_ids.iter() { + if let Some(count) = child_sizes.get_mut(tid as usize) { + *count += 1; + } + } + let mut child_nulls: Vec> = child_sizes + .iter() + .map(|&sz| Vec::with_capacity(sz)) .collect(); for (i, &tid) in type_ids.iter().enumerate() { let valid = validity_mask.value(i); diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index 1ad0d57..b713685 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -2248,27 +2248,25 @@ impl Cg4 { continue; } - // Collect option channel data and names (clone to own data) - option_data = option_ptrs - .iter() - .map(|ptr| { - self.cn - .values() - .find(|cn| cn.block_position == *ptr) - .map(|cn| cn.data.clone()) - }) - .collect(); - - option_names = option_ptrs - .iter() - .map(|ptr| { - self.cn - .values() - .find(|cn| cn.block_position == *ptr) - .map(|cn| cn.unique_name.clone()) - .unwrap_or_default() - }) - .collect(); + // Collect option channel data and names in a single pass + let (data_vec, names_vec): (Vec>, Vec) = + option_ptrs + .iter() + .map(|ptr| { + match self + .cn + .values() + .find(|cn| cn.block_position == *ptr) + { + Some(cn) => { + (Some(cn.data.clone()), cn.unique_name.clone()) + } + None => (None, String::new()), + } + }) + .unzip(); + option_data = data_vec; + option_names = names_vec; } // Immutable borrows end here @@ -2318,39 +2316,29 @@ impl Cg4 { min_len }; - // Build type_ids and offsets for dense union + // Single pass: build type_ids, offsets, and per-child indices together let mut type_ids = Vec::with_capacity(n_samples); let mut offsets = Vec::with_capacity(n_samples); - let mut child_counts = vec![0i32; option_data.len()]; + let mut child_indices: Vec> = + vec![Vec::new(); option_data.len()]; - for disc_val in &discriminator_values[..n_samples] { + for (i, disc_val) in discriminator_values[..n_samples].iter().enumerate() + { let opt_idx = val_to_option.get(disc_val).copied().unwrap_or(0); type_ids.push(opt_idx as i8); - offsets.push(child_counts[opt_idx]); - child_counts[opt_idx] += 1; + offsets.push(child_indices[opt_idx].len() as i32); + child_indices[opt_idx].push(i as u32); } - // Build child arrays: for dense union, each child contains only the - // rows where the discriminator selects that option + // Build child arrays using pre-collected indices let children: Vec = option_data .iter() .enumerate() .map(|(opt_idx, opt)| { if let Some(data) = opt { let full_array = data.finish_cloned(); - // Find indices where this option is selected (within n_samples) - let indices: Vec = discriminator_values[..n_samples] - .iter() - .enumerate() - .filter_map(|(i, disc_val)| { - if val_to_option.get(disc_val) == Some(&opt_idx) { - Some(i as u32) - } else { - None - } - }) - .collect(); - let indices_array = UInt32Array::from(indices); + let indices_array = + UInt32Array::from(child_indices[opt_idx].clone()); take(&*full_array, &indices_array, None) .unwrap_or(full_array) } else { @@ -2359,18 +2347,7 @@ impl Cg4 { }) .collect(); - // Build UnionFields - let fields: Vec<(i8, Arc)> = children - .iter() - .enumerate() - .map(|(idx, array)| { - let name = option_names.get(idx).cloned().unwrap_or_default(); - let field = Field::new(name, array.data_type().clone(), true); - (idx as i8, Arc::new(field)) - }) - .collect(); - - let union_fields = UnionFields::from_iter(fields); + let union_fields = build_union_fields(&option_names, &children); let type_ids_buffer = ScalarBuffer::from(type_ids); let offsets_buffer = ScalarBuffer::from(offsets); @@ -2452,20 +2429,10 @@ impl Cg4 { continue; } - // Build UnionFields: (type_id, Field) - let fields: Vec<(i8, Arc)> = member_info - .iter() - .enumerate() - .map(|(idx, (name, array))| { - let field = Field::new(name.clone(), array.data_type().clone(), true); - (idx as i8, Arc::new(field)) - }) - .collect(); - - let union_fields = UnionFields::from_iter(fields); - - // Collect child arrays - let children: Vec = member_info.iter().map(|(_, array)| array.clone()).collect(); + // Split member_info into names and children, then build UnionFields + let (member_names, children): (Vec, Vec) = + member_info.into_iter().unzip(); + let union_fields = build_union_fields(&member_names, &children); // For sparse union: type_ids all set to 0 (first member as primary interpretation) // In reality for CU blocks, all members are equally valid - we just pick the first @@ -2495,6 +2462,22 @@ impl Cg4 { } } +/// Build UnionFields from parallel name and child arrays slices. +fn build_union_fields(names: &[String], children: &[ArrayRef]) -> UnionFields { + let fields: Vec<(i8, Arc)> = children + .iter() + .enumerate() + .map(|(idx, array)| { + let name = names.get(idx).cloned().unwrap_or_default(); + ( + idx as i8, + Arc::new(Field::new(name, array.data_type().clone(), true)), + ) + }) + .collect(); + UnionFields::from_iter(fields) +} + /// Merge variant option data based on discriminator values (using owned ChannelData) fn merge_variant_data_owned( discriminator_values: &[u64], @@ -3677,6 +3660,38 @@ pub enum Compo { DS(Box), } +/// Parse and re-key channel blocks for CU/CV compositions. +/// All member/option channels share the same byte offset in the record, +/// so they are re-keyed using negative block_position to avoid HashMap collisions. +fn parse_and_rekey_channels( + rdr: &mut SymBufReader<&File>, + targets: &[i64], + position: &mut i64, + sharable: &mut SharableBlocks, + record_layout: RecordLayout, + cg_cycle_count: u64, +) -> Result<(CnType, usize)> { + let mut cns: CnType = HashMap::new(); + let mut n_cn: usize = 0; + for target in targets { + let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( + rdr, + *target, + *position, + sharable, + record_layout, + cg_cycle_count, + )?; + *position = pos; + n_cn += n_cns; + for (_rec_pos, cn_struct) in cnss { + let unique_key = -(cn_struct.block_position as i32); + cns.insert(unique_key, cn_struct); + } + } + Ok((cns, n_cn)) +} + /// parses composition linked blocks /// CN (structures of composed channels )and CA (array of arrays) blocks can be nested or even CA and CN nested and mixed: this is not supported, very complicated fn parse_composition( @@ -3809,31 +3824,19 @@ fn parse_composition( } else if block_header_short.hdr_id == "##CV".as_bytes() { // Channel Variant let cv_block: Cv4Block = block.read_le().context("Failed parsing CV block")?; - let cv_composition: Option> = None; // no composition possible after CV block + let cv_composition: Option> = None; let shape = (Vec::::new(), Order::RowMajor); array_size = 0; - // Note: cv_cn_discriminator points to the discriminator channel (parsed elsewhere in the CG) - // cv_option_val contains the discriminator values for each option - // reads all the listed option Channel Blocks - // For CV options, re-key using negative block_position to avoid HashMap - // collisions since all options share the same byte offset in the record. - for target in cv_block.cv_cn_option.iter() { - let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( - rdr, - *target, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - position = pos; - n_cn += n_cns; - // Re-key option channels using negative block_position - for (_rec_pos, cn_struct) in cnss { - let unique_key = -(cn_struct.block_position as i32); - cns.insert(unique_key, cn_struct); - } - } + let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( + rdr, + &cv_block.cv_cn_option, + &mut position, + sharable, + record_layout, + cg_cycle_count, + )?; + n_cn += rekeyed_n_cn; + cns.extend(rekeyed_cns); Ok(( Composition { block: Compo::CV(Box::new(cv_block)), @@ -3848,33 +3851,23 @@ fn parse_composition( } else if block_header_short.hdr_id == "##CU".as_bytes() { // Channel Union let cu_block: Cu4Block = block.read_le().context("Failed parsing CU block")?; - let cv_composition: Option> = None; // no composition possible after CV block + let cu_composition: Option> = None; let shape = (Vec::::new(), Order::RowMajor); array_size = 0; - // reads all the listed Channel Blocks - // For CU members, re-key using negative block_position to avoid HashMap - // collisions since all members share the same byte offset in the record. - for target in cu_block.cu_cn_member.iter() { - let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( - rdr, - *target, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - position = pos; - n_cn += n_cns; - // Re-key member channels using negative block_position - for (_rec_pos, cn_struct) in cnss { - let unique_key = -(cn_struct.block_position as i32); - cns.insert(unique_key, cn_struct); - } - } + let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( + rdr, + &cu_block.cu_cn_member, + &mut position, + sharable, + record_layout, + cg_cycle_count, + )?; + n_cn += rekeyed_n_cn; + cns.extend(rekeyed_cns); Ok(( Composition { block: Compo::CU(Box::new(cu_block)), - compo: cv_composition, + compo: cu_composition, }, position, array_size, From 27ea3b401ceed756780be1a85d007e9094730c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sat, 31 Jan 2026 10:39:49 +0100 Subject: [PATCH 07/46] simplified and optimised the data stream and VLSC parsing --- src/data_holder/channel_data.rs | 15 ++ src/mdfinfo/mdfinfo4.rs | 13 +- src/mdfreader/mdfreader4.rs | 433 +++++++++----------------------- 3 files changed, 132 insertions(+), 329 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index cb253b8..000ee40 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -554,6 +554,21 @@ impl ChannelData { } } } + /// Extracts integer channel values as a Vec. + /// Returns None for non-integer types. + pub fn to_u64_vec(&self) -> Option> { + match self { + ChannelData::UInt8(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::UInt16(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::UInt32(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::UInt64(a) => Some(a.values_slice().to_vec()), + ChannelData::Int8(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::Int16(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::Int32(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + ChannelData::Int64(a) => Some(a.values_slice().iter().map(|&v| v as u64).collect()), + _ => None, + } + } /// returns mdf4 data type pub fn data_type(&self, endian: bool) -> u8 { if endian { diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index b713685..9ab46a1 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -2229,16 +2229,9 @@ impl Cg4 { }; // Get discriminator values as u64 - discriminator_values = match &disc_cn.data { - ChannelData::UInt8(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::UInt16(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::UInt32(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::UInt64(b) => b.values_slice().to_vec(), - ChannelData::Int8(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::Int16(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::Int32(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - ChannelData::Int64(b) => b.values_slice().iter().map(|v| *v as u64).collect(), - _ => { + discriminator_values = match disc_cn.data.to_u64_vec() { + Some(v) => v, + None => { log::warn!("CV discriminator channel has unsupported data type"); continue; } diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 0c9d54f..1b423c5 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -526,67 +526,24 @@ fn read_vd( } // Get offsets from the VLSC channel data - let offsets: Vec = match &cn.data { - ChannelData::UInt8(a) => a.values_slice().iter().map(|&v| v as u64).collect(), - ChannelData::UInt16(a) => a.values_slice().iter().map(|&v| v as u64).collect(), - ChannelData::UInt32(a) => a.values_slice().iter().map(|&v| v as u64).collect(), - ChannelData::UInt64(a) => a.values_slice().to_vec(), - _ => Vec::new(), - }; + let offsets: Vec = cn.data.to_u64_vec().unwrap_or_default(); if offsets.is_empty() { continue; } // Get sizes from cn_cn_size channel - let cn_cn_size_pos = cn.block.cn_cn_size(); - let sizes: Vec = if let Some(size_pos) = cn_cn_size_pos { - let mut found_sizes: Option> = None; - for (_pos, size_cn) in channel_group.cn.iter() { - if size_cn.block_position == size_pos { - match &size_cn.data { - // Unsigned int types - ChannelData::UInt8(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::UInt16(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::UInt32(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::UInt64(a) => { - found_sizes = Some(a.values_slice().to_vec()); - } - // Signed int types (some files use signed int for sizes) - ChannelData::Int8(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::Int16(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::Int32(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - ChannelData::Int64(a) => { - found_sizes = - Some(a.values_slice().iter().map(|&v| v as u64).collect()); - } - _ => {} - } - break; - } - } - found_sizes.unwrap_or_default() - } else { - Vec::new() - }; + let sizes: Vec = cn + .block + .cn_cn_size() + .and_then(|size_pos| { + channel_group + .cn + .values() + .find(|cn| cn.block_position == size_pos) + .and_then(|size_cn| size_cn.data.to_u64_vec()) + }) + .unwrap_or_default(); if sizes.is_empty() { continue; @@ -642,120 +599,33 @@ fn read_vlsd_from_bytes( let mut remaining: usize = data_length - position; let mut nrecord: usize = 0; match &mut cn.data { - ChannelData::Int8(_) => {} - ChannelData::UInt8(_) => {} - ChannelData::Int16(_) => {} - ChannelData::UInt16(_) => {} - ChannelData::Int32(_) => {} - ChannelData::UInt32(_) => {} - ChannelData::Float32(_) => {} - ChannelData::Int64(_) => {} - ChannelData::UInt64(_) => {} - ChannelData::Float64(_) => {} - ChannelData::Complex32(_) => {} - ChannelData::Complex64(_) => {} ChannelData::Utf8(array) => { - if cn.block.cn_data_type == 6 { - while remaining > 0 { - let len = &data[position..position + std::mem::size_of::()]; - let length: usize = - u32::from_le_bytes(len.try_into().context("Could not read length")?) - as usize; - if (position + length + 4) <= data_length { - position += std::mem::size_of::(); - let record_len = if length > 0 { length - 1 } else { 0 }; - let record = &data[position..position + record_len]; // do not take null terminated character - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = decoder - .windows_1252 - .decode_to_string(record, &mut dst, false); - array.append_value(dst); - position += length; - remaining = data_length - position; - nrecord += 1; - } else { - remaining = data_length - position; - // copies tail part at beginnning of vect - data.copy_within(position.., 0); - // clears the last part - data.truncate(remaining); - break; - } - } - } else if cn.block.cn_data_type == 7 { - while remaining > 0 { - let len = &data[position..position + std::mem::size_of::()]; - let length: usize = - u32::from_le_bytes(len.try_into().context("Could not read length")?) - as usize; - if (position + length + 4) <= data_length { - position += std::mem::size_of::(); - let record = &data[position..position + length - 1]; // do not take null terminated character - let dst = str::from_utf8(record).context("Found invalid UTF-8")?; - array.append_value(dst); - position += length; - remaining = data_length - position; - nrecord += 1; - } else { - remaining = data_length - position; - // copies tail part at beginnning of vect - data.copy_within(position.., 0); - // clears the last part - data.truncate(remaining); - break; - } - } - } else if cn.block.cn_data_type == 8 { - while remaining > 0 { - let len = &data[position..position + std::mem::size_of::()]; - let length: usize = - u32::from_le_bytes(len.try_into().context("Could not read length")?) - as usize; - if (position + length + 4) <= data_length { - position += std::mem::size_of::(); - let record = &data[position..position + length]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_le.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - position += length; - remaining = data_length - position; - nrecord += 1; - } else { - remaining = data_length - position; - // copies tail part at beginnning of vect - data.copy_within(position.., 0); - // clears the last part - data.truncate(remaining); - break; - } - } - } else if cn.block.cn_data_type == 9 { - while remaining > 0 { - let len = &data[position..position + std::mem::size_of::()]; - let length: usize = - u32::from_le_bytes(len.try_into().context("Could not read length")?) - as usize; - if (position + length + 4) <= data_length { - position += std::mem::size_of::(); - let record = &data[position..position + length]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_be.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - position += length; - remaining = data_length - position; - nrecord += 1; - } else { - remaining = data_length - position; - // copies tail part at beginnning of vect - data.copy_within(position.., 0); - // clears the last part - data.truncate(remaining); - break; - } + let cn_data_type = cn.block.cn_data_type; + while remaining > 0 { + let len = &data[position..position + std::mem::size_of::()]; + let length: usize = + u32::from_le_bytes(len.try_into().context("Could not read length")?) as usize; + if (position + length + 4) <= data_length { + position += std::mem::size_of::(); + // Types 6 (SBC) and 7 (UTF-8) have null terminator to strip + let record_len = match cn_data_type { + 6 | 7 => if length > 0 { length - 1 } else { 0 }, + _ => length, + }; + let record = &data[position..position + record_len]; + array.append_value(decode_string_bytes(record, cn_data_type, decoder)?); + position += length; + remaining = data_length - position; + nrecord += 1; + } else { + remaining = data_length - position; + // copies tail part at beginnning of vect + data.copy_within(position.., 0); + // clears the last part + data.truncate(remaining); + break; } - }; + } if remaining == 0 { data.clear() } @@ -785,18 +655,7 @@ fn read_vlsd_from_bytes( data.clear() } } - ChannelData::FixedSizeByteArray(_) => {} - ChannelData::ArrayDInt8(_) => {} - ChannelData::ArrayDUInt8(_) => {} - ChannelData::ArrayDInt16(_) => {} - ChannelData::ArrayDUInt16(_) => {} - ChannelData::ArrayDInt32(_) => {} - ChannelData::ArrayDUInt32(_) => {} - ChannelData::ArrayDFloat32(_) => {} - ChannelData::ArrayDInt64(_) => {} - ChannelData::ArrayDUInt64(_) => {} - ChannelData::ArrayDFloat64(_) => {} - ChannelData::Union(_) => {} + _ => {} } Ok(nrecord + previous_index) } @@ -817,124 +676,18 @@ fn read_vlsc_from_bytes( let mut max_position: usize = 0; match &mut cn.data { ChannelData::Utf8(array) => { - if cn.block.cn_data_type == 6 { - // SBC ISO-8859-1 string - // Note: VLSC size channel gives actual data length (no null terminator) - for (offset, size) in offsets.iter().zip(sizes.iter()) { - let start = *offset as usize; - let length = *size as usize; - if start + length <= data_length && length > 0 { - let record = &data[start..start + length]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = decoder - .windows_1252 - .decode_to_string(record, &mut dst, false); - array.append_value(dst); - max_position = max_position.max(start + length); - } else if length == 0 { - array.append_value(""); - } else { - array.append_null(); - } - } - } else if cn.block.cn_data_type == 7 { - // UTF-8 string - // Note: VLSC size channel gives actual data length (no null terminator) - for (offset, size) in offsets.iter().zip(sizes.iter()) { - let start = *offset as usize; - let length = *size as usize; - if start + length <= data_length && length > 0 { - let record = &data[start..start + length]; - let dst = str::from_utf8(record).context("Found invalid UTF-8")?; - array.append_value(dst); - max_position = max_position.max(start + length); - } else if length == 0 { - array.append_value(""); - } else { - array.append_null(); - } - } - } else if cn.block.cn_data_type == 8 { - // UTF-16 LE string - for (offset, size) in offsets.iter().zip(sizes.iter()) { - let start = *offset as usize; - let length = *size as usize; - if start + length <= data_length && length > 0 { - let record = &data[start..start + length]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_le.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - max_position = max_position.max(start + length); - } else if length == 0 { - array.append_value(""); - } else { - array.append_null(); - } - } - } else if cn.block.cn_data_type == 9 { - // UTF-16 BE string - for (offset, size) in offsets.iter().zip(sizes.iter()) { - let start = *offset as usize; - let length = *size as usize; - if start + length <= data_length && length > 0 { - let record = &data[start..start + length]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_be.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - max_position = max_position.max(start + length); - } else if length == 0 { - array.append_value(""); - } else { - array.append_null(); - } - } - } else if cn.block.cn_data_type == 17 { - // String with BOM - the BOM indicates the actual encoding - // BOM types: UTF-8 (0xEF 0xBB 0xBF), UTF-16 LE (0xFF 0xFE), UTF-16 BE (0xFE 0xFF) - for (offset, size) in offsets.iter().zip(sizes.iter()) { - let start = *offset as usize; - let length = *size as usize; - if start + length <= data_length && length > 0 { - let record = &data[start..start + length]; - // Detect BOM and decode accordingly - if record.len() >= 3 - && record[0] == 0xEF - && record[1] == 0xBB - && record[2] == 0xBF - { - // UTF-8 BOM - let record = &record[3..]; - let dst = - str::from_utf8(record).context("Found invalid UTF-8 with BOM")?; - array.append_value(dst); - } else if record.len() >= 2 && record[0] == 0xFF && record[1] == 0xFE { - // UTF-16 LE BOM - let record = &record[2..]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_le.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - } else if record.len() >= 2 && record[0] == 0xFE && record[1] == 0xFF { - // UTF-16 BE BOM - let record = &record[2..]; - let mut dst = String::with_capacity(record.len()); - let (_result, _size, _replacement) = - decoder.utf_16_be.decode_to_string(record, &mut dst, false); - array.append_value(dst.trim_end_matches('\0')); - } else { - // No recognized BOM, try UTF-8 - let dst = - str::from_utf8(record).context("Found invalid UTF-8 (no BOM)")?; - array.append_value(dst); - } - max_position = max_position.max(start + length); - } else if length == 0 { - array.append_value(""); - } else { - array.append_null(); - } + let cn_data_type = cn.block.cn_data_type; + for (offset, size) in offsets.iter().zip(sizes.iter()) { + let start = *offset as usize; + let length = *size as usize; + if start + length <= data_length && length > 0 { + let record = &data[start..start + length]; + array.append_value(decode_string_bytes(record, cn_data_type, decoder)?); + max_position = max_position.max(start + length); + } else if length == 0 { + array.append_value(""); + } else { + array.append_null(); } } } @@ -1673,6 +1426,65 @@ struct Dec { utf_16_le: Decoder, } +/// Decodes a byte slice to a String based on MDF4 cn_data_type. +/// cn_data_type: 6=SBC/Windows-1252, 7=UTF-8, 8=UTF-16 LE, 9=UTF-16 BE, 17=BOM-prefixed +fn decode_string_bytes(record: &[u8], cn_data_type: u8, decoder: &mut Dec) -> Result { + match cn_data_type { + 6 => { + let mut dst = String::with_capacity(record.len()); + let _ = decoder + .windows_1252 + .decode_to_string(record, &mut dst, false); + Ok(dst) + } + 7 => Ok(str::from_utf8(record) + .context("Found invalid UTF-8")? + .to_string()), + 8 => { + let mut dst = String::with_capacity(record.len()); + let _ = decoder + .utf_16_le + .decode_to_string(record, &mut dst, false); + Ok(dst.trim_end_matches('\0').to_string()) + } + 9 => { + let mut dst = String::with_capacity(record.len()); + let _ = decoder + .utf_16_be + .decode_to_string(record, &mut dst, false); + Ok(dst.trim_end_matches('\0').to_string()) + } + 17 => { + if record.len() >= 3 + && record[0] == 0xEF + && record[1] == 0xBB + && record[2] == 0xBF + { + Ok(str::from_utf8(&record[3..]) + .context("Found invalid UTF-8 with BOM")? + .to_string()) + } else if record.len() >= 2 && record[0] == 0xFF && record[1] == 0xFE { + let mut dst = String::with_capacity(record.len()); + let _ = decoder + .utf_16_le + .decode_to_string(&record[2..], &mut dst, false); + Ok(dst.trim_end_matches('\0').to_string()) + } else if record.len() >= 2 && record[0] == 0xFE && record[1] == 0xFF { + let mut dst = String::with_capacity(record.len()); + let _ = decoder + .utf_16_be + .decode_to_string(&record[2..], &mut dst, false); + Ok(dst.trim_end_matches('\0').to_string()) + } else { + Ok(str::from_utf8(record) + .context("Found invalid UTF-8 (no BOM)")? + .to_string()) + } + } + _ => Ok(String::from_utf8_lossy(record).into_owned()), + } +} + /// initialise ndarrays for the data group/block fn initialise_arrays( channel_group: &mut Cg4, @@ -2180,28 +1992,11 @@ fn store_decoded_values_in_channel( } } ChannelData::Utf8(builder) => { - // Decode string based on data type - let s = if cn.block.cn_data_type == 6 { - // SBC (Windows-1252) - let mut dst = String::with_capacity(value_bytes.len()); - let _ = decoder - .windows_1252 - .decode_to_string(&value_bytes, &mut dst, false); - dst - } else if cn.block.cn_data_type == 7 || cn.block.cn_data_type == 9 { - // UTF-8 or ISO-8859-1 (treat as UTF-8) - String::from_utf8_lossy(&value_bytes).into_owned() - } else if cn.block.cn_data_type == 8 { - // UTF-16 LE - let mut dst = String::with_capacity(value_bytes.len()); - let _ = decoder - .utf_16_le - .decode_to_string(&value_bytes, &mut dst, false); - dst - } else { - String::from_utf8_lossy(&value_bytes).into_owned() - }; - builder.append_value(s); + builder.append_value(decode_string_bytes( + &value_bytes, + cn.block.cn_data_type, + decoder, + )?); } ChannelData::VariableSizeByteArray(builder) => { builder.append_value(&value_bytes); From 8528fe63a03fea0d50295c149be4cc26937b8234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sat, 31 Jan 2026 11:20:10 +0100 Subject: [PATCH 08/46] ticked up version --- Cargo.lock | 22 +++++++++++----------- Cargo.toml | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 698ae34..485866d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -749,7 +749,7 @@ version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799" dependencies = [ - "clap 4.5.54", + "clap 4.5.56", "heck", "indexmap", "log 0.4.29", @@ -880,18 +880,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.54" +version = "4.5.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0" dependencies = [ "anstream", "anstyle", @@ -1025,7 +1025,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.54", + "clap 4.5.56", "criterion-plot", "itertools 0.13.0", "num-traits", @@ -2308,7 +2308,7 @@ dependencies = [ [[package]] name = "mdfr" -version = "0.6.4" +version = "0.6.5" dependencies = [ "anyhow", "arrow", @@ -2316,7 +2316,7 @@ dependencies = [ "byteorder", "cbindgen", "chrono", - "clap 4.5.54", + "clap 4.5.56", "codepage", "criterion", "crossbeam-channel", @@ -2324,7 +2324,7 @@ dependencies = [ "env_logger 0.11.8", "fasteval", "flate2", - "glob 0.3.3", + "glob 0.2.11", "half", "hdf5", "itertools 0.14.0", @@ -4988,9 +4988,9 @@ dependencies = [ [[package]] name = "whoami" -version = "2.0.2" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace4d5c7b5ab3d99629156d4e0997edbe98a4beb6d5ba99e2cae830207a81983" +checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361" dependencies = [ "libredox", "wasite", diff --git a/Cargo.toml b/Cargo.toml index 17c8bc6..65ffc00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mdfr" -version = "0.6.4" +version = "0.6.5" description = "A package for reading and writing MDF files" authors = ["ratal "] edition = "2024" @@ -21,7 +21,7 @@ ndarray = ["dep:ndarray"] hdf5-mpio = ["hdf5/mpio"] [dependencies] -clap = "4.5.54" # for input arguments +clap = "4.5.56" # for input arguments anyhow = { version = "1.0", features = ["backtrace"] } # error handling log = "0.4" # to log events byteorder = "1.4" # for bytes conversions @@ -44,7 +44,7 @@ transpose = "0.2" # for DZBlock transpose fasteval = "0.2" # for algebraic conversion itertools = "0.14" serde = { version = "1.0", features = ["derive"] } # for serialization -whoami = "2.0.2" # to get user name for writing file +whoami = "2.1.0" # to get user name for writing file rand = "0.9" # for random numbers arrow = { version = "57.1.0", features = [ "pyarrow", From 82fc477dd3b18d8f163cfe0fea51165d5e074032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sat, 31 Jan 2026 11:25:28 +0100 Subject: [PATCH 09/46] fixed .gitignore --- .gitignore | 9 +- src/mdfreader/datastream_decoder.rs | 756 ++++++++++++++++++++++++++++ 2 files changed, 764 insertions(+), 1 deletion(-) create mode 100644 src/mdfreader/datastream_decoder.rs diff --git a/.gitignore b/.gitignore index 7077499..8736572 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,17 @@ callgrind.out.* cachegrind.out.* massif.* /.env -/venv +/.venv test_mdf* test.mf4 dhat.out.* test_files/test.mf4_test test_files/test.parquet /standard +/lib +/.vscode +/lib64 +/share +/bin +pyvenv.cfg +/.continue \ No newline at end of file diff --git a/src/mdfreader/datastream_decoder.rs b/src/mdfreader/datastream_decoder.rs new file mode 100644 index 0000000..e1a1ca5 --- /dev/null +++ b/src/mdfreader/datastream_decoder.rs @@ -0,0 +1,756 @@ +//! Data Stream Mode Composition Decoder +//! +//! This module decodes VLSD/VLSC data blobs that are linked to a DSBLOCK with `ds_mode = 0` +//! (data stream mode). It parses the composition hierarchy (CN, CL, CV, CU blocks) to extract +//! individual channel values from the raw data blob. +//! +//! Per MDF4.3 Section 6.25, data stream mode uses relative positioning where each channel's +//! offset depends on the previous channel's end position plus alignment rules. + +use anyhow::{Result, bail}; +use std::collections::HashMap; + +use crate::mdfinfo::mdfinfo4::{ + CN_F_ALIGNMENT_RESET, Cl4Block, Cn4, CnType, Compo, Composition, Cu4Block, Cv4Block, Ds4Block, +}; + +/// Groups channel metadata and reverse lookup maps for O(1) channel resolution. +struct ChannelIndex<'a> { + channels: &'a CnType, + pos_to_rec: HashMap, + name_to_rec: HashMap<&'a str, i32>, +} + +impl<'a> ChannelIndex<'a> { + fn new(cg_channels: &'a CnType) -> Self { + let pos_to_rec = cg_channels + .iter() + .map(|(rec_pos, cn)| (cn.block_position, *rec_pos)) + .collect(); + let name_to_rec = cg_channels + .iter() + .map(|(rec_pos, cn)| (cn.unique_name.as_str(), *rec_pos)) + .collect(); + Self { + channels: cg_channels, + pos_to_rec, + name_to_rec, + } + } +} + +/// Tracks position and alignment state within a data stream +#[derive(Debug, Clone)] +pub struct StreamState { + /// Current bit position in the stream (P) + pub bit_position: usize, + /// Current alignment offset in bytes (A) - used for alignment calculations + pub alignment_offset: usize, +} + +impl StreamState { + /// Creates a new StreamState starting at position 0 + pub fn new() -> Self { + Self { + bit_position: 0, + alignment_offset: 0, + } + } + + /// Creates a new StreamState with a specified alignment start + pub fn with_alignment_start(alignment_start: usize) -> Self { + Self { + bit_position: 0, + alignment_offset: alignment_start, + } + } + + /// Returns the current byte position (ceiling of bit_position / 8) + pub fn byte_position(&self) -> usize { + self.bit_position.div_ceil(8) + } + + /// Resets the alignment offset to the current byte position + /// Called when CN_F_ALIGNMENT_RESET flag is set + pub fn reset_alignment(&mut self) { + self.alignment_offset = self.byte_position(); + } +} + +impl Default for StreamState { + fn default() -> Self { + Self::new() + } +} + +/// Calculates the aligned byte offset for a channel in data stream mode. +/// +/// Per MDF4.3 Section 6.25.3, the alignment formula is: +/// - Find smallest `offset >= P` where `(offset - A) mod AV == 0` +/// - `AV = 2^cn_alignment` (alignment value in bytes) +/// - `A` = alignment_offset (from ds_cn_alignment_start or alignment reset) +/// - `P` = current_bit_pos converted to bytes (rounded up) +/// +/// Special case: if `cn_alignment == 255`, it means bit-packed (no byte alignment), +/// return the current bit position as-is. +/// +/// # Arguments +/// * `current_bit_pos` - Current position in the stream (in bits) +/// * `alignment_offset` - Alignment reference position (in bytes) +/// * `cn_alignment` - Channel alignment value (0-7 = 2^n bytes, 255 = bit-packed) +/// +/// # Returns +/// The byte offset where the channel data starts (or bit offset if bit-packed) +pub fn calculate_aligned_offset( + current_bit_pos: usize, + alignment_offset: usize, + cn_alignment: u8, +) -> usize { + // Bit-packed mode (cn_alignment == 255): no byte alignment + if cn_alignment == 255 { + return current_bit_pos; + } + + // Calculate current byte position (round up) + let current_byte_pos = current_bit_pos.div_ceil(8); + + // cn_alignment of 0 means 1-byte aligned (2^0 = 1) + if cn_alignment == 0 { + return current_byte_pos; + } + + // Calculate alignment value: AV = 2^cn_alignment + let alignment_value = 1usize << cn_alignment; + + // Find smallest offset >= current_byte_pos where (offset - A) mod AV == 0 + // This means offset = A + k*AV for some k, and offset >= current_byte_pos + + // Calculate relative position from alignment offset + if current_byte_pos <= alignment_offset { + // Already at or before alignment offset, align to alignment_offset + return alignment_offset; + } + + let relative_pos = current_byte_pos - alignment_offset; + let remainder = relative_pos % alignment_value; + + if remainder == 0 { + // Already aligned + current_byte_pos + } else { + // Need to advance to next aligned position + current_byte_pos + (alignment_value - remainder) + } +} + +/// Calculates the final byte offset after applying byte/bit offsets to an aligned position. +/// +/// When alignment is 255 (bit-packed), the aligned_offset is in bits and we combine +/// byte_offset * 8 + bit_offset in bits, then divide by 8 to get the final byte offset. +/// Otherwise, we simply add byte_offset to the aligned byte offset. +fn calculate_final_byte_offset( + aligned_offset: usize, + alignment: u8, + byte_offset: u32, + bit_offset: u8, +) -> usize { + if alignment == 255 { + (aligned_offset + byte_offset as usize * 8 + bit_offset as usize) / 8 + } else { + aligned_offset + byte_offset as usize + } +} + +/// Extracts u64 values from a channel's already-read data. +/// +/// This is used to get size values (for CLBLOCK) or discriminator values (for CVBLOCK) +/// from channels that have already been read during the fixed-length record processing. +/// +/// # Arguments +/// * `cn` - The channel containing the data +/// +/// # Returns +/// Vector of u64 values extracted from the channel data +pub fn extract_channel_values_as_u64(cn: &Cn4) -> Result> { + cn.data.to_u64_vec().ok_or_else(|| { + anyhow::anyhow!( + "Cannot extract u64 values from channel data type: {:?}", + std::mem::discriminant(&cn.data) + ) + }) +} + +/// Reads a single primitive value from the data buffer at the given offset. +/// +/// # Arguments +/// * `data` - The data buffer +/// * `offset` - Byte offset in the buffer +/// * `bit_count` - Number of bits to read +/// * `data_type` - MDF data type (0-5 for numeric types) +/// * `endian` - true for big endian, false for little endian +/// +/// # Returns +/// The value as bytes (for storage in ChannelData) +fn read_primitive_value( + data: &[u8], + offset: usize, + bit_count: u32, + _data_type: u8, + _endian: bool, +) -> Result> { + let byte_count = bit_count.div_ceil(8) as usize; + + if offset + byte_count > data.len() { + bail!( + "Buffer underrun: need {} bytes at offset {}, but only {} bytes available", + byte_count, + offset, + data.len() + ); + } + + let bytes = &data[offset..offset + byte_count]; + Ok(bytes.to_vec()) +} + +/// Decodes a single channel value from the data stream. +/// +/// This handles the basic data types (integers, floats, strings, byte arrays) +/// that can appear as leaf channels in a composition. +/// +/// # Arguments +/// * `data` - The data buffer for this record +/// * `cn` - The channel block metadata +/// * `stream_state` - Current stream position tracking +/// +/// # Returns +/// Tuple of (decoded bytes, bytes consumed) +pub fn decode_single_channel_value( + data: &[u8], + cn: &Cn4, + stream_state: &mut StreamState, +) -> Result<(Vec, usize)> { + // Handle alignment reset flag + if (cn.block.cn_flags & CN_F_ALIGNMENT_RESET) != 0 { + stream_state.reset_alignment(); + } + + // Calculate aligned offset + let aligned_offset = calculate_aligned_offset( + stream_state.bit_position, + stream_state.alignment_offset, + cn.block.cn_alignment, + ); + + // Apply cn_byte_offset and cn_bit_offset + let final_byte_offset = calculate_final_byte_offset( + aligned_offset, + cn.block.cn_alignment, + cn.block.cn_byte_offset, + cn.block.cn_bit_offset, + ); + + let bit_count = cn.block.cn_bit_count; + let byte_count = bit_count.div_ceil(8) as usize; + + // Handle variable-length types with cn_bit_count == 0 + if bit_count == 0 { + // For VLSD (cn_type == 1) with flat storage: 4-byte length prefix + data + if cn.block.cn_type == 1 { + if final_byte_offset + 4 > data.len() { + bail!("Buffer underrun reading VLSD length prefix"); + } + let length = + u32::from_le_bytes(data[final_byte_offset..final_byte_offset + 4].try_into()?) + as usize; + let total_bytes = 4 + length; + if final_byte_offset + total_bytes > data.len() { + bail!("Buffer underrun reading VLSD data"); + } + let value_bytes = data[final_byte_offset + 4..final_byte_offset + total_bytes].to_vec(); + stream_state.bit_position = (final_byte_offset + total_bytes) * 8; + return Ok((value_bytes, total_bytes)); + } + + // For VLSC (cn_type == 7) with flat storage: size from size channel + // This case should be handled by the caller who knows the size + bail!("VLSC with cn_bit_count == 0 requires size channel value"); + } + + // Read fixed-length value + let value_bytes = read_primitive_value( + data, + final_byte_offset, + bit_count, + cn.block.cn_data_type, + cn.endian, + )?; + + // Update stream position + stream_state.bit_position = (final_byte_offset + byte_count) * 8; + + Ok((value_bytes, byte_count)) +} + +/// Decodes a channel union where all members share the same bytes. +/// +/// # Arguments +/// * `data` - The data buffer for this record +/// * `cu_block` - The CUBLOCK metadata +/// * `cg_channels` - All channels in the channel group (to look up members) +/// * `stream_state` - Current stream position tracking +/// +/// # Returns +/// HashMap of member channel name to decoded value bytes +pub fn decode_channel_union( + data: &[u8], + _cu_block: &Cu4Block, + member_channels: &[&Cn4], + stream_state: &mut StreamState, +) -> Result>> { + let mut result = HashMap::new(); + + // All members start at the same position + let start_position = stream_state.bit_position; + let mut max_end_position = start_position; + + for cn in member_channels { + // Reset to union start for each member + stream_state.bit_position = start_position; + + let (value, _) = decode_single_channel_value(data, cn, stream_state)?; + result.insert(cn.unique_name.clone(), value); + + // Track the furthest position reached + if stream_state.bit_position > max_end_position { + max_end_position = stream_state.bit_position; + } + } + + // Set stream position to end of largest member + stream_state.bit_position = max_end_position; + + Ok(result) +} + +/// Decodes a channel variant based on discriminator value. +/// +/// # Arguments +/// * `data` - The data buffer for this record +/// * `cv_block` - The CVBLOCK metadata +/// * `option_channels` - The parsed option channels +/// * `discriminator_value` - The discriminator value for this record +/// * `stream_state` - Current stream position tracking +/// +/// # Returns +/// Tuple of (option index, option channel name, decoded value bytes) +pub fn decode_channel_variant( + data: &[u8], + cv_block: &Cv4Block, + option_channels: &[&Cn4], + discriminator_value: u64, + stream_state: &mut StreamState, +) -> Result<(usize, String, Vec)> { + // Find matching option by discriminator value + let option_index = cv_block + .cv_option_val + .iter() + .position(|&val| val == discriminator_value); + + match option_index { + Some(idx) if idx < option_channels.len() => { + let cn = option_channels[idx]; + let (value, _) = decode_single_channel_value(data, cn, stream_state)?; + Ok((idx, cn.unique_name.clone(), value)) + } + _ => { + // No matching option - this is a valid case per spec (data might be invalid) + bail!( + "No matching variant option for discriminator value {}. Available: {:?}", + discriminator_value, + cv_block.cv_option_val + ); + } + } +} + +/// Decodes a channel list (variable-length array). +/// +/// # Arguments +/// * `data` - The data buffer for this record +/// * `cl_block` - The CLBLOCK metadata +/// * `element_composition` - The composition defining each element (if any) +/// * `element_cn` - The template channel for list elements +/// * `size_value` - The size (element count or byte count) for this record +/// * `stream_state` - Current stream position tracking +/// +/// # Returns +/// Vector of decoded element values +pub fn decode_channel_list( + data: &[u8], + cl_block: &Cl4Block, + element_cn: &Cn4, + size_value: u64, + stream_state: &mut StreamState, +) -> Result>> { + let mut elements = Vec::new(); + + // Determine number of elements + let element_count = if (cl_block.cl_flags & 0x01) != 0 { + // Size is number of elements + size_value as usize + } else { + // Size is number of bytes - need to calculate element count + // For simple types, divide by element size + let element_size = element_cn.block.cn_bit_count.div_ceil(8) as usize; + if element_size > 0 { + (size_value as usize) / element_size + } else { + 0 + } + }; + + for i in 0..element_count { + if i == 0 { + // First element uses template channel's alignment + let (value, _) = decode_single_channel_value(data, element_cn, stream_state)?; + elements.push(value); + } else { + // Subsequent elements use CLBLOCK's alignment settings + // Handle alignment reset flag from CLBLOCK + if (cl_block.cl_flags & 0x02) != 0 { + // Bit 1: Alignment reset flag + stream_state.reset_alignment(); + } + + // Calculate aligned offset using CLBLOCK settings + let aligned_offset = calculate_aligned_offset( + stream_state.bit_position, + stream_state.alignment_offset, + cl_block.cl_alignment, + ); + + // Apply cl_byte_offset and cl_bit_offset + let final_byte_offset = calculate_final_byte_offset( + aligned_offset, + cl_block.cl_alignment, + cl_block.cl_byte_offset, + cl_block.cl_bit_offset, + ); + + let bit_count = element_cn.block.cn_bit_count; + let byte_count = bit_count.div_ceil(8) as usize; + + if final_byte_offset + byte_count > data.len() { + bail!( + "Buffer underrun reading list element {}: need {} bytes at offset {}, have {}", + i, + byte_count, + final_byte_offset, + data.len() + ); + } + + let value = data[final_byte_offset..final_byte_offset + byte_count].to_vec(); + elements.push(value); + + stream_state.bit_position = (final_byte_offset + byte_count) * 8; + } + } + + Ok(elements) +} + +/// Main entry point: Decodes a data stream blob using its composition definition. +/// +/// This function iterates through all records in the blob and decodes each one +/// according to the composition hierarchy. +/// +/// # Arguments +/// * `data` - The raw data blob bytes (may contain multiple records) +/// * `ds_block` - The DSBLOCK metadata +/// * `composition` - The composition tree under the DSBLOCK +/// * `cg_channels` - Reference to all channels in the channel group +/// * `record_offsets` - Byte offsets to each record in the blob (from VLSD/VLSC) +/// * `record_sizes` - Sizes of each record (for VLSC, or calculated for VLSD) +/// +/// # Returns +/// HashMap mapping child channel record positions to their decoded ChannelData +pub fn decode_datastream_blob( + data: &[u8], + ds_block: &Ds4Block, + composition: &Composition, + cg_channels: &CnType, + record_offsets: &[u64], + record_sizes: &[u64], +) -> Result>>> { + let mut result: HashMap>> = HashMap::new(); + let record_count = record_offsets.len(); + + let index = ChannelIndex::new(cg_channels); + + // Get initial alignment start from ds_cn_alignment_start (if specified) + let alignment_start = if ds_block.ds_cn_alignment_start() != 0 { + if let Some(&rec) = index.pos_to_rec.get(&ds_block.ds_cn_alignment_start()) { + if let Some(align_cn) = index.channels.get(&rec) { + extract_channel_values_as_u64(align_cn) + .ok() + .and_then(|v| v.first().copied()) + .unwrap_or(0) as usize + } else { + 0 + } + } else { + 0 + } + } else { + 0 + }; + + // Process each record + for record_idx in 0..record_count { + let record_offset = record_offsets[record_idx] as usize; + let record_size = record_sizes[record_idx] as usize; + + if record_offset + record_size > data.len() { + bail!( + "Record {} extends beyond data buffer: offset={}, size={}, data_len={}", + record_idx, + record_offset, + record_size, + data.len() + ); + } + + let record_data = &data[record_offset..record_offset + record_size]; + let mut stream_state = StreamState::with_alignment_start(alignment_start); + + // Decode the composition for this record + decode_composition_record( + record_data, + composition, + &index, + &mut stream_state, + record_idx, + &mut result, + )?; + } + + Ok(result) +} + +/// Recursively decodes a composition for a single record. +fn decode_composition_record( + data: &[u8], + composition: &Composition, + index: &ChannelIndex, + stream_state: &mut StreamState, + record_idx: usize, + result: &mut HashMap>>, +) -> Result<()> { + match &composition.block { + Compo::CN(cn) => { + // Decode this channel + let (value, _) = decode_single_channel_value(data, cn, stream_state)?; + + // O(1) lookup via reverse map + if let Some(&rec_pos) = index.pos_to_rec.get(&cn.block_position) { + result.entry(rec_pos).or_default().push(value); + } + + // Recursively decode nested composition + if let Some(nested) = &composition.compo { + decode_composition_record(data, nested, index, stream_state, record_idx, result)?; + } + } + + Compo::CL(cl_block) => { + // Get size channel values via O(1) lookup + let size_values = if let Some(&rec) = index.pos_to_rec.get(&cl_block.cl_cn_size) { + if let Some(size_cn) = index.channels.get(&rec) { + extract_channel_values_as_u64(size_cn)? + } else { + bail!( + "Size channel not found for CLBLOCK at position {}", + cl_block.cl_cn_size + ); + } + } else { + bail!( + "Size channel not found for CLBLOCK at position {}", + cl_block.cl_cn_size + ); + }; + + let size_value = size_values.get(record_idx).copied().unwrap_or(0); + + // Get element template from nested composition + if let Some(nested) = &composition.compo + && let Compo::CN(element_cn) = &nested.block + { + let elements = + decode_channel_list(data, cl_block, element_cn, size_value, stream_state)?; + + if let Some(&rec_pos) = index.pos_to_rec.get(&element_cn.block_position) { + for elem in elements { + result.entry(rec_pos).or_default().push(elem); + } + } + } + } + + Compo::CV(cv_block) => { + // Get discriminator channel values via O(1) lookup + let discriminator_values = + if let Some(&rec) = index.pos_to_rec.get(&cv_block.cv_cn_discriminator) { + if let Some(disc_cn) = index.channels.get(&rec) { + extract_channel_values_as_u64(disc_cn)? + } else { + bail!("Discriminator channel not found for CVBLOCK"); + } + } else { + bail!("Discriminator channel not found for CVBLOCK"); + }; + + let disc_value = discriminator_values.get(record_idx).copied().unwrap_or(0); + + // Collect option channels via O(1) lookups + let option_channels: Vec<&Cn4> = cv_block + .cv_cn_option + .iter() + .filter_map(|pos| { + index + .pos_to_rec + .get(pos) + .and_then(|rec| index.channels.get(rec)) + }) + .collect(); + + if let Ok((idx, _name, value)) = + decode_channel_variant(data, cv_block, &option_channels, disc_value, stream_state) + && idx < option_channels.len() + && let Some(&rec_pos) = + index.pos_to_rec.get(&option_channels[idx].block_position) + { + result.entry(rec_pos).or_default().push(value); + } + } + + Compo::CU(cu_block) => { + // Collect member channels via O(1) lookups + let member_channels: Vec<&Cn4> = cu_block + .cu_cn_member + .iter() + .filter_map(|pos| { + index + .pos_to_rec + .get(pos) + .and_then(|rec| index.channels.get(rec)) + }) + .collect(); + + let decoded = decode_channel_union(data, cu_block, &member_channels, stream_state)?; + + // Store decoded values for all members via O(1) name lookup + for (name, value) in decoded { + if let Some(&rec_pos) = index.name_to_rec.get(name.as_str()) { + result.entry(rec_pos).or_default().push(value); + } + } + } + + Compo::DS(_ds_block) => { + // Nested DS block - recursively decode its composition + if let Some(nested) = &composition.compo { + decode_composition_record(data, nested, index, stream_state, record_idx, result)?; + } + } + + Compo::CA(_ca_block) => { + // Channel array - not typically used in data stream mode + if let Some(nested) = &composition.compo { + decode_composition_record(data, nested, index, stream_state, record_idx, result)?; + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_calculate_aligned_offset_no_alignment() { + // cn_alignment = 0 means 1-byte aligned (2^0 = 1) + assert_eq!(calculate_aligned_offset(0, 0, 0), 0); + assert_eq!(calculate_aligned_offset(8, 0, 0), 1); // 8 bits = 1 byte + assert_eq!(calculate_aligned_offset(9, 0, 0), 2); // 9 bits -> 2 bytes (rounded up) + assert_eq!(calculate_aligned_offset(16, 0, 0), 2); + } + + #[test] + fn test_calculate_aligned_offset_word_alignment() { + // cn_alignment = 1 means 2-byte (WORD) aligned (2^1 = 2) + assert_eq!(calculate_aligned_offset(0, 0, 1), 0); + assert_eq!(calculate_aligned_offset(8, 0, 1), 2); // 1 byte -> align to 2 + assert_eq!(calculate_aligned_offset(16, 0, 1), 2); + assert_eq!(calculate_aligned_offset(24, 0, 1), 4); // 3 bytes -> align to 4 + } + + #[test] + fn test_calculate_aligned_offset_dword_alignment() { + // cn_alignment = 2 means 4-byte (DWORD) aligned (2^2 = 4) + assert_eq!(calculate_aligned_offset(0, 0, 2), 0); + assert_eq!(calculate_aligned_offset(8, 0, 2), 4); // 1 byte -> align to 4 + assert_eq!(calculate_aligned_offset(16, 0, 2), 4); // 2 bytes -> align to 4 + assert_eq!(calculate_aligned_offset(24, 0, 2), 4); // 3 bytes -> align to 4 + assert_eq!(calculate_aligned_offset(32, 0, 2), 4); // 4 bytes -> already aligned + assert_eq!(calculate_aligned_offset(40, 0, 2), 8); // 5 bytes -> align to 8 + } + + #[test] + fn test_calculate_aligned_offset_with_alignment_offset() { + // With alignment offset = 2, cn_alignment = 2 (4-byte) + // Valid positions: 2, 6, 10, 14, ... + assert_eq!(calculate_aligned_offset(0, 2, 2), 2); + assert_eq!(calculate_aligned_offset(8, 2, 2), 2); // 1 byte at pos 2 + assert_eq!(calculate_aligned_offset(24, 2, 2), 6); // 3 bytes -> align to 6 + assert_eq!(calculate_aligned_offset(48, 2, 2), 6); // 6 bytes -> already at 6 + assert_eq!(calculate_aligned_offset(56, 2, 2), 10); // 7 bytes -> align to 10 + } + + #[test] + fn test_calculate_aligned_offset_bit_packed() { + // cn_alignment = 255 means bit-packed + assert_eq!(calculate_aligned_offset(0, 0, 255), 0); + assert_eq!(calculate_aligned_offset(5, 0, 255), 5); // returns bit position as-is + assert_eq!(calculate_aligned_offset(13, 0, 255), 13); + } + + #[test] + fn test_stream_state_basic() { + let mut state = StreamState::new(); + assert_eq!(state.bit_position, 0); + assert_eq!(state.byte_position(), 0); + + state.bit_position += 3 * 8; + assert_eq!(state.bit_position, 24); + assert_eq!(state.byte_position(), 3); + + state.bit_position += 5; + assert_eq!(state.bit_position, 29); + assert_eq!(state.byte_position(), 4); // ceil(29/8) = 4 + } + + #[test] + fn test_stream_state_alignment_reset() { + let mut state = StreamState::new(); + state.bit_position += 7 * 8; + assert_eq!(state.alignment_offset, 0); + + state.reset_alignment(); + assert_eq!(state.alignment_offset, 7); + } +} From ba5460a9157aff2c30ffd6027afd837b3f966c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 1 Feb 2026 18:11:14 +0100 Subject: [PATCH 10/46] optimisations -28% on simple files --- src/mdfreader/conversions4.rs | 476 +++++++++++++++------------------- src/mdfreader/mdfreader4.rs | 33 +-- 2 files changed, 231 insertions(+), 278 deletions(-) diff --git a/src/mdfreader/conversions4.rs b/src/mdfreader/conversions4.rs index 1341974..93e1a37 100644 --- a/src/mdfreader/conversions4.rs +++ b/src/mdfreader/conversions4.rs @@ -1,13 +1,7 @@ //! this modules implements functions to convert arrays into physical arrays using CCBlock use anyhow::{Context, Error, Result, bail}; -use arrow::array::{ - Array, ArrayBuilder, AsArray, Float64Array, Float64Builder, LargeStringBuilder, - PrimitiveBuilder, -}; -use arrow::compute::cast; +use arrow::array::{ArrayBuilder, Float64Builder, LargeStringBuilder, PrimitiveBuilder}; use arrow::datatypes::{ArrowPrimitiveType, Float32Type, Float64Type}; -use arrow::datatypes::{DataType, Int64Type}; -use arrow::error::ArrowError; use itertools::Itertools; use log::warn; use num::abs; @@ -173,19 +167,9 @@ where ::Native: AsPrimitive, T::Native: NumCast, { - let mut array_f64: Float64Builder = array - .finish() - .try_unary(|value| { - num::cast::cast::(value) - .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) - }) - .context("failed converting array to f64")? - .into_builder() - .expect("Failed getting mutable F64"); - array_f64.values_slice_mut().iter_mut().for_each(|x| { - *x = *x * p2 + p1; - }); - Ok(array_f64) + let values = array.values_slice(); + let converted: Vec = values.iter().map(|v| (*v).as_() * p2 + p1).collect(); + Ok(Float64Builder::new_from_buffer(converted.into(), None)) } /// Apply linear conversion to get physical data @@ -249,10 +233,10 @@ fn linear_conversion(cn: &mut Cn4, cc_val: &[f64]) -> Result<(), Error> { ); } ChannelData::Float64(a) => { - cn.data = ChannelData::Float64( - linear_calculation(a, p1, p2) - .context("failed linear conversion of f64 channel")?, - ); + // In-place conversion for f64 - avoids all allocations + a.values_slice_mut().iter_mut().for_each(|x| { + *x = *x * p2 + p1; + }); } ChannelData::Complex32(a) => { cn.data = ChannelData::Complex64(ComplexArrow::new_from_primitive( @@ -383,16 +367,15 @@ where let p4 = cc_val[3]; let p5 = cc_val[4]; let p6 = cc_val[5]; - let array_f64: Float64Array = cast(&(array.finish_cloned()), &DataType::Float64) - .context("failed converting Array to f64 Array")? - .as_primitive::() - .clone(); - let array_f64 = array_f64 - .unary_mut(|x| (x * x * p1 + x * p2 + p3) / (x * x * p4 + x * p5 + p6)) - .expect("error applying rational conversion"); - Ok(array_f64 - .into_builder() - .expect("failed converting to builder")) + let values = array.values_slice(); + let converted: Vec = values + .iter() + .map(|v| { + let x: f64 = (*v).as_(); + (x * x * p1 + x * p2 + p3) / (x * x * p4 + x * p5 + p6) + }) + .collect(); + Ok(Float64Builder::new_from_buffer(converted.into(), None)) } /// Apply rational conversion to get physical data @@ -453,10 +436,16 @@ fn rational_conversion(cn: &mut Cn4, cc_val: &[f64]) -> Result<(), Error> { ); } ChannelData::Float64(a) => { - cn.data = ChannelData::Float64( - rational_calculation(a, cc_val) - .context("failed rational conversion of f64 channel")?, - ); + let p1 = cc_val[0]; + let p2 = cc_val[1]; + let p3 = cc_val[2]; + let p4 = cc_val[3]; + let p5 = cc_val[4]; + let p6 = cc_val[5]; + a.values_slice_mut().iter_mut().for_each(|x| { + let v = *x; + *x = (v * v * p1 + v * p2 + p3) / (v * v * p4 + v * p5 + p6); + }); } ChannelData::Complex32(a) => { cn.data = ChannelData::Complex64(ComplexArrow::new_from_primitive( @@ -581,41 +570,25 @@ where ::Native: AsPrimitive, T::Native: NumCast, { - let array_f64: Float64Array = array - .finish_cloned() - .try_unary(|value| { - num::cast::cast::(value) - .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) - }) - .context("failed converting array to f64")?; - let mut new_array = vec![0f64; array_f64.len()]; - new_array - .iter_mut() - .zip(array_f64.values()) - .for_each(|(new_a, a)| { - let mut map = BTreeMap::new(); - map.insert("X".to_string(), *a); - let val = compiled.eval(slab, &mut map); - *new_a = match val { - Ok(val) => val, - Err(err) => { - warn!( - "could not compute the value {a:?} with expression {compiled:?}, error {err}" - ); - *a - } + let values = array.values_slice(); + let mut new_array = vec![0f64; values.len()]; + let mut map = BTreeMap::new(); + new_array.iter_mut().zip(values).for_each(|(new_a, v)| { + let a: f64 = (*v).as_(); + map.clear(); + map.insert("X".to_string(), a); + let val = compiled.eval(slab, &mut map); + *new_a = match val { + Ok(val) => val, + Err(err) => { + warn!( + "could not compute the value {a:?} with expression {compiled:?}, error {err}" + ); + a } - }); - Ok(PrimitiveBuilder::new_from_buffer( - new_array.into(), - array_f64.nulls().map(|null_buffer| { - null_buffer - .inner() - .sliced() - .into_mutable() - .expect("failed converting null_buffer into mutable") - }), - )) + } + }); + Ok(PrimitiveBuilder::new_from_buffer(new_array.into(), None)) } /// Apply algebraic conversion to get physical data @@ -817,20 +790,15 @@ where ::Native: AsPrimitive, T::Native: NumCast, { - let array_f64: Float64Array = array - .finish_cloned() - .try_unary(|value| { - num::cast::cast::(value) - .ok_or_else(|| ArrowError::CastError(format!("Can't cast value {value:?} to f64"))) - }) - .context("failed converting array to f64")?; - let mut new_array = vec![0f64; array_f64.len()]; + let values = array.values_slice(); + let mut new_array = vec![0f64; values.len()]; new_array .iter_mut() - .zip(array_f64.values()) - .for_each(|(new_array, a)| { - *new_array = match val - .binary_search_by(|&(xi, _)| xi.partial_cmp(a).unwrap_or(Ordering::Equal)) + .zip(values) + .for_each(|(new_a, v)| { + let a: f64 = (*v).as_(); + *new_a = match val + .binary_search_by(|&(xi, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { Ok(idx) => *val[idx].1, Err(0) => *val[0].1, @@ -1017,17 +985,15 @@ where ::Native: AsPrimitive, T::Native: NumCast, { - let array_f64: Float64Array = cast(&array.finish(), &DataType::Float64) - .context("failed converting Array to f64 Array")? - .as_primitive::() - .clone(); - let mut new_array = vec![0f64; array_f64.len()]; + let values = array.values_slice(); + let mut new_array = vec![0f64; values.len()]; new_array .iter_mut() - .zip(array_f64.values()) - .for_each(|(new_array, a)| { - *new_array = match val - .binary_search_by(|&(xi, _)| xi.partial_cmp(a).unwrap_or(Ordering::Equal)) + .zip(values) + .for_each(|(new_a, v)| { + let a: f64 = (*v).as_(); + *new_a = match val + .binary_search_by(|&(xi, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { Ok(idx) => *val[idx].1, Err(0) => *val[0].1, @@ -1240,23 +1206,21 @@ where ::Native: AsPrimitive, T::Native: NumCast, { - let array_f64: Float64Array = cast(&array.finish_cloned(), &DataType::Float64) - .context("failed converting Array to f64 Array")? - .as_primitive::() - .clone(); - let mut new_array = vec![0f64; array_f64.len()]; + let values = array.values_slice(); + let mut new_array = vec![0f64; values.len()]; new_array .iter_mut() - .zip(array_f64.values()) - .for_each(|(new_array, a)| { - *new_array = match val - .binary_search_by(|&(xi, _, _)| xi.partial_cmp(a).unwrap_or(Ordering::Equal)) + .zip(values) + .for_each(|(new_a, v)| { + let a: f64 = (*v).as_(); + *new_a = match val + .binary_search_by(|&(xi, _, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { Ok(idx) => val[idx].2, Err(0) => *default_value, - Err(idx) if (idx >= val.len() && *a <= val[idx - 1].1) => val[idx - 1].2, + Err(idx) if (idx >= val.len() && a <= val[idx - 1].1) => val[idx - 1].2, Err(idx) => { - if *a <= val[idx].1 { + if a <= val[idx].1 { val[idx].2 } else { *default_value @@ -1374,6 +1338,7 @@ fn value_to_text_calculation_int( sharable: &SharableBlocks, ) -> Result where + ::Native: AsPrimitive + AsPrimitive, { // table applicable only to integers, no canonization let mut table_int: HashMap = HashMap::with_capacity(cc_val.len()); @@ -1393,45 +1358,37 @@ where } } } - let array_f64: Float64Array = cast(&array.finish_cloned(), &DataType::Float64) - .context("failed converting Array to f64 Array")? - .as_primitive::() - .clone(); - let array_i64 = cast(&array.finish_cloned(), &DataType::Int64) - .context("failed converting Array to Int64 Array")? - .as_primitive::() - .clone(); - let mut new_array = LargeStringBuilder::with_capacity(array_f64.len(), 32); - array_f64 - .iter() - .zip(array_i64.iter()) - .for_each(|(a_f64, a_i64)| { - if let Some(tosc) = table_int.get(&a_i64.unwrap_or_default()) { - match tosc { - TextOrScaleConversion::Txt(txt) => { - new_array.append_value(txt.clone()); - } - TextOrScaleConversion::Scale(conv) => { - new_array.append_value(conv.eval_to_txt(a_f64.unwrap_or(0f64))); - } - _ => { - new_array.append_value(a_f64.unwrap_or(0f64).to_string()); - } + let values = array.values_slice(); + let mut new_array = LargeStringBuilder::with_capacity(values.len(), 32); + values.iter().for_each(|v| { + let a_f64: f64 = (*v).as_(); + let a_i64: i64 = (*v).as_(); + if let Some(tosc) = table_int.get(&a_i64) { + match tosc { + TextOrScaleConversion::Txt(txt) => { + new_array.append_value(txt.clone()); } - } else { - match &def { - DefaultTextOrScaleConversion::DefaultTxt(txt) => { - new_array.append_value(txt.clone()); - } - DefaultTextOrScaleConversion::DefaultScale(conv) => { - new_array.append_value(conv.eval_to_txt(a_f64.unwrap_or(0f64))); - } - _ => { - new_array.append_value(a_f64.unwrap_or(0f64).to_string()); - } + TextOrScaleConversion::Scale(conv) => { + new_array.append_value(conv.eval_to_txt(a_f64)); + } + _ => { + new_array.append_value(a_f64.to_string()); } } - }); + } else { + match &def { + DefaultTextOrScaleConversion::DefaultTxt(txt) => { + new_array.append_value(txt.clone()); + } + DefaultTextOrScaleConversion::DefaultScale(conv) => { + new_array.append_value(conv.eval_to_txt(a_f64)); + } + _ => { + new_array.append_value(a_f64.to_string()); + } + } + } + }); if let Some(validity) = array.validity_slice_mut() { let _ = new_array.validity_slice_mut().insert(validity); } @@ -1729,7 +1686,10 @@ fn value_range_to_text_calculation( cc_val: &[f64], cc_ref: &[i64], sharable: &SharableBlocks, -) -> LargeStringBuilder { +) -> LargeStringBuilder +where + ::Native: AsPrimitive, +{ let n_keys = cc_val.len() / 2; let mut keys: Vec = Vec::with_capacity(n_keys); for (key_min, key_max) in cc_val.iter().tuples() { @@ -1769,12 +1729,10 @@ fn value_range_to_text_calculation( } } } - let mut new_array = LargeStringBuilder::with_capacity(array.len(), 32); - let array_f64: Float64Array = cast(&array.finish_cloned(), &DataType::Float64) - .expect("failed converting Array to f64 Array") - .as_primitive::() - .clone(); - array_f64.values().iter().for_each(|a| { + let values = array.values_slice(); + let mut new_array = LargeStringBuilder::with_capacity(values.len(), 32); + values.iter().for_each(|v| { + let a: &f64 = &(*v).as_(); let matched_key = keys .iter() .enumerate() @@ -1992,7 +1950,10 @@ fn bitfield_text_table_calculation( cc_val: &[u64], cc_ref: &[i64], sharable: &SharableBlocks, -) -> Result { +) -> Result +where + ::Native: AsPrimitive + AsPrimitive, +{ let mut table: Vec<(ValueOrValueRangeToText, Option)> = Vec::with_capacity(cc_ref.len()); for pointer in cc_ref.iter() { @@ -2105,136 +2066,127 @@ fn bitfield_text_table_calculation( } } - let array_f64 = cast(&array.finish_cloned(), &DataType::Float64) - .context("failed converting Array to f64 Array")? - .as_primitive::() - .clone(); - let array_i64 = cast(&array.finish_cloned(), &DataType::Int64) - .context("failed converting Array to Int64 Array")? - .as_primitive::() - .clone(); - let mut new_array = LargeStringBuilder::with_capacity(array.len(), 32); - array_f64 - .iter() - .zip(array_i64.iter()) - .for_each(|(a, a_i64)| { - let mut new_a = String::new(); - for (ind, val) in cc_val.iter().enumerate() { - match &table[ind] { - (ValueOrValueRangeToText::ValueToText(table_int, def), name) => { - let ref_val = - a_i64.unwrap_or_default() & (val.to_i64().unwrap_or_default()); - if let Some(tosc) = table_int.get(&ref_val) { - match tosc { - TextOrScaleConversion::Txt(txt) => { - if let Some(n) = name { - new_a = format!("{} | {} = {}", new_a, n, txt.clone()); - } else { - new_a = format!("{} | {}", new_a, txt.clone()); - } - } - TextOrScaleConversion::Scale(conv) => { - if let Some(n) = name { - new_a = format!( - "{} | {} = {}", - new_a, - n, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } else { - new_a = format!( - "{} | {}", - new_a, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } - } - _ => { - new_a = format!("{} | {}", new_a, "nothing"); + let values = array.values_slice(); + let mut new_array = LargeStringBuilder::with_capacity(values.len(), 32); + values.iter().for_each(|v| { + let a_f64: f64 = (*v).as_(); + let a_i64: i64 = (*v).as_(); + let mut new_a = String::new(); + for (ind, val) in cc_val.iter().enumerate() { + match &table[ind] { + (ValueOrValueRangeToText::ValueToText(table_int, def), name) => { + let ref_val = a_i64 & (val.to_i64().unwrap_or_default()); + if let Some(tosc) = table_int.get(&ref_val) { + match tosc { + TextOrScaleConversion::Txt(txt) => { + if let Some(n) = name { + new_a = format!("{} | {} = {}", new_a, n, txt.clone()); + } else { + new_a = format!("{} | {}", new_a, txt.clone()); } } - } else { - match &def { - DefaultTextOrScaleConversion::DefaultTxt(txt) => { - new_a.clone_from(txt); - } - DefaultTextOrScaleConversion::DefaultScale(conv) => { - new_a = conv.eval_to_txt(a.unwrap_or(0f64)); - } - _ => { - new_a = format!("{} | {}", new_a, "nothing"); + TextOrScaleConversion::Scale(conv) => { + if let Some(n) = name { + new_a = format!( + "{} | {} = {}", + new_a, + n, + conv.eval_to_txt(a_f64) + ); + } else { + new_a = format!( + "{} | {}", + new_a, + conv.eval_to_txt(a_f64) + ); } } + _ => { + new_a = format!("{} | {}", new_a, "nothing"); + } + } + } else { + match &def { + DefaultTextOrScaleConversion::DefaultTxt(txt) => { + new_a.clone_from(txt); + } + DefaultTextOrScaleConversion::DefaultScale(conv) => { + new_a = conv.eval_to_txt(a_f64); + } + _ => { + new_a = format!("{} | {}", new_a, "nothing"); + } } } - (ValueOrValueRangeToText::ValueRangeToText(txt, def, keys), name) => { - let matched_key = keys.iter().enumerate().find(|&x| { - (x.1.min <= a.unwrap_or_default()) && (a.unwrap_or_default() <= x.1.max) - }); - if let Some(key) = matched_key { - match &txt[key.0] { - TextOrScaleConversion::Txt(txt) => { - if let Some(n) = name { - new_a = format!("{} | {} = {}", new_a, n, txt.clone()); - } else { - new_a = format!("{} | {}", new_a, txt.clone()); - } - } - TextOrScaleConversion::Scale(conv) => { - if let Some(n) = name { - new_a = format!( - "{} | {} = {}", - new_a, - n, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } else { - new_a = format!( - "{} | {}", - new_a, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } - } - _ => { - new_array.append_value(format!("{} | {}", new_a, "nothing")); + } + (ValueOrValueRangeToText::ValueRangeToText(txt, def, keys), name) => { + let matched_key = keys.iter().enumerate().find(|&x| { + (x.1.min <= a_f64) && (a_f64 <= x.1.max) + }); + if let Some(key) = matched_key { + match &txt[key.0] { + TextOrScaleConversion::Txt(txt) => { + if let Some(n) = name { + new_a = format!("{} | {} = {}", new_a, n, txt.clone()); + } else { + new_a = format!("{} | {}", new_a, txt.clone()); } } - } else { - match &def { - DefaultTextOrScaleConversion::DefaultTxt(txt) => { - if let Some(n) = name { - new_a = format!("{} | {} = {}", new_a, n, txt.clone()); - } else { - new_a = format!("{} | {}", new_a, txt.clone()); - } + TextOrScaleConversion::Scale(conv) => { + if let Some(n) = name { + new_a = format!( + "{} | {} = {}", + new_a, + n, + conv.eval_to_txt(a_f64) + ); + } else { + new_a = format!( + "{} | {}", + new_a, + conv.eval_to_txt(a_f64) + ); } - DefaultTextOrScaleConversion::DefaultScale(conv) => { - if let Some(n) = name { - new_a = format!( - "{} | {} = {}", - new_a, - n, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } else { - new_a = format!( - "{} | {}", - new_a, - conv.eval_to_txt(a.unwrap_or_default()) - ); - } + } + _ => { + new_array.append_value(format!("{} | {}", new_a, "nothing")); + } + } + } else { + match &def { + DefaultTextOrScaleConversion::DefaultTxt(txt) => { + if let Some(n) = name { + new_a = format!("{} | {} = {}", new_a, n, txt.clone()); + } else { + new_a = format!("{} | {}", new_a, txt.clone()); } - _ => { - new_a = format!("{} | {}", new_a, "nothing"); + } + DefaultTextOrScaleConversion::DefaultScale(conv) => { + if let Some(n) = name { + new_a = format!( + "{} | {} = {}", + new_a, + n, + conv.eval_to_txt(a_f64) + ); + } else { + new_a = format!( + "{} | {}", + new_a, + conv.eval_to_txt(a_f64) + ); } } + _ => { + new_a = format!("{} | {}", new_a, "nothing"); + } } } } } - new_array.append_value(new_a); - }); + } + new_array.append_value(new_a); + }); Ok(new_array) } diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 1b423c5..dcb0005 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -50,11 +50,11 @@ pub fn mdfreader4<'a>( }; // read file data for (_dg_position, dg) in info.dg.iter_mut() { - // Let's find channel names + // Let's find channel names to read in this data group channel_names_present_in_dg = HashSet::new(); for channel_group in dg.cg.values() { - let cn = channel_group.channel_names.clone(); - channel_names_present_in_dg.par_extend(cn); + channel_names_present_in_dg + .extend(channel_group.channel_names.iter().cloned()); } let channel_names_to_read_in_dg: HashSet<_> = channel_names_present_in_dg .into_par_iter() @@ -1150,12 +1150,14 @@ fn read_all_channels_sorted( // read by chunks and store in channel array let mut previous_index: usize = 0; let mut vlsd_channels: Vec<(u8, i32)> = Vec::new(); + // Allocate buffer once and reuse across chunks + let max_chunk_size = chunks.iter().map(|c| c.1).max().unwrap_or(0); + let mut data_chunk = vec![0u8; max_chunk_size]; for (n_record_chunk, chunk_size) in chunks { - let mut data_chunk = vec![0u8; chunk_size]; - rdr.read_exact(&mut data_chunk) + rdr.read_exact(&mut data_chunk[..chunk_size]) .context("Could not read data chunk")?; vlsd_channels = read_channels_from_bytes( - &data_chunk, + &data_chunk[..chunk_size], &mut channel_group.cn, channel_group.record_length as usize, previous_index, @@ -1215,20 +1217,19 @@ fn read_all_channels_unsorted( // reads the sorted data block into chunks let mut data: Vec = Vec::new(); - let mut data_chunk: Vec; + let mut data_chunk = vec![0u8; CHUNK_SIZE_READING_4]; while position < data_block_length { - if (data_block_length - position) > CHUNK_SIZE_READING_4 { - // not last chunk of data - data_chunk = vec![0u8; CHUNK_SIZE_READING_4]; + let chunk_size = if (data_block_length - position) > CHUNK_SIZE_READING_4 { position += CHUNK_SIZE_READING_4; + CHUNK_SIZE_READING_4 } else { - // last chunk of data - data_chunk = vec![0u8; data_block_length - position]; - position += data_block_length - position; - } - rdr.read_exact(&mut data_chunk) + let remaining = data_block_length - position; + position += remaining; + remaining + }; + rdr.read_exact(&mut data_chunk[..chunk_size]) .context("Could not read data chunk")?; - data.extend(data_chunk); + data.extend_from_slice(&data_chunk[..chunk_size]); read_all_channels_unsorted_from_bytes( &mut data, dg, From 8413f78e741c47833a30ab35cdad764d74c7e8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 2 Feb 2026 00:19:34 +0100 Subject: [PATCH 11/46] added unfinalised handling, fixed halffloat, introduced SR and included Data Stream in Write --- src/data_holder/channel_data.rs | 13 ++- src/mdfinfo.rs | 96 +++++++++++++++++++++- src/mdfinfo/mdfinfo4.rs | 130 ++++++++++++++++++++++++++++-- src/mdfr.rs | 40 ++++++++++ src/mdfreader.rs | 12 +++ src/mdfwriter/mdfwriter4.rs | 137 ++++++++++++++++++++++---------- 6 files changed, 379 insertions(+), 49 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 000ee40..2bd291e 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -10,8 +10,8 @@ use arrow::array::{ }; use arrow::buffer::{MutableBuffer, NullBuffer}; use arrow::datatypes::{ - DataType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, - UInt16Type, UInt32Type, UInt64Type, + DataType, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, + UInt8Type, UInt16Type, UInt32Type, UInt64Type, }; use arrow::util::display::{ArrayFormatter, FormatOptions}; use itertools::Itertools; @@ -1806,7 +1806,14 @@ pub fn try_from(value: &dyn Array) -> Result { data.iter().for_each(|v| new_data.append_option(v)); Ok(ChannelData::UInt64(new_data)) } - DataType::Float16 => todo!(), + DataType::Float16 => { + let data = as_primitive_array::(value); + let mut new_data = PrimitiveBuilder::::with_capacity(data.len()); + data.iter().for_each(|v| { + new_data.append_option(v.map(|f| f.to_f32())) + }); + Ok(ChannelData::Float32(new_data)) + } DataType::Float32 => { let data = as_primitive_array::(value); let mut new_data = PrimitiveBuilder::with_capacity(data.len()); diff --git a/src/mdfinfo.rs b/src/mdfinfo.rs index 6c615b3..b8cb0ed 100644 --- a/src/mdfinfo.rs +++ b/src/mdfinfo.rs @@ -7,7 +7,7 @@ use arrow::array::Array; use binrw::{BinReaderExt, binrw}; use codepage::to_encoding; use encoding_rs::Encoding; -use log::info; +use log::{info, warn}; use std::collections::HashMap; use std::collections::HashSet; use std::fmt; @@ -36,6 +36,29 @@ use self::mdfinfo4::{At4Block, Ch4Block, Ev4Block, FhBlock}; use self::sym_buf_reader::SymBufReader; use crate::mdfreader::{DataSignature, MasterSignature}; +/// "UnFinMF " identifier bytes for unfinalized MDF files +const UNFINALIZED_ID: [u8; 8] = [85, 110, 70, 105, 110, 77, 70, 32]; + +/// Standard unfinalization flag bits (id_unfin_flags) +/// Bit 0: Update of cycle counters for CG-/CABLOCK required +const UNFIN_CG_CYCLE_COUNTERS: u16 = 1 << 0; +/// Bit 1: Update of cycle counters for SRBLOCKs required +const UNFIN_SR_CYCLE_COUNTERS: u16 = 1 << 1; +/// Bit 2: Update of length for last DTBLOCK required +const UNFIN_LAST_DT_LENGTH: u16 = 1 << 2; +/// Bit 3: Update of length for last RDBLOCK required +const UNFIN_LAST_RD_LENGTH: u16 = 1 << 3; +/// Bit 4: Update of last DLBLOCK in each chained list required +const UNFIN_LAST_DL_BLOCK: u16 = 1 << 4; +/// Bit 5: Update of cg_data_bytes and cg_inval_bytes in VLSD CGBLOCK required +const UNFIN_VLSD_CG_BYTES: u16 = 1 << 5; +/// Bit 6: Update of offset values for VLSD channel required +const UNFIN_VLSD_OFFSET: u16 = 1 << 6; +/// Bit 7: Update of cg_data_bytes and cg_inval_bytes in VLSC CGBLOCK required +const UNFIN_VLSC_CG_BYTES: u16 = 1 << 7; +/// Bit 8: Update of offset values for VLSC channel required +const UNFIN_VLSC_OFFSET: u16 = 1 << 8; + /// joins mdf versions 3.x and 4.x #[derive(Debug)] #[repr(C)] @@ -114,6 +137,52 @@ impl MdfInfo { .context("Could not parse buffer into IdBlock structure")?; info!("Read IdBlock"); + // Check for unfinalized MDF file (MDF 4.x feature, version-independent) + let id_unfin_flags = id.id_unfin_flags; + let id_custom_unfin_flags = id.id_custom_unfin_flags; + let is_unfinalized = id.id_file_id == UNFINALIZED_ID || id_unfin_flags != 0; + if is_unfinalized { + warn!( + "Unfinalized MDF file detected (id_unfin_flags=0x{:04X}, id_custom_unfin_flags=0x{:04X}). \ + Data may be incomplete or metadata may be inaccurate.", + id_unfin_flags, + id_custom_unfin_flags, + ); + if id_unfin_flags & UNFIN_CG_CYCLE_COUNTERS != 0 { + warn!(" Bit 0: CG/CA cycle counters may be incorrect"); + } + if id_unfin_flags & UNFIN_SR_CYCLE_COUNTERS != 0 { + warn!(" Bit 1: SR cycle counters may be incorrect"); + } + if id_unfin_flags & UNFIN_LAST_DT_LENGTH != 0 { + warn!(" Bit 2: Last DT block length may be incorrect"); + } + if id_unfin_flags & UNFIN_LAST_RD_LENGTH != 0 { + warn!(" Bit 3: Last RD block length may be incorrect"); + } + if id_unfin_flags & UNFIN_LAST_DL_BLOCK != 0 { + warn!(" Bit 4: Last DL block may have incorrect dl_count or NIL links"); + } + if id_unfin_flags & UNFIN_VLSD_CG_BYTES != 0 { + warn!(" Bit 5: VLSD CG data_bytes/inval_bytes may be incorrect"); + } + if id_unfin_flags & UNFIN_VLSD_OFFSET != 0 { + warn!(" Bit 6: VLSD channel offsets may be incorrect"); + } + if id_unfin_flags & UNFIN_VLSC_CG_BYTES != 0 { + warn!(" Bit 7: VLSC CG data_bytes/inval_bytes may be incorrect"); + } + if id_unfin_flags & UNFIN_VLSC_OFFSET != 0 { + warn!(" Bit 8: VLSC channel offsets may be incorrect"); + } + if id_custom_unfin_flags != 0 { + warn!( + " Custom finalization flags set (tool-specific): 0x{:04X}", + id_custom_unfin_flags + ); + } + } + // Depending of version different blocks let mdf_info: MdfInfo = if id.id_ver < 400 { let mut sharable: SharableBlocks3 = SharableBlocks3 { @@ -202,6 +271,7 @@ impl MdfInfo { sharable, channel_names_set, ch, + is_unfinalized, })) }; info!("Finished reading metadata"); @@ -214,6 +284,23 @@ impl MdfInfo { MdfInfo::V4(mdfinfo4) => mdfinfo4.id_block.id_ver, } } + /// returns true if the file was marked as unfinalized + pub fn is_unfinalized(&self) -> bool { + match self { + MdfInfo::V3(_) => false, + MdfInfo::V4(mdfinfo4) => mdfinfo4.is_unfinalized, + } + } + /// returns the standard and custom unfinalization flags (0, 0) if finalized or MDF3 + pub fn get_unfin_flags(&self) -> (u16, u16) { + match self { + MdfInfo::V3(_) => (0, 0), + MdfInfo::V4(mdfinfo4) => ( + mdfinfo4.id_block.id_unfin_flags, + mdfinfo4.id_block.id_custom_unfin_flags, + ), + } + } /// returns channel's unit string pub fn get_channel_unit(&self, channel_name: &str) -> Result> { let unit: Option = match self { @@ -522,6 +609,13 @@ impl MdfInfo { MdfInfo::V4(mdfinfo4) => mdfinfo4.list_source_information(), } } + /// List sample reduction blocks for all channel groups (MDF 4.x only) + pub fn list_sample_reductions(&self) -> String { + match self { + MdfInfo::V3(_) => String::new(), + MdfInfo::V4(mdfinfo4) => mdfinfo4.list_sample_reductions(), + } + } } impl fmt::Display for MdfInfo { diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs index 9ab46a1..ef184e8 100644 --- a/src/mdfinfo/mdfinfo4.rs +++ b/src/mdfinfo/mdfinfo4.rs @@ -103,6 +103,8 @@ pub struct MdfInfo4 { pub channel_names_set: ChannelNamesSet, // set of channel names /// channel hierarchy blocks pub ch: HashMap, + /// whether the file was marked as unfinalized + pub is_unfinalized: bool, } /// MdfInfo4's implementation @@ -246,6 +248,7 @@ impl MdfInfo4 { ev: HashMap::new(), hd_block: Hd4::default(), ch: HashMap::new(), + is_unfinalized: false, } } /// Adds a new channel in memory (no file modification) @@ -373,6 +376,7 @@ impl MdfInfo4 { record_length: n_bytes, vlsd_cg: None, invalid_bytes: None, + sr: Vec::new(), }; cg.cn.insert(0, cn); cg.channel_names.insert(channel_name.to_string()); @@ -545,6 +549,40 @@ impl MdfInfo4 { } output } + /// list sample reduction blocks for all channel groups + pub fn list_sample_reductions(&self) -> String { + let mut output = String::new(); + let sync_type_name = |st: u8| match st { + 1 => "time (s)", + 2 => "angle (rad)", + 3 => "distance (m)", + 4 => "index", + _ => "unknown", + }; + for (_dg_pos, dg) in self.dg.iter() { + for (rec_id, cg) in dg.cg.iter() { + if !cg.sr.is_empty() { + output.push_str(&format!( + "Channel group (rec_id={}): {} sample reduction(s)\n", + rec_id, + cg.sr.len() + )); + for (i, sr) in cg.sr.iter().enumerate() { + output.push_str(&format!( + " SR[{}]: cycle_count={}, interval={}, sync_type={} ({}), flags=0x{:02X}\n", + i, + sr.sr_cycle_count, + sr.sr_interval, + sr.sr_sync_type, + sync_type_name(sr.sr_sync_type), + sr.sr_flags, + )); + } + } + } + } + output + } /// list source information blocks pub fn list_source_information(&self) -> String { let mut output = String::new(); @@ -2053,6 +2091,10 @@ fn parse_cg4_block( let record_length = cg.cg_data_bytes; + // Parse Sample Reduction blocks if present + let (sr_blocks, pos) = parse_sr4(rdr, cg.cg_sr_first, position)?; + position = pos; + let cg_struct = Cg4 { header, block: cg, @@ -2063,11 +2105,51 @@ fn parse_cg4_block( block_position: target, vlsd_cg: None, invalid_bytes: None, + sr: sr_blocks, }; Ok((cg_struct, position, n_cn)) } +/// Parses the linked list of Sample Reduction blocks (SRBLOCK) starting from target +fn parse_sr4( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Vec, i64)> { + let mut sr_blocks: Vec = Vec::new(); + if target <= 0 { + return Ok((sr_blocks, position)); + } + + let mut next = target; + while next > 0 { + // Read just the 16-byte header first to validate before allocating + rdr.seek_relative(next - position) + .context("Could not reach SR block header position")?; + let header: Blockheader4Short = + parse_block_header_short(rdr).context("Could not read SR block header")?; + // Validate block ID is ##SR + if &header.hdr_id != b"##SR" { + position = next + 16; + break; + } + // Now read the rest of the block + let mut buf = vec![0u8; (header.hdr_len - 16) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read SR block body")?; + position = next + header.hdr_len as i64; + let mut block = Cursor::new(buf); + let sr: Sr4Block = block + .read_le() + .context("Could not read buffer into Sr4Block struct")?; + next = sr.sr_sr_next; + sr_blocks.push(sr); + } + + Ok((sr_blocks, position)) +} + /// Channel Group struct /// it contains the related channels structure, a set of channel names, the dedicated master channel name and other helper data. #[derive(Debug, Clone)] @@ -2091,6 +2173,8 @@ pub struct Cg4 { pub vlsd_cg: Option<(u64, i32)>, /// invalid byte array, optional pub invalid_bytes: Option>, + /// Sample reduction blocks linked from cg_sr_first + pub sr: Vec, } /// Cg4 implementations for extracting acquisition and source name and path @@ -4427,7 +4511,7 @@ pub struct Cl4Block { /// Length of block in bytes // pub cl_len: u64, /// # of links - cl_links: u64, + pub cl_links: u64, /// links /// link to CNBlock describing dynamic data pub cl_composition: i64, @@ -4458,7 +4542,7 @@ pub struct Cv4Block { /// Length of block in bytes // pub cv_len: u64, /// # of links - cv_n_links: u64, + pub cv_n_links: u64, /// links /// link to CNBlock for discriminator channel pub cv_cn_discriminator: i64, @@ -4469,7 +4553,7 @@ pub struct Cv4Block { /// number of option channels pub cv_option_count: u32, /// reserved - cv_reserved: [u8; 4], + pub cv_reserved: [u8; 4], /// list of discriminator values for the options #[br(if(cv_option_count > 1), little, count = cv_option_count )] pub cv_option_val: Vec, @@ -4489,7 +4573,7 @@ pub struct Cu4Block { /// Length of block in bytes // pub cu_len: u64, /// # of links - cu_n_links: u64, + pub cu_n_links: u64, /// links /// list of member channel #[br(if(cu_n_links > 1), little, count = cu_n_links)] @@ -4498,5 +4582,41 @@ pub struct Cu4Block { /// number of member channels pub cu_member_count: u32, /// reserved - cu_reserved: [u8; 4], + pub cu_reserved: [u8; 4], +} + +/// SR4 Sample Reduction block struct (Section 6.29 of MDF 4.3 spec) +#[derive(Debug, PartialEq, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Sr4Block { + /// Pointer to next sample reduction block (SRBLOCK) (can be NIL) + pub sr_sr_next: i64, + /// Pointer to reduction data block (RD-/RV-/DZBLOCK or DL-/LD-/HLBLOCK) + pub sr_data: i64, + /// Number of cycles, i.e. number of sample reduction records + pub sr_cycle_count: u64, + /// Length of sample interval used to calculate the reduction records (unit depends on sr_sync_type) + pub sr_interval: f64, + /// Sync type: 1=time(s), 2=angle(rad), 3=distance(m), 4=index + pub sr_sync_type: u8, + /// Flags: bit 0 = invalidation bytes present, bit 1 = dominant invalidation bit + pub sr_flags: u8, + /// Reserved + sr_reserved: [u8; 6], +} + +impl Default for Sr4Block { + fn default() -> Self { + Sr4Block { + sr_sr_next: 0, + sr_data: 0, + sr_cycle_count: 0, + sr_interval: 0.0, + sr_sync_type: 1, + sr_flags: 0, + sr_reserved: [0; 6], + } + } } diff --git a/src/mdfr.rs b/src/mdfr.rs index dc793db..26b7057 100644 --- a/src/mdfr.rs +++ b/src/mdfr.rs @@ -48,6 +48,16 @@ impl Mdfr { let Mdfr(mdf) = self; mdf.get_version() } + /// returns true if the file was marked as unfinalized + pub fn is_unfinalized(&self) -> bool { + let Mdfr(mdf) = self; + mdf.is_unfinalized() + } + /// returns the standard and custom unfinalization flags (0, 0) if finalized or MDF3 + pub fn get_unfin_flags(&self) -> (u16, u16) { + let Mdfr(mdf) = self; + mdf.get_unfin_flags() + } /// returns channel's data, numpy array or list, depending if data type is numeric or string|bytes fn get_channel_data(&self, channel_name: String) -> PyResult> { let Mdfr(mdf) = self; @@ -263,6 +273,19 @@ df=polars.DataFrame(series) Ok(channel_list) }) } + /// returns the set of channel names that are in the same channel group as input channel name + pub fn get_channel_names_cg_set(&self, channel_name: String) -> PyResult> { + let Mdfr(mdf) = self; + pyo3::Python::attach(|py| { + let channel_list: Py = mdf + .mdf_info + .get_channel_names_cg_set(&channel_name) + .into_pyobject(py) + .context("error converting channel group names set into python object")? + .into(); + Ok(channel_list) + }) + } /// returns a dict of master names keys for which values are a set of associated channel names pub fn get_master_channel_names_set(&self) -> PyResult> { let Mdfr(mdf) = self; @@ -302,6 +325,13 @@ df=polars.DataFrame(series) let Mdfr(mdf) = self; Ok(Mdfr(mdf.write(file_name, compression)?)) } + /// converts MDF version 3.x to 4.2 in memory + pub fn convert3to4(&mut self, file_name: &str) -> PyResult<()> { + let Mdfr(mdf) = self; + let converted = mdf.mdf_info.convert3to4(file_name)?; + mdf.mdf_info = converted; + Ok(()) + } /// Adds a new channel in memory (no file modification) /// Master must be a dict with keys name, type and flag /// Data has to be a PyArrow @@ -510,6 +540,16 @@ df=polars.DataFrame(series) let Mdfr(mdf) = self; Ok(mdf.mdf_info.list_channel_hierarchy()) } + /// list source information blocks (MDF 4.x only) + pub fn list_source_information(&self) -> PyResult { + let Mdfr(mdf) = self; + Ok(mdf.mdf_info.list_source_information()) + } + /// list sample reduction blocks for all channel groups (MDF 4.x only) + pub fn list_sample_reductions(&self) -> PyResult { + let Mdfr(mdf) = self; + Ok(mdf.mdf_info.list_sample_reductions()) + } /// get channel hierarchy blocks (MDF 4.x only) pub fn get_channel_hierarchy_blocks(&mut self) -> Py { let Mdfr(mdf) = self; diff --git a/src/mdfreader.rs b/src/mdfreader.rs index 94faa2d..c4dff7a 100644 --- a/src/mdfreader.rs +++ b/src/mdfreader.rs @@ -94,6 +94,18 @@ impl Mdf { pub fn get_version(&self) -> u16 { self.mdf_info.get_version() } + /// returns true if the file was marked as unfinalized + pub fn is_unfinalized(&self) -> bool { + self.mdf_info.is_unfinalized() + } + /// returns the standard and custom unfinalization flags (0, 0) if finalized or MDF3 + pub fn get_unfin_flags(&self) -> (u16, u16) { + self.mdf_info.get_unfin_flags() + } + /// List sample reduction blocks for all channel groups (MDF 4.x only) + pub fn list_sample_reductions(&self) -> String { + self.mdf_info.list_sample_reductions() + } /// returns channel's unit string pub fn get_channel_unit(&self, channel_name: &str) -> Result> { self.mdf_info.get_channel_unit(channel_name) diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 088d764..23f28c4 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -264,46 +264,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result } // channel composition if let Some(compo) = &cn.composition { - match &compo.block { - Compo::CA(c) => { - let header = Blockheader4Short { - hdr_id: [35, 35, 67, 65], // ##CA - hdr_len: c.ca_len, - ..Default::default() - }; - buffer - .write_le(&header) - .context("Could not write CABlock header")?; - buffer - .write_le(&1u64) - .context("error writing number of links in CA Block")?; - let ca_composition: u64 = 0; - buffer - .write_le(&ca_composition) - .context("Could not write CABlock ca_composition")?; - let mut ca_block = Ca4BlockMembers::default(); - ca_block.ca_ndim = c.ca_ndim; - ca_block.ca_dim_size.clone_from(&c.ca_dim_size); - buffer - .write_le(&ca_composition) - .context("Could not write CABlock members")?; - } - Compo::DS(_) => { - todo!() - } - Compo::CL(_) => { - todo!() - } - Compo::CU(_) => { - todo!() - } - Compo::CV(_) => { - todo!() - } - Compo::CN(_) => { - todo!() - } - } + write_composition(&mut buffer, compo)?; } } } @@ -317,6 +278,101 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result }) } +/// Writes a composition block (CA, DS, CL, CU, CV, CN) and any nested composition recursively +fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Result<()> { + match &compo.block { + Compo::CA(c) => { + let header = Blockheader4Short { + hdr_id: [35, 35, 67, 65], // ##CA + hdr_len: c.ca_len, + ..Default::default() + }; + buffer + .write_le(&header) + .context("Could not write CABlock header")?; + buffer + .write_le(&1u64) + .context("error writing number of links in CA Block")?; + let ca_composition: u64 = 0; + buffer + .write_le(&ca_composition) + .context("Could not write CABlock ca_composition")?; + let mut ca_block = Ca4BlockMembers::default(); + ca_block.ca_ndim = c.ca_ndim; + ca_block.ca_dim_size.clone_from(&c.ca_dim_size); + buffer + .write_le(&ca_block) + .context("Could not write CABlock members")?; + } + Compo::DS(ds) => { + let header = Blockheader4Short { + hdr_id: [35, 35, 68, 83], // ##DS + hdr_len: 16 + 8 + ds.ds_links * 8 + 8, + ..Default::default() + }; + buffer + .write_le(&header) + .context("Could not write DSBlock header")?; + buffer + .write_le(ds) + .context("Could not write DSBlock")?; + } + Compo::CL(cl) => { + let header = Blockheader4Short { + hdr_id: [35, 35, 67, 76], // ##CL + hdr_len: 48, + ..Default::default() + }; + buffer + .write_le(&header) + .context("Could not write CLBlock header")?; + buffer + .write_le(cl) + .context("Could not write CLBlock")?; + } + Compo::CU(cu) => { + let header = Blockheader4Short { + hdr_id: [35, 35, 67, 85], // ##CU + hdr_len: 16 + 8 + cu.cu_n_links * 8 + 4 + 4, + ..Default::default() + }; + buffer + .write_le(&header) + .context("Could not write CUBlock header")?; + buffer + .write_le(cu) + .context("Could not write CUBlock")?; + } + Compo::CV(cv) => { + let header = Blockheader4Short { + hdr_id: [35, 35, 67, 86], // ##CV + hdr_len: 16 + 8 + cv.cv_n_links * 8 + 4 + 4 + cv.cv_option_count as u64 * 8, + ..Default::default() + }; + buffer + .write_le(&header) + .context("Could not write CVBlock header")?; + buffer + .write_le(cv) + .context("Could not write CVBlock")?; + } + Compo::CN(cn) => { + // Nested CN composition: write the CN block header + block data + buffer + .write_le(&cn.header) + .context("Could not write composition CN header")?; + buffer + .write_le(&cn.block) + .context("Could not write composition CN block")?; + } + } + // Handle recursive nested compositions + if let Some(nested) = &compo.compo { + write_composition(buffer, nested)?; + } + Ok(()) +} + /// Writes the data blocks fn write_data_blocks( position: i64, @@ -662,6 +718,7 @@ fn create_blocks( record_length: cg_block.cg_data_bytes, vlsd_cg: None, invalid_bytes: None, + sr: Vec::new(), }; new_cg.cn.insert(0, new_cn); new_cg.channel_names.insert(cn.unique_name.clone()); From 544404d367680b2b8e22840f91935b2d870b2708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 2 Feb 2026 00:25:48 +0100 Subject: [PATCH 12/46] added unionarray import from arrow --- src/data_holder/channel_data.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 2bd291e..4d1336c 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -1933,7 +1933,14 @@ pub fn try_from(value: &dyn Array) -> Result { bail!("FixedSizeList is not of size 2, to be used for complex") } } - _ => todo!(), + DataType::Union(_, _) => { + let array = value + .as_any() + .downcast_ref::() + .context("could not downcast to UnionArray")?; + Ok(ChannelData::Union(UnionArray::from(array.to_data()))) + } + dt => bail!("Arrow data type {dt} is not supported for conversion to ChannelData"), } } From cf7767fc7fe10d42917247304e7e1121a857ecd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 2 Feb 2026 21:13:30 +0100 Subject: [PATCH 13/46] optimisations for big file with many channels --- src/mdfreader/data_read4.rs | 70 ++++++++++++++++++------------------- src/mdfreader/mdfreader4.rs | 6 ++-- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index 7f3c737..25e9815 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -156,7 +156,7 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( - value.try_into().context("Could not read be f16")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -164,7 +164,7 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_le_bytes( - value.try_into().context("Could not read le f16")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -285,7 +285,7 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( - value.try_into().context("Could not read be f16 complex")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -293,7 +293,7 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_le_bytes( - value.try_into().context("Could not read le f16 complex")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -304,14 +304,14 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f32::from_be_bytes( - value.try_into().context("Could not read be f32 complex")?, + value.try_into().unwrap(), ); } } else { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f32::from_le_bytes( - value.try_into().context("Could not read le f32 complex")?, + value.try_into().unwrap(), ); } } @@ -322,13 +322,13 @@ pub fn read_one_channel_array( if cn.endian { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f64::from_be_bytes( - value.try_into().context("Could not read be f64 complex")?, + value.try_into().unwrap(), ); } } else { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f64::from_le_bytes( - value.try_into().context("Could not read le f64 complex")?, + value.try_into().unwrap(), ); } } @@ -525,7 +525,7 @@ pub fn read_one_channel_array( for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { buf[i] = f16::from_be_bytes( - value.try_into().context("Could not read be f16")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -537,7 +537,7 @@ pub fn read_one_channel_array( } else if n_bytes == 2 { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { buf[i] = - f16::from_le_bytes(value.try_into().context("Could not read le f16")?) + f16::from_le_bytes(value.try_into().unwrap()) .to_f32(); } } else { @@ -705,7 +705,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = - i8::from_le_bytes(value.try_into().context("Could not read i8")?); + i8::from_le_bytes(value.try_into().unwrap()); } } ChannelData::UInt8(a) => { @@ -713,7 +713,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = - u8::from_le_bytes(value.try_into().context("Could not read u8")?); + u8::from_le_bytes(value.try_into().unwrap()); } } ChannelData::Int16(a) => { @@ -723,7 +723,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = i16::from_be_bytes( - value.try_into().context("Could not read be i16")?, + value.try_into().unwrap(), ); } } else { @@ -731,7 +731,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = i16::from_le_bytes( - value.try_into().context("Could not read le i16")?, + value.try_into().unwrap(), ); } } @@ -743,7 +743,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = u16::from_be_bytes( - value.try_into().context("Could not read be u16")?, + value.try_into().unwrap(), ); } } else { @@ -751,7 +751,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = u16::from_le_bytes( - value.try_into().context("Could not read le u16")?, + value.try_into().unwrap(), ); } } @@ -779,7 +779,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = i32::from_be_bytes( - value.try_into().context("Could not read be i32")?, + value.try_into().unwrap(), ); } } else { @@ -787,7 +787,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = i32::from_le_bytes( - value.try_into().context("Could not read le i32")?, + value.try_into().unwrap(), ); } } @@ -815,7 +815,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = u32::from_be_bytes( - value.try_into().context("Could not read be u32")?, + value.try_into().unwrap(), ); } } else { @@ -823,7 +823,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = u32::from_le_bytes( - value.try_into().context("Could not read le u32")?, + value.try_into().unwrap(), ); } } @@ -836,7 +836,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f16::from_be_bytes( - value.try_into().context("Could not read be f16")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -845,7 +845,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f32::from_be_bytes( - value.try_into().context("Could not read be f32")?, + value.try_into().unwrap(), ); } } @@ -854,7 +854,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f16::from_le_bytes( - value.try_into().context("Could not read le f16")?, + value.try_into().unwrap(), ) .to_f32(); } @@ -863,7 +863,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f32::from_le_bytes( - value.try_into().context("Could not read le f32")?, + value.try_into().unwrap(), ); } } @@ -875,7 +875,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = i64::from_be_bytes( - value.try_into().context("Could not read be i64")?, + value.try_into().unwrap(), ); } } else if n_bytes == 6 { @@ -890,7 +890,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = i64::from_le_bytes( - value.try_into().context("Could not read le i64")?, + value.try_into().unwrap(), ); } } else if n_bytes == 6 { @@ -909,7 +909,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = u64::from_be_bytes( - value.try_into().context("Could not read be u64")?, + value.try_into().unwrap(), ); } } else if n_bytes == 7 { @@ -941,7 +941,7 @@ pub fn read_channels_from_bytes( for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = u64::from_le_bytes( - value.try_into().context("Could not read le u64")?, + value.try_into().unwrap(), ); } } else if n_bytes == 7 { @@ -976,7 +976,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f64::from_be_bytes( - value.try_into().context("Could not read be f64")?, + value.try_into().unwrap(), ); } } else { @@ -984,7 +984,7 @@ pub fn read_channels_from_bytes( value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; data[i + previous_index] = f64::from_le_bytes( - value.try_into().context("Could not read le f64")?, + value.try_into().unwrap(), ); } } @@ -1205,7 +1205,7 @@ pub fn read_channels_from_bytes( value = &record[pos_byte_beg + j * std::mem::size_of::() ..pos_byte_beg + (j + 1) * std::mem::size_of::()]; data[(i + previous_index) * cn.list_size + j] = i8::from_le_bytes( - value.try_into().context("Could not read i8 array")?, + value.try_into().unwrap(), ); } } @@ -1217,7 +1217,7 @@ pub fn read_channels_from_bytes( value = &record[pos_byte_beg + j * std::mem::size_of::() ..pos_byte_beg + (j + 1) * std::mem::size_of::()]; data[(i + previous_index) * cn.list_size + j] = u8::from_le_bytes( - value.try_into().context("Could not read u8 array")?, + value.try_into().unwrap(), ); } } @@ -1562,7 +1562,7 @@ pub fn read_channels_from_bytes( ..pos_byte_beg + (j + 1) * n_bytes]; data[(i + previous_index) * cn.list_size + j] = u64::from_le_bytes( - value.try_into().context("Could not read le u64")?, + value.try_into().unwrap(), ); } } @@ -1614,7 +1614,7 @@ pub fn read_channels_from_bytes( ..pos_byte_beg + (j + 1) * std::mem::size_of::()]; data[(i + previous_index) * cn.list_size + j] = f64::from_be_bytes( - value.try_into().context("Could not read be f64")?, + value.try_into().unwrap(), ); } } @@ -1625,7 +1625,7 @@ pub fn read_channels_from_bytes( ..pos_byte_beg + (j + 1) * std::mem::size_of::()]; data[(i + previous_index) * cn.list_size + j] = f64::from_le_bytes( - value.try_into().context("Could not read le f64")?, + value.try_into().unwrap(), ); } } diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index dcb0005..a68db72 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -29,7 +29,7 @@ use super::Mdf; /// The following constant represents the size of data chunk to be read and processed. /// a big chunk will improve performance but consume more memory /// a small chunk will not consume too much memory but will cause many read calls, penalising performance -pub const CHUNK_SIZE_READING_4: usize = 524288; // can be tuned according to architecture +pub const CHUNK_SIZE_READING_4: usize = 4_194_304; // can be tuned according to architecture /// Reads the file data based on headers information contained in info parameter /// Hashset of channel names parameter allows to filter which channels to read @@ -57,8 +57,8 @@ pub fn mdfreader4<'a>( .extend(channel_group.channel_names.iter().cloned()); } let channel_names_to_read_in_dg: HashSet<_> = channel_names_present_in_dg - .into_par_iter() - .filter(|v| channel_names.contains(v)) + .intersection(channel_names) + .cloned() .collect(); if dg.block.dg_data != 0 && !channel_names_to_read_in_dg.is_empty() { // header block From f626de829195778f529e90136aad656e825d5433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 2 Feb 2026 22:58:42 +0100 Subject: [PATCH 14/46] further optimised for VLSC sorted and unsorted data --- src/data_holder/channel_data.rs | 8 ++ src/mdfreader/mdfreader4.rs | 190 ++++++++++++++++++-------------- 2 files changed, 113 insertions(+), 85 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 4d1336c..0cac5fd 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -569,6 +569,14 @@ impl ChannelData { _ => None, } } + /// Returns a borrowed slice of u64 values if the data is already UInt64. + /// Zero-copy alternative to to_u64_vec() for the common case. + pub fn as_u64_slice(&self) -> Option<&[u64]> { + match self { + ChannelData::UInt64(a) => Some(a.values_slice()), + _ => None, + } + } /// returns mdf4 data type pub fn data_type(&self, endian: bool) -> u8 { if endian { diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index a68db72..0186965 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -381,7 +381,7 @@ fn read_all_blocks_to_bytes( position }; let (dl_blocks, mut pos) = parser_dl4(rdr, current_pos)?; - let mut combined_data = Vec::new(); + let mut combined_data = Vec::with_capacity(CHUNK_SIZE_READING_4 * 2); for dl in dl_blocks { for data_ptr in dl.dl_data { if data_ptr == 0 { @@ -598,6 +598,7 @@ fn read_vlsd_from_bytes( let data_length = data.len(); let mut remaining: usize = data_length - position; let mut nrecord: usize = 0; + let mut str_buf = String::new(); match &mut cn.data { ChannelData::Utf8(array) => { let cn_data_type = cn.block.cn_data_type; @@ -613,7 +614,7 @@ fn read_vlsd_from_bytes( _ => length, }; let record = &data[position..position + record_len]; - array.append_value(decode_string_bytes(record, cn_data_type, decoder)?); + array.append_value(decode_string_bytes(record, cn_data_type, decoder, &mut str_buf)?); position += length; remaining = data_length - position; nrecord += 1; @@ -674,6 +675,7 @@ fn read_vlsc_from_bytes( ) -> Result { let data_length = data.len(); let mut max_position: usize = 0; + let mut str_buf = String::new(); match &mut cn.data { ChannelData::Utf8(array) => { let cn_data_type = cn.block.cn_data_type; @@ -682,7 +684,7 @@ fn read_vlsc_from_bytes( let length = *size as usize; if start + length <= data_length && length > 0 { let record = &data[start..start + length]; - array.append_value(decode_string_bytes(record, cn_data_type, decoder)?); + array.append_value(decode_string_bytes(record, cn_data_type, decoder, &mut str_buf)?); max_position = max_position.max(start + length); } else if length == 0 { array.append_value(""); @@ -1074,7 +1076,7 @@ fn parser_dl4_unsorted( channel_names_to_read_in_dg: &HashSet, ) -> Result { // Read all data blocks - let mut data: Vec = Vec::new(); + let mut data: Vec = Vec::with_capacity(CHUNK_SIZE_READING_4 * 2); let mut decoder: Dec = Dec { windows_1252: WINDOWS_1252.new_decoder(), utf_16_be: UTF_16BE.new_decoder(), @@ -1083,7 +1085,12 @@ fn parser_dl4_unsorted( // initialise record counter let mut record_counter: HashMap)> = HashMap::new(); for cg in dg.cg.values_mut() { - record_counter.insert(cg.block.cg_record_id, (0, Vec::new())); + let capacity = if (cg.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 // VLSD/VLSC data is not accumulated in record_counter + } else { + cg.block.cg_cycle_count as usize * cg.record_length as usize + }; + record_counter.insert(cg.block.cg_record_id, (0, Vec::with_capacity(capacity))); } for dl in dl_blocks { for data_pointer in dl.dl_data { @@ -1212,11 +1219,16 @@ fn read_all_channels_unsorted( }; // initialise record counter that will contain sorted data blocks for each channel group for cg in dg.cg.values_mut() { - record_counter.insert(cg.block.cg_record_id, (0, Vec::new())); + let capacity = if (cg.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 // VLSD/VLSC data is not accumulated in record_counter + } else { + cg.block.cg_cycle_count as usize * cg.record_length as usize + }; + record_counter.insert(cg.block.cg_record_id, (0, Vec::with_capacity(capacity))); } // reads the sorted data block into chunks - let mut data: Vec = Vec::new(); + let mut data: Vec = Vec::with_capacity(CHUNK_SIZE_READING_4 * 2); let mut data_chunk = vec![0u8; CHUNK_SIZE_READING_4]; while position < data_block_length { let chunk_size = if (data_block_length - position) > CHUNK_SIZE_READING_4 { @@ -1253,6 +1265,8 @@ fn read_all_channels_unsorted_from_bytes( let data_length = data.len(); let dg_rec_id_size = dg.block.dg_rec_id_size as usize; let vlsd_data_start_offset = dg_rec_id_size + std::mem::size_of::(); + // reusable string buffer for VLSC string decoding + let mut dst = String::new(); // unsorted data into sorted data blocks, except for VLSD CG. let mut remaining: usize = data_length - position; while remaining > 0 { @@ -1262,15 +1276,13 @@ fn read_all_channels_unsorted_from_bytes( rec_id = data[position].into(); } else if dg_rec_id_size == 2 && remaining >= 2 { let rec = &data[position..position + std::mem::size_of::()]; - rec_id = u16::from_le_bytes(rec.try_into().context("Could not convert record id u16")?) - as u64; + rec_id = u16::from_le_bytes(rec.try_into().unwrap()) as u64; } else if dg_rec_id_size == 4 && remaining >= 4 { let rec = &data[position..position + std::mem::size_of::()]; - rec_id = u32::from_le_bytes(rec.try_into().context("Could not convert record id u32")?) - as u64; + rec_id = u32::from_le_bytes(rec.try_into().unwrap()) as u64; } else if dg_rec_id_size == 8 && remaining >= 8 { let rec = &data[position..position + std::mem::size_of::()]; - rec_id = u64::from_le_bytes(rec.try_into().context("Could not convert record id u64")?); + rec_id = u64::from_le_bytes(rec.try_into().unwrap()); } else { break; // not enough data remaining } @@ -1282,8 +1294,7 @@ fn read_all_channels_unsorted_from_bytes( if remaining >= 4 + dg_rec_id_size { let len = &data[position + dg_rec_id_size..position + vlsd_data_start_offset]; let length: usize = - u32::from_le_bytes(len.try_into().context("Could not read length")?) - as usize; + u32::from_le_bytes(len.try_into().unwrap()) as usize; remaining = data_length - position - vlsd_data_start_offset; if remaining >= length { position += vlsd_data_start_offset; @@ -1312,45 +1323,48 @@ fn read_all_channels_unsorted_from_bytes( } match &mut target_cn.data { ChannelData::Utf8(array) => { - let mut dst = String::with_capacity(record.len()); - if target_cn.block.cn_data_type == 6 { - let (_result, _size, _replacement) = decoder - .windows_1252 - .decode_to_string(record, &mut dst, false); - } else if target_cn.block.cn_data_type == 7 { - dst = str::from_utf8(record) - .context( - "Found invalid UTF-8 from VLSD record", - )? - .to_string(); - } else if target_cn.block.cn_data_type == 8 { - let (_result, _size, _replacement) = decoder - .utf_16_le - .decode_to_string(record, &mut dst, false); - } else if target_cn.block.cn_data_type == 9 { - let (_result, _size, _replacement) = decoder - .utf_16_be - .decode_to_string(record, &mut dst, false); - } else if target_cn.block.cn_data_type == 17 { - // Unicode with BOM - let bom = Bom::from(record); - let mut decoder = match bom { - Bom::Utf8 => UTF_8.new_decoder(), - Bom::Utf16Be => UTF_16BE.new_decoder(), - Bom::Utf16Le => UTF_16LE.new_decoder(), - Bom::Gb18030 => GB18030.new_decoder(), - _ => { - bail!("not implemented BOM type"); - } - }; - let (_result, _size, _replacement) = decoder - .decode_to_string(record, &mut dst, false); + if target_cn.block.cn_data_type == 7 { + // UTF-8: no decoding needed, use &str directly + array.append_value( + str::from_utf8(record) + .context("Found invalid UTF-8 from VLSD record")? + .trim_end_matches('\0'), + ); } else { - bail!( - "channel data type is not correct for a text" - ) - }; - array.append_value(dst.trim_end_matches('\0')); + dst.clear(); + if target_cn.block.cn_data_type == 6 { + let (_result, _size, _replacement) = decoder + .windows_1252 + .decode_to_string(record, &mut dst, false); + } else if target_cn.block.cn_data_type == 8 { + let (_result, _size, _replacement) = decoder + .utf_16_le + .decode_to_string(record, &mut dst, false); + } else if target_cn.block.cn_data_type == 9 { + let (_result, _size, _replacement) = decoder + .utf_16_be + .decode_to_string(record, &mut dst, false); + } else if target_cn.block.cn_data_type == 17 { + // Unicode with BOM + let bom = Bom::from(record); + let mut bom_decoder = match bom { + Bom::Utf8 => UTF_8.new_decoder(), + Bom::Utf16Be => UTF_16BE.new_decoder(), + Bom::Utf16Le => UTF_16LE.new_decoder(), + Bom::Gb18030 => GB18030.new_decoder(), + _ => { + bail!("not implemented BOM type"); + } + }; + let (_result, _size, _replacement) = bom_decoder + .decode_to_string(record, &mut dst, false); + } else { + bail!( + "channel data type is not correct for a text" + ) + }; + array.append_value(dst.trim_end_matches('\0')); + } } ChannelData::VariableSizeByteArray(array) => { array.append_value(record); @@ -1398,9 +1412,9 @@ fn read_all_channels_unsorted_from_bytes( } // removes consumed records from data and leaves remaining that could not be processed. - let remaining_vect = data[position..].to_owned(); - data.clear(); // removes data but keeps capacity - data.extend(remaining_vect); + let remaining_len = data.len() - position; + data.copy_within(position.., 0); + data.truncate(remaining_len); // From sorted data block, copies data in channels arrays for (rec_id, (index, record_data)) in record_counter.iter_mut() { @@ -1429,31 +1443,33 @@ struct Dec { /// Decodes a byte slice to a String based on MDF4 cn_data_type. /// cn_data_type: 6=SBC/Windows-1252, 7=UTF-8, 8=UTF-16 LE, 9=UTF-16 BE, 17=BOM-prefixed -fn decode_string_bytes(record: &[u8], cn_data_type: u8, decoder: &mut Dec) -> Result { +fn decode_string_bytes<'a>( + record: &'a [u8], + cn_data_type: u8, + decoder: &mut Dec, + buf: &'a mut String, +) -> Result<&'a str> { match cn_data_type { 6 => { - let mut dst = String::with_capacity(record.len()); + buf.clear(); + buf.reserve(record.len()); let _ = decoder .windows_1252 - .decode_to_string(record, &mut dst, false); - Ok(dst) + .decode_to_string(record, buf, false); + Ok(buf.as_str()) } - 7 => Ok(str::from_utf8(record) - .context("Found invalid UTF-8")? - .to_string()), + 7 => Ok(str::from_utf8(record).context("Found invalid UTF-8")?), 8 => { - let mut dst = String::with_capacity(record.len()); - let _ = decoder - .utf_16_le - .decode_to_string(record, &mut dst, false); - Ok(dst.trim_end_matches('\0').to_string()) + buf.clear(); + buf.reserve(record.len()); + let _ = decoder.utf_16_le.decode_to_string(record, buf, false); + Ok(buf.trim_end_matches('\0')) } 9 => { - let mut dst = String::with_capacity(record.len()); - let _ = decoder - .utf_16_be - .decode_to_string(record, &mut dst, false); - Ok(dst.trim_end_matches('\0').to_string()) + buf.clear(); + buf.reserve(record.len()); + let _ = decoder.utf_16_be.decode_to_string(record, buf, false); + Ok(buf.trim_end_matches('\0')) } 17 => { if record.len() >= 3 @@ -1461,28 +1477,30 @@ fn decode_string_bytes(record: &[u8], cn_data_type: u8, decoder: &mut Dec) -> Re && record[1] == 0xBB && record[2] == 0xBF { - Ok(str::from_utf8(&record[3..]) - .context("Found invalid UTF-8 with BOM")? - .to_string()) + Ok(str::from_utf8(&record[3..]).context("Found invalid UTF-8 with BOM")?) } else if record.len() >= 2 && record[0] == 0xFF && record[1] == 0xFE { - let mut dst = String::with_capacity(record.len()); + buf.clear(); + buf.reserve(record.len()); let _ = decoder .utf_16_le - .decode_to_string(&record[2..], &mut dst, false); - Ok(dst.trim_end_matches('\0').to_string()) + .decode_to_string(&record[2..], buf, false); + Ok(buf.trim_end_matches('\0')) } else if record.len() >= 2 && record[0] == 0xFE && record[1] == 0xFF { - let mut dst = String::with_capacity(record.len()); + buf.clear(); + buf.reserve(record.len()); let _ = decoder .utf_16_be - .decode_to_string(&record[2..], &mut dst, false); - Ok(dst.trim_end_matches('\0').to_string()) + .decode_to_string(&record[2..], buf, false); + Ok(buf.trim_end_matches('\0')) } else { - Ok(str::from_utf8(record) - .context("Found invalid UTF-8 (no BOM)")? - .to_string()) + Ok(str::from_utf8(record).context("Found invalid UTF-8 (no BOM)")?) } } - _ => Ok(String::from_utf8_lossy(record).into_owned()), + _ => { + buf.clear(); + buf.push_str(&String::from_utf8_lossy(record)); + Ok(buf.as_str()) + } } } @@ -1900,6 +1918,7 @@ fn store_decoded_values_in_channel( values: Vec>, decoder: &mut Dec, ) -> Result<()> { + let mut str_buf = String::new(); for value_bytes in values { match &mut cn.data { ChannelData::Int8(builder) => { @@ -1997,6 +2016,7 @@ fn store_decoded_values_in_channel( &value_bytes, cn.block.cn_data_type, decoder, + &mut str_buf, )?); } ChannelData::VariableSizeByteArray(builder) => { From bcde0a94f0a671accd3cd3b5dbcadd81f81d8f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 8 Feb 2026 15:49:14 +0100 Subject: [PATCH 15/46] refactored mdfinfo4 being too long into a modules with smaller files per block type. Implemented further writing --- src/mdfinfo.rs | 57 +- src/mdfinfo/mdfinfo3.rs | 60 +- src/mdfinfo/mdfinfo4.rs | 4622 -------------------------- src/mdfinfo/mdfinfo4/at_block.rs | 192 ++ src/mdfinfo/mdfinfo4/block_header.rs | 304 ++ src/mdfinfo/mdfinfo4/ca_block.rs | 336 ++ src/mdfinfo/mdfinfo4/cc_block.rs | 134 + src/mdfinfo/mdfinfo4/cg_block.rs | 807 +++++ src/mdfinfo/mdfinfo4/ch_block.rs | 123 + src/mdfinfo/mdfinfo4/cn_block.rs | 807 +++++ src/mdfinfo/mdfinfo4/composition.rs | 523 +++ src/mdfinfo/mdfinfo4/data_block.rs | 462 +++ src/mdfinfo/mdfinfo4/dg_block.rs | 188 ++ src/mdfinfo/mdfinfo4/ev_block.rs | 252 ++ src/mdfinfo/mdfinfo4/fh_block.rs | 126 + src/mdfinfo/mdfinfo4/hd_block.rs | 114 + src/mdfinfo/mdfinfo4/metadata.rs | 295 ++ src/mdfinfo/mdfinfo4/mod.rs | 868 +++++ src/mdfinfo/mdfinfo4/si_block.rs | 95 + src/mdfinfo/mdfinfo4/sr_block.rs | 91 + src/mdfr.rs | 248 +- src/mdfreader/mdfreader4.rs | 27 +- src/mdfwriter/mdfwriter4.rs | 786 ++++- 23 files changed, 6646 insertions(+), 4871 deletions(-) delete mode 100644 src/mdfinfo/mdfinfo4.rs create mode 100644 src/mdfinfo/mdfinfo4/at_block.rs create mode 100644 src/mdfinfo/mdfinfo4/block_header.rs create mode 100644 src/mdfinfo/mdfinfo4/ca_block.rs create mode 100644 src/mdfinfo/mdfinfo4/cc_block.rs create mode 100644 src/mdfinfo/mdfinfo4/cg_block.rs create mode 100644 src/mdfinfo/mdfinfo4/ch_block.rs create mode 100644 src/mdfinfo/mdfinfo4/cn_block.rs create mode 100644 src/mdfinfo/mdfinfo4/composition.rs create mode 100644 src/mdfinfo/mdfinfo4/data_block.rs create mode 100644 src/mdfinfo/mdfinfo4/dg_block.rs create mode 100644 src/mdfinfo/mdfinfo4/ev_block.rs create mode 100644 src/mdfinfo/mdfinfo4/fh_block.rs create mode 100644 src/mdfinfo/mdfinfo4/hd_block.rs create mode 100644 src/mdfinfo/mdfinfo4/metadata.rs create mode 100644 src/mdfinfo/mdfinfo4/mod.rs create mode 100644 src/mdfinfo/mdfinfo4/si_block.rs create mode 100644 src/mdfinfo/mdfinfo4/sr_block.rs diff --git a/src/mdfinfo.rs b/src/mdfinfo.rs index b8cb0ed..33c18fb 100644 --- a/src/mdfinfo.rs +++ b/src/mdfinfo.rs @@ -32,7 +32,7 @@ use crate::data_holder::channel_data::ChannelData; use crate::mdfwriter::mdfwriter3::convert3to4; use self::mdfinfo3::build_channel_db3; -use self::mdfinfo4::{At4Block, Ch4Block, Ev4Block, FhBlock}; +use self::mdfinfo4::{At4Block, Ch4Block, Ev4Block, FhBlock, Si4Block, Sr4Block}; use self::sym_buf_reader::SymBufReader; use crate::mdfreader::{DataSignature, MasterSignature}; @@ -574,6 +574,13 @@ impl MdfInfo { MdfInfo::V4(mdfinfo4) => Some(mdfinfo4.get_event_blocks()), } } + /// list file history entries + pub fn list_file_history(&mut self) -> String { + match self { + MdfInfo::V3(_) => String::new(), + MdfInfo::V4(mdfinfo4) => mdfinfo4.list_file_history(), + } + } /// get file history blocks pub fn get_file_history_blocks(&self) -> Option> { match self { @@ -616,42 +623,28 @@ impl MdfInfo { MdfInfo::V4(mdfinfo4) => mdfinfo4.list_sample_reductions(), } } + /// Get all source information blocks (MDF 4.x only) + pub fn get_source_information_blocks(&self) -> Option> { + match self { + MdfInfo::V3(_) => None, + MdfInfo::V4(mdfinfo4) => Some(mdfinfo4.get_source_information_blocks()), + } + } + /// Get all sample reduction blocks across all channel groups (MDF 4.x only) + /// Returns a vector of (dg_position, rec_id, sr_blocks) tuples + pub fn get_sample_reduction_blocks(&self) -> Option)>> { + match self { + MdfInfo::V3(_) => None, + MdfInfo::V4(mdfinfo4) => Some(mdfinfo4.get_sample_reduction_blocks()), + } + } } impl fmt::Display for MdfInfo { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - MdfInfo::V3(mdfinfo3) => { - writeln!(f, "Version : {}\n", mdfinfo3.id_block.id_ver)?; - writeln!( - f, - "Header :\n Author: {} Organisation:{}\n", - mdfinfo3.hd_block.hd_author, mdfinfo3.hd_block.hd_organization - )?; - writeln!( - f, - "Project: {} Subject:{}\n", - mdfinfo3.hd_block.hd_project, mdfinfo3.hd_block.hd_subject - )?; - writeln!( - f, - "Date: {:?} Time:{:?}\n", - mdfinfo3.hd_block.hd_date, mdfinfo3.hd_block.hd_time - )?; - writeln!(f, "Comments: {}", mdfinfo3.hd_comment)?; - writeln!(f, "\n") - } - MdfInfo::V4(mdfinfo4) => { - writeln!(f, "Version : {}", mdfinfo4.id_block.id_ver)?; - writeln!(f, "{}\n", mdfinfo4.hd_block)?; - let comments = &mdfinfo4 - .sharable - .get_hd_comments(mdfinfo4.hd_block.hd_md_comment); - for c in comments.iter() { - writeln!(f, "{} {}", c.0, c.1)?; - } - writeln!(f, "\n") - } + MdfInfo::V3(mdfinfo3) => write!(f, "{}", mdfinfo3), + MdfInfo::V4(mdfinfo4) => write!(f, "{}", mdfinfo4), } } } diff --git a/src/mdfinfo/mdfinfo3.rs b/src/mdfinfo/mdfinfo3.rs index 8c6343e..f81b0ca 100644 --- a/src/mdfinfo/mdfinfo3.rs +++ b/src/mdfinfo/mdfinfo3.rs @@ -283,27 +283,63 @@ impl MdfInfo3 { cn.description = desc.to_string(); } } -} - -/// MdfInfo3 display implementation -impl fmt::Display for MdfInfo3 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "MdfInfo3: {}", self.file_name)?; - writeln!(f, "Version : {}\n", self.id_block.id_ver)?; - writeln!(f, "{}\n", self.hd_block)?; + /// Returns a concise one-line summary of the MDF3 file + pub fn summary(&self) -> String { + let total_channels = self.channel_names_set.len(); + let total_dgs = self.dg.len(); + format!( + "MDF3 v{}: {} DGs, {} channels", + self.id_block.id_ver, total_dgs, total_channels + ) + } + /// Formats the channel list with optional data preview + pub fn format_channels(&self, _show_data: bool) -> String { + let mut output = String::new(); for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}\n")?; + output.push_str(&format!("\nMaster: {}\n", master_name)); } else { - writeln!(f, "\nWithout Master channel\n")?; + output.push_str("\nWithout Master channel\n"); } for channel in list.iter() { let unit = self.get_channel_unit(channel); let desc = self.get_channel_desc(channel); - writeln!(f, " {channel} {unit:?} {desc:?} \n")?; + output.push_str(&format!(" {} ", channel)); + if let Some(u) = unit { + output.push_str(&format!("\"{}\" ", u)); + } + if let Some(d) = desc + && !d.is_empty() + { + output.push_str(&format!("// {}", d)); + } + output.push('\n'); } } - writeln!(f, "\n") + output + } + /// Formats header info + pub fn format_header(&self) -> String { + format!( + "Author: {} Organisation: {}\nProject: {} Subject: {}\nDate: {:?} Time: {:?}\nComments: {}", + self.hd_block.hd_author, + self.hd_block.hd_organization, + self.hd_block.hd_project, + self.hd_block.hd_subject, + self.hd_block.hd_date, + self.hd_block.hd_time, + self.hd_comment + ) + } +} + +/// MdfInfo3 display implementation +impl fmt::Display for MdfInfo3 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "{}", self.summary())?; + writeln!(f, "File: {}", self.file_name)?; + writeln!(f, "{}", self.format_header())?; + write!(f, "{}", self.format_channels(false)) } } diff --git a/src/mdfinfo/mdfinfo4.rs b/src/mdfinfo/mdfinfo4.rs deleted file mode 100644 index ef184e8..0000000 --- a/src/mdfinfo/mdfinfo4.rs +++ /dev/null @@ -1,4622 +0,0 @@ -//! Parsing of file metadata into MdfInfo4 struct -use crate::mdfreader::{DataSignature, MasterSignature}; -use anyhow::{Context, Error, Result, bail}; -use arrow::array::{ - Array, ArrayRef, BooleanBufferBuilder, UInt8Builder, UInt16Builder, UInt32Builder, UInt32Array, - UnionArray, -}; -use arrow::buffer::ScalarBuffer; -use arrow::compute::take; -use arrow::datatypes::{Field, UnionFields}; -use binrw::{BinReaderExt, BinWriterExt, binrw}; -use byteorder::{LittleEndian, ReadBytesExt}; -use chrono::{DateTime, Local}; -use flate2::read::ZlibDecoder; -use log::warn; -use lz4::Decoder as Lz4Decoder; -use md5::{Digest, Md5}; -use rayon::prelude::*; -use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; -use std::default::Default; -use std::fmt::Debug; -use std::fs::File; -use std::io::{BufReader, Cursor, Read, Seek, Write}; -use std::sync::Arc; -use std::{fmt, str}; -use zstd::Decoder as ZstdDecoder; - -use crate::data_holder::channel_data::{ChannelData, data_type_init, try_from}; -use crate::data_holder::tensor_arrow::Order; -use crate::mdfinfo::IdBlock; - -use super::sym_buf_reader::SymBufReader; - -// ============================================================================= -// MDF 4.3 Flag Constants -// ============================================================================= - -// Channel Group (CG) flags - cg_flags field (u16) -/// Bit 0: VLSD channel group (Variable Length Signal Data) -pub const CG_F_VLSD: u16 = 1 << 0; -/// Bit 5: VLSC channel group (contains VLSC channels, MDF 4.3) -pub const CG_F_VLSC: u16 = 1 << 5; -/// Bit 6: Raw sensor event channel group -#[allow(dead_code)] -pub const CG_F_RAW_SENSOR_EVENT: u16 = 1 << 6; -/// Bit 7: Protocol event channel group -#[allow(dead_code)] -pub const CG_F_PROTOCOL_EVENT: u16 = 1 << 7; - -// Channel (CN) flags - cn_flags field (u32) -// Bits 0-14 are from MDF 4.2 -/// Bit 15: Raw sensor event channel -#[allow(dead_code)] -pub const CN_F_RAW_SENSOR_EVENT: u32 = 1 << 15; -/// Bit 16: Auxiliary channel -#[allow(dead_code)] -pub const CN_F_AUXILIARY: u32 = 1 << 16; -/// Bit 17: Data stream mode - channel uses data stream alignment -pub const CN_F_DATA_STREAM_MODE: u32 = 1 << 17; -/// Bit 18: Alignment reset - reset alignment to start of data stream -#[allow(dead_code)] -pub const CN_F_ALIGNMENT_RESET: u32 = 1 << 18; -/// Bit 19: Protocol event channel -#[allow(dead_code)] -pub const CN_F_PROTOCOL_EVENT: u32 = 1 << 19; -/// Bit 20: Data description mode - channel describes data structure -#[allow(dead_code)] -pub const CN_F_DATA_DESCRIPTION_MODE: u32 = 1 << 20; - -/// ChannelId : (Option, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos)) -pub(crate) type ChannelId = (Option, i64, (i64, u64), (i64, i32)); -pub(crate) type ChannelNamesSet = HashMap; - -/// MdfInfo4 is the struct holding whole metadata of mdf4.x files -/// * blocks with unique links are at top level like attachment, events and file history -/// * sharable blocks (most likely referenced multiple times and shared by several blocks) -/// that are in sharable fields and holds CC, SI, TX and MD blocks -/// * the dg fields nests cg itself nesting cn blocks and eventually compositions -/// (other cn or ca blocks) and conversion -/// * channel_names_set is the complete set of channel names contained in file -/// * in general the blocks are contained in HashMaps with key corresponding -/// to their position in the file -#[derive(Debug, Default, Clone)] -#[repr(C)] -pub struct MdfInfo4 { - /// file name string - pub file_name: String, - /// Identifier block - pub id_block: IdBlock, - /// header block - pub hd_block: Hd4, - /// file history blocks - pub fh: Fh, - /// attachment blocks - pub at: At, // attachments - /// event blocks - pub ev: HashMap, // events - /// data group block linking channel group/channel/conversion/compostion/..etc. and data block - pub dg: BTreeMap, // contains most of the file structure - /// cc, md, tx and si blocks that can be referenced by several blocks - pub sharable: SharableBlocks, - /// set of all channel names - pub channel_names_set: ChannelNamesSet, // set of channel names - /// channel hierarchy blocks - pub ch: HashMap, - /// whether the file was marked as unfinalized - pub is_unfinalized: bool, -} - -/// MdfInfo4's implementation -impl MdfInfo4 { - /// returns the hashmap with : - /// key = channel_name, - /// value = (master_name, - /// dg_position, - /// (cg.block_position, record_id), - /// (cn.block_position, cn_record_position)) - pub fn get_channel_id(&self, channel_name: &str) -> Option<&ChannelId> { - self.channel_names_set.get(channel_name) - } - /// Returns the channel's vector data if present in memory, otherwise None. - pub fn get_channel_data(&self, channel_name: &str) -> Option<&ChannelData> { - let mut data: Option<&ChannelData> = None; - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.get_channel_id(channel_name) - && let Some(dg) = self.dg.get(dg_pos) - && let Some(cg) = dg.cg.get(rec_id) - && let Some(cn) = cg.cn.get(rec_pos) - && !cn.data.is_empty() - { - data = Some(&cn.data); - } - data - } - /// Returns the channel's unit string. If it does not exist, it is an empty string. - pub fn get_channel_unit(&self, channel_name: &str) -> Result> { - let mut unit: Option = None; - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.get_channel_id(channel_name) - && let Some(dg) = self.dg.get(dg_pos) - && let Some(cg) = dg.cg.get(rec_id) - && let Some(cn) = cg.cn.get(rec_pos) - { - unit = self.sharable.get_tx(cn.block.cn_md_unit)?; - } - Ok(unit) - } - /// Returns the channel's description. If it does not exist, it is an empty string - pub fn get_channel_desc(&self, channel_name: &str) -> Result> { - let mut desc: Option = None; - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.get_channel_id(channel_name) - && let Some(dg) = self.dg.get(dg_pos) - && let Some(cg) = dg.cg.get(rec_id) - && let Some(cn) = cg.cn.get(rec_pos) - { - desc = self.sharable.get_tx(cn.block.cn_md_comment)?; - } - Ok(desc) - } - /// returns the master channel associated to the input channel name - pub fn get_channel_master(&self, channel_name: &str) -> Option { - let mut master: Option = None; - if let Some((m, _dg_pos, (_cg_pos, _rec_idd), (_cn_pos, _rec_pos))) = - self.get_channel_id(channel_name) - { - master.clone_from(m); - } - master - } - /// returns type of master channel link to channel input in parameter: - /// 0 = None (normal data channels), 1 = Time (seconds), 2 = Angle (radians), - /// 3 = Distance (meters), 4 = Index (zero-based index values) - pub fn get_channel_master_type(&self, channel_name: &str) -> u8 { - let mut master_type: u8 = 0; // default to normal data channel - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.get_channel_id(channel_name) - && let Some(dg) = self.dg.get(dg_pos) - && let Some(cg) = dg.cg.get(rec_id) - && let Some(cn) = cg.cn.get(rec_pos) - { - master_type = cn.block.cn_sync_type; - } - master_type - } - /// returns the set of channel names - pub fn get_channel_names_set(&self) -> HashSet { - self.channel_names_set.keys().cloned().collect() - } - /// returns the set of channel names that are in same channel group as input channel name - pub fn get_channel_names_cg_set(&self, channel_name: &str) -> HashSet { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, _rec_pos))) = - self.get_channel_id(channel_name) - { - let mut channel_list = HashSet::new(); - if let Some(dg) = self.dg.get(dg_pos) - && let Some(cg) = dg.cg.get(rec_id) - { - channel_list.clone_from(&cg.channel_names); - } - channel_list - } else { - HashSet::new() - } - } - /// returns a hashmap for which master channel names are keys and values its corresponding set of channel names - pub fn get_master_channel_names_set(&self) -> HashMap, HashSet> { - let mut channel_master_list: HashMap, HashSet> = HashMap::new(); - for (_dg_position, dg) in self.dg.iter() { - for (_record_id, cg) in dg.cg.iter() { - if let Some(list) = channel_master_list.get_mut(&cg.master_channel_name) { - list.extend(cg.channel_names.clone()); - } else { - channel_master_list - .insert(cg.master_channel_name.clone(), cg.channel_names.clone()); - } - } - } - channel_master_list - } - /// empty the channels' ndarray - pub fn clear_channel_data_from_memory(&mut self, channel_names: HashSet) -> Result<()> { - for channel_name in channel_names { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get_mut(&channel_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - && let Some(cn) = cg.cn.get_mut(rec_pos) - && !cn.data.is_empty() - { - cn.data = - cn.data - .zeros(cn.block.cn_data_type, 0, 0, (Vec::new(), Order::RowMajor))?; - } - } - Ok(()) - } - /// returns a new empty MdfInfo4 struct - pub fn new(file_name: &str, n_channels: usize) -> MdfInfo4 { - MdfInfo4 { - file_name: file_name.to_string(), - dg: BTreeMap::new(), - sharable: SharableBlocks::new(n_channels), - channel_names_set: HashMap::with_capacity(n_channels), - id_block: IdBlock::default(), - fh: Vec::new(), - at: HashMap::new(), - ev: HashMap::new(), - hd_block: Hd4::default(), - ch: HashMap::new(), - is_unfinalized: false, - } - } - /// Adds a new channel in memory (no file modification) - pub fn add_channel( - &mut self, - channel_name: String, - data: ChannelData, - data_signature: DataSignature, - mut master: MasterSignature, - unit: Option, - description: Option, - ) -> Result<(), Error> { - let mut cg_block = Cg4Block { - cg_cycle_count: data_signature.len as u64, - ..Default::default() - }; - // Basic channel block - let mut cn_block = Cn4Block::default(); - let machine_endian: bool = cfg!(target_endian = "big"); - cn_block.cn_data_type = data_signature.data_type; - cn_block.cn_bit_count = data_signature.bit_count; - let cn_pos = position_generator(); - cn_block.cn_sync_type = master.master_type.unwrap_or(0); - - // channel name - let channel_name_position = position_generator(); - cn_block.cn_tx_name = channel_name_position; - self.sharable - .create_tx(channel_name_position, channel_name.to_string()); - - // Channel array - let mut list_size = data_signature.shape.0.iter().product(); // primitive list size is 1 - if data_signature.data_type == 15 | 16 { - //complex - list_size *= 2; - } - let data_ndim = data_signature.ndim - 1; - let mut composition: Option = None; - if data_ndim > 0 { - let data_dim_size = data - .shape() - .0 - .iter() - .skip(1) - .map(|x| *x as u64) - .collect::>(); - // data_dim_size.remove(0); - let mut ca_block = Ca4Block::default(); - cg_block.cg_data_bytes = list_size as u32 * data_signature.byte_count; - - let composition_position = position_generator(); - cn_block.cn_composition = composition_position; - ca_block.ca_ndim = data_ndim as u16; - ca_block.ca_dim_size.clone_from(&data_dim_size); - ca_block.ca_len = 48 + 8 * data_ndim as u64; - composition = Some(Composition { - block: Compo::CA(Box::new(ca_block)), - compo: None, - }); - } - - // master channel - if master.master_flag { - cn_block.cn_type = 2; // master channel - } else { - cn_block.cn_type = 0; // data channel - if let Some(master_channel_name) = master.master_channel.clone() { - // looking for the master channel's cg position - if let Some((m, _dg_pos, (cg_pos, _rec_id), (_cn_pos, _rec_pos))) = - self.channel_names_set.get(&master_channel_name) - { - cg_block.cg_cg_master = Some(*cg_pos); - cg_block.cg_flags = 0b1000; - cg_block.cg_links = 7; // with cg_cg_master - // cg_block.cg_len = 112; - master.master_channel.clone_from(m); - } - } - } - if let Some(sync_type) = master.master_type { - cn_block.cn_sync_type = sync_type; - } - - // unit - if let Some(u) = unit { - let unit_position = position_generator(); - cn_block.cn_md_unit = unit_position; - self.sharable.create_tx(unit_position, u); - } - - // description - if let Some(d) = description { - let md_comment = position_generator(); - cn_block.cn_md_comment = md_comment; - self.sharable.create_tx(md_comment, d); - } - - // CN - let n_bytes = data_signature.byte_count; - let cn = Cn4 { - header: default_short_header(BlockType::CN), - unique_name: channel_name.to_string(), - data, - block: cn_block, - endian: machine_endian, - block_position: cn_pos, - pos_byte_beg: 0, - n_bytes, - composition, - list_size, - shape: data_signature.shape, - invalid_mask: None, - }; - - // CG - let cg_pos = position_generator(); - cg_block.cg_data_bytes = n_bytes; - let mut cg = Cg4 { - header: default_short_header(BlockType::CG), - block: cg_block, - master_channel_name: master.master_channel.clone(), - cn: HashMap::new(), - block_position: cg_pos, - channel_names: HashSet::new(), - record_length: n_bytes, - vlsd_cg: None, - invalid_bytes: None, - sr: Vec::new(), - }; - cg.cn.insert(0, cn); - cg.channel_names.insert(channel_name.to_string()); - - // DG - let dg_pos = position_generator(); - let dg_block = Dg4Block::default(); - let mut dg = Dg4 { - block: dg_block, - cg: HashMap::new(), - }; - dg.cg.insert(0, cg); - self.dg.insert(dg_pos, dg); - - self.channel_names_set.insert( - channel_name, - (master.master_channel, dg_pos, (cg_pos, 0), (cn_pos, 0)), - ); - Ok(()) - } - /// Removes a channel in memory (no file modification) - pub fn remove_channel(&mut self, channel_name: &str) { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get(channel_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - { - cg.cn.remove(rec_pos); - cg.channel_names.remove(channel_name); - self.channel_names_set.remove(channel_name); - } - } - /// Renames a channel's name in memory - pub fn rename_channel(&mut self, channel_name: &str, new_name: &str) { - if let Some((master, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos))) = - self.channel_names_set.remove(channel_name) - && let Some(dg) = self.dg.get_mut(&dg_pos) - && let Some(cg) = dg.cg.get_mut(&rec_id) - && let Some(cn) = cg.cn.get_mut(&rec_pos) - { - cn.unique_name = new_name.to_string(); - cg.channel_names.remove(channel_name); - cg.channel_names.insert(new_name.to_string()); - if let Some(master_name) = &master - && master_name == channel_name - { - cg.master_channel_name = Some(new_name.to_string()); - cg.channel_names.iter().for_each(|channel| { - if let Some(val) = self.channel_names_set.get_mut(channel) { - val.0 = Some(new_name.to_string()); - val.1 = dg_pos; - val.2 = (cg_pos, rec_id); - val.3 = (cn_pos, rec_pos); - } - }); - } - - self.channel_names_set.insert( - new_name.to_string(), - (master, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos)), - ); - } - } - /// defines channel's data in memory - pub fn set_channel_data( - &mut self, - channel_name: &str, - data: Arc, - ) -> Result<(), Error> { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get(channel_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - && let Some(cn) = cg.cn.get_mut(rec_pos) - { - cn.data = try_from(&data).context("failed converting dyn array to ChannelData")?; - } - - Ok(()) - } - /// Sets the channel unit in memory - pub fn set_channel_unit(&mut self, channel_name: &str, unit: &str) { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get(channel_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - && let Some(cn) = cg.cn.get_mut(rec_pos) - { - // hopefully never 2 times the same position - let position = position_generator(); - self.sharable.create_tx(position, unit.to_string()); - cn.block.cn_md_unit = position; - } - } - /// Sets the channel description in memory - pub fn set_channel_desc(&mut self, channel_name: &str, desc: &str) { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get(channel_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - && let Some(cn) = cg.cn.get_mut(rec_pos) - { - let position = position_generator(); - self.sharable.create_tx(position, desc.to_string()); - cn.block.cn_md_comment = position; - } - } - /// Sets the channel's related master channel type in memory - pub fn set_channel_master_type(&mut self, master_name: &str, master_type: u8) { - if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = - self.channel_names_set.get(master_name) - && let Some(dg) = self.dg.get_mut(dg_pos) - && let Some(cg) = dg.cg.get_mut(rec_id) - && let Some(cn) = cg.cn.get_mut(rec_pos) - { - cn.block.cn_sync_type = master_type; - } - } - /// list attachments - pub fn list_attachments(&mut self) -> String { - let mut output = String::new(); - for (key, (block, _embedded_data)) in self.at.iter() { - output.push_str(&format!( - "position: {}, filename: {:?}, mimetype: {:?}, comment: {:?}\n ", - key, - self.sharable.get_tx(block.at_tx_filename), - self.sharable.get_tx(block.at_tx_mimetype), - self.sharable.get_comments(block.at_md_comment) - )) - } - output - } - /// get embedded data in attachment for a block at position - pub fn get_attachment_embedded_data(&self, position: i64) -> Option> { - if let Some(at) = self.at.get(&position) { - at.1.clone() - } else { - None - } - } - /// get list attachment block - pub fn get_attachment_block(&self, position: i64) -> Option { - if let Some((block, _)) = self.at.get(&position) { - Some(*block) - } else { - None - } - } - /// get all attachment blocks - pub fn get_attachment_blocks(&self) -> HashMap { - let mut output: HashMap = HashMap::new(); - for (key, (block, _data)) in self.at.iter() { - output.insert(*key, *block); - } - output - } - /// list events - pub fn list_events(&mut self) -> String { - let mut output = String::new(); - for (key, block) in self.ev.iter() { - output.push_str(&format!( - "position: {}, name: {:?}, comment: {:?}, scope: {:?}, attachment references: {:?}, event type: {}\n", - key, - self.sharable.get_tx(block.ev_tx_name), - self.sharable.get_comments(block.ev_md_comment), - block.links[0..block.ev_scope_count as usize].to_vec(), - block.links[block.ev_scope_count as usize.. block.ev_attachment_count as usize].to_vec(), - block.ev_type, - )) - } - output - } - /// list sample reduction blocks for all channel groups - pub fn list_sample_reductions(&self) -> String { - let mut output = String::new(); - let sync_type_name = |st: u8| match st { - 1 => "time (s)", - 2 => "angle (rad)", - 3 => "distance (m)", - 4 => "index", - _ => "unknown", - }; - for (_dg_pos, dg) in self.dg.iter() { - for (rec_id, cg) in dg.cg.iter() { - if !cg.sr.is_empty() { - output.push_str(&format!( - "Channel group (rec_id={}): {} sample reduction(s)\n", - rec_id, - cg.sr.len() - )); - for (i, sr) in cg.sr.iter().enumerate() { - output.push_str(&format!( - " SR[{}]: cycle_count={}, interval={}, sync_type={} ({}), flags=0x{:02X}\n", - i, - sr.sr_cycle_count, - sr.sr_interval, - sr.sr_sync_type, - sync_type_name(sr.sr_sync_type), - sr.sr_flags, - )); - } - } - } - } - output - } - /// list source information blocks - pub fn list_source_information(&self) -> String { - let mut output = String::new(); - for (key, block) in self.sharable.si.iter() { - let si_type_name = match block.si_type { - 0 => "Other", - 1 => "ECU", - 2 => "Bus", - 3 => "I/O", - 4 => "Tool", - 5 => "User", - _ => "Unknown", - }; - let bus_type_name = match block.si_bus_type { - 0 => "None", - 1 => "Other", - 2 => "CAN", - 3 => "LIN", - 4 => "MOST", - 5 => "FlexRay", - 6 => "K-Line", - 7 => "Ethernet", - 8 => "USB", - _ => "Unknown", - }; - output.push_str(&format!( - "position: {}, name: {:?}, path: {:?}, type: {}, bus: {}\n", - key, - self.sharable.get_tx(block.si_tx_name), - self.sharable.get_tx(block.si_tx_path), - si_type_name, - bus_type_name, - )) - } - output - } - /// get event block from its position - pub fn get_event_block(&self, position: i64) -> Option { - self.ev.get(&position).cloned() - } - /// get all event blocks - pub fn get_event_blocks(&self) -> HashMap { - self.ev.clone() - } - /// Get a channel hierarchy block from its position - pub fn get_channel_hierarchy_block(&self, position: i64) -> Option { - self.ch.get(&position).cloned() - } - /// Get all channel hierarchy blocks - pub fn get_channel_hierarchy_blocks(&self) -> HashMap { - self.ch.clone() - } - /// List channel hierarchy in a human-readable format - pub fn list_channel_hierarchy(&self) -> String { - let mut output = String::new(); - // Find root blocks (blocks not referenced as children or siblings by any other block) - let mut non_root_positions: HashSet = HashSet::new(); - for block in self.ch.values() { - if block.ch_ch_first > 0 { - non_root_positions.insert(block.ch_ch_first); - } - if block.ch_ch_next > 0 { - non_root_positions.insert(block.ch_ch_next); - } - } - - let mut roots: Vec = self - .ch - .keys() - .filter(|pos| !non_root_positions.contains(pos)) - .copied() - .collect(); - roots.sort(); - - for root_pos in roots { - self.format_hierarchy_level(&mut output, root_pos, 0); - } - output - } - /// Helper to format a hierarchy level recursively - fn format_hierarchy_level(&self, output: &mut String, position: i64, depth: usize) { - if let Some(block) = self.ch.get(&position) { - let indent = " ".repeat(depth); - let type_name = match block.ch_type { - 0 => "Group", - 1 => "Function", - 2 => "Structure", - 3 => "Map list", - 4 => "Input variables", - 5 => "Output variables", - 6 => "Local variables", - 7 => "Defined calibration objects", - 8 => "Referenced calibration objects", - _ => "Unknown", - }; - let name = self - .sharable - .get_tx(block.ch_tx_name) - .ok() - .flatten() - .unwrap_or_else(|| "".to_string()); - - output.push_str(&format!( - "{}[{}] {} (elements={})\n", - indent, type_name, name, block.ch_element_count - )); - - // List elements (each element is a DG/CG/CN triplet) - for i in 0..block.ch_element_count as usize { - let base_idx = i * 3; - if base_idx + 2 < block.ch_element.len() { - let dg_pos = block.ch_element[base_idx]; - let cg_pos = block.ch_element[base_idx + 1]; - let cn_pos = block.ch_element[base_idx + 2]; - output.push_str(&format!( - "{} -> DG:{} CG:{} CN:{}\n", - indent, dg_pos, cg_pos, cn_pos - )); - } - } - - // Traverse children first - if block.ch_ch_first > 0 { - self.format_hierarchy_level(output, block.ch_ch_first, depth + 1); - } - - // Then traverse siblings at same level - if block.ch_ch_next > 0 { - self.format_hierarchy_level(output, block.ch_ch_next, depth); - } - } - } -} - -/// creates random negative position -pub fn position_generator() -> i64 { - // hopefully never 2 times the same position - let mut position = rand::random::(); - if position > 0 { - // make sure position is negative to avoid interference with existing positions in file - position = -position; - } - position -} - -/// MdfInfo4 display implementation -impl fmt::Display for MdfInfo4 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "MdfInfo4: {}", self.file_name)?; - writeln!(f, "Version : {}\n", self.id_block.id_ver)?; - writeln!(f, "{}\n", self.hd_block)?; - let comments = &self.sharable.get_hd_comments(self.hd_block.hd_md_comment); - for c in comments.iter() { - writeln!(f, "{} {}\n", c.0, c.1)?; - } - for (master, list) in self.get_master_channel_names_set().iter() { - if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}\n")?; - } else { - writeln!(f, "\nWithout Master channel\n")?; - } - for channel in list.iter() { - let unit = self.get_channel_unit(channel); - let desc = self.get_channel_desc(channel); - writeln!(f, " {channel} {unit:?} {desc:?} \n")?; - } - } - writeln!(f, "\n") - } -} - -/// MDF4 - common block Header -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Blockheader4 { - /// '##XX' - pub hdr_id: [u8; 4], - /// reserved, must be 0 - hdr_gap: [u8; 4], - /// Length of block in bytes - pub hdr_len: u64, - /// # of links - pub hdr_links: u64, -} - -impl Default for Blockheader4 { - fn default() -> Self { - Blockheader4 { - hdr_id: [35, 35, 84, 88], // ##TX - hdr_gap: [0x00, 0x00, 0x00, 0x00], - hdr_len: 24, - hdr_links: 0, - } - } -} - -/// parse the block header and its fields id, (reserved), length and number of links -#[inline] -pub fn parse_block_header(rdr: &mut SymBufReader<&File>) -> Result { - let mut buf = [0u8; 24]; - rdr.read_exact(&mut buf) - .context("could not read blockheader4 Id")?; - let mut block = Cursor::new(buf); - let header: Blockheader4 = block - .read_le() - .context("binread could not parse blockheader4")?; - Ok(header) -} - -/// MDF4 - common block Header without the number of links -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Blockheader4Short { - /// '##XX' - pub hdr_id: [u8; 4], - /// reserved, must be 0 - pub hdr_gap: [u8; 4], - /// Length of block in bytes - pub hdr_len: u64, -} - -impl Default for Blockheader4Short { - fn default() -> Self { - Blockheader4Short { - hdr_id: [35, 35, 67, 78], // ##CN - hdr_gap: [0u8; 4], - hdr_len: 160, - } - } -} - -pub fn default_short_header(variant: BlockType) -> Blockheader4Short { - match variant { - BlockType::CG => Blockheader4Short { - hdr_id: [35, 35, 67, 71], // ##CG - hdr_gap: [0u8; 4], - hdr_len: 104, // 112 with cg_cg_master, 104 without, - }, - BlockType::CN => Blockheader4Short { - hdr_id: [35, 35, 67, 78], // ##CN - hdr_gap: [0u8; 4], - hdr_len: 160, - }, - _ => Blockheader4Short { - hdr_id: [35, 35, 67, 78], // ##CN - hdr_gap: [0u8; 4], - hdr_len: 160, - }, - } -} - -/// parse the block header and its fields id, (reserved), length except the number of links -#[inline] -fn parse_block_header_short(rdr: &mut SymBufReader<&File>) -> Result { - let mut buf = [0u8; 16]; - rdr.read_exact(&mut buf) - .context("could not read short blockheader4 Id")?; - let mut block = Cursor::new(buf); - let header: Blockheader4Short = block - .read_le() - .context("could not parse short blockheader4")?; - Ok(header) -} - -/// reads generically a block header and return links and members section part into a Seek buffer for further processing -#[inline] -fn parse_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Cursor>, Blockheader4, i64)> { - // Reads block header - rdr.seek_relative(target - position) - .context("Could not reach block header position")?; // change buffer position - let block_header = parse_block_header(rdr).context(" could not read header block")?; // reads header - - // Reads in buffer rest of block - let mut buf = vec![0u8; (block_header.hdr_len - 24) as usize]; - rdr.read_exact(&mut buf) - .context("Could not read rest of block after header")?; - position = target + block_header.hdr_len as i64; - let block = Cursor::new(buf); - Ok((block, block_header, position)) -} - -/// reads generically a block header wihtout the number of links and returns links and members section part into a Seek buffer for further processing -#[inline] -fn parse_block_short( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Cursor>, Blockheader4Short, i64)> { - // Reads block header - rdr.seek_relative(target - position) - .context("Could not reach block short header position")?; // change buffer position - let block_header: Blockheader4Short = - parse_block_header_short(rdr).context(" could not read short header block")?; // reads header - - // Reads in buffer rest of block - let mut buf = vec![0u8; (block_header.hdr_len - 16) as usize]; - rdr.read_exact(&mut buf) - .context("Could not read rest of block after short header")?; - position = target + block_header.hdr_len as i64; - let block = Cursor::new(buf); - Ok((block, block_header, position)) -} - -/// metadata are either stored in TX (text) or MD (xml) blocks for mdf version 4 -#[derive(Debug, Clone, PartialEq, Eq)] -#[repr(C)] -#[derive(Default)] -pub enum MetaDataBlockType { - MdBlock, - MdParsed, - #[default] - TX, -} - -/// Blocks types that could link to MDBlock -#[derive(Debug, Clone)] -#[repr(C)] -#[derive(Default)] -pub enum BlockType { - HD, - FH, - AT, - EV, - DG, - CG, - #[default] - CN, - CC, - SI, - CH, -} - -/// struct linking MD or TX block with -#[derive(Debug, Default, Clone)] -#[repr(C)] -pub struct MetaData { - /// Header of the block - pub block: Blockheader4, - /// Raw bytes for the block's data - pub raw_data: Vec, - /// Block type, TX, MD or MD not yet parsed - pub block_type: MetaDataBlockType, - /// Metadata after parsing - pub comments: HashMap, - /// Parent block type - pub parent_block_type: BlockType, -} - -/// Parses the MD or TX block -fn read_meta_data( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, - parent_block_type: BlockType, -) -> Result { - if target != 0 && !sharable.md_tx.contains_key(&target) { - let (raw_data, block, pos) = - parse_block(rdr, target, position).context("could not read metadata block")?; - position = pos; - let block_type = match block.hdr_id { - [35, 35, 77, 68] => MetaDataBlockType::MdBlock, - [35, 35, 84, 88] => MetaDataBlockType::TX, - _ => MetaDataBlockType::TX, - }; - let md = MetaData { - block, - raw_data: raw_data.into_inner(), - block_type, - comments: HashMap::new(), - parent_block_type, - }; - sharable.md_tx.insert(target, md); - Ok(position) - } else { - Ok(position) - } -} - -impl MetaData { - /// Returns a new MetaData struct - pub fn new(block_type: MetaDataBlockType, parent_block_type: BlockType) -> Self { - let header = match block_type { - MetaDataBlockType::MdBlock => Blockheader4 { - hdr_id: [35, 35, 77, 68], // '##MD' - hdr_gap: [0u8; 4], - hdr_len: 24, - hdr_links: 0, - }, - MetaDataBlockType::TX | MetaDataBlockType::MdParsed => Blockheader4 { - hdr_id: [35, 35, 84, 88], // '##TX' - hdr_gap: [0u8; 4], - hdr_len: 24, - hdr_links: 0, - }, - }; - MetaData { - block: header, - raw_data: Vec::new(), - block_type, - comments: HashMap::new(), - parent_block_type, - } - } - /// Converts the metadata handling the parent block type's specificities - pub fn parse_xml(&mut self) -> Result<()> { - if self.block_type == MetaDataBlockType::MdBlock { - match self.parent_block_type { - BlockType::HD => self.parse_hd_xml()?, - BlockType::FH => self.parse_fh_xml()?, - _ => self.parse_generic_xml()?, - }; - } - Ok(()) - } - /// Returns the text from TX Block or TX's tag text from MD Block - pub fn get_tx(&self) -> Result, Error> { - match self.block_type { - MetaDataBlockType::MdParsed => Ok(self.comments.get("TX").cloned()), - MetaDataBlockType::MdBlock => { - // extract TX tag from xml - let comment: String = self - .get_data_string() - .context("failed getting data string to extract TX tag")? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - let mut tx: Option = None; - for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; - if node.is_element() - && !text.is_empty() - && node.tag_name().name() == r"TX" - { - tx = Some(text); - break; - } - } - Ok(tx) - } - Err(e) => { - warn!("Error parsing comment : \n{comment}\n{e}"); - Ok(None) - } - } - } - MetaDataBlockType::TX => { - let comment = str::from_utf8(&self.raw_data).with_context(|| { - format!("Invalid UTF-8 sequence in metadata: {:?}", self.raw_data) - })?; - let c: String = comment.trim_end_matches(char::from(0)).into(); - Ok(Some(c)) - } - } - } - /// Returns the bytes of the text from TX Block or TX's tag text from MD Block - pub fn get_tx_bytes(&self) -> Option<&[u8]> { - match self.block_type { - MetaDataBlockType::MdParsed => self.comments.get("TX").map(|s| s.as_bytes()), - _ => Some(&self.raw_data), - } - } - /// Decode string from raw_data field - pub fn get_data_string(&self) -> Result { - match self.block_type { - MetaDataBlockType::MdParsed => Ok(String::new()), - _ => { - let comment = str::from_utf8(&self.raw_data).with_context(|| { - format!("Invalid UTF-8 sequence in metadata: {:?}", self.raw_data) - })?; - let comment: String = comment.trim_end_matches(char::from(0)).into(); - Ok(comment) - } - } - } - /// allocate bytes to raw_data field, adjusting header length - pub fn set_data_buffer(&mut self, data: &[u8]) { - self.raw_data = [data, vec![0u8; 8 - data.len() % 8].as_slice()].concat(); - self.block.hdr_len = self.raw_data.len() as u64 + 24; - } - /// parses the xml bytes specifically for HD block contexted schema - fn parse_hd_xml(&mut self) -> Result<()> { - let mut comments: HashMap = HashMap::new(); - // MD Block from HD Block, reading xml - let comment: String = self - .get_data_string()? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants().filter(|p| p.has_tag_name("e")) { - if let (Some(value), Some(text)) = (node.attribute("name"), node.text()) { - comments.insert(value.to_string(), text.to_string()); - } - } - } - Err(e) => { - warn!("Could not parse HD MD comment : \n{comment}\n{e}"); - } - }; - self.comments = comments; - self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed - Ok(()) - } - /// Creates File History MetaData - pub fn create_fh(&mut self) { - let user_name = whoami::username().unwrap_or_else(|_| "unknown".to_string()); - let comments = format!( - " -created -mdfr -ratalco -0.1 -{user_name} -" - ); - let raw_comments = format!( - "{:\0 Result<()> { - let mut comments: HashMap = HashMap::new(); - // MD Block from FH Block, reading xml - let comment: String = self - .get_data_string()? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; - comments.insert(node.tag_name().name().to_string(), text); - } - } - Err(e) => { - warn!("Could not parse FH comment : \n{comment}\n{e}"); - } - }; - self.comments = comments; - self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed - Ok(()) - } - /// Generic xml parser without schema consideration - fn parse_generic_xml(&mut self) -> Result<()> { - let mut comments: HashMap = HashMap::new(); - let comment: String = self - .get_data_string()? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; - if node.is_element() - && !text.is_empty() - && !node.tag_name().name().to_string().is_empty() - { - comments.insert(node.tag_name().name().to_string(), text); - } - } - } - Err(e) => { - warn!("Error parsing comment : \n{comment}\n{e}"); - } - }; - self.comments = comments; - self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed - Ok(()) - } - /// Writes the metadata to file - pub fn write(&self, writer: &mut W) -> Result<()> - where - W: Write + Seek, - { - writer - .write_le(&self.block) - .context("Could not write comment block header")?; - writer - .write_all(&self.raw_data) - .context("Could not write comment block data")?; - Ok(()) - } -} - -/// Hd4 (Header) block structure -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Hd4 { - /// ##HD - hd_id: [u8; 4], - /// reserved - hd_reserved: [u8; 4], - /// Length of block in bytes - hd_len: u64, - /// # of links - hd_link_counts: u64, - /// Pointer to the first data group block (DGBLOCK) (can be NIL) - pub hd_dg_first: i64, - /// Pointer to first file history block (FHBLOCK) - /// There must be at least one FHBLOCK with information about the application which created the MDF file. - pub hd_fh_first: i64, - /// Pointer to first channel hierarchy block (CHBLOCK) (can be NIL). - pub hd_ch_first: i64, - /// Pointer to first attachment block (ATBLOCK) (can be NIL) - pub hd_at_first: i64, - /// Pointer to first event block (EVBLOCK) (can be NIL) - pub hd_ev_first: i64, - /// Pointer to the measurement file comment (TXBLOCK or MDBLOCK) (can be NIL) For MDBLOCK contents, see Table 14. - pub hd_md_comment: i64, - /// Data members - /// Time stamp in nanoseconds elapsed since 00:00:00 01.01.1970 (UTC time or local time, depending on "local time" flag) - pub hd_start_time_ns: u64, - /// Time zone offset in minutes. The value must be in range [-720,720], i.e. it can be negative! For example a value of 60 (min) means UTC+1 time zone = Central European Time (CET). Only valid if "time offsets valid" flag is set in time flags. - pub hd_tz_offset_min: i16, - /// Daylight saving time (DST) offset in minutes for start time stamp. During the summer months, most regions observe a DST offset of 60 min (1 hour). Only valid if "time offsets valid" flag is set in time flags. - pub hd_dst_offset_min: i16, - /// Time flags The value contains the following bit flags (see HD_TF_xxx) - pub hd_time_flags: u8, - /// Time quality class (see HD_TC[35, 35, 72, 68]_xxx) - pub hd_time_class: u8, - /// Flags The value contains the following bit flags (see HD_FL_xxx): - pub hd_flags: u8, - /// reserved - pub hd_reserved2: u8, - /// Start angle in radians at start of measurement (only for angle synchronous measurements) Only valid if "start angle valid" flag is set. All angle values for angle synchronized master channels or events are relative to this start angle. - pub hd_start_angle_rad: f64, - /// Start distance in meters at start of measurement (only for distance synchronous measurements) Only valid if "start distance valid" flag is set. All distance values for distance synchronized master channels or events are relative to this start distance. - pub hd_start_distance_m: f64, -} - -impl Default for Hd4 { - fn default() -> Self { - Hd4 { - hd_id: [35, 35, 72, 68], // ##HD - hd_len: 104, - hd_link_counts: 6, - hd_reserved: [0u8; 4], - hd_dg_first: 0, - hd_fh_first: 0, - hd_ch_first: 0, - hd_at_first: 0, - hd_ev_first: 0, - hd_md_comment: 0, - hd_start_time_ns: Local::now() - .timestamp_nanos_opt() - .map(|t| t as u64) - .unwrap_or(0), - hd_tz_offset_min: 0, - hd_dst_offset_min: 0, - hd_time_flags: 0, - hd_time_class: 0, - hd_flags: 0, - hd_reserved2: 0, - hd_start_angle_rad: 0.0, - hd_start_distance_m: 0.0, - } - } -} - -/// Hd4 display implementation -impl fmt::Display for Hd4 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let sec = self.hd_start_time_ns / 1000000000; - let nsec = (self.hd_start_time_ns - sec * 1000000000) as u32; - let naive = DateTime::from_timestamp(sec as i64, nsec).unwrap_or_default(); - writeln!(f, "Time : {} ", naive.to_rfc3339()) - } -} - -/// Hd4 block struct parser -pub fn hd4_parser( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, -) -> Result<(Hd4, i64)> { - let mut buf = [0u8; 104]; - rdr.read_exact(&mut buf) - .context("could not read HD block buffer")?; - let mut block = Cursor::new(buf); - let hd: Hd4 = block - .read_le() - .context("Could not parse HD block buffer into Hd4 struct")?; - let position = read_meta_data(rdr, sharable, hd.hd_md_comment, 168, BlockType::HD)?; - Ok((hd, position)) -} - -/// Fh4 (File History) block struct, including the header -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct FhBlock { - /// '##FH' - fh_id: [u8; 4], - /// reserved, must be 0 - fh_gap: [u8; 4], - /// Length of block in bytes - fh_len: u64, - /// # of links - fh_links: u64, - /// Link to next FHBLOCK (can be NIL if list finished) - pub fh_fh_next: i64, - /// Link to MDBLOCK containing comment about the creation or modification of the MDF file. - pub fh_md_comment: i64, - /// time stamp in nanosecs - pub fh_time_ns: u64, - /// time zone offset on minutes - pub fh_tz_offset_min: i16, - /// daylight saving time offset in minutes for start time stamp - pub fh_dst_offset_min: i16, - /// time flags, but 1 local, bit 2 time offsets - pub fh_time_flags: u8, - /// reserved - fh_reserved: [u8; 3], -} - -impl Default for FhBlock { - fn default() -> Self { - FhBlock { - fh_id: [35, 35, 70, 72], // '##FH' - fh_gap: [0u8; 4], - fh_len: 56, - fh_links: 2, - fh_fh_next: 0, - fh_md_comment: 0, - fh_time_ns: Local::now() - .timestamp_nanos_opt() - .map(|t| t as u64) - .unwrap_or(0), - fh_tz_offset_min: 0, - fh_dst_offset_min: 0, - fh_time_flags: 0, - fh_reserved: [0u8; 3], - } - } -} - -/// Fh4 (File History) block struct parser -fn parse_fh_block( - rdr: &mut SymBufReader<&File>, - target: i64, - position: i64, -) -> Result<(FhBlock, i64)> { - rdr.seek_relative(target - position) - .context("Could not reach FH Block position")?; // change buffer position - let mut buf = [0u8; 56]; - rdr.read_exact(&mut buf) - .context("Could not read FH block buffer")?; - let mut block = Cursor::new(buf); - let fh: FhBlock = block - .read_le() - .with_context(|| format!("Error parsing fh block into FhBlock struct \n{block:?}"))?; // reads the fh block - Ok((fh, target + 56)) -} - -type Fh = Vec; - -/// parses File History blocks along with its linked comments returns a vect of Fh4 block with comments -pub fn parse_fh( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, -) -> Result<(Fh, i64)> { - let mut fh: Fh = Vec::new(); - let (block, pos) = parse_fh_block(rdr, target, position)?; - position = pos; - position = read_meta_data(rdr, sharable, block.fh_md_comment, position, BlockType::FH)?; - let mut next_pointer = block.fh_fh_next; - fh.push(block); - while next_pointer != 0 { - let (block, pos) = parse_fh_block(rdr, next_pointer, position)?; - position = pos; - next_pointer = block.fh_fh_next; - position = read_meta_data(rdr, sharable, block.fh_md_comment, position, BlockType::FH)?; - fh.push(block); - } - Ok((fh, position)) -} -/// At4 Attachment block struct -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct At4Block { - /// ##DG - at_id: [u8; 4], - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - at_len: u64, - /// # of links - at_links: u64, - /// Link to next ATBLOCK (linked list) (can be NIL) - at_at_next: i64, - /// Link to TXBLOCK with the path and file name of the embedded or referenced file (can only be NIL if data is embedded). The path of the file can be relative or absolute. If relative, it is relative to the directory of the MDF file. If no path is given, the file must be in the same directory as the MDF file. - pub at_tx_filename: i64, - /// Link to TXBLOCK with MIME content-type text that gives information about the attached data. Can be NIL if the content-type is unknown, but should be specified whenever possible. The MIME content-type string must be written in lowercase. - pub at_tx_mimetype: i64, - /// Link to MDBLOCK with comment and additional information about the attachment (can be NIL). - pub at_md_comment: i64, - /// Flags The value contains the following bit flags (see AT_FL_xxx): - pub at_flags: u16, - /// Creator index, i.e. zero-based index of FHBLOCK in global list of FHBLOCKs that specifies which application has created this attachment, or changed it most recently. - pub at_creator_index: u16, - /// Compression algorithm used for embedded data - pub at_zip_type: u8, - /// File path format - pub at_path_syntax: u8, - /// Reserved - at_reserved: [u8; 2], - /// 128-bit value for MD5 check sum (of the uncompressed data if data is embedded and compressed). Only valid if "MD5 check sum valid" flag (bit 2) is set. - pub at_md5_checksum: [u8; 16], - /// Original data size in Bytes, i.e. either for external file or for uncompressed data. - pub at_original_size: u64, - /// Embedded data size N, i.e. number of Bytes for binary embedded data following this element. Must be 0 if external file is referenced. - pub at_embedded_size: u64, - // followed by embedded data depending of flag -} - -/// At4 (Attachment) block struct parser -fn parser_at4_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(At4Block, Option>, i64)> { - rdr.seek_relative(target - position) - .context("Could not reach At4 Block position")?; - let mut buf = [0u8; 96]; - rdr.read_exact(&mut buf) - .context("Could not read At4 Block buffer")?; - let mut block = Cursor::new(buf); - let block: At4Block = block - .read_le() - .context("Could not parse At4 Block buffer into At4Block struct")?; - position = target + 96; - - // reads embedded if exists - let data: Option> = if (block.at_flags & 0b1) > 0 { - let mut embedded_data = vec![0u8; block.at_embedded_size as usize]; - rdr.read_exact(&mut embedded_data) - .context("Could not parse At4Block embedded attachement")?; - - let zip_type = block.at_zip_type; - if (block.at_flags & 0b10) > 0 { - embedded_data = decompress_data(zip_type, 0, embedded_data, block.at_original_size)?; - } - - // MD5 Checksum verification - if (block.at_flags & 0b100) > 0 { - let mut hasher = Md5::new(); - hasher.update(&embedded_data); - let result = hasher.finalize(); - if result.as_slice() != block.at_md5_checksum { - warn!( - "MD5 checksum mismatch for attachment: expected {:?}, got {:?}", - block.at_md5_checksum, result - ); - } - } - - position += block.at_embedded_size as i64; - Some(embedded_data) - } else { - None - }; - Ok((block, data, position)) -} - -type At = HashMap>)>; - -/// parses Attachment blocks along with its linked comments, returns a hashmap of At4 block and attached data in a vect -pub fn parse_at4( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, -) -> Result<(At, i64)> { - let mut at: At = HashMap::new(); - if target > 0 { - let (block, data, pos) = parser_at4_block(rdr, target, position)?; - position = pos; - // Reads MD - position = read_meta_data(rdr, sharable, block.at_md_comment, position, BlockType::AT)?; - // reads TX file_name - position = read_meta_data(rdr, sharable, block.at_tx_filename, position, BlockType::AT)?; - // Reads tx mime type - position = read_meta_data(rdr, sharable, block.at_tx_mimetype, position, BlockType::AT)?; - let mut next_pointer = block.at_at_next; - at.insert(target, (block, data)); - - while next_pointer > 0 { - let block_start = next_pointer; - let (block, data, pos) = parser_at4_block(rdr, next_pointer, position)?; - position = pos; - // Reads MD - position = read_meta_data(rdr, sharable, block.at_md_comment, position, BlockType::AT)?; - // reads TX file_name - position = - read_meta_data(rdr, sharable, block.at_tx_filename, position, BlockType::AT)?; - // Reads tx mime type - position = - read_meta_data(rdr, sharable, block.at_tx_mimetype, position, BlockType::AT)?; - next_pointer = block.at_at_next; - at.insert(block_start, (block, data)); - } - } - Ok((at, position)) -} - -/// Ev4 Event block struct -#[derive(Debug, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Ev4Block { - //ev_id: [u8; 4], // DG - //reserved: [u8; 4], // reserved - //ev_len: u64, // Length of block in bytes - /// # of links - ev_links: u64, - /// Link to next EVBLOCK (linked list) (can be NIL) - ev_ev_next: i64, - /// Referencing link to EVBLOCK with parent event (can be NIL). - ev_ev_parent: i64, - /// Referencing link to EVBLOCK with event that defines the beginning of a range (can be NIL, must be NIL if ev_range_type ≠ 2). - ev_ev_range: i64, - /// Pointer to TXBLOCK with event name (can be NIL) Name must be according to naming rules stated in 4.4.2 Naming Rules. If available, the name of a named trigger condition should be used as event name. Other event types may have individual names or no names. - pub ev_tx_name: i64, - /// Pointer to TX/MDBLOCK with event comment and additional information, e.g. trigger condition or formatted user comment text (can be NIL) - pub ev_md_comment: i64, - #[br(if(ev_links > 5), little, count = ev_links - 5)] - /// links - links: Vec, - - /// Event type (see EV_T_xxx) - pub ev_type: u8, - /// Sync type (see EV_S_xxx) - pub ev_sync_type: u8, - /// Range Type (see EV_R_xxx) - pub ev_range_type: u8, - /// Cause of event (see EV_C_xxx) - pub ev_cause: u8, - /// flags (see EV_F_xxx) - pub ev_flags: u8, - /// Reserved - ev_reserved: [u8; 3], - /// Length M of ev_scope list. Can be zero. - pub ev_scope_count: u32, - /// Length N of ev_at_reference list, i.e. number of attachments for this event. Can be zero. - pub ev_attachment_count: u16, - /// Creator index, i.e. zero-based index of FHBLOCK in global list of FHBLOCKs that specifies which application has created or changed this event (e.g. when generating event offline). - pub ev_creator_index: u16, - /// Base value for synchronization value. - pub ev_sync_base_value: i64, - /// Factor for event synchronization value. - pub ev_sync_factor: f64, -} - -/// Ev4 (Event) block struct parser -fn parse_ev4_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Ev4Block, i64)> { - let (mut block, _header, pos) = parse_block_short(rdr, target, position)?; - position = pos; - let block: Ev4Block = block.read_le().context("Error parsing ev block")?; // reads the fh block - - Ok((block, position)) -} - -/// parses Event blocks along with its linked comments, returns a hashmap of Ev4 block with position as key -pub fn parse_ev4( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, -) -> Result<(HashMap, i64)> { - let mut ev: HashMap = HashMap::new(); - if target > 0 { - let (block, pos) = parse_ev4_block(rdr, target, position)?; - position = pos; - // Reads MD - position = read_meta_data(rdr, sharable, block.ev_md_comment, position, BlockType::EV)?; - // reads TX event name - position = read_meta_data(rdr, sharable, block.ev_tx_name, position, BlockType::EV)?; - let mut next_pointer = block.ev_ev_next; - ev.insert(target, block); - - while next_pointer > 0 { - let block_start = next_pointer; - let (block, pos) = parse_ev4_block(rdr, next_pointer, position)?; - position = pos; - // Reads MD - position = read_meta_data(rdr, sharable, block.ev_md_comment, position, BlockType::EV)?; - // reads TX event name - position = read_meta_data(rdr, sharable, block.ev_tx_name, position, BlockType::EV)?; - next_pointer = block.ev_ev_next; - ev.insert(block_start, block); - } - } - Ok((ev, position)) -} - -/// Ch4Block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Ch4Block { - // header - // ##CH - // ch_id [u8;4] - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub ch_len: u64, - /// # of links - pub ch_links: u64, - - // link section - /// link to next CHBLOCK at this hierarchy level - pub ch_ch_next: i64, - /// link to first CHBLOCK at the next hierarchy level (child) - pub ch_ch_first: i64, - /// link to TXBLOCK with the name of the hierarchy level - pub ch_tx_name: i64, - /// link to MDBLOCK with a comment/description - pub ch_md_comment: i64, - /// list of elements in this hierarchy level - #[br(count = ch_links - 4)] - pub ch_element: Vec, - - // data section - /// number of elements in this hierarchy level (Nx3) - pub ch_element_count: u32, - /// hierarchy level type - pub ch_type: u8, - /// reserved - pub ch_reserved: [u8; 3], -} - -/// parser Ch4Block -fn parser_ch4_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Ch4Block, i64)> { - let (mut block, _header, pos) = parse_block_short(rdr, target, position)?; - position = pos; - let block: Ch4Block = block.read_le().context("Error parsing ch block")?; - - Ok((block, position)) -} - -/// parses all CH blocks starting from target -pub fn parse_ch4( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, -) -> Result<(HashMap, i64)> { - let mut ch = HashMap::new(); - let mut next_pointer = target; - while next_pointer > 0 { - let block_start = next_pointer; - let (block, pos) = parser_ch4_block(rdr, next_pointer, position)?; - position = pos; - - // Parse comments/names if exist - position = read_meta_data(rdr, sharable, block.ch_tx_name, position, BlockType::CH)?; - position = read_meta_data(rdr, sharable, block.ch_md_comment, position, BlockType::CH)?; - - // Traverse children - if block.ch_ch_first > 0 { - let (children, pos) = parse_ch4(rdr, sharable, block.ch_ch_first, position)?; - position = pos; - ch.extend(children); - } - - next_pointer = block.ch_ch_next; - ch.insert(block_start, block); - } - Ok((ch, position)) -} - -/// Dg4 Data Group block struct -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Dg4Block { - /// ##DG - dg_id: [u8; 4], - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - pub dg_len: u64, - /// # of links - dg_links: u64, - /// Pointer to next data group block (DGBLOCK) (can be NIL) - pub dg_dg_next: i64, - /// Pointer to first channel group block (CGBLOCK) (can be NIL) - pub dg_cg_first: i64, - // Pointer to data block (DTBLOCK or DZBLOCK for this block type) or data list block (DLBLOCK of data blocks or its HLBLOCK) (can be NIL) - pub dg_data: i64, - /// comment - dg_md_comment: i64, - /// number of bytes used for record IDs. 0 no recordID - pub dg_rec_id_size: u8, - // reserved - reserved_2: [u8; 7], -} - -impl Default for Dg4Block { - fn default() -> Self { - Dg4Block { - dg_id: [35, 35, 68, 71], // ##DG - reserved: [0; 4], - dg_len: 64, - dg_links: 4, - dg_dg_next: 0, - dg_cg_first: 0, - dg_data: 0, - dg_md_comment: 0, - dg_rec_id_size: 0, - reserved_2: [0; 7], - } - } -} - -/// Dg4 (Data Group) block struct parser with comments -fn parse_dg4_block( - rdr: &mut SymBufReader<&File>, - sharable: &mut SharableBlocks, - target: i64, - mut position: i64, -) -> Result<(Dg4Block, i64)> { - rdr.seek_relative(target - position) - .context("Could not reach position of Dg4 block")?; - let mut buf = [0u8; 64]; - rdr.read_exact(&mut buf) - .context("Could not read Dg4Blcok buffer")?; - let mut block = Cursor::new(buf); - let dg: Dg4Block = block - .read_le() - .context("Could not parse Dg4Block buffer into Dg4Block struct")?; - position = target + 64; - - // Reads MD - position = read_meta_data(rdr, sharable, dg.dg_md_comment, position, BlockType::DG)?; - - Ok((dg, position)) -} - -/// Dg4 struct wrapping block, comments and linked CG -#[derive(Debug, Clone)] -#[allow(dead_code)] -#[repr(C)] -pub struct Dg4 { - /// DG Block - pub block: Dg4Block, - /// CG Block - pub cg: HashMap, -} - -/// Parser for Dg4 and all linked blocks (cg, cn, cc, ca, si) -pub fn parse_dg4( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, -) -> Result<(BTreeMap, i64, usize, usize)> { - let mut dg: BTreeMap = BTreeMap::new(); - let mut n_cn: usize = 0; - let mut n_cg: usize = 0; - if target > 0 { - let (block, pos) = parse_dg4_block(rdr, sharable, target, position)?; - position = pos; - let mut next_pointer = block.dg_dg_next; - let (mut cg, pos, num_cg, num_cn) = parse_cg4( - rdr, - block.dg_cg_first, - position, - sharable, - block.dg_rec_id_size, - )?; - n_cg += num_cg; - n_cn += num_cn; - identify_vlsd_cg(&mut cg); - let dg_struct = Dg4 { block, cg }; - dg.insert(target, dg_struct); - position = pos; - while next_pointer > 0 { - let block_start = next_pointer; - let (block, pos) = parse_dg4_block(rdr, sharable, next_pointer, position)?; - next_pointer = block.dg_dg_next; - position = pos; - let (mut cg, pos, num_cg, num_cn) = parse_cg4( - rdr, - block.dg_cg_first, - position, - sharable, - block.dg_rec_id_size, - )?; - n_cg += num_cg; - n_cn += num_cn; - identify_vlsd_cg(&mut cg); - let dg_struct = Dg4 { block, cg }; - dg.insert(block_start, dg_struct); - position = pos; - } - } - Ok((dg, position, n_cg, n_cn)) -} - -/// Try to link VLSD/VLSC Channel Groups with matching channel in other groups -fn identify_vlsd_cg(cg: &mut HashMap) { - // First find all VLSD/VLSC Channel Groups - let mut vlsd: HashMap = HashMap::new(); - for (rec_id, channel_group) in cg.iter() { - if (channel_group.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { - // VLSD or VLSC channel group found - vlsd.insert(channel_group.block_position, *rec_id); - } - } - if !vlsd.is_empty() { - // try to find corresponding channel in other channel group - let mut vlsd_matching: HashMap = HashMap::new(); - for (target_rec_id, channel_group) in cg.iter() { - for (target_rec_pos, cn) in channel_group.cn.iter() { - if let Some(vlsd_rec_id) = vlsd.get(&cn.block.cn_data) { - // Found matching channel with VLSD_CG - vlsd_matching.insert(*vlsd_rec_id, (*target_rec_id, *target_rec_pos)); - } - } - } - for (vlsd_rec_id, (target_rec_id, target_rec_pos)) in vlsd_matching { - if let Some(vlsd_cg) = cg.get_mut(&vlsd_rec_id) { - vlsd_cg.vlsd_cg = Some((target_rec_id, target_rec_pos)); - } - } - } -} - -/// sharable blocks (most likely referenced multiple times and shared by several blocks) -/// that are in sharable fields and holds CC, SI, TX and MD blocks -#[derive(Debug, Default, Clone)] -#[repr(C)] -pub struct SharableBlocks { - pub(crate) md_tx: HashMap, - pub(crate) cc: HashMap, - pub(crate) si: HashMap, -} - -/// SharableBlocks display implementation to facilitate debugging -impl fmt::Display for SharableBlocks { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "MD TX comments : \n")?; - for (_k, c) in self.md_tx.iter() { - match c.block_type { - MetaDataBlockType::MdParsed => { - for (tag, text) in c.comments.iter() { - writeln!(f, "Tag: {tag} Text: {text}")?; - } - } - MetaDataBlockType::TX => match c.get_data_string() { - Ok(s) => writeln!(f, "Text: {s}")?, - Err(e) => writeln!(f, "Text: {e:?}")?, - }, - _ => (), - } - } - writeln!(f, "CC : \n")?; - for (position, cc) in self.cc.iter() { - writeln!(f, "Position: {position} Text: {cc:?}")?; - } - writeln!(f, "SI : ")?; - for (position, si) in self.si.iter() { - writeln!(f, "Position: {position} Text: {si:?}")?; - } - writeln!(f, "finished") - } -} - -impl SharableBlocks { - /// Returns the text from TX Block or TX tag's text from MD block - pub fn get_tx(&self, position: i64) -> Result> { - let mut txt: Option = None; - if let Some(md) = self.md_tx.get(&position) { - txt = md.get_tx()?; - }; - Ok(txt) - } - /// Creates a new SharableBlocks of type TX (not MD) - pub fn create_tx(&mut self, position: i64, text: String) { - let md = self - .md_tx - .entry(position) - .or_insert_with(|| MetaData::new(MetaDataBlockType::TX, BlockType::CN)); - md.set_data_buffer(text.as_bytes()); - } - /// Returns metadata from MD Block - /// keys are tag and related value text of tag - pub fn get_comments(&mut self, position: i64) -> HashMap { - let mut comments: HashMap = HashMap::new(); - if let Some(md) = self.md_tx.get_mut(&position) { - match md.block_type { - MetaDataBlockType::MdParsed => { - comments.clone_from(&md.comments); - } - MetaDataBlockType::MdBlock => { - // not yet parsed, so let's parse it - let _ = md.parse_xml(); - comments.clone_from(&md.comments); - } - MetaDataBlockType::TX => { - // should not happen - } - } - }; - comments - } - /// Returns metadata from MD Block linked by HD Block - /// keys are tag and related value text of tag - pub fn get_hd_comments(&self, position: i64) -> HashMap { - // this method assumes the xml was already parsed - let mut comments: HashMap = HashMap::new(); - if let Some(md) = self.md_tx.get(&position) - && md.block_type == MetaDataBlockType::MdParsed - { - comments.clone_from(&md.comments); - }; - comments - } - /// parses the HD Block metadata comments - /// done right after reading HD block - pub fn parse_hd_comments(&mut self, position: i64) { - if let Some(md) = self.md_tx.get_mut(&position) { - let _ = md.parse_hd_xml(); - }; - } - /// Create new Shared Block - pub fn new(n_channels: usize) -> SharableBlocks { - let md_tx: HashMap = HashMap::with_capacity(n_channels); - let cc: HashMap = HashMap::new(); - let si: HashMap = HashMap::new(); - SharableBlocks { md_tx, cc, si } - } -} -/// Cg4 Channel Group block struct -#[derive(Debug, Copy, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Cg4Block { - /// ##CG - // cg_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub cg_len: u64, - /// # of links - pub cg_links: u64, - /// Pointer to next channel group block (CGBLOCK) (can be NIL) - pub cg_cg_next: i64, - /// Pointer to first channel block (CNBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK, i.e. if "VLSD channel group" flag (bit 0) is set) - pub cg_cn_first: i64, - /// Pointer to acquisition name (TXBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) - cg_tx_acq_name: i64, - /// Pointer to acquisition source (SIBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) See also rules for uniqueness explained in 4.4.3 Identification of Channels. - cg_si_acq_source: i64, - /// Pointer to first sample reduction block (SRBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) - cg_sr_first: i64, - ///Pointer to comment and additional information (TXBLOCK or MDBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) - cg_md_comment: i64, - #[br(if(cg_links > 6))] - pub cg_cg_master: Option, - // Data Members - /// Record ID, value must be less than maximum unsigned integer value allowed by dg_rec_id_size in parent DGBLOCK. Record ID must be unique within linked list of CGBLOCKs. - pub cg_record_id: u64, - /// Number of cycles, i.e. number of samples for this channel group. This specifies the number of records of this type in the data block. - pub cg_cycle_count: u64, - /// Flags The value contains the following bit flags (see CG_F_xx): - pub cg_flags: u16, - cg_path_separator: u16, - /// Reserved. - cg_reserved: [u8; 4], - /// Normal CGBLOCK: Number of data Bytes (after record ID) used for signal values in record, i.e. size of plain data for each recorded sample of this channel group. VLSD CGBLOCK: Low part of a UINT64 value that specifies the total size in Bytes of all variable length signal values for the recorded samples of this channel group. See explanation for cg_inval_bytes. - pub cg_data_bytes: u32, - /// Normal CGBLOCK: Number of additional Bytes for record used for invalidation bits. Can be zero if no invalidation bits are used at all. Invalidation bits may only occur in the specified number of Bytes after the data Bytes, not within the data Bytes that contain the signal values. VLSD CGBLOCK: High part of UINT64 value that specifies the total size in Bytes of all variable length signal values for the recorded samples of this channel group, i.e. the total size in Bytes can be calculated by cg_data_bytes + (cg_inval_bytes << 32) Note: this value does not include the Bytes used to specify the length of each VLSD value! - pub cg_inval_bytes: u32, -} - -impl Default for Cg4Block { - fn default() -> Self { - Cg4Block { - // cg_id: [35, 35, 67, 71], // ##CG - // reserved: [0u8; 4], - // cg_len: 104, // 112 with cg_cg_master, 104 without - cg_links: 6, // 7 with cg_cg_master, 6 without - cg_cg_next: 0, - cg_cn_first: 0, - cg_tx_acq_name: 0, - cg_si_acq_source: 0, - cg_sr_first: 0, - cg_md_comment: 0, - cg_cg_master: None, - cg_record_id: 0, - cg_cycle_count: 0, - cg_flags: 0, // bit 3 set for remote master - cg_path_separator: 0, - cg_reserved: [0; 4], - cg_data_bytes: 0, - cg_inval_bytes: 0, - } - } -} - -/// Cg4 (Channel Group) block struct parser with linked comments Source Information in sharable blocks -fn parse_cg4_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, - record_id_size: u8, -) -> Result<(Cg4, i64, usize)> { - let (mut block, header, pos) = parse_block_short(rdr, target, position)?; - position = pos; - let cg: Cg4Block = block - .read_le() - .context("Could not read buffer into Cg4Block struct")?; - - // Reads MD - position = read_meta_data(rdr, sharable, cg.cg_md_comment, position, BlockType::CG)?; - // For VLSD/VLSC, cg_inval_bytes is the high part of VL data size, not invalidation bytes - let inval_bytes_for_record = if (cg.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { - 0 - } else { - cg.cg_inval_bytes - }; - let record_layout = (record_id_size, cg.cg_data_bytes, inval_bytes_for_record); - - // reads CN (and other linked block behind like CC, SI, CA, etc.) - let (cn, pos, n_cn, _first_rec_pos) = parse_cn4( - rdr, - cg.cg_cn_first, - position, - sharable, - record_layout, - cg.cg_cycle_count, - )?; - position = pos; - - // Reads Acq Name - position = read_meta_data(rdr, sharable, cg.cg_tx_acq_name, position, BlockType::CG)?; - - // Reads SI Acq name - let si_pointer = cg.cg_si_acq_source; - if (si_pointer != 0) && !sharable.si.contains_key(&si_pointer) { - let (mut si_block, _header, pos) = parse_block_short(rdr, si_pointer, position)?; - position = pos; - let si_block: Si4Block = si_block - .read_le() - .context("Could not read buffer into Si4block struct")?; - position = read_meta_data(rdr, sharable, si_block.si_tx_name, position, BlockType::SI)?; - position = read_meta_data(rdr, sharable, si_block.si_tx_path, position, BlockType::SI)?; - sharable.si.insert(si_pointer, si_block); - } - - let record_length = cg.cg_data_bytes; - - // Parse Sample Reduction blocks if present - let (sr_blocks, pos) = parse_sr4(rdr, cg.cg_sr_first, position)?; - position = pos; - - let cg_struct = Cg4 { - header, - block: cg, - cn, - master_channel_name: None, - channel_names: HashSet::new(), - record_length, - block_position: target, - vlsd_cg: None, - invalid_bytes: None, - sr: sr_blocks, - }; - - Ok((cg_struct, position, n_cn)) -} - -/// Parses the linked list of Sample Reduction blocks (SRBLOCK) starting from target -fn parse_sr4( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Vec, i64)> { - let mut sr_blocks: Vec = Vec::new(); - if target <= 0 { - return Ok((sr_blocks, position)); - } - - let mut next = target; - while next > 0 { - // Read just the 16-byte header first to validate before allocating - rdr.seek_relative(next - position) - .context("Could not reach SR block header position")?; - let header: Blockheader4Short = - parse_block_header_short(rdr).context("Could not read SR block header")?; - // Validate block ID is ##SR - if &header.hdr_id != b"##SR" { - position = next + 16; - break; - } - // Now read the rest of the block - let mut buf = vec![0u8; (header.hdr_len - 16) as usize]; - rdr.read_exact(&mut buf) - .context("Could not read SR block body")?; - position = next + header.hdr_len as i64; - let mut block = Cursor::new(buf); - let sr: Sr4Block = block - .read_le() - .context("Could not read buffer into Sr4Block struct")?; - next = sr.sr_sr_next; - sr_blocks.push(sr); - } - - Ok((sr_blocks, position)) -} - -/// Channel Group struct -/// it contains the related channels structure, a set of channel names, the dedicated master channel name and other helper data. -#[derive(Debug, Clone)] -#[repr(C)] -pub struct Cg4 { - /// short header - pub header: Blockheader4Short, - /// CG block without header - pub block: Cg4Block, - /// hashmap of channels - pub cn: CnType, - /// Master channel name - pub master_channel_name: Option, - /// Set of channel names belonging to this channel group - pub channel_names: HashSet, - /// as not stored in .block but can still be referenced by other blocks - pub block_position: i64, - /// record length including recordId and invalid bytes - pub record_length: u32, - /// pointing to another cg,cn - pub vlsd_cg: Option<(u64, i32)>, - /// invalid byte array, optional - pub invalid_bytes: Option>, - /// Sample reduction blocks linked from cg_sr_first - pub sr: Vec, -} - -/// Cg4 implementations for extracting acquisition and source name and path -impl Cg4 { - /// Channel group acquisition name - fn get_cg_name(&self, sharable: &SharableBlocks) -> Result> { - sharable.get_tx(self.block.cg_tx_acq_name) - } - /// Channel group source name - fn get_cg_source_name(&self, sharable: &SharableBlocks) -> Result> { - let si = sharable.si.get(&self.block.cg_si_acq_source); - match si { - Some(block) => Ok(block.get_si_source_name(sharable)?), - None => Ok(None), - } - } - /// Channel group source path - fn get_cg_source_path(&self, sharable: &SharableBlocks) -> Result> { - let si = sharable.si.get(&self.block.cg_si_acq_source); - match si { - Some(block) => Ok(block.get_si_path_name(sharable)?), - None => Ok(None), - } - } - /// Computes the validity mask for each channel in the group - /// clears out the common invalid bytes vector for the group at the end - pub fn process_all_channel_invalid_bits(&mut self) -> Result<(), Error> { - // For VLSD/VLSC, cg_inval_bytes is the high part of VL data size, not invalidation bytes - if (self.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { - return Ok(()); - } - // get invalid bytes - let cg_inval_bytes = self.block.cg_inval_bytes as usize; - if let Some(invalid_bytes) = &self.invalid_bytes { - // To extract invalidity for each channel from invalid_bytes - self.cn - .par_iter_mut() - .filter(|(_rec_pos, cn)| !cn.data.is_empty()) - .try_for_each(|(_rec_pos, cn): (&i32, &mut Cn4)| -> Result<(), Error> { - if let Some((Some(mask), invalid_byte_position, invalid_byte_mask)) = - &mut cn.invalid_mask - { - // mask is already initialised to all valid values. - invalid_bytes.chunks(cg_inval_bytes).enumerate().for_each( - |(index, record)| { - // arrow considers bit set as valid while mdf spec considers bit set as invalid - mask.set_bit( - index, - (record[*invalid_byte_position] & *invalid_byte_mask) == 0, - ); - }, - ); - cn.data.set_validity(mask).with_context(|| { - format!( - "failed applying invalid bits for channel {}", - cn.unique_name - ) - })?; - } - Ok(()) - })?; - self.invalid_bytes = None; // Clears out invalid bytes channel - } else if cg_inval_bytes > 0 { - // invalidity already stored in mask for each channel by read_channels_from_bytes() - // to set validity in arrow array - self.cn - .par_iter_mut() - .filter(|(_rec_pos, cn)| !cn.data.is_empty()) - .try_for_each(|(_rec_pos, cn): (&i32, &mut Cn4)| -> Result<(), Error> { - if let Some((validity, _invalid_byte_position, _invalid_byte_mask)) = - &mut cn.invalid_mask - { - if let Some(mask) = validity { - cn.data.set_validity(mask).with_context(|| { - format!( - "failed applying invalid bits for channel {} from mask", - cn.unique_name - ) - })?; - } - *validity = None; // clean bitmask from Cn4 as present in arrow array - } - Ok(()) - })?; - } - Ok(()) - } - - /// Process Channel Variant (CV) compositions after data is loaded. - /// For each channel with a CV composition, this method: - /// 1. Reads the discriminator channel values - /// 2. Maps discriminator values to option indices using cv_option_val - /// 3. Merges option channel data based on the discriminator - /// - /// After processing, the parent channel (with CV composition) contains the merged data. - pub fn process_channel_variants(&mut self) -> Result<(), Error> { - // Find channels with CV composition and collect info - let cv_channels: Vec<(i32, i64, Vec, Vec)> = self - .cn - .iter() - .filter_map(|(rec_pos, cn)| { - #[allow(clippy::collapsible_if)] - if let Some(composition) = &cn.composition { - if let Compo::CV(cv_block) = &composition.block { - return Some(( - *rec_pos, - cv_block.cv_cn_discriminator, - cv_block.cv_cn_option.clone(), - cv_block.cv_option_val.clone(), - )); - } - } - None - }) - .collect(); - - for (parent_rec_pos, discriminator_ptr, option_ptrs, option_vals) in cv_channels { - // First pass: collect all needed data (immutable borrows complete before mutable) - let discriminator_values: Vec; - let option_data: Vec>; - let option_names: Vec; - - { - // Find the discriminator channel by block_position - let discriminator_cn = self - .cn - .values() - .find(|cn| cn.block_position == discriminator_ptr); - - let Some(disc_cn) = discriminator_cn else { - log::warn!( - "CV discriminator channel not found for block_position {}", - discriminator_ptr - ); - continue; - }; - - // Get discriminator values as u64 - discriminator_values = match disc_cn.data.to_u64_vec() { - Some(v) => v, - None => { - log::warn!("CV discriminator channel has unsupported data type"); - continue; - } - }; - - if discriminator_values.is_empty() { - continue; - } - - // Collect option channel data and names in a single pass - let (data_vec, names_vec): (Vec>, Vec) = - option_ptrs - .iter() - .map(|ptr| { - match self - .cn - .values() - .find(|cn| cn.block_position == *ptr) - { - Some(cn) => { - (Some(cn.data.clone()), cn.unique_name.clone()) - } - None => (None, String::new()), - } - }) - .unzip(); - option_data = data_vec; - option_names = names_vec; - } - // Immutable borrows end here - - // Build index mapping: discriminator value -> option index - let val_to_option: std::collections::HashMap = option_vals - .iter() - .enumerate() - .map(|(idx, val)| (*val, idx)) - .collect(); - - // Check if all option channels have the same data type - let all_same_type = { - let mut discriminants: Vec> = Vec::new(); - for data in option_data.iter().flatten() { - discriminants.push(std::mem::discriminant(data)); - } - discriminants.windows(2).all(|w| w[0] == w[1]) - }; - - if all_same_type { - // All options have the same type: use existing merge path - let template = option_data.iter().find_map(|o| o.clone()); - - // Second pass: update parent channel (mutable borrow) - if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) - && let Some(tmpl) = template - { - let merged_data = merge_variant_data_owned( - &discriminator_values, - &option_data, - &val_to_option, - &tmpl, - ); - - if let Some(data) = merged_data { - parent_cn.data = data; - } - } - } else { - // Mixed types: build a dense UnionArray - // Effective sample count is the minimum of discriminator and all option lengths - let n_samples = { - let mut min_len = discriminator_values.len(); - for data in option_data.iter().flatten() { - min_len = min_len.min(data.len()); - } - min_len - }; - - // Single pass: build type_ids, offsets, and per-child indices together - let mut type_ids = Vec::with_capacity(n_samples); - let mut offsets = Vec::with_capacity(n_samples); - let mut child_indices: Vec> = - vec![Vec::new(); option_data.len()]; - - for (i, disc_val) in discriminator_values[..n_samples].iter().enumerate() - { - let opt_idx = val_to_option.get(disc_val).copied().unwrap_or(0); - type_ids.push(opt_idx as i8); - offsets.push(child_indices[opt_idx].len() as i32); - child_indices[opt_idx].push(i as u32); - } - - // Build child arrays using pre-collected indices - let children: Vec = option_data - .iter() - .enumerate() - .map(|(opt_idx, opt)| { - if let Some(data) = opt { - let full_array = data.finish_cloned(); - let indices_array = - UInt32Array::from(child_indices[opt_idx].clone()); - take(&*full_array, &indices_array, None) - .unwrap_or(full_array) - } else { - Arc::new(arrow::array::NullArray::new(0)) as ArrayRef - } - }) - .collect(); - - let union_fields = build_union_fields(&option_names, &children); - let type_ids_buffer = ScalarBuffer::from(type_ids); - let offsets_buffer = ScalarBuffer::from(offsets); - - match UnionArray::try_new( - union_fields, - type_ids_buffer, - Some(offsets_buffer), - children, - ) { - Ok(union_array) => { - if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { - parent_cn.data = ChannelData::Union(union_array); - } - } - Err(e) => { - log::warn!( - "Failed to create dense UnionArray for CV variant: {}", - e - ); - } - } - } - } - - Ok(()) - } - - /// Process Channel Union (CU) compositions after data is loaded. - /// For each channel with a CU composition, this method: - /// 1. Collects member channel data (already read by pipeline) - /// 2. Builds UnionFields from member names and data types - /// 3. Creates a sparse UnionArray where all members are valid at every row - /// 4. Replaces parent channel data with ChannelData::Union - /// - /// CU blocks represent C-style unions: all members share the same bytes and are - /// simultaneously valid, just interpreted differently. - pub fn process_channel_unions(&mut self) -> Result<(), Error> { - // Find channels with CU composition and collect info - let cu_channels: Vec<(i32, Vec)> = self - .cn - .iter() - .filter_map(|(rec_pos, cn)| { - if let Some(composition) = &cn.composition - && let Compo::CU(cu_block) = &composition.block - { - return Some((*rec_pos, cu_block.cu_cn_member.clone())); - } - None - }) - .collect(); - - for (parent_rec_pos, member_ptrs) in cu_channels { - if member_ptrs.is_empty() { - continue; - } - - // Collect member channel info: (name, data as ArrayRef) - let member_info: Vec<(String, ArrayRef)> = member_ptrs - .iter() - .filter_map(|ptr| { - self.cn.values().find(|cn| cn.block_position == *ptr).map( - |cn| { - let name = cn.unique_name.clone(); - let array = cn.data.finish_cloned(); - (name, array) - }, - ) - }) - .collect(); - - if member_info.is_empty() { - log::warn!("CU member channels not found for parent at rec_pos {}", parent_rec_pos); - continue; - } - - // All members should have the same length (same number of samples) - let n_samples = member_info.first().map(|(_, arr)| arr.len()).unwrap_or(0); - if n_samples == 0 { - continue; - } - - // Split member_info into names and children, then build UnionFields - let (member_names, children): (Vec, Vec) = - member_info.into_iter().unzip(); - let union_fields = build_union_fields(&member_names, &children); - - // For sparse union: type_ids all set to 0 (first member as primary interpretation) - // In reality for CU blocks, all members are equally valid - we just pick the first - let type_ids: ScalarBuffer = ScalarBuffer::from(vec![0i8; n_samples]); - - // Create sparse UnionArray (offsets = None) - let union_array = match UnionArray::try_new( - union_fields, - type_ids, - None, // sparse union: no offsets - children, - ) { - Ok(arr) => arr, - Err(e) => { - log::warn!("Failed to create UnionArray for CU channel: {}", e); - continue; - } - }; - - // Update parent channel data - if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { - parent_cn.data = ChannelData::Union(union_array); - } - } - - Ok(()) - } -} - -/// Build UnionFields from parallel name and child arrays slices. -fn build_union_fields(names: &[String], children: &[ArrayRef]) -> UnionFields { - let fields: Vec<(i8, Arc)> = children - .iter() - .enumerate() - .map(|(idx, array)| { - let name = names.get(idx).cloned().unwrap_or_default(); - ( - idx as i8, - Arc::new(Field::new(name, array.data_type().clone(), true)), - ) - }) - .collect(); - UnionFields::from_iter(fields) -} - -/// Merge variant option data based on discriminator values (using owned ChannelData) -fn merge_variant_data_owned( - discriminator_values: &[u64], - option_data: &[Option], - val_to_option: &std::collections::HashMap, - template: &ChannelData, -) -> Option { - use crate::data_holder::channel_data::ChannelData; - - let n_samples = discriminator_values.len(); - - macro_rules! merge_typed { - ($builder_type:ty, $variant:ident) => {{ - let mut builder = <$builder_type>::with_capacity(n_samples); - for (i, disc_val) in discriminator_values.iter().enumerate() { - if let Some(&opt_idx) = val_to_option.get(disc_val) - && let Some(Some(ChannelData::$variant(b))) = option_data.get(opt_idx) - && i < b.values_slice().len() - { - builder.append_value(b.values_slice()[i]); - continue; - } - // Default value if option not found - builder.append_value(Default::default()); - } - Some(ChannelData::$variant(builder)) - }}; - } - - match template { - ChannelData::UInt8(_) => merge_typed!(arrow::array::UInt8Builder, UInt8), - ChannelData::UInt16(_) => merge_typed!(arrow::array::UInt16Builder, UInt16), - ChannelData::UInt32(_) => merge_typed!(arrow::array::UInt32Builder, UInt32), - ChannelData::UInt64(_) => merge_typed!(arrow::array::UInt64Builder, UInt64), - ChannelData::Int8(_) => merge_typed!(arrow::array::Int8Builder, Int8), - ChannelData::Int16(_) => merge_typed!(arrow::array::Int16Builder, Int16), - ChannelData::Int32(_) => merge_typed!(arrow::array::Int32Builder, Int32), - ChannelData::Int64(_) => merge_typed!(arrow::array::Int64Builder, Int64), - ChannelData::Float32(_) => merge_typed!(arrow::array::Float32Builder, Float32), - ChannelData::Float64(_) => merge_typed!(arrow::array::Float64Builder, Float64), - _ => { - log::warn!("CV variant merge not implemented for this data type"); - None - } - } -} - -/// Cg4 blocks and linked blocks parsing -pub fn parse_cg4( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, - record_id_size: u8, -) -> Result<(HashMap, i64, usize, usize)> { - let mut cg: HashMap = HashMap::new(); - let mut n_cg: usize = 0; - let mut n_cn: usize = 0; - if target != 0 { - let (mut cg_struct, pos, num_cn) = - parse_cg4_block(rdr, target, position, sharable, record_id_size)?; - position = pos; - let mut next_pointer = cg_struct.block.cg_cg_next; - // For VLSD/VLSC, cg_inval_bytes is the high part of total VL data size, not invalidation bytes - let inval_bytes_size = if (cg_struct.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { - 0 - } else { - cg_struct.block.cg_inval_bytes - }; - cg_struct.record_length += record_id_size as u32 + inval_bytes_size; - cg.insert(cg_struct.block.cg_record_id, cg_struct); - n_cg += 1; - n_cn += num_cn; - - while next_pointer != 0 { - let (mut cg_struct, pos, num_cn) = - parse_cg4_block(rdr, next_pointer, position, sharable, record_id_size)?; - position = pos; - // For VLSD/VLSC, cg_inval_bytes is the high part of total VL data size, not invalidation bytes - let inval_bytes_size = if (cg_struct.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { - 0 - } else { - cg_struct.block.cg_inval_bytes - }; - cg_struct.record_length += record_id_size as u32 + inval_bytes_size; - next_pointer = cg_struct.block.cg_cg_next; - cg.insert(cg_struct.block.cg_record_id, cg_struct); - n_cg += 1; - n_cn += num_cn; - } - } - Ok((cg, position, n_cg, n_cn)) -} - -/// Cn4 Channel block struct -#[derive(Debug, PartialEq, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Cn4Block { - /// ##CN - // cn_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub cn_len: u64, - /// # of links - cn_links: u64, - /// Pointer to next channel block (CNBLOCK) (can be NIL) - cn_cn_next: i64, - /// Composition of channels: Pointer to channel array block (CABLOCK) or channel block (CNBLOCK) (can be NIL). Details see 4.18 Composition of Channels - pub cn_composition: i64, - /// Pointer to TXBLOCK with name (identification) of channel. Name must be according to naming rules stated in 4.4.2 Naming Rules. - pub cn_tx_name: i64, - /// Pointer to channel source (SIBLOCK) (can be NIL) Must be NIL for component channels (members of a structure or array elements) because they all must have the same source and thus simply use the SIBLOCK of their parent CNBLOCK (direct child of CGBLOCK). - cn_si_source: i64, - /// Pointer to the conversion formula (CCBLOCK) (can be NIL, must be NIL for complex channel data types, i.e. for cn_data_type ≥ 10). If the pointer is NIL, this means that a 1:1 conversion is used (phys = int). }; - pub cn_cc_conversion: i64, - /// Pointer to channel type specific signal data For variable length data channel (cn_type = 1): unique link to signal data block (SDBLOCK) or data list block (DLBLOCK) or, only for unsorted data groups, referencing link to a VLSD channel group block (CGBLOCK). Can only be NIL if SDBLOCK would be empty. For synchronization channel (cn_type = 4): referencing link to attachment block (ATBLOCK) in global linked list of ATBLOCKs starting at hd_at_first. Cannot be NIL. - pub cn_data: i64, - /// Pointer to TXBLOCK/MDBLOCK with designation for physical unit of signal data (after conversion) or (only for channel data types "MIME sample" and "MIME stream") to MIME context-type text. (can be NIL). The unit can be used if no conversion rule is specified or to overwrite the unit specified for the conversion rule (e.g. if a conversion rule is shared between channels). If the link is NIL, then the unit from the conversion rule must be used. If the content is an empty string, no unit should be displayed. If an MDBLOCK is used, in addition the A-HDO unit definition can be stored, see Table 38. Note: for (virtual) master and synchronization channels the A-HDO definition should be omitted to avoid redundancy. Here the unit is already specified by cn_sync_type of the channel. In case of channel data types "MIME sample" and "MIME stream", the text of the unit must be the content-type text of a MIME type which specifies the content of the values of the channel (either fixed length in record or variable length in SDBLOCK). The MIME content-type string must be written in lowercase, and it must apply to the same rules as defined for at_tx_mimetype in 4.11 The Attachment Block ATBLOCK. - pub cn_md_unit: i64, - /// Pointer to TXBLOCK/MDBLOCK with designation for physical unit of signal data (after conversion) or (only for channel data types "MIME sample" and "MIME stream") to MIME context-type text. (can be NIL). The unit can be used if no conversion rule is specified or to overwrite the unit specified for the conversion rule (e.g. if a conversion rule is shared between channels). If the link is NIL, then the unit from the conversion rule must be used. If the content is an empty string, no unit should be displayed. If an MDBLOCK is used, in addition the A-HDO unit definition can be stored, see Table 38. Note: for (virtual) master and synchronization channels the A-HDO definition should be omitted to avoid redundancy. Here the unit is already specified by cn_sync_type of the channel. In case of channel data types "MIME sample" and "MIME stream", the text of the unit must be the content-type text of a MIME type which specifies the content of the values of the channel (either fixed length in record or variable length in SDBLOCK). The MIME content-type string must be written in lowercase, and it must apply to the same rules as defined for at_tx_mimetype in 4.11 The Attachment Block ATBLOCK. - pub cn_md_comment: i64, - #[br(if(cn_links > 8), little, count = cn_links - 8)] - links: Vec, - - // Data Members - /// Channel type (see CN_T_xxx) - pub cn_type: u8, - /// Sync type: (see CN_S_xxx) - pub cn_sync_type: u8, - /// Channel data type of raw signal value (see CN_DT_xxx) - pub cn_data_type: u8, - /// Bit offset (0-7): first bit (=LSB) of signal value after Byte offset has been applied (see 4.21.4.2 Reading the Signal Value). If zero, the signal value is 1-Byte aligned. A value different to zero is only allowed for Integer data types (cn_data_type ≤ 3) and if the Integer signal value fits into 8 contiguous Bytes (cn_bit_count + cn_bit_offset ≤ 64). For all other cases, cn_bit_offset must be zero. - pub cn_bit_offset: u8, - /// Offset to first Byte in the data record that contains bits of the signal value. The offset is applied to the plain record data, i.e. skipping the record ID. - pub cn_byte_offset: u32, - /// Number of bits for signal value in record - pub cn_bit_count: u32, - /// Flags (see CN_F_xxx) - pub cn_flags: u32, - /// Position of invalidation bit. - pub cn_inval_bit_pos: u32, - /// Precision for display of floating point values. 0xFF means unrestricted precision (infinite). Any other value specifies the number of decimal places to use for display of floating point values. Only valid if "precision valid" flag (bit 2) is set - cn_precision: u8, - /// Byte alignment with previous channel in data stream - pub cn_alignment: u8, - /// Number of attachment for this channel - cn_attachment_count: u16, - /// Minimum signal value that occurred for this signal (raw value) Only valid if "value range valid" flag (bit 3) is set. - cn_val_range_min: f64, - /// Maximum signal value that occurred for this signal (raw value) Only valid if "value range valid" flag (bit 3) is set. - cn_val_range_max: f64, - /// Lower limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "limit range valid" flag (bit 4) is set. - cn_limit_min: f64, - /// Upper limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "limit range valid" flag (bit 4) is set. - cn_limit_max: f64, - /// Lower extended limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "extended limit range valid" flag (bit 5) is set. - cn_limit_ext_min: f64, - /// Upper extended limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "extended limit range valid" flag (bit 5) is set. - cn_limit_ext_max: f64, -} - -impl Default for Cn4Block { - fn default() -> Self { - Cn4Block { - // cn_id: [35, 35, 67, 78], // ##CN - // reserved: [0; 4], - // cn_len: 160, - cn_links: 8, - cn_cn_next: 0, - cn_composition: 0, - cn_tx_name: 0, - cn_si_source: 0, - cn_cc_conversion: 0, - cn_data: 0, - cn_md_unit: 0, - cn_md_comment: 0, - links: vec![], - cn_type: 0, - cn_sync_type: 0, - cn_data_type: 0, - cn_bit_offset: 0, - cn_byte_offset: 0, - cn_bit_count: 0, - cn_flags: 0, - cn_inval_bit_pos: 0, - cn_precision: 0, - cn_alignment: 0, - cn_attachment_count: 0, - cn_val_range_min: 0.0, - cn_val_range_max: 0.0, - cn_limit_min: 0.0, - cn_limit_max: 0.0, - cn_limit_ext_min: 0.0, - cn_limit_ext_max: 0.0, - } - } -} - -impl Cn4Block { - /// Returns the cn_cn_size link for VLSC channels (cn_type = 7). - /// This link points to a channel containing the size information for variable length signal data. - /// Only valid for MDF 4.3+ VLSC channels. - pub fn cn_cn_size(&self) -> Option { - if self.cn_type == 7 && !self.links.is_empty() { - Some(self.links[0]) // First additional link (9th link) is cn_cn_size - } else { - None - } - } -} - -/// Cn4 structure containing block but also unique_name, ndarray data, composition -/// and other attributes frequently needed and computed -#[derive(Debug, Default)] -#[repr(C)] -pub struct Cn4 { - /// short header - pub header: Blockheader4Short, - /// CN Block without short header - pub block: Cn4Block, - /// unique channel name string - pub unique_name: String, - pub block_position: i64, - /// beginning position of channel in record - pub pos_byte_beg: u32, - /// number of bytes taken by channel in record - pub n_bytes: u32, - pub composition: Option, - /// channel data - pub data: ChannelData, - /// false = little endian - pub endian: bool, - /// List size: 1 for normal primitive, 2 for complex, pnd for arrays - pub list_size: usize, - // Shape of array - pub shape: (Vec, Order), - /// optional invalid mask array, invalid byte position in record, invalid byte mask - pub invalid_mask: Option<(Option, usize, u8)>, -} - -impl Clone for Cn4 { - fn clone(&self) -> Self { - let mut invalid_mask: Option<(Option, usize, u8)> = None; - if let Some((boolean_buffer, byte_position, byte_mask)) = &self.invalid_mask { - let mut boolean_buffer_builder: Option = None; - if let Some(buffer) = boolean_buffer { - let mut new_boolean_buffer_builder = BooleanBufferBuilder::new(buffer.len()); - new_boolean_buffer_builder.append_buffer(&buffer.finish_cloned()); - boolean_buffer_builder = Some(new_boolean_buffer_builder); - } - invalid_mask = Some((boolean_buffer_builder, *byte_position, *byte_mask)); - } - Self { - header: self.header, - block: self.block.clone(), - unique_name: self.unique_name.clone(), - block_position: self.block_position, - pos_byte_beg: self.pos_byte_beg, - n_bytes: self.n_bytes, - composition: self.composition.clone(), - data: ChannelData::default(), - endian: self.endian, - list_size: self.list_size, - shape: self.shape.clone(), - invalid_mask, - } - } -} - -/// hashmap's key is bit position in record, value Cn4 -pub(crate) type CnType = HashMap; - -/// record layout type : record_id_size: u8, cg_data_bytes: u32, cg_inval_bytes: u32 -type RecordLayout = (u8, u32, u32); - -/// Channel Array block parse result type -type CaBlockParseResult = (Ca4Block, (Vec, Order), usize, usize); - -/// Composition parse result type -type CompositionParseResult = (Composition, i64, usize, (Vec, Order), usize, CnType); - -/// creates recursively in the channel group the CN blocks and all its other linked blocks (CC, MD, TX, CA, etc.) -pub fn parse_cn4( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, - record_layout: RecordLayout, - cg_cycle_count: u64, -) -> Result<(CnType, i64, usize, i32)> { - let mut cn: CnType = HashMap::new(); - let mut n_cn: usize = 0; - let mut first_rec_pos: i32 = 0; - let (record_id_size, _cg_data_bytes, _cg_inval_bytes) = record_layout; - if target != 0 { - let (cn_struct, pos, n_cns, cns) = parse_cn4_block( - rdr, - target, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - position = pos; - n_cn += n_cns; - cn.extend(cns); - first_rec_pos = (cn_struct.block.cn_byte_offset as i32 + record_id_size as i32) * 8 - + cn_struct.block.cn_bit_offset as i32; - let mut next_pointer = cn_struct.block.cn_cn_next; - if cn_struct.block.cn_data_type == 13 { - // CANopen date - let (date_ms, min, hour, day, month, year) = can_open_date( - cn_struct.block_position, - cn_struct.pos_byte_beg, - cn_struct.block.cn_byte_offset, - ); - cn.insert(first_rec_pos, date_ms); - cn.insert(first_rec_pos + 16, min); - cn.insert(first_rec_pos + 24, hour); - cn.insert(first_rec_pos + 32, day); - cn.insert(first_rec_pos + 40, month); - cn.insert(first_rec_pos + 48, year); - } else if cn_struct.block.cn_data_type == 14 { - // CANopen time - let (ms, days) = can_open_time( - cn_struct.block_position, - cn_struct.pos_byte_beg, - cn_struct.block.cn_byte_offset, - ); - cn.insert(first_rec_pos, ms); - cn.insert(first_rec_pos + 32, days); - } else { - if cn_struct.block.cn_type == 3 || cn_struct.block.cn_type == 6 { - // virtual channel, position in record negative - first_rec_pos = -1; - while cn.contains_key(&first_rec_pos) { - first_rec_pos -= 1; - } - } else if (cn_struct.block.cn_flags & CN_F_DATA_STREAM_MODE) != 0 { - // data stream mode channel: use negative block_position as key to avoid collisions - first_rec_pos = -(cn_struct.block_position as i32); - } - cn.insert(first_rec_pos, cn_struct); - } - - while next_pointer != 0 { - let (cn_struct, pos, n_cns, cns) = parse_cn4_block( - rdr, - next_pointer, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - position = pos; - n_cn += n_cns; - cn.extend(cns); - let mut rec_pos = (cn_struct.block.cn_byte_offset as i32 + record_id_size as i32) * 8 - + cn_struct.block.cn_bit_offset as i32; - next_pointer = cn_struct.block.cn_cn_next; - if cn_struct.block.cn_data_type == 13 { - // CANopen date - let (date_ms, min, hour, day, month, year) = can_open_date( - cn_struct.block_position, - cn_struct.pos_byte_beg, - cn_struct.block.cn_byte_offset, - ); - cn.insert(rec_pos, date_ms); - cn.insert(rec_pos + 16, min); - cn.insert(rec_pos + 24, hour); - cn.insert(rec_pos + 32, day); - cn.insert(rec_pos + 40, month); - cn.insert(rec_pos + 48, year); - } else if cn_struct.block.cn_data_type == 14 { - // CANopen time - let (ms, days) = can_open_time( - cn_struct.block_position, - cn_struct.pos_byte_beg, - cn_struct.block.cn_byte_offset, - ); - cn.insert(rec_pos, ms); - cn.insert(rec_pos + 32, days); - } else { - if cn_struct.block.cn_type == 3 || cn_struct.block.cn_type == 6 { - // virtual channel, position in record negative - rec_pos = -1; - while cn.contains_key(&rec_pos) { - rec_pos -= 1; - } - } else if (cn_struct.block.cn_flags & CN_F_DATA_STREAM_MODE) != 0 { - // data stream mode channel: use negative block_position as key to avoid collisions - rec_pos = -(cn_struct.block_position as i32); - } - cn.insert(rec_pos, cn_struct); - } - } - } - Ok((cn, position, n_cn, first_rec_pos)) -} - -/// returns created CANopenDate channels -fn can_open_date( - block_position: i64, - pos_byte_beg: u32, - cn_byte_offset: u32, -) -> (Cn4, Cn4, Cn4, Cn4, Cn4, Cn4) { - let block = Cn4Block { - cn_links: 8, - cn_byte_offset, - cn_bit_count: 16, - ..Default::default() - }; - let date_ms = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("ms"), - block_position, - pos_byte_beg, - n_bytes: 2, - composition: None, - data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 2, - cn_bit_count: 6, - ..Default::default() - }; - let min = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("min"), - block_position, - pos_byte_beg, - n_bytes: 1, - composition: None, - data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 3, - cn_bit_count: 5, - ..Default::default() - }; - let hour = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("hour"), - block_position, - pos_byte_beg, - n_bytes: 1, - composition: None, - data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 4, - cn_bit_count: 5, - ..Default::default() - }; - let day = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("day"), - block_position, - pos_byte_beg, - n_bytes: 1, - composition: None, - data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 5, - cn_bit_count: 6, - ..Default::default() - }; - let month = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("month"), - block_position, - pos_byte_beg, - n_bytes: 1, - composition: None, - data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 6, - cn_bit_count: 7, - ..Default::default() - }; - let year = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("year"), - block_position, - pos_byte_beg, - n_bytes: 1, - composition: None, - data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - (date_ms, min, hour, day, month, year) -} - -/// returns created CANopenTime channels -fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> (Cn4, Cn4) { - let block = Cn4Block { - cn_links: 8, - cn_byte_offset, - cn_bit_count: 28, - ..Default::default() - }; - let ms: Cn4 = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("ms"), - block_position, - pos_byte_beg, - n_bytes: 4, - composition: None, - data: ChannelData::UInt32(UInt32Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - let block = Cn4Block { - cn_links: 8, - cn_byte_offset: cn_byte_offset + 4, - cn_bit_count: 16, - ..Default::default() - }; - let days: Cn4 = Cn4 { - header: default_short_header(BlockType::CN), - block, - unique_name: String::from("day"), - block_position, - pos_byte_beg, - n_bytes: 2, - composition: None, - data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, - list_size: 1, - shape: (vec![1], Order::RowMajor), - invalid_mask: None, - }; - (ms, days) -} - -/// Simple calculation to convert bit count into equivalent bytes count -fn calc_n_bytes_not_aligned(bitcount: u32) -> u32 { - let mut n_bytes = bitcount / 8u32; - if !bitcount.is_multiple_of(8) { - n_bytes += 1; - } - n_bytes -} - -impl Cn4 { - /// Returns the channel source name - fn get_cn_source_name(&self, sharable: &SharableBlocks) -> Result> { - let si = sharable.si.get(&self.block.cn_si_source); - match si { - Some(block) => Ok(block.get_si_source_name(sharable)?), - None => Ok(None), - } - } - /// Returns the channel source path - fn get_cn_source_path(&self, sharable: &SharableBlocks) -> Result> { - let si = sharable.si.get(&self.block.cn_si_source); - match si { - Some(block) => Ok(block.get_si_path_name(sharable)?), - None => Ok(None), - } - } -} - -/// Channel block parser -fn parse_cn4_block( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, - record_layout: RecordLayout, - cg_cycle_count: u64, -) -> Result<(Cn4, i64, usize, CnType)> { - let (record_id_size, _cg_data_bytes, cg_inval_bytes) = record_layout; - let mut n_cn: usize = 1; - let mut cns: HashMap = HashMap::new(); - let (mut block, cnheader, pos) = parse_block_short(rdr, target, position)?; - position = pos; - let block: Cn4Block = block - .read_le() - .context("Could not read buffer into Cn4Block struct")?; - - let pos_byte_beg = block.cn_byte_offset + record_id_size as u32; - let n_bytes = calc_n_bytes_not_aligned(block.cn_bit_count + (block.cn_bit_offset as u32)); - let invalid_mask: Option<(Option, usize, u8)> = if cg_inval_bytes != 0 { - let invalid_byte_position = (block.cn_inval_bit_pos >> 3) as usize; - let invalid_byte_mask = 1 << (block.cn_inval_bit_pos & 0x07); - let mut buffer = BooleanBufferBuilder::new(cg_cycle_count as usize); - buffer.advance(cg_cycle_count as usize); - Some((Some(buffer), invalid_byte_position, invalid_byte_mask)) - } else { - None - }; - - // Reads TX name - position = read_meta_data(rdr, sharable, block.cn_tx_name, position, BlockType::CN)?; - let name: String = sharable.get_tx(block.cn_tx_name)?.unwrap_or_default(); - - // Reads unit - position = read_meta_data(rdr, sharable, block.cn_md_unit, position, BlockType::CN)?; - - // Reads CC - let cc_pointer = block.cn_cc_conversion; - if (cc_pointer != 0) && !sharable.cc.contains_key(&cc_pointer) { - let (cc_block, _header, pos) = parse_block_short(rdr, cc_pointer, position)?; - position = pos; - position = read_cc(rdr, &cc_pointer, position, cc_block, sharable)?; - } - - // Reads MD - position = read_meta_data(rdr, sharable, block.cn_md_comment, position, BlockType::CN)?; - - //Reads SI - let si_pointer = block.cn_si_source; - if (si_pointer != 0) && !sharable.si.contains_key(&si_pointer) { - let (mut si_block, _header, pos) = parse_block_short(rdr, si_pointer, position)?; - position = pos; - let si_block: Si4Block = si_block - .read_le() - .context("Could into read buffer into Si4Block struct")?; - position = read_meta_data(rdr, sharable, si_block.si_tx_name, position, BlockType::SI)?; - position = read_meta_data(rdr, sharable, si_block.si_tx_path, position, BlockType::SI)?; - sharable.si.insert(si_pointer, si_block); - } - - //Reads CA or composition - let compo: Option; - let list_size: usize; - let shape: (Vec, Order); - if block.cn_composition != 0 { - let (co, pos, array_size, s, n_cns, cnss) = parse_composition( - rdr, - block.cn_composition, - position, - sharable, - record_layout, - cg_cycle_count, - ) - .context("Failed reading composition")?; - shape = s; - // list size calculation - if block.cn_data_type == 15 || block.cn_data_type == 16 { - //complex - list_size = 2 * array_size; - } else { - list_size = array_size; - } - compo = Some(co); - position = pos; - n_cn += n_cns; - cns = cnss; - } else { - compo = None; - shape = (vec![1], Order::RowMajor); - // list size calculation - if block.cn_data_type == 15 | 16 { - //complex - list_size = 2; - } else { - list_size = 1; - } - } - - let mut endian: bool = false; // Little endian by default - if block.cn_data_type == 0 - || block.cn_data_type == 2 - || block.cn_data_type == 4 - || block.cn_data_type == 8 - || block.cn_data_type == 15 - { - endian = false; // little endian - } else if block.cn_data_type == 1 - || block.cn_data_type == 3 - || block.cn_data_type == 5 - || block.cn_data_type == 9 - || block.cn_data_type == 16 - { - endian = true; // big endian - } - // For VLSC/VLSD channels, cn_data_type describes the signal data block encoding - // (e.g. UTF-16 BE), not the byte order of the integer offsets stored in the DT block. - if block.cn_type == 1 || block.cn_type == 7 { - endian = false; - } - let data_type = block.cn_data_type; - let cn_type = block.cn_type; - - let cn_struct = Cn4 { - header: cnheader, - unique_name: name, - block_position: target, - pos_byte_beg, - n_bytes, - composition: compo, - data: data_type_init(cn_type, data_type, n_bytes, list_size, block.cn_flags)?, - block, - endian, - list_size, - shape, - invalid_mask, - }; - - Ok((cn_struct, position, n_cn, cns)) -} - -/// reads pointed TX or CC Block(s) pointed by cc_ref in CCBlock -fn read_cc( - rdr: &mut SymBufReader<&File>, - target: &i64, - mut position: i64, - mut block: Cursor>, - sharable: &mut SharableBlocks, -) -> Result { - let cc_block: Cc4Block = block - .read_le() - .context("Could nto read buffer into Cc4Block struct")?; - position = read_meta_data(rdr, sharable, cc_block.cc_md_unit, position, BlockType::CC)?; - position = read_meta_data(rdr, sharable, cc_block.cc_tx_name, position, BlockType::CC)?; - - for pointer in &cc_block.cc_ref { - if !sharable.cc.contains_key(pointer) - && !sharable.md_tx.contains_key(pointer) - && *pointer != 0 - { - let (ref_block, header, _pos) = parse_block_short(rdr, *pointer, position)?; - position = pointer + header.hdr_len as i64; - if "##TX".as_bytes() == header.hdr_id { - // TX Block - position = read_meta_data(rdr, sharable, *pointer, position, BlockType::CC)? - } else { - // CC Block - position = read_cc(rdr, pointer, position, ref_block, sharable)?; - } - } - } - sharable.cc.insert(*target, cc_block); - Ok(position) -} - -/// Cc4 Channel Conversion block struct -#[derive(Debug, Clone)] -#[binrw] -#[br(little)] -#[allow(dead_code)] -#[repr(C)] -pub struct Cc4Block { - // cc_id: [u8; 4], // ##CC - // reserved: [u8; 4], // reserved - // cc_len: u64, // Length of block in bytes - /// # of links - cc_links: u64, - /// Link to TXBLOCK with name (identifier) of conversion (can be NIL). Name must be according to naming rules stated in 4.4.2 Naming Rules. - pub cc_tx_name: i64, - /// Link to TXBLOCK/MDBLOCK with physical unit of signal data (after conversion). (can be NIL) Unit only applies if no unit defined in CNBLOCK. Otherwise the unit of the channel overwrites the conversion unit. - cc_md_unit: i64, - // An MDBLOCK can be used to additionally reference the A-HDO unit definition. Note: for channels with cn_sync_type > 0, the unit is already defined, thus a reference to an A-HDO definition should be omitted to avoid redundancy. - /// Link to TXBLOCK/MDBLOCK with comment of conversion and additional information. (can be NIL) - pub cc_md_comment: i64, - /// Link to CCBLOCK for inverse formula (can be NIL, must be NIL for CCBLOCK of the inverse formula (no cyclic reference allowed). - cc_cc_inverse: i64, - #[br(if(cc_links > 4), little, count = cc_links - 4)] - /// List of additional links to TXBLOCKs with strings or to CCBLOCKs with partial conversion rules. Length of list is given by cc_ref_count. The list can be empty. Details are explained in formula-specific block supplement. - pub cc_ref: Vec, - - // Data Members - /// Conversion type (formula identifier) (see CC_T_xxx) - pub cc_type: u8, - /// Precision for display of floating point values. 0xFF means unrestricted precision (infinite) Any other value specifies the number of decimal places to use for display of floating point values. Note: only valid if "precision valid" flag (bit 0) is set and if cn_precision of the parent CNBLOCK is invalid, otherwise cn_precision must be used. - cc_precision: u8, - /// Flags (see CC_F_xxx) - cc_flags: u16, - /// Length M of cc_ref list with additional links. See formula-specific block supplement for meaning of the links. - cc_ref_count: u16, - /// Length N of cc_val list with additional parameters. See formula-specific block supplement for meaning of the parameters. - cc_val_count: u16, - /// Minimum physical signal value that occurred for this signal. Only valid if "physical value range valid" flag (bit 1) is set. - cc_phy_range_min: f64, - /// Maximum physical signal value that occurred for this signal. Only valid if "physical value range valid" flag (bit 1) is set. - cc_phy_range_max: f64, - #[br(args(cc_val_count, cc_type))] - pub cc_val: CcVal, -} - -/// Cc Values can be either a float or Uint64 -#[derive(Debug, Clone)] -#[binrw] -#[br(little, import(count: u16, cc_type: u8))] -#[repr(C)] -pub enum CcVal { - #[br(pre_assert(cc_type < 11))] - Real(#[br(count = count)] Vec), - - #[br(pre_assert(cc_type == 11))] - Uint(#[br(count = count)] Vec), -} - -/// Si4 Source Information block struct -#[derive(Debug, PartialEq, Eq, Default, Copy, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Si4Block { - // si_id: [u8; 4], // ##SI - // reserved: [u8; 4], // reserved - // si_len: u64, // Length of block in bytes - /// # of links - si_links: u64, - /// Pointer to TXBLOCK with name (identification) of source (must not be NIL). The source name must be according to naming rules stated in 4.4.2 Naming Rules. - si_tx_name: i64, - /// Pointer to TXBLOCK with (tool-specific) path of source (can be NIL). The path string must be according to naming rules stated in 4.4.2 Naming Rules. - si_tx_path: i64, - // Each tool may generate a different path string. The only purpose is to ensure uniqueness as explained in section 4.4.3 Identification of Channels. As a recommendation, the path should be a human readable string containing additional information about the source. However, the path string should not be used to store this information in order to retrieve it later by parsing the string. Instead, additional source information should be stored in generic or custom XML fields in the comment MDBLOCK si_md_comment. - /// Pointer to source comment and additional information (TXBLOCK or MDBLOCK) (can be NIL) - si_md_comment: i64, - - // Data Members - /// Source type additional classification of source (see SI_T_xxx) - si_type: u8, - /// Bus type additional classification of used bus (should be 0 for si_type ≥ 3) (see SI_BUS_xxx) - si_bus_type: u8, - /// Flags The value contains the following bit flags (see SI_F_xxx)): - si_flags: u8, - /// reserved - si_reserved: [u8; 5], -} - -impl Si4Block { - /// returns the source name - fn get_si_source_name(&self, sharable: &SharableBlocks) -> Result> { - sharable.get_tx(self.si_tx_name) - } - /// returns the source path - fn get_si_path_name(&self, sharable: &SharableBlocks) -> Result> { - sharable.get_tx(self.si_tx_path) - } -} - -/// Ca4 Channel Array block struct -#[derive(Debug, PartialEq, Clone)] -#[repr(C)] -pub struct Ca4Block { - // header - /// ##CA - pub ca_id: [u8; 4], - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - pub ca_len: u64, - /// # of links - ca_links: u64, - // links - /// [] Array of composed elements: Pointer to a CNBLOCK for array of structures, or to a CABLOCK for array of arrays (can be NIL). If a CABLOCK is referenced, it must use the "CN template" storage type (ca_storage = 0). - pub ca_composition: i64, - /// [Π N(d) or empty] Only present for storage type "DG template". List of links to data blocks (DTBLOCK/DLBLOCK) for each element in case of "DG template" storage (ca_storage = 2). A link in this list may only be NIL if the cycle count of the respective element is 0: ca_data\[k\] = NIL => ca_cycle_count\[k\] = 0 The links are stored line-oriented, i.e. element k uses ca_data\[k\] (see explanation below). The size of the list must be equal to Π N(d), i.e. to the product of the number of elements per dimension N(d) over all dimensions D. Note: link ca_data\[0\] must be equal to dg_data link of the parent DGBLOCK. - pub ca_data: Option>, - /// [Dx3 or empty] Only present if "dynamic size" flag (bit 0) is set. References to channels for size signal of each dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). Thus the links have the following order: DGBLOCK for size signal of dimension 1 CGBLOCK for size signal of dimension 1 CNBLOCK for size signal of dimension 1 … DGBLOCK for size signal of dimension D CGBLOCK for size signal of dimension D CNBLOCK for size signal of dimension D The size signal can be used to model arrays whose number of elements per dimension can vary over time. If a size signal is specified for a dimension, the number of elements for this dimension at some point in time is equal to the value of the size signal at this time (i.e. for time-synchronized signals, the size signal value with highest time stamp less or equal to current time stamp). If the size signal has no recorded signal value for this time (yet), assume 0 as size. - ca_dynamic_size: Option>, - /// [Dx3 or empty] Only present if "input quantity" flag (bit 1) is set. Reference to channels for input quantity signal for each dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). Thus the links have the following order: DGBLOCK for input quantity of dimension 1 CGBLOCK for input quantity of dimension 1 CNBLOCK for input quantity of dimension 1 … DGBLOCK for input quantity of dimension D CGBLOCK for input quantity of dimension D CNBLOCK for input quantity of dimension D Since the input quantity signal and the array signal must be synchronized, their channel groups must contain at least one common master channel type. - ca_input_quantity: Option>, - /// [3 or empty] Only present if "output quantity" flag (bit 2) is set. Reference to channel for output quantity (can be NIL). The reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). Since the output quantity signal and the array signal must be synchronized, their channel groups must contain at least one common master channel type. For array type "look-up", the output quantity is the result of the complete look-up (see [MCD-2 MC] keyword RIP_ADDR_W). The output quantity should have the same physical unit as the array elements of the array that references it. - ca_output_quantity: Option>, - /// [3 or empty] Only present if "comparison quantity" flag (bit 3) is set. Reference to channel for comparison quantity (can be NIL). The reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). Since the comparison quantity signal and the array signal must be synchronized, their channel groups must contain at least one common master channel type. The comparison quantity should have the same physical unit as the array elements. - ca_comparison_quantity: Option>, - /// [D or empty] Only present if "axis" flag (bit 4) is set. Pointer to a conversion rule (CCBLOCK) for the scaling axis of each dimension. If a link NIL a 1:1 conversion must be used for this axis. If the "fixed axis" flag (Bit 5) is set, the conversion must be applied to the fixed axis values of the respective axis/dimension (ca_axis_value list stores the raw values as REAL). If the link to the CCBLOCK is NIL already the physical values are stored in the ca_axis_value list. If the "fixed axes" flag (Bit 5) is not set, the conversion must be applied to the raw values of the respective axis channel, i.e. it overrules the conversion specified for the axis channel, even if the ca_axis_conversion link is NIL! Note: ca_axis_conversion may reference the same CCBLOCK as referenced by the respective axis channel ("sharing" of CCBLOCK). - ca_cc_axis_conversion: Option>, - /// [Dx3 or empty] Only present if "axis" flag (bit 4) is set and "fixed axes flag" (bit 5) is not set. References to channels for scaling axis of respective dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). Thus the links have the following order: DGBLOCK for axis of dimension 1 CGBLOCK for axis of dimension 1 CNBLOCK for axis of dimension 1 … DGBLOCK for axis of dimension D CGBLOCK for axis of dimension D CNBLOCK for axis of dimension D Each referenced channel must be an array of type "axis". The maximum number of elements of each axis (ca_dim_size\[0\] in axis) must be equal to the maximum number of elements of respective dimension d in "look-up" array (ca_dim_size[d-1]). - ca_axis: Option>, - //members - /// Array type (defines semantic of the array) see CA_T_xxx - pub ca_type: u8, - /// Storage type (defines how the element values are stored) see CA_S_xxx - pub ca_storage: u8, - /// Number of dimensions D > 0 For array type "axis", D must be 1. - pub ca_ndim: u16, - /// Flags The value contains the following bit flags (Bit 0 = LSB): see CA_F_xxx - pub ca_flags: u32, - /// Base factor for calculation of Byte offsets for "CN template" storage type. ca_byte_offset_base should be larger than or equal to the size of Bytes required to store a component channel value in the record (all must have the same size). If it is equal to this value, then the component values are stored next to each other without gaps. Exact formula for calculation of Byte offset for each component channel see below. - pub ca_byte_offset_base: i32, - /// Base factor for calculation of invalidation bit positions for CN template storage type. - pub ca_inval_bit_pos_base: u32, - pub ca_dim_size: Vec, - pub ca_axis_value: Option>, - pub ca_cycle_count: Option>, -} - -impl Default for Ca4Block { - fn default() -> Self { - Self { - ca_id: [35, 35, 67, 65], // ##CA - reserved: [0u8; 4], - ca_len: 48, - ca_links: 1, - ca_composition: 0, - ca_data: None, - ca_dynamic_size: None, - ca_input_quantity: None, - ca_output_quantity: None, - ca_comparison_quantity: None, - ca_cc_axis_conversion: None, - ca_axis: None, - ca_type: 0, // Array - ca_storage: 0, // CN template - ca_ndim: 1, - ca_flags: 0, - ca_byte_offset_base: 0, // first - ca_inval_bit_pos_base: 0, // present in DIBlock - ca_dim_size: vec![], - ca_axis_value: None, - ca_cycle_count: None, - } - } -} - -/// Channel Array block structure, only members section, links section structure complex -#[derive(Debug, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Ca4BlockMembers { - /// Array type (defines semantic of the array) see CA_T_xxx - ca_type: u8, - /// Storage type (defines how the element values are stored) see CA_S_xxx - ca_storage: u8, - /// Number of dimensions D > 0 For array type "axis", D must be 1. - pub ca_ndim: u16, - /// Flags The value contains the following bit flags (Bit 0 = LSB): see CA_F_xxx - ca_flags: u32, - /// Base factor for calculation of Byte offsets for "CN template" storage type. ca_byte_offset_base should be larger than or equal to the size of Bytes required to store a component channel value in the record (all must have the same size). If it is equal to this value, then the component values are stored next to each other without gaps. Exact formula for calculation of Byte offset for each component channel see below. - ca_byte_offset_base: i32, - /// Base factor for calculation of invalidation bit positions for CN template storage type. - ca_inval_bit_pos_base: u32, - #[br(if(ca_ndim > 0), little, count = ca_ndim)] - pub ca_dim_size: Vec, -} - -impl Default for Ca4BlockMembers { - fn default() -> Self { - Self { - ca_type: 0, - ca_storage: 0, - ca_ndim: 1, - ca_flags: 0, - ca_byte_offset_base: 0, - ca_inval_bit_pos_base: 0, - ca_dim_size: vec![], - } - } -} - -/// Channel Array block parser -fn parse_ca_block( - ca_block: &mut Cursor>, - block_header: Blockheader4Short, - cg_cycle_count: u64, -) -> Result { - // reads links count - let ca_links: u64 = ca_block - .read_le() - .context("Could not read links count in ca block")?; - //Reads members first - ca_block.set_position(8 + ca_links * 8); // change buffer position after links section - let ca_members: Ca4BlockMembers = ca_block - .read_le() - .context("Could not read buffer into CaBlockMembers struct")?; - let mut snd: usize; - let mut pnd: usize; - // converts ca_dim_size from u64 to usize - let shape_dim_usize: Vec = ca_members.ca_dim_size.iter().map(|&d| d as usize).collect(); - if shape_dim_usize.len() == 1 { - snd = shape_dim_usize[0]; - pnd = shape_dim_usize[0]; - } else { - snd = 0; - pnd = 1; - let sizes = shape_dim_usize.clone(); - for x in sizes.into_iter() { - snd += x; - pnd *= x; - } - } - let mut shape_dim: VecDeque = VecDeque::from(shape_dim_usize); - shape_dim.push_front(cg_cycle_count as usize); - - let shape: (Vec, Order) = if (ca_members.ca_flags >> 6 & 1) != 0 { - (shape_dim.into(), Order::ColumnMajor) - } else { - (shape_dim.into(), Order::RowMajor) - }; - - let mut val = vec![0.0f64; snd]; - let ca_axis_value: Option> = if (ca_members.ca_flags & 0b100000) > 0 { - ca_block - .read_f64_into::(&mut val) - .context("Could not read ca_axis_value")?; - Some(val) - } else { - None - }; - - let mut val = vec![0u64; pnd]; - let ca_cycle_count: Option> = if ca_members.ca_storage >= 1 { - ca_block - .read_u64_into::(&mut val) - .context("Could not read ca_cycle_count")?; - Some(val) - } else { - None - }; - - // Reads links - ca_block.set_position(8); // change buffer position to beginning of links section - - let ca_composition: i64 = ca_block - .read_i64::() - .context("Could not read ca_composition")?; - - let mut val = vec![0i64; pnd]; - let ca_data: Option> = if ca_members.ca_storage == 2 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_data")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; - let ca_dynamic_size: Option> = if (ca_members.ca_flags & 0b1) > 0 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_dynamic_size")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; - let ca_input_quantity: Option> = if (ca_members.ca_flags & 0b10) > 0 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_input_quantity")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; 3]; - let ca_output_quantity: Option> = if (ca_members.ca_flags & 0b100) > 0 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_output_quantity")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; 3]; - let ca_comparison_quantity: Option> = if (ca_members.ca_flags & 0b1000) > 0 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_comparison_quantity")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; ca_members.ca_ndim as usize]; - let ca_cc_axis_conversion: Option> = if (ca_members.ca_flags & 0b10000) > 0 { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_cc_axis_conversion")?; - Some(val) - } else { - None - }; - - let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; - let ca_axis: Option> = - if ((ca_members.ca_flags & 0b10000) > 0) & ((ca_members.ca_flags & 0b100000) > 0) { - ca_block - .read_i64_into::(&mut val) - .context("Could not read ca_axis")?; - Some(val) - } else { - None - }; - - Ok(( - Ca4Block { - ca_id: block_header.hdr_id, - reserved: block_header.hdr_gap, - ca_len: block_header.hdr_len, - ca_links, - ca_composition, - ca_data, - ca_dynamic_size, - ca_input_quantity, - ca_output_quantity, - ca_comparison_quantity, - ca_cc_axis_conversion, - ca_axis, - ca_type: ca_members.ca_type, - ca_storage: ca_members.ca_storage, - ca_ndim: ca_members.ca_ndim, - ca_flags: ca_members.ca_flags, - ca_byte_offset_base: ca_members.ca_byte_offset_base, - ca_inval_bit_pos_base: ca_members.ca_inval_bit_pos_base, - ca_dim_size: ca_members.ca_dim_size, - ca_axis_value, - ca_cycle_count, - }, - shape, - snd, - pnd, - )) -} - -/// contains composition blocks (CN or CA) -/// can optionaly point to another composition -#[derive(Debug, Clone)] -#[repr(C)] -pub struct Composition { - pub block: Compo, - pub compo: Option>, -} - -/// enum allowing to nest CA or CN blocks for a composition -#[derive(Debug, Clone)] -#[repr(C)] -pub enum Compo { - CA(Box), - #[allow(dead_code)] - CN(Box), - #[allow(dead_code)] - CL(Box), - CV(Box), - #[allow(dead_code)] - CU(Box), - DS(Box), -} - -/// Parse and re-key channel blocks for CU/CV compositions. -/// All member/option channels share the same byte offset in the record, -/// so they are re-keyed using negative block_position to avoid HashMap collisions. -fn parse_and_rekey_channels( - rdr: &mut SymBufReader<&File>, - targets: &[i64], - position: &mut i64, - sharable: &mut SharableBlocks, - record_layout: RecordLayout, - cg_cycle_count: u64, -) -> Result<(CnType, usize)> { - let mut cns: CnType = HashMap::new(); - let mut n_cn: usize = 0; - for target in targets { - let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( - rdr, - *target, - *position, - sharable, - record_layout, - cg_cycle_count, - )?; - *position = pos; - n_cn += n_cns; - for (_rec_pos, cn_struct) in cnss { - let unique_key = -(cn_struct.block_position as i32); - cns.insert(unique_key, cn_struct); - } - } - Ok((cns, n_cn)) -} - -/// parses composition linked blocks -/// CN (structures of composed channels )and CA (array of arrays) blocks can be nested or even CA and CN nested and mixed: this is not supported, very complicated -fn parse_composition( - rdr: &mut SymBufReader<&File>, - target: i64, - mut position: i64, - sharable: &mut SharableBlocks, - record_layout: RecordLayout, - cg_cycle_count: u64, -) -> Result { - let (mut block, block_header_short, pos) = parse_block_short(rdr, target, position) - .context("Failed parsing composition header block")?; - position = pos; - let array_size: usize; - let mut cns: CnType = HashMap::new(); - let mut n_cn: usize = 0; - - if block_header_short.hdr_id == "##CA".as_bytes() { - // Channel Array - let (block, mut shape, _snd, array_size) = - parse_ca_block(&mut block, block_header_short, cg_cycle_count) - .context("Failed parsing CA block")?; - position = pos; - let ca_composition: Option>; - if block.ca_composition != 0 { - let (ca, pos, _array_size, s, n_cns, cnss) = parse_composition( - rdr, - block.ca_composition, - position, - sharable, - record_layout, - cg_cycle_count, - ) - .context("Failed parsing composition block from CA block")?; - shape = s; - position = pos; - cns = cnss; - n_cn += n_cns; - ca_composition = Some(Box::new(ca)); - } else { - ca_composition = None; - cns = HashMap::new(); - } - Ok(( - Composition { - block: Compo::CA(Box::new(block)), - compo: ca_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else if block_header_short.hdr_id == "##DS".as_bytes() { - // Data Stream - let ds_block: Ds4Block = block.read_le().context("Failed parsing DS block")?; - array_size = 1; - let ds_pointer = ds_block.ds_cn_composition(); - let ds_composition: Option>; - let mut shape = (Vec::::new(), Order::RowMajor); - if ds_pointer != 0 { - let (ds, pos, _array_size, s, n_cns, cnss) = parse_composition( - rdr, - ds_pointer, - position, - sharable, - record_layout, - cg_cycle_count, - ) - .context("Failed parsing composition block from DS Block")?; - shape = s; - position = pos; - cns = cnss; - n_cn += n_cns; - ds_composition = Some(Box::new(ds)); - } else { - ds_composition = None; - cns = HashMap::new(); - } - Ok(( - Composition { - block: Compo::DS(Box::new(ds_block)), - compo: ds_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else if block_header_short.hdr_id == "##CL".as_bytes() { - // Channel List - let cl_block: Cl4Block = block.read_le().context("Failed parsing CL block")?; - let cl_composition: Option>; - let mut shape = (Vec::::new(), Order::RowMajor); - array_size = 0; - // Note: cl_cn_size points to the size channel (parsed elsewhere in the CG) - // Parse the composition (element type) - if cl_block.cl_composition != 0 { - let (ds, pos, _array_size, s, n_cns, cnss) = parse_composition( - rdr, - cl_block.cl_composition, - position, - sharable, - record_layout, - cg_cycle_count, - ) - .context("Failed parsing composition block from CL Block")?; - shape = s; - position = pos; - cns = cnss; - n_cn += n_cns; - cl_composition = Some(Box::new(ds)); - } else { - cl_composition = None; - cns = HashMap::new(); - } - Ok(( - Composition { - block: Compo::CL(Box::new(cl_block)), - compo: cl_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else if block_header_short.hdr_id == "##CV".as_bytes() { - // Channel Variant - let cv_block: Cv4Block = block.read_le().context("Failed parsing CV block")?; - let cv_composition: Option> = None; - let shape = (Vec::::new(), Order::RowMajor); - array_size = 0; - let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( - rdr, - &cv_block.cv_cn_option, - &mut position, - sharable, - record_layout, - cg_cycle_count, - )?; - n_cn += rekeyed_n_cn; - cns.extend(rekeyed_cns); - Ok(( - Composition { - block: Compo::CV(Box::new(cv_block)), - compo: cv_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else if block_header_short.hdr_id == "##CU".as_bytes() { - // Channel Union - let cu_block: Cu4Block = block.read_le().context("Failed parsing CU block")?; - let cu_composition: Option> = None; - let shape = (Vec::::new(), Order::RowMajor); - array_size = 0; - let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( - rdr, - &cu_block.cu_cn_member, - &mut position, - sharable, - record_layout, - cg_cycle_count, - )?; - n_cn += rekeyed_n_cn; - cns.extend(rekeyed_cns); - Ok(( - Composition { - block: Compo::CU(Box::new(cu_block)), - compo: cu_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else if block_header_short.hdr_id == "##CN".as_bytes() { - // Channel structure - array_size = 1; - let (cnss, pos, n_cns, first_rec_pos) = parse_cn4( - rdr, - target, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - position = pos; - n_cn += n_cns; - cns = cnss; - let cn_composition: Option>; - let cn_struct: Cn4 = if let Some(cn) = cns.get(&first_rec_pos) { - cn.clone() - } else { - Cn4::default() - }; - let shape: (Vec, Order); - if cn_struct.block.cn_composition != 0 { - let (cn, pos, _array_size, s, n_cns, cnss) = parse_composition( - rdr, - cn_struct.block.cn_composition, - position, - sharable, - record_layout, - cg_cycle_count, - )?; - shape = s; - position = pos; - n_cn += n_cns; - cns.extend(cnss); - cn_composition = Some(Box::new(cn)); - } else { - cn_composition = None; - shape = (vec![1], Order::RowMajor); - } - Ok(( - Composition { - block: Compo::CN(Box::new(cn_struct)), - compo: cn_composition, - }, - position, - array_size, - shape, - n_cn, - cns, - )) - } else { - bail!("Unknown composition block type") - } -} - -/// parses mdfinfo structure to make channel names unique -/// creates channel names set and links master channels to set of channels -pub fn build_channel_db( - dg: &mut BTreeMap, - sharable: &SharableBlocks, - n_cg: usize, - n_cn: usize, -) -> ChannelNamesSet { - let mut channel_list: ChannelNamesSet = HashMap::with_capacity(n_cn); - let mut master_channel_list: HashMap = HashMap::with_capacity(n_cg); - // creating channel list for whole file and making channel names unique - dg.iter_mut().for_each(|(dg_position, dg)| { - dg.cg.iter_mut().for_each(|(record_id, cg)| { - let gn = cg.get_cg_name(sharable); - let gs = cg.get_cg_source_name(sharable); - let gp = cg.get_cg_source_path(sharable); - cg.cn.iter_mut().for_each(|(cn_record_position, cn)| { - if channel_list.contains_key(&cn.unique_name) { - let mut changed: bool = false; - let space_char = String::from(" "); - // create unique channel name - if let Ok(Some(cs)) = cn.get_cn_source_name(sharable) { - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(&cs); - changed = true; - } - if let Ok(Some(cp)) = cn.get_cn_source_path(sharable) { - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(&cp); - changed = true; - } - if let Ok(Some(name)) = &gn { - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(name); - changed = true; - } - if let Ok(Some(source)) = &gs { - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(source); - changed = true; - } - if let Ok(Some(path)) = &gp { - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(path); - changed = true; - } - // No souce or path name to make channel unique - if !changed || channel_list.contains_key(&cn.unique_name) { - // extend name with channel block position, unique - cn.unique_name.push_str(&space_char); - cn.unique_name.push_str(&cn.block_position.to_string()); - } - }; - channel_list.insert( - cn.unique_name.clone(), - ( - None, // computes at second step master channel because of cg_cg_master - *dg_position, - (cg.block_position, *record_id), - (cn.block_position, *cn_record_position), - ), - ); - if cn.block.cn_type == 2 || cn.block.cn_type == 3 { - // Master channel - master_channel_list.insert(cg.block_position, cn.unique_name.clone()); - } - }); - }); - }); - // identifying master channels - let avg_ncn_per_cg = if n_cg > 0 { n_cn / n_cg } else { 0 }; - dg.iter_mut().for_each(|(_dg_position, dg)| { - dg.cg.iter_mut().for_each(|(_record_id, cg)| { - let mut cg_channel_list: HashSet = HashSet::with_capacity(avg_ncn_per_cg); - let mut master_channel_name: Option = None; - if let Some(name) = master_channel_list.get(&cg.block_position) { - master_channel_name = Some(name.to_string()); - } else if let Some(cg_cg_master) = cg.block.cg_cg_master { - // master is in another cg block, possible from 4.2 - if let Some(name) = master_channel_list.get(&cg_cg_master) { - master_channel_name = Some(name.to_string()); - } - } - cg.cn.iter_mut().for_each(|(_cn_record_position, cn)| { - cg_channel_list.insert(cn.unique_name.clone()); - // assigns master in channel_list - if let Some(id) = channel_list.get_mut(&cn.unique_name) { - id.0.clone_from(&master_channel_name); - } - }); - cg.channel_names = cg_channel_list; - cg.master_channel_name = master_channel_name; - }); - }); - channel_list -} - -/// Generic Data block struct, without the Id -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Dt4Block { - //header - // dl_id: [u8; 4], // ##DL - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - pub len: u64, - /// # of links - links: u64, -} - -/// DL4 Data List block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Dl4Block { - //header - // dl_id: [u8; 4], // ##DL - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - dl_len: u64, - /// # of links - dl_links: u64, - // links - /// next DL - pub dl_dl_next: i64, - #[br(if(dl_links > 1), little, count = dl_links - 1)] - pub dl_data: Vec, - // members - /// Flags - dl_flags: u8, - dl_reserved: [u8; 3], - /// Number of data blocks - dl_count: u32, - #[br(if((dl_flags & 0b1)>0), little)] - dl_equal_length: Option, - #[br(if((dl_flags & 0b1)==0), little, count = dl_count)] - dl_offset: Vec, - #[br(if((dl_flags & 0b10)>0), little, count = dl_count)] - dl_time_values: Vec, - #[br(if((dl_flags & 0b100)>0), little, count = dl_count)] - dl_angle_values: Vec, - #[br(if((dl_flags & 0b1000)>0), little, count = dl_count)] - dl_distance_values: Vec, -} - -/// parses Data List block -/// pointing to DT, SD, RD or DZ blocks -pub fn parser_dl4_block( - rdr: &mut BufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Dl4Block, i64)> { - rdr.seek_relative(target - position) - .context("Could not reach position to read Dl4Block")?; - let block: Dl4Block = rdr - .read_le() - .context("Could not read into Dl4Block struct")?; - position = target + block.dl_len as i64; - Ok((block, position)) -} - -/// Helper function to decompress data using various algorithms -pub fn decompress_data( - zip_type: u8, - zip_parameter: u32, - buf: Vec, - org_data_length: u64, -) -> Result> { - let mut data = Vec::::new(); - match zip_type { - 0 | 1 => { - // deflate algorithm (zlib format) - let reader = Cursor::new(buf); - let mut decoder = ZlibDecoder::new(reader); - decoder - .read_to_end(&mut data) - .context("Error decompressing Deflate data")?; - } - 2 | 3 => { - // zstd algorithm - let reader = Cursor::new(buf); - let mut decoder = - ZstdDecoder::new(reader).context("Error creating Zstd decoder from read vector")?; - let _nbbytesread = decoder - .read_to_end(&mut data) - .context("error reading the compressed bytes")?; - } - 4 | 5 => { - // lz4 algorithm - let reader = Cursor::new(buf); - let mut decoder = - Lz4Decoder::new(reader).context("Error creating Lz4 decoder from read vector")?; - let _nbbytesread = decoder - .read_to_end(&mut data) - .context("error reading the compressed bytes")?; - } - 254 => { - // MDF 4.3 custom/vendor-specific compression - warn!("Custom compression (zip_type=254) not supported - data will be empty"); - return Ok(data); - } - _ => { - bail!("not implemented compression algorithm: {}", zip_type) - } - }; - // transpose data - if matches!(zip_type, 1 | 3 | 5) && zip_parameter > 0 { - // transpose - let m = org_data_length / zip_parameter as u64; - let tail: Vec = data.split_off((m * zip_parameter as u64) as usize); - let mut output = vec![0u8; (m * zip_parameter as u64) as usize]; - transpose::transpose(&data, &mut output, m as usize, zip_parameter as usize); - data = output; - if !tail.is_empty() { - data.extend(tail); - } - } - Ok(data) -} - -/// parses DZBlock -pub fn parse_dz(rdr: &mut BufReader<&File>) -> Result<(Vec, Dz4Block)> { - let mut block: Dz4Block = rdr - .read_le() - .context("Could not read into Dz4Block struct")?; - let mut buf = vec![0u8; block.dz_data_length as usize]; - rdr.read_exact(&mut buf).context("Could not read Dz data")?; - // decompress data - let data = decompress_data( - block.dz_zip_type, - block.dz_zip_parameter, - buf, - block.dz_org_data_length, - )?; - block.dz_org_data_length = data.len() as u64; - Ok((data, block)) -} - -/// DZ4 Data List block struct -#[derive(Debug, PartialEq, Eq, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Dz4Block { - //header - // dz_id: [u8; 4], // ##DZ - reserved: [u8; 4], // reserved - /// Length of block in bytes - pub len: u64, - dz_links: u64, // # of links - // links - // members - /// "DT", "SD", "RD" or "DV", "DI", "RV", "RI" - pub dz_org_block_type: [u8; 2], - /// Zip algorithm, 0 deflate, 1 transpose + deflate - dz_zip_type: u8, - /// reserved - dz_reserved: u8, - /// Zip algorithm parameter - dz_zip_parameter: u32, // - /// length of uncompressed data - pub dz_org_data_length: u64, - /// length of compressed data - pub dz_data_length: u64, -} - -impl Default for Dz4Block { - fn default() -> Self { - Dz4Block { - reserved: [0; 4], - len: 0, - dz_links: 0, - dz_org_block_type: [68, 86], // DV - dz_zip_type: 0, // No transposition for a single channel - dz_reserved: 0, - dz_zip_parameter: 0, - dz_org_data_length: 0, - dz_data_length: 0, - } - } -} - -/// DL4 Data List block struct -#[derive(Debug, PartialEq, Eq, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Ld4Block { - // header - // ld_id: [u8; 4], // ##LD - reserved: [u8; 4], // reserved - /// Length of block in bytes - pub ld_len: u64, - /// # of links - pub ld_n_links: u64, - // links - /// next ld block - pub ld_next: i64, - /// number of links - #[br(if(ld_n_links > 1), little, count = ld_n_links - 1)] - pub ld_links: Vec, - // members - /// Flags - pub ld_flags: u8, - /// Zip info in valid data - pub ld_zip_info: u8, - /// Zip info in ivalid data - pub ld_zip_info_inval: u8, - /// Extended flags - pub ld_flags_ext: u8, - /// Number of data blocks - pub ld_count: u32, - #[br(if((ld_flags & 0b1)!=0), little)] - pub ld_equal_sample_count: Option, - #[br(if((ld_flags & 0b1)==0), little, count = ld_count)] - pub ld_sample_offset: Vec, - #[br(if((ld_flags & 0b10)>0), little, count = ld_count)] - dl_time_values: Vec, - #[br(if((ld_flags & 0b100)>0), little, count = ld_count)] - dl_angle_values: Vec, - #[br(if((ld_flags & 0b1000)>0), little, count = ld_count)] - dl_distance_values: Vec, -} - -impl Default for Ld4Block { - fn default() -> Self { - Ld4Block { - reserved: [0; 4], - ld_len: 56, - ld_n_links: 2, - ld_next: 0, - ld_links: vec![], - ld_flags: 0, - ld_zip_info: 0, - ld_zip_info_inval: 0, - ld_flags_ext: 0, - ld_count: 1, - ld_equal_sample_count: None, - ld_sample_offset: vec![], - dl_time_values: vec![], - dl_angle_values: vec![], - dl_distance_values: vec![], - } - } -} - -impl Ld4Block { - pub fn ld_ld_next(&self) -> i64 { - self.ld_next - } - /// Data block positions - pub fn ld_data(&self) -> Vec { - // In MDF 4.3, bit 7 of ld_flags_ext indicates invalid data present. - // If present, links are interleaved: Data 1, Inval 1, Data 2, Inval 2, ... - // We can also check if the number of links matches 2 * ld_count. - if (1u8 << 7) & self.ld_flags_ext > 0 || self.ld_links.len() as u32 == self.ld_count * 2 { - self.ld_links.iter().step_by(2).copied().collect() - } else { - self.ld_links.clone() - } - } - /// Invalid data block positions - pub fn ld_invalid_data(&self) -> Vec { - if (1u8 << 7) & self.ld_flags_ext > 0 || self.ld_links.len() as u32 == self.ld_count * 2 { - self.ld_links.iter().skip(1).step_by(2).copied().collect() - } else { - Vec::::new() - } - } -} - -/// parse List Data block -/// equivalent ot DLBlock but unsorted data is not allowed -/// pointing to DV/DI and RV/RI blocks -pub fn parser_ld4_block( - rdr: &mut BufReader<&File>, - target: i64, - mut position: i64, -) -> Result<(Ld4Block, i64)> { - rdr.seek_relative(target - position) - .context("Could not reach Ld4Block position")?; - let block: Ld4Block = rdr - .read_le() - .context("Could not read buffer into Ld4Block struct")?; - position = target + block.ld_len as i64; - Ok((block, position)) -} - -/// HL4 Data List block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Hl4Block { - //header - // ##HL - // hl_id: [u8; 4], - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - pub hl_len: u64, - /// # of links - hl_links: u64, - /// links - pub hl_dl_first: i64, // first LD block - // members - /// flags - hl_flags: u16, - /// Zip algorithn - hl_zip_type: u8, - /// reserved - hl_reserved: [u8; 5], -} - -/// GD4 Guard Block struct (MDF 4.3) -/// Used to safeguard newly introduced features against incompatible readers -/// Note: gd_reserved is not included as its size varies based on gd_len -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Gd4Block { - // header - // ##GD - // gd_id: [u8; 4], - /// reserved - reserved: [u8; 4], - /// Length of block in bytes - pub gd_len: u64, - /// # of links (always 1) - gd_links: u64, - // link section - /// Pointer to the guarded block (shall not be NIL) - pub gd_link: i64, - // data section - /// Minimum version number of the MDF format the reader shall support - /// Same format as id_ver in IDBLOCK, i.e. 430 for MDF 4.3.0 - pub gd_version: u16, - // gd_reserved is not included - size varies, position handled manually -} - -/// DS4 Data Stream block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Ds4Block { - //header - // ##DS - // ds_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub ds_len: u64, - /// # of links - pub ds_links: u64, - /// links - #[br(count = ds_links)] - pub links: Vec, - /// data - /// Minimum version of the reader to read the data - pub ds_version: u16, - /// DSBlock mode, 0 data stream, 1 data description - pub ds_mode: u8, - /// Reserved - pub ds_reserved: [u8; 5], -} - -impl Ds4Block { - pub fn ds_cn_composition(&self) -> i64 { - self.links.first().copied().unwrap_or(0) - } - #[allow(dead_code)] - pub fn ds_cn_alignment_start(&self) -> i64 { - self.links.get(1).copied().unwrap_or(0) - } - pub fn ds_data(&self) -> i64 { - self.links.get(2).copied().unwrap_or(0) - } - #[allow(dead_code)] - pub fn ds_md_comment(&self) -> i64 { - self.links.get(3).copied().unwrap_or(0) - } -} - -/// CL4 Channel List block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Cl4Block { - //header - // ##CL - // cl_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub cl_len: u64, - /// # of links - pub cl_links: u64, - /// links - /// link to CNBlock describing dynamic data - pub cl_composition: i64, - /// link to CNBlock for the alignment start with data stream mode - pub cl_cn_size: i64, - /// data - /// Flags - pub cl_flags: u16, - /// Bytes alignment - pub cl_alignment: u8, - /// Bit Offset - pub cl_bit_offset: u8, - /// Byte Offset - pub cl_byte_offset: u32, -} - -/// CV4 Channel Variant block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Cv4Block { - //header - // ##CV - // cv_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub cv_len: u64, - /// # of links - pub cv_n_links: u64, - /// links - /// link to CNBlock for discriminator channel - pub cv_cn_discriminator: i64, - /// list of option channel - #[br(if(cv_n_links > 1), little, count = cv_n_links - 1)] - pub cv_cn_option: Vec, - /// data - /// number of option channels - pub cv_option_count: u32, - /// reserved - pub cv_reserved: [u8; 4], - /// list of discriminator values for the options - #[br(if(cv_option_count > 1), little, count = cv_option_count )] - pub cv_option_val: Vec, -} - -/// CU4 Channel Union block struct -#[derive(Debug, PartialEq, Eq, Default, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Cu4Block { - //header - // ##CU - // cu_id: [u8; 4], - /// reserved - // reserved: [u8; 4], - /// Length of block in bytes - // pub cu_len: u64, - /// # of links - pub cu_n_links: u64, - /// links - /// list of member channel - #[br(if(cu_n_links > 1), little, count = cu_n_links)] - pub cu_cn_member: Vec, - /// data - /// number of member channels - pub cu_member_count: u32, - /// reserved - pub cu_reserved: [u8; 4], -} - -/// SR4 Sample Reduction block struct (Section 6.29 of MDF 4.3 spec) -#[derive(Debug, PartialEq, Clone)] -#[binrw] -#[br(little)] -#[repr(C)] -pub struct Sr4Block { - /// Pointer to next sample reduction block (SRBLOCK) (can be NIL) - pub sr_sr_next: i64, - /// Pointer to reduction data block (RD-/RV-/DZBLOCK or DL-/LD-/HLBLOCK) - pub sr_data: i64, - /// Number of cycles, i.e. number of sample reduction records - pub sr_cycle_count: u64, - /// Length of sample interval used to calculate the reduction records (unit depends on sr_sync_type) - pub sr_interval: f64, - /// Sync type: 1=time(s), 2=angle(rad), 3=distance(m), 4=index - pub sr_sync_type: u8, - /// Flags: bit 0 = invalidation bytes present, bit 1 = dominant invalidation bit - pub sr_flags: u8, - /// Reserved - sr_reserved: [u8; 6], -} - -impl Default for Sr4Block { - fn default() -> Self { - Sr4Block { - sr_sr_next: 0, - sr_data: 0, - sr_cycle_count: 0, - sr_interval: 0.0, - sr_sync_type: 1, - sr_flags: 0, - sr_reserved: [0; 6], - } - } -} diff --git a/src/mdfinfo/mdfinfo4/at_block.rs b/src/mdfinfo/mdfinfo4/at_block.rs new file mode 100644 index 0000000..de5051b --- /dev/null +++ b/src/mdfinfo/mdfinfo4/at_block.rs @@ -0,0 +1,192 @@ +//! Attachment block (ATBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use log::warn; +use md5::{Digest, Md5}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{Cursor, Read}; + +use super::block_header::{read_meta_data, SharableBlocks}; +use super::data_block::decompress_data; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// At4 Attachment block struct +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct At4Block { + /// ##DG + at_id: [u8; 4], + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub at_len: u64, + /// # of links + at_links: u64, + /// Link to next ATBLOCK (linked list) (can be NIL) + pub at_at_next: i64, + /// Link to TXBLOCK with the path and file name of the embedded or referenced file (can only be NIL if data is embedded). The path of the file can be relative or absolute. If relative, it is relative to the directory of the MDF file. If no path is given, the file must be in the same directory as the MDF file. + pub at_tx_filename: i64, + /// Link to TXBLOCK with MIME content-type text that gives information about the attached data. Can be NIL if the content-type is unknown, but should be specified whenever possible. The MIME content-type string must be written in lowercase. + pub at_tx_mimetype: i64, + /// Link to MDBLOCK with comment and additional information about the attachment (can be NIL). + pub at_md_comment: i64, + /// Flags The value contains the following bit flags (see AT_FL_xxx): + pub at_flags: u16, + /// Creator index, i.e. zero-based index of FHBLOCK in global list of FHBLOCKs that specifies which application has created this attachment, or changed it most recently. + pub at_creator_index: u16, + /// Compression algorithm used for embedded data + pub at_zip_type: u8, + /// File path format + pub at_path_syntax: u8, + /// Reserved + at_reserved: [u8; 2], + /// 128-bit value for MD5 check sum (of the uncompressed data if data is embedded and compressed). Only valid if "MD5 check sum valid" flag (bit 2) is set. + pub at_md5_checksum: [u8; 16], + /// Original data size in Bytes, i.e. either for external file or for uncompressed data. + pub at_original_size: u64, + /// Embedded data size N, i.e. number of Bytes for binary embedded data following this element. Must be 0 if external file is referenced. + pub at_embedded_size: u64, + // followed by embedded data depending of flag +} + +impl At4Block { + /// Returns true if this attachment has embedded data + pub fn is_embedded(&self) -> bool { + (self.at_flags & 0b1) > 0 + } + + /// Returns true if the data is compressed + pub fn is_compressed(&self) -> bool { + (self.at_flags & 0b10) > 0 + } + + /// Returns true if the MD5 checksum is valid + #[allow(dead_code)] + pub fn has_md5_checksum(&self) -> bool { + (self.at_flags & 0b100) > 0 + } + + /// Returns the compression type as a string description + pub fn get_compression_str(&self) -> &'static str { + if !self.is_compressed() { + return "None"; + } + match self.at_zip_type { + 0 | 1 => "Deflate", + 2 | 3 => "Zstd", + 4 | 5 => "LZ4", + _ => "Unknown", + } + } +} + +impl Display for At4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let storage = if self.is_embedded() { "embedded" } else { "external" }; + let compression = self.get_compression_str(); + write!( + f, + "AT: {} size={} original={} compression={} creator_index={}", + storage, + self.at_embedded_size, + self.at_original_size, + compression, + self.at_creator_index + ) + } +} + +/// At4 (Attachment) block struct parser +fn parser_at4_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(At4Block, Option>, i64)> { + rdr.seek_relative(target - position) + .context("Could not reach At4 Block position")?; + let mut buf = [0u8; 96]; + rdr.read_exact(&mut buf) + .context("Could not read At4 Block buffer")?; + let mut block = Cursor::new(buf); + let block: At4Block = block + .read_le() + .context("Could not parse At4 Block buffer into At4Block struct")?; + position = target + 96; + + // reads embedded if exists + let data: Option> = if (block.at_flags & 0b1) > 0 { + let mut embedded_data = vec![0u8; block.at_embedded_size as usize]; + rdr.read_exact(&mut embedded_data) + .context("Could not parse At4Block embedded attachement")?; + + let zip_type = block.at_zip_type; + if (block.at_flags & 0b10) > 0 { + embedded_data = decompress_data(zip_type, 0, embedded_data, block.at_original_size)?; + } + + // MD5 Checksum verification + if (block.at_flags & 0b100) > 0 { + let mut hasher = Md5::new(); + hasher.update(&embedded_data); + let result = hasher.finalize(); + if result.as_slice() != block.at_md5_checksum { + warn!( + "MD5 checksum mismatch for attachment: expected {:?}, got {:?}", + block.at_md5_checksum, result + ); + } + } + + position += block.at_embedded_size as i64; + Some(embedded_data) + } else { + None + }; + Ok((block, data, position)) +} + +pub type At = HashMap>)>; + +/// parses Attachment blocks along with its linked comments, returns a hashmap of At4 block and attached data in a vect +pub fn parse_at4( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, +) -> Result<(At, i64)> { + let mut at: At = HashMap::new(); + if target > 0 { + let (block, data, pos) = parser_at4_block(rdr, target, position)?; + position = pos; + // Reads MD + position = read_meta_data(rdr, sharable, block.at_md_comment, position, BlockType::AT)?; + // reads TX file_name + position = read_meta_data(rdr, sharable, block.at_tx_filename, position, BlockType::AT)?; + // Reads tx mime type + position = read_meta_data(rdr, sharable, block.at_tx_mimetype, position, BlockType::AT)?; + let mut next_pointer = block.at_at_next; + at.insert(target, (block, data)); + + while next_pointer > 0 { + let block_start = next_pointer; + let (block, data, pos) = parser_at4_block(rdr, next_pointer, position)?; + position = pos; + // Reads MD + position = read_meta_data(rdr, sharable, block.at_md_comment, position, BlockType::AT)?; + // reads TX file_name + position = + read_meta_data(rdr, sharable, block.at_tx_filename, position, BlockType::AT)?; + // Reads tx mime type + position = + read_meta_data(rdr, sharable, block.at_tx_mimetype, position, BlockType::AT)?; + next_pointer = block.at_at_next; + at.insert(block_start, (block, data)); + } + } + Ok((at, position)) +} diff --git a/src/mdfinfo/mdfinfo4/block_header.rs b/src/mdfinfo/mdfinfo4/block_header.rs new file mode 100644 index 0000000..f200283 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/block_header.rs @@ -0,0 +1,304 @@ +//! Block header structures, metadata parsing, and sharable blocks for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{Cursor, Read}; +use std::str; + +use super::cc_block::Cc4Block; +use super::metadata::{BlockType, MetaData, MetaDataBlockType}; +use super::si_block::Si4Block; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// MDF4 - common block Header +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Blockheader4 { + /// '##XX' + pub hdr_id: [u8; 4], + /// reserved, must be 0 + pub hdr_gap: [u8; 4], + /// Length of block in bytes + pub hdr_len: u64, + /// # of links + pub hdr_links: u64, +} + +impl Default for Blockheader4 { + fn default() -> Self { + Blockheader4 { + hdr_id: [35, 35, 84, 88], // ##TX + hdr_gap: [0x00, 0x00, 0x00, 0x00], + hdr_len: 24, + hdr_links: 0, + } + } +} + +impl Display for Blockheader4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let id = str::from_utf8(&self.hdr_id).unwrap_or("????"); + write!(f, "Block: {} len={} links={}", id, self.hdr_len, self.hdr_links) + } +} + +/// parse the block header and its fields id, (reserved), length and number of links +#[inline] +pub fn parse_block_header(rdr: &mut SymBufReader<&File>) -> Result { + let mut buf = [0u8; 24]; + rdr.read_exact(&mut buf) + .context("could not read blockheader4 Id")?; + let mut block = Cursor::new(buf); + let header: Blockheader4 = block + .read_le() + .context("binread could not parse blockheader4")?; + Ok(header) +} + +/// MDF4 - common block Header without the number of links +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Blockheader4Short { + /// '##XX' + pub hdr_id: [u8; 4], + /// reserved, must be 0 + pub hdr_gap: [u8; 4], + /// Length of block in bytes + pub hdr_len: u64, +} + +impl Default for Blockheader4Short { + fn default() -> Self { + Blockheader4Short { + hdr_id: [35, 35, 67, 78], // ##CN + hdr_gap: [0u8; 4], + hdr_len: 160, + } + } +} + +impl Display for Blockheader4Short { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let id = str::from_utf8(&self.hdr_id).unwrap_or("????"); + write!(f, "Block: {} len={}", id, self.hdr_len) + } +} + +pub fn default_short_header(variant: BlockType) -> Blockheader4Short { + match variant { + BlockType::CG => Blockheader4Short { + hdr_id: [35, 35, 67, 71], // ##CG + hdr_gap: [0u8; 4], + hdr_len: 104, // 112 with cg_cg_master, 104 without, + }, + BlockType::CN => Blockheader4Short { + hdr_id: [35, 35, 67, 78], // ##CN + hdr_gap: [0u8; 4], + hdr_len: 160, + }, + _ => Blockheader4Short { + hdr_id: [35, 35, 67, 78], // ##CN + hdr_gap: [0u8; 4], + hdr_len: 160, + }, + } +} + +/// parse the block header and its fields id, (reserved), length except the number of links +#[inline] +pub(super) fn parse_block_header_short(rdr: &mut SymBufReader<&File>) -> Result { + let mut buf = [0u8; 16]; + rdr.read_exact(&mut buf) + .context("could not read short blockheader4 Id")?; + let mut block = Cursor::new(buf); + let header: Blockheader4Short = block + .read_le() + .context("could not parse short blockheader4")?; + Ok(header) +} + +/// reads generically a block header and return links and members section part into a Seek buffer for further processing +#[inline] +pub(super) fn parse_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Cursor>, Blockheader4, i64)> { + // Reads block header + rdr.seek_relative(target - position) + .context("Could not reach block header position")?; // change buffer position + let block_header = parse_block_header(rdr).context(" could not read header block")?; // reads header + + // Reads in buffer rest of block + let mut buf = vec![0u8; (block_header.hdr_len - 24) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read rest of block after header")?; + position = target + block_header.hdr_len as i64; + let block = Cursor::new(buf); + Ok((block, block_header, position)) +} + +/// reads generically a block header wihtout the number of links and returns links and members section part into a Seek buffer for further processing +#[inline] +pub(super) fn parse_block_short( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Cursor>, Blockheader4Short, i64)> { + // Reads block header + rdr.seek_relative(target - position) + .context("Could not reach block short header position")?; // change buffer position + let block_header: Blockheader4Short = + parse_block_header_short(rdr).context(" could not read short header block")?; // reads header + + // Reads in buffer rest of block + let mut buf = vec![0u8; (block_header.hdr_len - 16) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read rest of block after short header")?; + position = target + block_header.hdr_len as i64; + let block = Cursor::new(buf); + Ok((block, block_header, position)) +} + +/// Parses the MD or TX block +pub(super) fn read_meta_data( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, + parent_block_type: BlockType, +) -> Result { + if target != 0 && !sharable.md_tx.contains_key(&target) { + let (raw_data, block, pos) = + parse_block(rdr, target, position).context("could not read metadata block")?; + position = pos; + let block_type = match block.hdr_id { + [35, 35, 77, 68] => MetaDataBlockType::MdBlock, + [35, 35, 84, 88] => MetaDataBlockType::TX, + _ => MetaDataBlockType::TX, + }; + let md = MetaData { + block, + raw_data: raw_data.into_inner(), + block_type, + comments: HashMap::new(), + parent_block_type, + }; + sharable.md_tx.insert(target, md); + Ok(position) + } else { + Ok(position) + } +} + +/// sharable blocks (most likely referenced multiple times and shared by several blocks) +/// that are in sharable fields and holds CC, SI, TX and MD blocks +#[derive(Debug, Default, Clone)] +#[repr(C)] +pub struct SharableBlocks { + pub(crate) md_tx: HashMap, + pub(crate) cc: HashMap, + pub(crate) si: HashMap, +} + +/// SharableBlocks display implementation to facilitate debugging +impl fmt::Display for SharableBlocks { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "MD TX comments : \n")?; + for (_k, c) in self.md_tx.iter() { + match c.block_type { + MetaDataBlockType::MdParsed => { + for (tag, text) in c.comments.iter() { + writeln!(f, "Tag: {tag} Text: {text}")?; + } + } + MetaDataBlockType::TX => match c.get_data_string() { + Ok(s) => writeln!(f, "Text: {s}")?, + Err(e) => writeln!(f, "Text: {e:?}")?, + }, + _ => (), + } + } + writeln!(f, "CC : \n")?; + for (position, cc) in self.cc.iter() { + writeln!(f, "Position: {position} Text: {cc:?}")?; + } + writeln!(f, "SI : ")?; + for (position, si) in self.si.iter() { + writeln!(f, "Position: {position} Text: {si:?}")?; + } + writeln!(f, "finished") + } +} + +impl SharableBlocks { + /// Returns the text from TX Block or TX tag's text from MD block + pub fn get_tx(&self, position: i64) -> anyhow::Result> { + let mut txt: Option = None; + if let Some(md) = self.md_tx.get(&position) { + txt = md.get_tx()?; + }; + Ok(txt) + } + /// Creates a new SharableBlocks of type TX (not MD) + pub fn create_tx(&mut self, position: i64, text: String) { + let md = self + .md_tx + .entry(position) + .or_insert_with(|| MetaData::new(MetaDataBlockType::TX, BlockType::CN)); + md.set_data_buffer(text.as_bytes()); + } + /// Returns metadata from MD Block + /// keys are tag and related value text of tag + pub fn get_comments(&mut self, position: i64) -> HashMap { + let mut comments: HashMap = HashMap::new(); + if let Some(md) = self.md_tx.get_mut(&position) { + match md.block_type { + MetaDataBlockType::MdParsed => { + comments.clone_from(&md.comments); + } + MetaDataBlockType::MdBlock => { + // not yet parsed, so let's parse it + let _ = md.parse_xml(); + comments.clone_from(&md.comments); + } + MetaDataBlockType::TX => { + // should not happen + } + } + }; + comments + } + /// Returns metadata from MD Block linked by HD Block + /// keys are tag and related value text of tag + pub fn get_hd_comments(&self, position: i64) -> HashMap { + // this method assumes the xml was already parsed + let mut comments: HashMap = HashMap::new(); + if let Some(md) = self.md_tx.get(&position) + && md.block_type == MetaDataBlockType::MdParsed + { + comments.clone_from(&md.comments); + }; + comments + } + /// parses the HD Block metadata comments + /// done right after reading HD block + pub fn parse_hd_comments(&mut self, position: i64) { + if let Some(md) = self.md_tx.get_mut(&position) { + let _ = md.parse_hd_xml(); + }; + } + /// Create new Shared Block + pub fn new(n_channels: usize) -> SharableBlocks { + let md_tx: HashMap = HashMap::with_capacity(n_channels); + let cc: HashMap = HashMap::new(); + let si: HashMap = HashMap::new(); + SharableBlocks { md_tx, cc, si } + } +} diff --git a/src/mdfinfo/mdfinfo4/ca_block.rs b/src/mdfinfo/mdfinfo4/ca_block.rs new file mode 100644 index 0000000..cc5be50 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/ca_block.rs @@ -0,0 +1,336 @@ +//! Channel Array block (CABLOCK) for MDF4 +use anyhow::{Context, Error, Result}; +use binrw::{BinReaderExt, binrw}; +use byteorder::{LittleEndian, ReadBytesExt}; +use std::collections::VecDeque; +use std::fmt::{self, Display}; +use std::io::Cursor; + +use super::block_header::Blockheader4Short; +use crate::data_holder::tensor_arrow::Order; + +/// type alias for Ca4Block parse result +pub type CaBlockParseResult = (Ca4Block, (Vec, Order), usize, usize); + +/// Ca4 Channel Array block struct +#[derive(Debug, PartialEq, Clone)] +#[repr(C)] +pub struct Ca4Block { + // header + /// ##CA + pub ca_id: [u8; 4], + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub ca_len: u64, + /// # of links + ca_links: u64, + // links + /// [] Array of composed elements: Pointer to a CNBLOCK for array of structures, or to a CABLOCK for array of arrays (can be NIL). If a CABLOCK is referenced, it must use the "CN template" storage type (ca_storage = 0). + pub ca_composition: i64, + /// [Π N(d) or empty] Only present for storage type "DG template". List of links to data blocks (DTBLOCK/DLBLOCK) for each element in case of "DG template" storage (ca_storage = 2). A link in this list may only be NIL if the cycle count of the respective element is 0: ca_data\[k\] = NIL => ca_cycle_count\[k\] = 0 The links are stored line-oriented, i.e. element k uses ca_data\[k\] (see explanation below). The size of the list must be equal to Π N(d), i.e. to the product of the number of elements per dimension N(d) over all dimensions D. Note: link ca_data\[0\] must be equal to dg_data link of the parent DGBLOCK. + pub ca_data: Option>, + /// [Dx3 or empty] Only present if "dynamic size" flag (bit 0) is set. References to channels for size signal of each dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). + ca_dynamic_size: Option>, + /// [Dx3 or empty] Only present if "input quantity" flag (bit 1) is set. Reference to channels for input quantity signal for each dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). + ca_input_quantity: Option>, + /// [3 or empty] Only present if "output quantity" flag (bit 2) is set. Reference to channel for output quantity (can be NIL). The reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). + ca_output_quantity: Option>, + /// [3 or empty] Only present if "comparison quantity" flag (bit 3) is set. Reference to channel for comparison quantity (can be NIL). The reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). + ca_comparison_quantity: Option>, + /// [D or empty] Only present if "axis" flag (bit 4) is set. Pointer to a conversion rule (CCBLOCK) for the scaling axis of each dimension. If a link NIL a 1:1 conversion must be used for this axis. + ca_cc_axis_conversion: Option>, + /// [Dx3 or empty] Only present if "axis" flag (bit 4) is set and "fixed axes flag" (bit 5) is not set. References to channels for scaling axis of respective dimension (can be NIL). Each reference is a link triple with pointer to parent DGBLOCK, parent CGBLOCK and CNBLOCK for the channel (either all three links are assigned or NIL). + ca_axis: Option>, + //members + /// Array type (defines semantic of the array) see CA_T_xxx + pub ca_type: u8, + /// Storage type (defines how the element values are stored) see CA_S_xxx + pub ca_storage: u8, + /// Number of dimensions D > 0 For array type "axis", D must be 1. + pub ca_ndim: u16, + /// Flags The value contains the following bit flags (Bit 0 = LSB): see CA_F_xxx + pub ca_flags: u32, + /// Base factor for calculation of Byte offsets for "CN template" storage type. + pub ca_byte_offset_base: i32, + /// Base factor for calculation of invalidation bit positions for CN template storage type. + pub ca_inval_bit_pos_base: u32, + pub ca_dim_size: Vec, + pub ca_axis_value: Option>, + pub ca_cycle_count: Option>, +} + +impl Default for Ca4Block { + fn default() -> Self { + Self { + ca_id: [35, 35, 67, 65], // ##CA + reserved: [0u8; 4], + ca_len: 48, + ca_links: 1, + ca_composition: 0, + ca_data: None, + ca_dynamic_size: None, + ca_input_quantity: None, + ca_output_quantity: None, + ca_comparison_quantity: None, + ca_cc_axis_conversion: None, + ca_axis: None, + ca_type: 0, // Array + ca_storage: 0, // CN template + ca_ndim: 1, + ca_flags: 0, + ca_byte_offset_base: 0, // first + ca_inval_bit_pos_base: 0, // present in DIBlock + ca_dim_size: vec![], + ca_axis_value: None, + ca_cycle_count: None, + } + } +} + +impl Ca4Block { + /// Returns a string representation of the array type (ca_type) + pub fn get_ca_type_str(&self) -> &'static str { + match self.ca_type { + 0 => "Array", + 1 => "ScalingAxis", + 2 => "LookUp", + 3 => "IntervalAxis", + 4 => "ClassificationResult", + _ => "Unknown", + } + } + /// Returns a string representation of the storage type (ca_storage) + pub fn get_storage_str(&self) -> &'static str { + match self.ca_storage { + 0 => "CN Template", + 1 => "CG Template", + 2 => "DG Template", + _ => "Unknown", + } + } +} + +impl Display for Ca4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CA: type={} ({}) storage={} ({}) dims={:?}", + self.get_ca_type_str(), + self.ca_type, + self.get_storage_str(), + self.ca_storage, + self.ca_dim_size + ) + } +} + +/// Channel Array block structure, only members section, links section structure complex +#[derive(Debug, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Ca4BlockMembers { + /// Array type (defines semantic of the array) see CA_T_xxx + ca_type: u8, + /// Storage type (defines how the element values are stored) see CA_S_xxx + ca_storage: u8, + /// Number of dimensions D > 0 For array type "axis", D must be 1. + pub ca_ndim: u16, + /// Flags The value contains the following bit flags (Bit 0 = LSB): see CA_F_xxx + ca_flags: u32, + /// Base factor for calculation of Byte offsets for "CN template" storage type. + ca_byte_offset_base: i32, + /// Base factor for calculation of invalidation bit positions for CN template storage type. + ca_inval_bit_pos_base: u32, + #[br(if(ca_ndim > 0), little, count = ca_ndim)] + pub ca_dim_size: Vec, +} + +impl Default for Ca4BlockMembers { + fn default() -> Self { + Self { + ca_type: 0, + ca_storage: 0, + ca_ndim: 1, + ca_flags: 0, + ca_byte_offset_base: 0, + ca_inval_bit_pos_base: 0, + ca_dim_size: vec![], + } + } +} + +impl Display for Ca4BlockMembers { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CA: {}D array dims={:?}", self.ca_ndim, self.ca_dim_size) + } +} + +/// Channel Array block parser +pub(super) fn parse_ca_block( + ca_block: &mut Cursor>, + block_header: Blockheader4Short, + cg_cycle_count: u64, +) -> Result { + // reads links count + let ca_links: u64 = ca_block + .read_le() + .context("Could not read links count in ca block")?; + //Reads members first + ca_block.set_position(8 + ca_links * 8); // change buffer position after links section + let ca_members: Ca4BlockMembers = ca_block + .read_le() + .context("Could not read buffer into CaBlockMembers struct")?; + let mut snd: usize; + let mut pnd: usize; + // converts ca_dim_size from u64 to usize + let shape_dim_usize: Vec = ca_members.ca_dim_size.iter().map(|&d| d as usize).collect(); + if shape_dim_usize.len() == 1 { + snd = shape_dim_usize[0]; + pnd = shape_dim_usize[0]; + } else { + snd = 0; + pnd = 1; + let sizes = shape_dim_usize.clone(); + for x in sizes.into_iter() { + snd += x; + pnd *= x; + } + } + let mut shape_dim: VecDeque = VecDeque::from(shape_dim_usize); + shape_dim.push_front(cg_cycle_count as usize); + + let shape: (Vec, Order) = if (ca_members.ca_flags >> 6 & 1) != 0 { + (shape_dim.into(), Order::ColumnMajor) + } else { + (shape_dim.into(), Order::RowMajor) + }; + + let mut val = vec![0.0f64; snd]; + let ca_axis_value: Option> = if (ca_members.ca_flags & 0b100000) > 0 { + ca_block + .read_f64_into::(&mut val) + .context("Could not read ca_axis_value")?; + Some(val) + } else { + None + }; + + let mut val = vec![0u64; pnd]; + let ca_cycle_count: Option> = if ca_members.ca_storage >= 1 { + ca_block + .read_u64_into::(&mut val) + .context("Could not read ca_cycle_count")?; + Some(val) + } else { + None + }; + + // Reads links + ca_block.set_position(8); // change buffer position to beginning of links section + + let ca_composition: i64 = ca_block + .read_i64::() + .context("Could not read ca_composition")?; + + let mut val = vec![0i64; pnd]; + let ca_data: Option> = if ca_members.ca_storage == 2 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_data")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; + let ca_dynamic_size: Option> = if (ca_members.ca_flags & 0b1) > 0 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_dynamic_size")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; + let ca_input_quantity: Option> = if (ca_members.ca_flags & 0b10) > 0 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_input_quantity")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; 3]; + let ca_output_quantity: Option> = if (ca_members.ca_flags & 0b100) > 0 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_output_quantity")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; 3]; + let ca_comparison_quantity: Option> = if (ca_members.ca_flags & 0b1000) > 0 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_comparison_quantity")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; ca_members.ca_ndim as usize]; + let ca_cc_axis_conversion: Option> = if (ca_members.ca_flags & 0b10000) > 0 { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_cc_axis_conversion")?; + Some(val) + } else { + None + }; + + let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; + let ca_axis: Option> = + if ((ca_members.ca_flags & 0b10000) > 0) & ((ca_members.ca_flags & 0b100000) > 0) { + ca_block + .read_i64_into::(&mut val) + .context("Could not read ca_axis")?; + Some(val) + } else { + None + }; + + Ok(( + Ca4Block { + ca_id: block_header.hdr_id, + reserved: block_header.hdr_gap, + ca_len: block_header.hdr_len, + ca_links, + ca_composition, + ca_data, + ca_dynamic_size, + ca_input_quantity, + ca_output_quantity, + ca_comparison_quantity, + ca_cc_axis_conversion, + ca_axis, + ca_type: ca_members.ca_type, + ca_storage: ca_members.ca_storage, + ca_ndim: ca_members.ca_ndim, + ca_flags: ca_members.ca_flags, + ca_byte_offset_base: ca_members.ca_byte_offset_base, + ca_inval_bit_pos_base: ca_members.ca_inval_bit_pos_base, + ca_dim_size: ca_members.ca_dim_size, + ca_axis_value, + ca_cycle_count, + }, + shape, + snd, + pnd, + )) +} diff --git a/src/mdfinfo/mdfinfo4/cc_block.rs b/src/mdfinfo/mdfinfo4/cc_block.rs new file mode 100644 index 0000000..407c648 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/cc_block.rs @@ -0,0 +1,134 @@ +//! Channel Conversion block (CCBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::Cursor; + +use super::block_header::{parse_block_short, read_meta_data, SharableBlocks}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Cc4 Channel Conversion block struct +#[derive(Debug, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cc4Block { + // cc_id: [u8; 4], // ##CC + // reserved: [u8; 4], // reserved + // cc_len: u64, // Length of block in bytes + /// # of links + cc_links: u64, + /// Link to TXBLOCK with name (identifier) of conversion (can be NIL). Name must be according to naming rules stated in 4.4.2 Naming Rules. + pub cc_tx_name: i64, + /// Link to TXBLOCK/MDBLOCK with physical unit of signal data (after conversion). (can be NIL) Unit only applies if no unit defined in CNBLOCK. Otherwise the unit of the channel overwrites the conversion unit. + cc_md_unit: i64, + // An MDBLOCK can be used to additionally reference the A-HDO unit definition. Note: for channels with cn_sync_type > 0, the unit is already defined, thus a reference to an A-HDO definition should be omitted to avoid redundancy. + /// Link to TXBLOCK/MDBLOCK with comment of conversion and additional information. (can be NIL) + pub cc_md_comment: i64, + /// Link to CCBLOCK for inverse formula (can be NIL, must be NIL for CCBLOCK of the inverse formula (no cyclic reference allowed). + cc_cc_inverse: i64, + #[br(if(cc_links > 4), little, count = cc_links - 4)] + /// List of additional links to TXBLOCKs with strings or to CCBLOCKs with partial conversion rules. Length of list is given by cc_ref_count. The list can be empty. Details are explained in formula-specific block supplement. + pub cc_ref: Vec, + + // Data Members + /// Conversion type (formula identifier) (see CC_T_xxx) + pub cc_type: u8, + /// Precision for display of floating point values. 0xFF means unrestricted precision (infinite) Any other value specifies the number of decimal places to use for display of floating point values. Note: only valid if "precision valid" flag (bit 0) is set and if cn_precision of the parent CNBLOCK is invalid, otherwise cn_precision must be used. + cc_precision: u8, + /// Flags (see CC_F_xxx) + cc_flags: u16, + /// Length M of cc_ref list with additional links. See formula-specific block supplement for meaning of the links. + cc_ref_count: u16, + /// Length N of cc_val list with additional parameters. See formula-specific block supplement for meaning of the parameters. + cc_val_count: u16, + /// Minimum physical signal value that occurred for this signal. Only valid if "physical value range valid" flag (bit 1) is set. + cc_phy_range_min: f64, + /// Maximum physical signal value that occurred for this signal. Only valid if "physical value range valid" flag (bit 1) is set. + cc_phy_range_max: f64, + #[br(args(cc_val_count, cc_type))] + pub cc_val: CcVal, +} + +/// Cc Values can be either a float or Uint64 +#[derive(Debug, Clone)] +#[binrw] +#[br(little, import(count: u16, cc_type: u8))] +#[repr(C)] +pub enum CcVal { + #[br(pre_assert(cc_type < 11))] + Real(#[br(count = count)] Vec), + + #[br(pre_assert(cc_type == 11))] + Uint(#[br(count = count)] Vec), +} + +impl Cc4Block { + /// Returns a string representation of the conversion type (cc_type) + pub fn get_cc_type_str(&self) -> &'static str { + match self.cc_type { + 0 => "Identity", + 1 => "Linear", + 2 => "Rational", + 3 => "Algebraic", + 4 => "ValueToValueInterpolation", + 5 => "ValueToValue", + 6 => "ValueRangeToValue", + 7 => "ValueToText", + 8 => "ValueRangeToText", + 9 => "TextToValue", + 10 => "TextToText", + 11 => "BitfieldToText", + _ => "Unknown", + } + } +} + +impl Display for Cc4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CC: type={} ({}) refs={} vals={}", + self.get_cc_type_str(), + self.cc_type, + self.cc_ref_count, + self.cc_val_count + ) + } +} + +/// reads and parses CC block and its linked metadata +pub(super) fn read_cc( + rdr: &mut SymBufReader<&File>, + target: &i64, + mut position: i64, + mut block: Cursor>, + sharable: &mut SharableBlocks, +) -> Result { + let cc_block: Cc4Block = block + .read_le() + .context("Could nto read buffer into Cc4Block struct")?; + position = read_meta_data(rdr, sharable, cc_block.cc_md_unit, position, BlockType::CC)?; + position = read_meta_data(rdr, sharable, cc_block.cc_tx_name, position, BlockType::CC)?; + + for pointer in &cc_block.cc_ref { + if !sharable.cc.contains_key(pointer) + && !sharable.md_tx.contains_key(pointer) + && *pointer != 0 + { + let (ref_block, header, _pos) = parse_block_short(rdr, *pointer, position)?; + position = pointer + header.hdr_len as i64; + if "##TX".as_bytes() == header.hdr_id { + // TX Block + position = read_meta_data(rdr, sharable, *pointer, position, BlockType::CC)? + } else { + // CC Block + position = read_cc(rdr, pointer, position, ref_block, sharable)?; + } + } + } + sharable.cc.insert(*target, cc_block); + Ok(position) +} diff --git a/src/mdfinfo/mdfinfo4/cg_block.rs b/src/mdfinfo/mdfinfo4/cg_block.rs new file mode 100644 index 0000000..118566f --- /dev/null +++ b/src/mdfinfo/mdfinfo4/cg_block.rs @@ -0,0 +1,807 @@ +//! Channel Group block (CGBLOCK) for MDF4 +use anyhow::{Context, Error, Result}; +use arrow::array::{Array, ArrayRef, UInt32Array, UnionArray}; +use arrow::buffer::ScalarBuffer; +use arrow::compute::take; +use arrow::datatypes::{Field, UnionFields}; +use binrw::{BinReaderExt, binrw}; +use rayon::prelude::*; +use std::collections::{HashMap, HashSet}; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{Cursor, Read}; +use std::sync::Arc; + +use crate::data_holder::channel_data::ChannelData; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +use super::block_header::{ + parse_block_header_short, parse_block_short, read_meta_data, Blockheader4Short, SharableBlocks, +}; +use super::metadata::BlockType; +use super::cn_block::{parse_cn4, Cn4, CnType}; +use super::composition::Compo; + +// Channel Group (CG) flags - cg_flags field (u16) +/// Bit 0: VLSD channel group (Variable Length Signal Data) +pub const CG_F_VLSD: u16 = 1 << 0; +/// Bit 4: Event signal group - channel group contains event signals +pub const CG_F_EVENT_SIGNAL_GROUP: u16 = 1 << 4; +/// Bit 5: VLSC channel group (contains VLSC channels, MDF 4.3) +pub const CG_F_VLSC: u16 = 1 << 5; +/// Bit 6: Raw sensor event channel group +pub const CG_F_RAW_SENSOR_EVENT: u16 = 1 << 6; +/// Bit 7: Protocol event channel group +pub const CG_F_PROTOCOL_EVENT: u16 = 1 << 7; +use super::si_block::Si4Block; +use super::sr_block::Sr4Block; + +/// Cg4 Channel Group block struct +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cg4Block { + /// ##CG + // cg_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub cg_len: u64, + /// # of links + pub cg_links: u64, + /// Pointer to next channel group block (CGBLOCK) (can be NIL) + pub cg_cg_next: i64, + /// Pointer to first channel block (CNBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK, i.e. if "VLSD channel group" flag (bit 0) is set) + pub cg_cn_first: i64, + /// Pointer to acquisition name (TXBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) + cg_tx_acq_name: i64, + /// Pointer to acquisition source (SIBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) See also rules for uniqueness explained in 4.4.3 Identification of Channels. + cg_si_acq_source: i64, + /// Pointer to first sample reduction block (SRBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) + cg_sr_first: i64, + ///Pointer to comment and additional information (TXBLOCK or MDBLOCK) (can be NIL, must be NIL for VLSD CGBLOCK) + cg_md_comment: i64, + #[br(if(cg_links > 6))] + pub cg_cg_master: Option, + // Data Members + /// Record ID, value must be less than maximum unsigned integer value allowed by dg_rec_id_size in parent DGBLOCK. Record ID must be unique within linked list of CGBLOCKs. + pub cg_record_id: u64, + /// Number of cycles, i.e. number of samples for this channel group. This specifies the number of records of this type in the data block. + pub cg_cycle_count: u64, + /// Flags The value contains the following bit flags (see CG_F_xx): + pub cg_flags: u16, + cg_path_separator: u16, + /// Reserved. + cg_reserved: [u8; 4], + /// Normal CGBLOCK: Number of data Bytes (after record ID) used for signal values in record, i.e. size of plain data for each recorded sample of this channel group. VLSD CGBLOCK: Low part of a UINT64 value that specifies the total size in Bytes of all variable length signal values for the recorded samples of this channel group. See explanation for cg_inval_bytes. + pub cg_data_bytes: u32, + /// Normal CGBLOCK: Number of additional Bytes for record used for invalidation bits. Can be zero if no invalidation bits are used at all. Invalidation bits may only occur in the specified number of Bytes after the data Bytes, not within the data Bytes that contain the signal values. VLSD CGBLOCK: High part of UINT64 value that specifies the total size in Bytes of all variable length signal values for the recorded samples of this channel group, i.e. the total size in Bytes can be calculated by cg_data_bytes + (cg_inval_bytes << 32) Note: this value does not include the Bytes used to specify the length of each VLSD value! + pub cg_inval_bytes: u32, +} + +impl Default for Cg4Block { + fn default() -> Self { + Cg4Block { + // cg_id: [35, 35, 67, 71], // ##CG + // reserved: [0u8; 4], + // cg_len: 104, // 112 with cg_cg_master, 104 without + cg_links: 6, // 7 with cg_cg_master, 6 without + cg_cg_next: 0, + cg_cn_first: 0, + cg_tx_acq_name: 0, + cg_si_acq_source: 0, + cg_sr_first: 0, + cg_md_comment: 0, + cg_cg_master: None, + cg_record_id: 0, + cg_cycle_count: 0, + cg_flags: 0, // bit 3 set for remote master + cg_path_separator: 0, + cg_reserved: [0; 4], + cg_data_bytes: 0, + cg_inval_bytes: 0, + } + } +} + +impl Cg4Block { + /// Returns a string representation of the channel group flags + pub fn get_flags_str(&self) -> String { + let mut flags = Vec::new(); + if (self.cg_flags & CG_F_VLSD) != 0 { + flags.push("VLSD"); + } + if (self.cg_flags & CG_F_VLSC) != 0 { + flags.push("VLSC"); + } + if (self.cg_flags & CG_F_EVENT_SIGNAL_GROUP) != 0 { + flags.push("EventSignal"); + } + if (self.cg_flags & CG_F_RAW_SENSOR_EVENT) != 0 { + flags.push("RawSensor"); + } + if (self.cg_flags & CG_F_PROTOCOL_EVENT) != 0 { + flags.push("ProtocolEvent"); + } + if (self.cg_flags & 0b1000) != 0 { + // Bit 3: Remote master + flags.push("RemoteMaster"); + } + if flags.is_empty() { + "None".to_string() + } else { + flags.join("|") + } + } +} + +impl Display for Cg4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CG: rec_id={} cycles={} data_bytes={} inval_bytes={} flags={}", + self.cg_record_id, + self.cg_cycle_count, + self.cg_data_bytes, + self.cg_inval_bytes, + self.get_flags_str() + ) + } +} + +/// Cg4 (Channel Group) block struct parser with linked comments Source Information in sharable blocks +fn parse_cg4_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, + record_id_size: u8, +) -> Result<(Cg4, i64, usize)> { + let (mut block, header, pos) = parse_block_short(rdr, target, position)?; + position = pos; + let cg: Cg4Block = block + .read_le() + .context("Could not read buffer into Cg4Block struct")?; + + // Reads MD + position = read_meta_data(rdr, sharable, cg.cg_md_comment, position, BlockType::CG)?; + // For VLSD/VLSC, cg_inval_bytes is the high part of VL data size, not invalidation bytes + let inval_bytes_for_record = if (cg.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 + } else { + cg.cg_inval_bytes + }; + let record_layout = (record_id_size, cg.cg_data_bytes, inval_bytes_for_record); + + // reads CN (and other linked block behind like CC, SI, CA, etc.) + let (cn, pos, n_cn, _first_rec_pos) = parse_cn4( + rdr, + cg.cg_cn_first, + position, + sharable, + record_layout, + cg.cg_cycle_count, + )?; + position = pos; + + // Reads Acq Name + position = read_meta_data(rdr, sharable, cg.cg_tx_acq_name, position, BlockType::CG)?; + + // Reads SI Acq name + let si_pointer = cg.cg_si_acq_source; + if (si_pointer != 0) && !sharable.si.contains_key(&si_pointer) { + let (mut si_block, _header, pos) = parse_block_short(rdr, si_pointer, position)?; + position = pos; + let si_block: Si4Block = si_block + .read_le() + .context("Could not read buffer into Si4block struct")?; + position = read_meta_data(rdr, sharable, si_block.si_tx_name, position, BlockType::SI)?; + position = read_meta_data(rdr, sharable, si_block.si_tx_path, position, BlockType::SI)?; + sharable.si.insert(si_pointer, si_block); + } + + let record_length = cg.cg_data_bytes; + + // Parse Sample Reduction blocks if present + let (sr_blocks, pos) = parse_sr4(rdr, cg.cg_sr_first, position)?; + position = pos; + + let cg_struct = Cg4 { + header, + block: cg, + cn, + master_channel_name: None, + channel_names: HashSet::new(), + record_length, + block_position: target, + vlsd_cg: None, + invalid_bytes: None, + sr: sr_blocks, + }; + + Ok((cg_struct, position, n_cn)) +} + +/// Parses the linked list of Sample Reduction blocks (SRBLOCK) starting from target +fn parse_sr4( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Vec, i64)> { + let mut sr_blocks: Vec = Vec::new(); + if target <= 0 { + return Ok((sr_blocks, position)); + } + + let mut next = target; + while next > 0 { + // Read just the 16-byte header first to validate before allocating + rdr.seek_relative(next - position) + .context("Could not reach SR block header position")?; + let header: Blockheader4Short = + parse_block_header_short(rdr).context("Could not read SR block header")?; + // Validate block ID is ##SR + if &header.hdr_id != b"##SR" { + position = next + 16; + break; + } + // Now read the rest of the block + let mut buf = vec![0u8; (header.hdr_len - 16) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read SR block body")?; + position = next + header.hdr_len as i64; + let mut block = Cursor::new(buf); + let sr: Sr4Block = block + .read_le() + .context("Could not read buffer into Sr4Block struct")?; + next = sr.sr_sr_next; + sr_blocks.push(sr); + } + + Ok((sr_blocks, position)) +} + +/// Channel Group struct +/// it contains the related channels structure, a set of channel names, the dedicated master channel name and other helper data. +#[derive(Debug, Clone)] +#[repr(C)] +pub struct Cg4 { + /// short header + pub header: Blockheader4Short, + /// CG block without header + pub block: Cg4Block, + /// hashmap of channels + pub cn: CnType, + /// Master channel name + pub master_channel_name: Option, + /// Set of channel names belonging to this channel group + pub channel_names: HashSet, + /// as not stored in .block but can still be referenced by other blocks + pub block_position: i64, + /// record length including recordId and invalid bytes + pub record_length: u32, + /// pointing to another cg,cn + pub vlsd_cg: Option<(u64, i32)>, + /// invalid byte array, optional + pub invalid_bytes: Option>, + /// Sample reduction blocks linked from cg_sr_first + pub sr: Vec, +} + +/// Cg4 implementations for extracting acquisition and source name and path +#[allow(dead_code)] +impl Cg4 { + /// Returns true if this channel group is an event signal group (cg_flags bit 4 set). + /// Event signal groups contain channels that store event data instead of regular signal data. + /// The event structure is described by a template EVBLOCK in each event signal channel's cn_data link. + pub fn is_event_signal_group(&self) -> bool { + (self.block.cg_flags & CG_F_EVENT_SIGNAL_GROUP) != 0 + } + + /// Returns true if this channel group has sample reduction data available. + /// Sample reduction data provides mean/min/max values for fast preview/graphical display. + pub fn has_sample_reduction(&self) -> bool { + !self.sr.is_empty() + } + + /// Returns the number of sample reduction blocks available for this channel group. + pub fn sample_reduction_count(&self) -> usize { + self.sr.len() + } + + /// Returns a reference to the sample reduction blocks. + /// Each Sr4Block contains metadata about the reduction (interval, sync type, etc.) + /// and a pointer (sr_data) to the actual reduction data. + pub fn get_sample_reduction_blocks(&self) -> &[Sr4Block] { + &self.sr + } + + /// Returns the data bytes per record for this channel group. + /// This is needed to decode sample reduction records. + pub fn get_data_bytes(&self) -> u32 { + self.block.cg_data_bytes + } + + /// Returns the invalidation bytes per record for this channel group. + /// Returns 0 for VLSD/VLSC channel groups where this field has a different meaning. + pub fn get_inval_bytes(&self) -> u32 { + if (self.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 + } else { + self.block.cg_inval_bytes + } + } + + /// Channel group acquisition name + pub fn get_cg_name(&self, sharable: &SharableBlocks) -> Result> { + sharable.get_tx(self.block.cg_tx_acq_name) + } + /// Channel group source name + pub fn get_cg_source_name(&self, sharable: &SharableBlocks) -> Result> { + let si = sharable.si.get(&self.block.cg_si_acq_source); + match si { + Some(block) => Ok(block.get_si_source_name(sharable)?), + None => Ok(None), + } + } + /// Channel group source path + pub fn get_cg_source_path(&self, sharable: &SharableBlocks) -> Result> { + let si = sharable.si.get(&self.block.cg_si_acq_source); + match si { + Some(block) => Ok(block.get_si_path_name(sharable)?), + None => Ok(None), + } + } + /// Computes the validity mask for each channel in the group + /// clears out the common invalid bytes vector for the group at the end + pub fn process_all_channel_invalid_bits(&mut self) -> Result<(), Error> { + // For VLSD/VLSC, cg_inval_bytes is the high part of VL data size, not invalidation bytes + if (self.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + return Ok(()); + } + // get invalid bytes + let cg_inval_bytes = self.block.cg_inval_bytes as usize; + if let Some(invalid_bytes) = &self.invalid_bytes { + // To extract invalidity for each channel from invalid_bytes + self.cn + .par_iter_mut() + .filter(|(_rec_pos, cn)| !cn.data.is_empty()) + .try_for_each(|(_rec_pos, cn): (&i32, &mut Cn4)| -> Result<(), Error> { + if let Some((Some(mask), invalid_byte_position, invalid_byte_mask)) = + &mut cn.invalid_mask + { + // mask is already initialised to all valid values. + invalid_bytes.chunks(cg_inval_bytes).enumerate().for_each( + |(index, record)| { + // arrow considers bit set as valid while mdf spec considers bit set as invalid + mask.set_bit( + index, + (record[*invalid_byte_position] & *invalid_byte_mask) == 0, + ); + }, + ); + cn.data.set_validity(mask).with_context(|| { + format!( + "failed applying invalid bits for channel {}", + cn.unique_name + ) + })?; + } + Ok(()) + })?; + self.invalid_bytes = None; // Clears out invalid bytes channel + } else if cg_inval_bytes > 0 { + // invalidity already stored in mask for each channel by read_channels_from_bytes() + // to set validity in arrow array + self.cn + .par_iter_mut() + .filter(|(_rec_pos, cn)| !cn.data.is_empty()) + .try_for_each(|(_rec_pos, cn): (&i32, &mut Cn4)| -> Result<(), Error> { + if let Some((validity, _invalid_byte_position, _invalid_byte_mask)) = + &mut cn.invalid_mask + { + if let Some(mask) = validity { + cn.data.set_validity(mask).with_context(|| { + format!( + "failed applying invalid bits for channel {} from mask", + cn.unique_name + ) + })?; + } + *validity = None; // clean bitmask from Cn4 as present in arrow array + } + Ok(()) + })?; + } + Ok(()) + } + + /// Process Channel Variant (CV) compositions after data is loaded. + /// For each channel with a CV composition, this method: + /// 1. Reads the discriminator channel values + /// 2. Maps discriminator values to option indices using cv_option_val + /// 3. Merges option channel data based on the discriminator + /// + /// After processing, the parent channel (with CV composition) contains the merged data. + pub fn process_channel_variants(&mut self) -> Result<(), Error> { + // Find channels with CV composition and collect info + let cv_channels: Vec<(i32, i64, Vec, Vec)> = self + .cn + .iter() + .filter_map(|(rec_pos, cn)| { + if let Some(composition) = &cn.composition + && let Compo::CV(cv_block) = &composition.block + { + return Some(( + *rec_pos, + cv_block.cv_cn_discriminator, + cv_block.cv_cn_option.clone(), + cv_block.cv_option_val.clone(), + )); + } + None + }) + .collect(); + + for (parent_rec_pos, discriminator_ptr, option_ptrs, option_vals) in cv_channels { + // First pass: collect all needed data (immutable borrows complete before mutable) + let discriminator_values: Vec; + let option_data: Vec>; + let option_names: Vec; + + { + // Find the discriminator channel by block_position + let discriminator_cn = self + .cn + .values() + .find(|cn| cn.block_position == discriminator_ptr); + + let Some(disc_cn) = discriminator_cn else { + log::warn!( + "CV discriminator channel not found for block_position {}", + discriminator_ptr + ); + continue; + }; + + // Get discriminator values as u64 + discriminator_values = match disc_cn.data.to_u64_vec() { + Some(v) => v, + None => { + log::warn!("CV discriminator channel has unsupported data type"); + continue; + } + }; + + if discriminator_values.is_empty() { + continue; + } + + // Collect option channel data and names in a single pass + let (data_vec, names_vec): (Vec>, Vec) = option_ptrs + .iter() + .map( + |ptr| match self.cn.values().find(|cn| cn.block_position == *ptr) { + Some(cn) => (Some(cn.data.clone()), cn.unique_name.clone()), + None => (None, String::new()), + }, + ) + .unzip(); + option_data = data_vec; + option_names = names_vec; + } + // Immutable borrows end here + + // Build index mapping: discriminator value -> option index + let val_to_option: std::collections::HashMap = option_vals + .iter() + .enumerate() + .map(|(idx, val)| (*val, idx)) + .collect(); + + // Check if all option channels have the same data type + let all_same_type = { + let mut discriminants: Vec> = Vec::new(); + for data in option_data.iter().flatten() { + discriminants.push(std::mem::discriminant(data)); + } + discriminants.windows(2).all(|w| w[0] == w[1]) + }; + + if all_same_type { + // All options have the same type: use existing merge path + let template = option_data.iter().find_map(|o| o.clone()); + + // Second pass: update parent channel (mutable borrow) + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) + && let Some(tmpl) = template + { + let merged_data = merge_variant_data_owned( + &discriminator_values, + &option_data, + &val_to_option, + &tmpl, + ); + + if let Some(data) = merged_data { + parent_cn.data = data; + } + } + } else { + // Mixed types: build a dense UnionArray + // Effective sample count is the minimum of discriminator and all option lengths + let n_samples = { + let mut min_len = discriminator_values.len(); + for data in option_data.iter().flatten() { + min_len = min_len.min(data.len()); + } + min_len + }; + + // Single pass: build type_ids, offsets, and per-child indices together + let mut type_ids = Vec::with_capacity(n_samples); + let mut offsets = Vec::with_capacity(n_samples); + let mut child_indices: Vec> = vec![Vec::new(); option_data.len()]; + + for (i, disc_val) in discriminator_values[..n_samples].iter().enumerate() { + let opt_idx = val_to_option.get(disc_val).copied().unwrap_or(0); + type_ids.push(opt_idx as i8); + offsets.push(child_indices[opt_idx].len() as i32); + child_indices[opt_idx].push(i as u32); + } + + // Build child arrays using pre-collected indices + let children: Vec = option_data + .iter() + .enumerate() + .map(|(opt_idx, opt)| { + if let Some(data) = opt { + let full_array = data.finish_cloned(); + let indices_array = UInt32Array::from(child_indices[opt_idx].clone()); + take(&*full_array, &indices_array, None).unwrap_or(full_array) + } else { + Arc::new(arrow::array::NullArray::new(0)) as ArrayRef + } + }) + .collect(); + + let union_fields = build_union_fields(&option_names, &children); + let type_ids_buffer = ScalarBuffer::from(type_ids); + let offsets_buffer = ScalarBuffer::from(offsets); + + match UnionArray::try_new( + union_fields, + type_ids_buffer, + Some(offsets_buffer), + children, + ) { + Ok(union_array) => { + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { + parent_cn.data = ChannelData::Union(union_array); + } + } + Err(e) => { + log::warn!("Failed to create dense UnionArray for CV variant: {}", e); + } + } + } + } + + Ok(()) + } + + /// Process Channel Union (CU) compositions after data is loaded. + /// For each channel with a CU composition, this method: + /// 1. Collects member channel data (already read by pipeline) + /// 2. Builds UnionFields from member names and data types + /// 3. Creates a sparse UnionArray where all members are valid at every row + /// 4. Replaces parent channel data with ChannelData::Union + /// + /// CU blocks represent C-style unions: all members share the same bytes and are + /// simultaneously valid, just interpreted differently. + pub fn process_channel_unions(&mut self) -> Result<(), Error> { + // Find channels with CU composition and collect info + let cu_channels: Vec<(i32, Vec)> = self + .cn + .iter() + .filter_map(|(rec_pos, cn)| { + if let Some(composition) = &cn.composition + && let Compo::CU(cu_block) = &composition.block + { + return Some((*rec_pos, cu_block.cu_cn_member.clone())); + } + None + }) + .collect(); + + for (parent_rec_pos, member_ptrs) in cu_channels { + if member_ptrs.is_empty() { + continue; + } + + // Collect member channel info: (name, data as ArrayRef) + let member_info: Vec<(String, ArrayRef)> = member_ptrs + .iter() + .filter_map(|ptr| { + self.cn + .values() + .find(|cn| cn.block_position == *ptr) + .map(|cn| { + let name = cn.unique_name.clone(); + let array = cn.data.finish_cloned(); + (name, array) + }) + }) + .collect(); + + if member_info.is_empty() { + log::warn!( + "CU member channels not found for parent at rec_pos {}", + parent_rec_pos + ); + continue; + } + + // All members should have the same length (same number of samples) + let n_samples = member_info.first().map(|(_, arr)| arr.len()).unwrap_or(0); + if n_samples == 0 { + continue; + } + + // Split member_info into names and children, then build UnionFields + let (member_names, children): (Vec, Vec) = + member_info.into_iter().unzip(); + let union_fields = build_union_fields(&member_names, &children); + + // For sparse union: type_ids all set to 0 (first member as primary interpretation) + // In reality for CU blocks, all members are equally valid - we just pick the first + let type_ids: ScalarBuffer = ScalarBuffer::from(vec![0i8; n_samples]); + + // Create sparse UnionArray (offsets = None) + let union_array = match UnionArray::try_new( + union_fields, + type_ids, + None, // sparse union: no offsets + children, + ) { + Ok(arr) => arr, + Err(e) => { + log::warn!("Failed to create UnionArray for CU channel: {}", e); + continue; + } + }; + + // Update parent channel data + if let Some(parent_cn) = self.cn.get_mut(&parent_rec_pos) { + parent_cn.data = ChannelData::Union(union_array); + } + } + + Ok(()) + } +} + +impl Display for Cg4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let master = self.master_channel_name.as_deref().unwrap_or("None"); + write!( + f, + "CG: master={} channels={} record_len={} cycles={}", + master, + self.cn.len(), + self.record_length, + self.block.cg_cycle_count + ) + } +} + +/// Build UnionFields from parallel name and child arrays slices. +fn build_union_fields(names: &[String], children: &[ArrayRef]) -> UnionFields { + let fields: Vec<(i8, Arc)> = children + .iter() + .enumerate() + .map(|(idx, array)| { + let name = names.get(idx).cloned().unwrap_or_default(); + ( + idx as i8, + Arc::new(Field::new(name, array.data_type().clone(), true)), + ) + }) + .collect(); + UnionFields::from_iter(fields) +} + +/// Merge variant option data based on discriminator values (using owned ChannelData) +fn merge_variant_data_owned( + discriminator_values: &[u64], + option_data: &[Option], + val_to_option: &std::collections::HashMap, + template: &ChannelData, +) -> Option { + use crate::data_holder::channel_data::ChannelData; + + let n_samples = discriminator_values.len(); + + macro_rules! merge_typed { + ($builder_type:ty, $variant:ident) => {{ + let mut builder = <$builder_type>::with_capacity(n_samples); + for (i, disc_val) in discriminator_values.iter().enumerate() { + if let Some(&opt_idx) = val_to_option.get(disc_val) + && let Some(Some(ChannelData::$variant(b))) = option_data.get(opt_idx) + && i < b.values_slice().len() + { + builder.append_value(b.values_slice()[i]); + continue; + } + // Default value if option not found + builder.append_value(Default::default()); + } + Some(ChannelData::$variant(builder)) + }}; + } + + match template { + ChannelData::UInt8(_) => merge_typed!(arrow::array::UInt8Builder, UInt8), + ChannelData::UInt16(_) => merge_typed!(arrow::array::UInt16Builder, UInt16), + ChannelData::UInt32(_) => merge_typed!(arrow::array::UInt32Builder, UInt32), + ChannelData::UInt64(_) => merge_typed!(arrow::array::UInt64Builder, UInt64), + ChannelData::Int8(_) => merge_typed!(arrow::array::Int8Builder, Int8), + ChannelData::Int16(_) => merge_typed!(arrow::array::Int16Builder, Int16), + ChannelData::Int32(_) => merge_typed!(arrow::array::Int32Builder, Int32), + ChannelData::Int64(_) => merge_typed!(arrow::array::Int64Builder, Int64), + ChannelData::Float32(_) => merge_typed!(arrow::array::Float32Builder, Float32), + ChannelData::Float64(_) => merge_typed!(arrow::array::Float64Builder, Float64), + _ => { + log::warn!("CV variant merge not implemented for this data type"); + None + } + } +} + +/// Cg4 blocks and linked blocks parsing +pub(super) fn parse_cg4( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, + record_id_size: u8, +) -> Result<(HashMap, i64, usize, usize)> { + let mut cg: HashMap = HashMap::new(); + let mut n_cg: usize = 0; + let mut n_cn: usize = 0; + if target != 0 { + let (mut cg_struct, pos, num_cn) = + parse_cg4_block(rdr, target, position, sharable, record_id_size)?; + position = pos; + let mut next_pointer = cg_struct.block.cg_cg_next; + // For VLSD/VLSC, cg_inval_bytes is the high part of total VL data size, not invalidation bytes + let inval_bytes_size = if (cg_struct.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 + } else { + cg_struct.block.cg_inval_bytes + }; + cg_struct.record_length += record_id_size as u32 + inval_bytes_size; + cg.insert(cg_struct.block.cg_record_id, cg_struct); + n_cg += 1; + n_cn += num_cn; + + while next_pointer != 0 { + let (mut cg_struct, pos, num_cn) = + parse_cg4_block(rdr, next_pointer, position, sharable, record_id_size)?; + position = pos; + // For VLSD/VLSC, cg_inval_bytes is the high part of total VL data size, not invalidation bytes + let inval_bytes_size = if (cg_struct.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + 0 + } else { + cg_struct.block.cg_inval_bytes + }; + cg_struct.record_length += record_id_size as u32 + inval_bytes_size; + next_pointer = cg_struct.block.cg_cg_next; + cg.insert(cg_struct.block.cg_record_id, cg_struct); + n_cg += 1; + n_cn += num_cn; + } + } + Ok((cg, position, n_cg, n_cn)) +} diff --git a/src/mdfinfo/mdfinfo4/ch_block.rs b/src/mdfinfo/mdfinfo4/ch_block.rs new file mode 100644 index 0000000..ccdb95e --- /dev/null +++ b/src/mdfinfo/mdfinfo4/ch_block.rs @@ -0,0 +1,123 @@ +//! Channel Hierarchy block (CHBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; + +use super::block_header::{parse_block_short, read_meta_data, SharableBlocks}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Ch4Block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Ch4Block { + // header + // ##CH + // ch_id [u8;4] + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub ch_len: u64, + /// # of links + pub ch_links: u64, + + // link section + /// link to next CHBLOCK at this hierarchy level + pub ch_ch_next: i64, + /// link to first CHBLOCK at the next hierarchy level (child) + pub ch_ch_first: i64, + /// link to TXBLOCK with the name of the hierarchy level + pub ch_tx_name: i64, + /// link to MDBLOCK with a comment/description + pub ch_md_comment: i64, + /// list of elements in this hierarchy level + #[br(count = ch_links - 4)] + pub ch_element: Vec, + + // data section + /// number of elements in this hierarchy level (Nx3) + pub ch_element_count: u32, + /// hierarchy level type + pub ch_type: u8, + /// reserved + pub ch_reserved: [u8; 3], +} + +impl Ch4Block { + /// Returns the hierarchy type as a string description + pub fn get_type_str(&self) -> &'static str { + match self.ch_type { + 0 => "Group", + 1 => "Function", + 2 => "Structure", + 3 => "Map list", + 4 => "Input variables", + 5 => "Output variables", + 6 => "Local variables", + 7 => "Defined calibration objects", + 8 => "Referenced calibration objects", + _ => "Unknown", + } + } +} + +impl Display for Ch4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CH: type={} ({}) elements={} children={}", + self.get_type_str(), + self.ch_type, + self.ch_element_count, + if self.ch_ch_first > 0 { "yes" } else { "no" } + ) + } +} + +/// parser Ch4Block +fn parser_ch4_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Ch4Block, i64)> { + let (mut block, _header, pos) = parse_block_short(rdr, target, position)?; + position = pos; + let block: Ch4Block = block.read_le().context("Error parsing ch block")?; + + Ok((block, position)) +} + +/// parses all CH blocks starting from target +pub fn parse_ch4( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, +) -> Result<(HashMap, i64)> { + let mut ch = HashMap::new(); + let mut next_pointer = target; + while next_pointer > 0 { + let block_start = next_pointer; + let (block, pos) = parser_ch4_block(rdr, next_pointer, position)?; + position = pos; + + // Parse comments/names if exist + position = read_meta_data(rdr, sharable, block.ch_tx_name, position, BlockType::CH)?; + position = read_meta_data(rdr, sharable, block.ch_md_comment, position, BlockType::CH)?; + + // Traverse children + if block.ch_ch_first > 0 { + let (children, pos) = parse_ch4(rdr, sharable, block.ch_ch_first, position)?; + position = pos; + ch.extend(children); + } + + next_pointer = block.ch_ch_next; + ch.insert(block_start, block); + } + Ok((ch, position)) +} diff --git a/src/mdfinfo/mdfinfo4/cn_block.rs b/src/mdfinfo/mdfinfo4/cn_block.rs new file mode 100644 index 0000000..d9acd0f --- /dev/null +++ b/src/mdfinfo/mdfinfo4/cn_block.rs @@ -0,0 +1,807 @@ +//! Channel block (CNBLOCK) for MDF4 +use anyhow::{Context, Result}; +use arrow::array::{BooleanBufferBuilder, UInt8Builder, UInt16Builder, UInt32Builder}; +use binrw::{BinReaderExt, binrw}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; +use crate::data_holder::channel_data::{ChannelData, data_type_init}; +use crate::data_holder::tensor_arrow::Order; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +use super::block_header::{parse_block_short, read_meta_data, Blockheader4Short, SharableBlocks, default_short_header}; +use super::metadata::BlockType; +use super::cc_block::read_cc; +use super::composition::{parse_composition, Composition}; +use super::ev_block::{Ev4Block, parse_ev4_block}; + +// Channel (CN) flags - cn_flags field (u32) +/// Bit 13: Event signal - channel contains event data, cn_data points to template EVBLOCK +pub const CN_F_EVENT_SIGNAL: u32 = 1 << 13; +/// Bit 15: Raw sensor event channel +#[allow(dead_code)] +pub const CN_F_RAW_SENSOR_EVENT: u32 = 1 << 15; +/// Bit 16: Auxiliary channel +#[allow(dead_code)] +pub const CN_F_AUXILIARY: u32 = 1 << 16; +/// Bit 17: Data stream mode - channel uses data stream alignment +pub const CN_F_DATA_STREAM_MODE: u32 = 1 << 17; +/// Bit 18: Alignment reset - reset alignment to start of data stream +pub const CN_F_ALIGNMENT_RESET: u32 = 1 << 18; +/// Bit 19: Protocol event channel +#[allow(dead_code)] +pub const CN_F_PROTOCOL_EVENT: u32 = 1 << 19; +/// Bit 20: Data description mode - channel describes data structure +#[allow(dead_code)] +pub const CN_F_DATA_DESCRIPTION_MODE: u32 = 1 << 20; +use super::si_block::Si4Block; + +/// Cn4 Channel block struct +#[derive(Debug, PartialEq, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cn4Block { + /// ##CN + // cn_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub cn_len: u64, + /// # of links + cn_links: u64, + /// Pointer to next channel block (CNBLOCK) (can be NIL) + cn_cn_next: i64, + /// Composition of channels: Pointer to channel array block (CABLOCK) or channel block (CNBLOCK) (can be NIL). Details see 4.18 Composition of Channels + pub cn_composition: i64, + /// Pointer to TXBLOCK with name (identification) of channel. Name must be according to naming rules stated in 4.4.2 Naming Rules. + pub cn_tx_name: i64, + /// Pointer to channel source (SIBLOCK) (can be NIL) Must be NIL for component channels (members of a structure or array elements) because they all must have the same source and thus simply use the SIBLOCK of their parent CNBLOCK (direct child of CGBLOCK). + cn_si_source: i64, + /// Pointer to the conversion formula (CCBLOCK) (can be NIL, must be NIL for complex channel data types, i.e. for cn_data_type ≥ 10). If the pointer is NIL, this means that a 1:1 conversion is used (phys = int). }; + pub cn_cc_conversion: i64, + /// Pointer to channel type specific signal data For variable length data channel (cn_type = 1): unique link to signal data block (SDBLOCK) or data list block (DLBLOCK) or, only for unsorted data groups, referencing link to a VLSD channel group block (CGBLOCK). Can only be NIL if SDBLOCK would be empty. For synchronization channel (cn_type = 4): referencing link to attachment block (ATBLOCK) in global linked list of ATBLOCKs starting at hd_at_first. Cannot be NIL. + pub cn_data: i64, + /// Pointer to TXBLOCK/MDBLOCK with designation for physical unit of signal data (after conversion) or (only for channel data types "MIME sample" and "MIME stream") to MIME context-type text. (can be NIL). The unit can be used if no conversion rule is specified or to overwrite the unit specified for the conversion rule (e.g. if a conversion rule is shared between channels). If the link is NIL, then the unit from the conversion rule must be used. If the content is an empty string, no unit should be displayed. If an MDBLOCK is used, in addition the A-HDO unit definition can be stored, see Table 38. Note: for (virtual) master and synchronization channels the A-HDO definition should be omitted to avoid redundancy. Here the unit is already specified by cn_sync_type of the channel. In case of channel data types "MIME sample" and "MIME stream", the text of the unit must be the content-type text of a MIME type which specifies the content of the values of the channel (either fixed length in record or variable length in SDBLOCK). The MIME content-type string must be written in lowercase, and it must apply to the same rules as defined for at_tx_mimetype in 4.11 The Attachment Block ATBLOCK. + pub cn_md_unit: i64, + /// Pointer to TXBLOCK/MDBLOCK with designation for physical unit of signal data (after conversion) or (only for channel data types "MIME sample" and "MIME stream") to MIME context-type text. (can be NIL). The unit can be used if no conversion rule is specified or to overwrite the unit specified for the conversion rule (e.g. if a conversion rule is shared between channels). If the link is NIL, then the unit from the conversion rule must be used. If the content is an empty string, no unit should be displayed. If an MDBLOCK is used, in addition the A-HDO unit definition can be stored, see Table 38. Note: for (virtual) master and synchronization channels the A-HDO definition should be omitted to avoid redundancy. Here the unit is already specified by cn_sync_type of the channel. In case of channel data types "MIME sample" and "MIME stream", the text of the unit must be the content-type text of a MIME type which specifies the content of the values of the channel (either fixed length in record or variable length in SDBLOCK). The MIME content-type string must be written in lowercase, and it must apply to the same rules as defined for at_tx_mimetype in 4.11 The Attachment Block ATBLOCK. + pub cn_md_comment: i64, + #[br(if(cn_links > 8), little, count = cn_links - 8)] + links: Vec, + + // Data Members + /// Channel type (see CN_T_xxx) + pub cn_type: u8, + /// Sync type: (see CN_S_xxx) + pub cn_sync_type: u8, + /// Channel data type of raw signal value (see CN_DT_xxx) + pub cn_data_type: u8, + /// Bit offset (0-7): first bit (=LSB) of signal value after Byte offset has been applied (see 4.21.4.2 Reading the Signal Value). If zero, the signal value is 1-Byte aligned. A value different to zero is only allowed for Integer data types (cn_data_type ≤ 3) and if the Integer signal value fits into 8 contiguous Bytes (cn_bit_count + cn_bit_offset ≤ 64). For all other cases, cn_bit_offset must be zero. + pub cn_bit_offset: u8, + /// Offset to first Byte in the data record that contains bits of the signal value. The offset is applied to the plain record data, i.e. skipping the record ID. + pub cn_byte_offset: u32, + /// Number of bits for signal value in record + pub cn_bit_count: u32, + /// Flags (see CN_F_xxx) + pub cn_flags: u32, + /// Position of invalidation bit. + pub cn_inval_bit_pos: u32, + /// Precision for display of floating point values. 0xFF means unrestricted precision (infinite). Any other value specifies the number of decimal places to use for display of floating point values. Only valid if "precision valid" flag (bit 2) is set + cn_precision: u8, + /// Byte alignment with previous channel in data stream + pub cn_alignment: u8, + /// Number of attachment for this channel + cn_attachment_count: u16, + /// Minimum signal value that occurred for this signal (raw value) Only valid if "value range valid" flag (bit 3) is set. + cn_val_range_min: f64, + /// Maximum signal value that occurred for this signal (raw value) Only valid if "value range valid" flag (bit 3) is set. + cn_val_range_max: f64, + /// Lower limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "limit range valid" flag (bit 4) is set. + cn_limit_min: f64, + /// Upper limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "limit range valid" flag (bit 4) is set. + cn_limit_max: f64, + /// Lower extended limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "extended limit range valid" flag (bit 5) is set. + cn_limit_ext_min: f64, + /// Upper extended limit for this signal (physical value for numeric conversion rule, otherwise raw value) Only valid if "extended limit range valid" flag (bit 5) is set. + cn_limit_ext_max: f64, +} + +impl Default for Cn4Block { + fn default() -> Self { + Cn4Block { + // cn_id: [35, 35, 67, 78], // ##CN + // reserved: [0; 4], + // cn_len: 160, + cn_links: 8, + cn_cn_next: 0, + cn_composition: 0, + cn_tx_name: 0, + cn_si_source: 0, + cn_cc_conversion: 0, + cn_data: 0, + cn_md_unit: 0, + cn_md_comment: 0, + links: vec![], + cn_type: 0, + cn_sync_type: 0, + cn_data_type: 0, + cn_bit_offset: 0, + cn_byte_offset: 0, + cn_bit_count: 0, + cn_flags: 0, + cn_inval_bit_pos: 0, + cn_precision: 0, + cn_alignment: 0, + cn_attachment_count: 0, + cn_val_range_min: 0.0, + cn_val_range_max: 0.0, + cn_limit_min: 0.0, + cn_limit_max: 0.0, + cn_limit_ext_min: 0.0, + cn_limit_ext_max: 0.0, + } + } +} + +impl Cn4Block { + /// Returns the cn_cn_size link for VLSC channels (cn_type = 7). + /// This link points to a channel containing the size information for variable length signal data. + /// Only valid for MDF 4.3+ VLSC channels. + pub fn cn_cn_size(&self) -> Option { + if self.cn_type == 7 && !self.links.is_empty() { + Some(self.links[0]) // First additional link (9th link) is cn_cn_size + } else { + None + } + } + /// Returns the cn_si_source link + pub fn get_si_source(&self) -> i64 { + self.cn_si_source + } + /// Sets the cn_si_source link + pub fn set_si_source(&mut self, si_source: i64) { + self.cn_si_source = si_source; + } + /// Returns a string representation of the channel type (cn_type) + pub fn get_cn_type_str(&self) -> &'static str { + match self.cn_type { + 0 => "Fixed", + 1 => "VLSD", + 2 => "Master", + 3 => "Virtual Master", + 4 => "Sync", + 5 => "MLSD", + 6 => "Virtual Data", + 7 => "VLSC", + _ => "Unknown", + } + } + /// Returns a string representation of the sync type (cn_sync_type) + pub fn get_sync_type_str(&self) -> &'static str { + match self.cn_sync_type { + 0 => "None", + 1 => "Time", + 2 => "Angle", + 3 => "Distance", + 4 => "Index", + 5 => "Frequency", + _ => "Unknown", + } + } + /// Returns a string representation of the data type (cn_data_type) + pub fn get_data_type_str(&self) -> &'static str { + match self.cn_data_type { + 0 => "UInt LE", + 1 => "UInt BE", + 2 => "Int LE", + 3 => "Int BE", + 4 => "Float LE", + 5 => "Float BE", + 6 => "String ISO-8859-1", + 7 => "String UTF-8", + 8 => "String UTF-16 LE", + 9 => "String UTF-16 BE", + 10 => "Byte Array", + 11 => "MIME Sample", + 12 => "MIME Stream", + 13 => "CANopen Date", + 14 => "CANopen Time", + 15 => "Complex LE", + 16 => "Complex BE", + _ => "Unknown", + } + } +} + +impl Display for Cn4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CN: type={} ({}) data_type={} ({}) bits={} byte_offset={}", + self.get_cn_type_str(), + self.cn_type, + self.get_data_type_str(), + self.cn_data_type, + self.cn_bit_count, + self.cn_byte_offset + ) + } +} + +/// Cn4 structure containing block but also unique_name, ndarray data, composition +/// and other attributes frequently needed and computed +#[derive(Debug, Default)] +#[repr(C)] +pub struct Cn4 { + /// short header + pub header: Blockheader4Short, + /// CN Block without short header + pub block: Cn4Block, + /// unique channel name string + pub unique_name: String, + pub block_position: i64, + /// beginning position of channel in record + pub pos_byte_beg: u32, + /// number of bytes taken by channel in record + pub n_bytes: u32, + pub composition: Option, + /// channel data + pub data: ChannelData, + /// false = little endian + pub endian: bool, + /// List size: 1 for normal primitive, 2 for complex, pnd for arrays + pub list_size: usize, + // Shape of array + pub shape: (Vec, Order), + /// optional invalid mask array, invalid byte position in record, invalid byte mask + pub invalid_mask: Option<(Option, usize, u8)>, + /// Template EVBLOCK for event signal channels (cn_flags bit 13 set) + /// This describes the structure of event data stored in the channel + pub event_template: Option, +} + +impl Clone for Cn4 { + fn clone(&self) -> Self { + let mut invalid_mask: Option<(Option, usize, u8)> = None; + if let Some((boolean_buffer, byte_position, byte_mask)) = &self.invalid_mask { + let mut boolean_buffer_builder: Option = None; + if let Some(buffer) = boolean_buffer { + let mut new_boolean_buffer_builder = BooleanBufferBuilder::new(buffer.len()); + new_boolean_buffer_builder.append_buffer(&buffer.finish_cloned()); + boolean_buffer_builder = Some(new_boolean_buffer_builder); + } + invalid_mask = Some((boolean_buffer_builder, *byte_position, *byte_mask)); + } + Self { + header: self.header, + block: self.block.clone(), + unique_name: self.unique_name.clone(), + block_position: self.block_position, + pos_byte_beg: self.pos_byte_beg, + n_bytes: self.n_bytes, + composition: self.composition.clone(), + data: ChannelData::default(), + endian: self.endian, + list_size: self.list_size, + shape: self.shape.clone(), + invalid_mask, + event_template: self.event_template.clone(), + } + } +} + +impl Display for Cn4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CN: {} @ byte {} ({} bytes) type={} sync={}", + self.unique_name, + self.pos_byte_beg, + self.n_bytes, + self.block.get_cn_type_str(), + self.block.get_sync_type_str() + ) + } +} + +/// hashmap's key is bit position in record, value Cn4 +pub(crate) type CnType = HashMap; + +/// record layout type : record_id_size: u8, cg_data_bytes: u32, cg_inval_bytes: u32 +pub(crate) type RecordLayout = (u8, u32, u32); + +/// creates recursively in the channel group the CN blocks and all its other linked blocks (CC, MD, TX, CA, etc.) +pub(super) fn parse_cn4( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, + record_layout: RecordLayout, + cg_cycle_count: u64, +) -> Result<(CnType, i64, usize, i32)> { + let mut cn: CnType = HashMap::new(); + let mut n_cn: usize = 0; + let mut first_rec_pos: i32 = 0; + let (record_id_size, _cg_data_bytes, _cg_inval_bytes) = record_layout; + if target != 0 { + let (cn_struct, pos, n_cns, cns) = parse_cn4_block( + rdr, + target, + position, + sharable, + record_layout, + cg_cycle_count, + )?; + position = pos; + n_cn += n_cns; + cn.extend(cns); + first_rec_pos = (cn_struct.block.cn_byte_offset as i32 + record_id_size as i32) * 8 + + cn_struct.block.cn_bit_offset as i32; + let mut next_pointer = cn_struct.block.cn_cn_next; + if cn_struct.block.cn_data_type == 13 { + // CANopen date + let (date_ms, min, hour, day, month, year) = can_open_date( + cn_struct.block_position, + cn_struct.pos_byte_beg, + cn_struct.block.cn_byte_offset, + ); + cn.insert(first_rec_pos, date_ms); + cn.insert(first_rec_pos + 16, min); + cn.insert(first_rec_pos + 24, hour); + cn.insert(first_rec_pos + 32, day); + cn.insert(first_rec_pos + 40, month); + cn.insert(first_rec_pos + 48, year); + } else if cn_struct.block.cn_data_type == 14 { + // CANopen time + let (ms, days) = can_open_time( + cn_struct.block_position, + cn_struct.pos_byte_beg, + cn_struct.block.cn_byte_offset, + ); + cn.insert(first_rec_pos, ms); + cn.insert(first_rec_pos + 32, days); + } else { + if cn_struct.block.cn_type == 3 || cn_struct.block.cn_type == 6 { + // virtual channel, position in record negative + first_rec_pos = -1; + while cn.contains_key(&first_rec_pos) { + first_rec_pos -= 1; + } + } else if (cn_struct.block.cn_flags & CN_F_DATA_STREAM_MODE) != 0 { + // data stream mode channel: use negative block_position as key to avoid collisions + first_rec_pos = -(cn_struct.block_position as i32); + } + cn.insert(first_rec_pos, cn_struct); + } + + while next_pointer != 0 { + let (cn_struct, pos, n_cns, cns) = parse_cn4_block( + rdr, + next_pointer, + position, + sharable, + record_layout, + cg_cycle_count, + )?; + position = pos; + n_cn += n_cns; + cn.extend(cns); + let mut rec_pos = (cn_struct.block.cn_byte_offset as i32 + record_id_size as i32) * 8 + + cn_struct.block.cn_bit_offset as i32; + next_pointer = cn_struct.block.cn_cn_next; + if cn_struct.block.cn_data_type == 13 { + // CANopen date + let (date_ms, min, hour, day, month, year) = can_open_date( + cn_struct.block_position, + cn_struct.pos_byte_beg, + cn_struct.block.cn_byte_offset, + ); + cn.insert(rec_pos, date_ms); + cn.insert(rec_pos + 16, min); + cn.insert(rec_pos + 24, hour); + cn.insert(rec_pos + 32, day); + cn.insert(rec_pos + 40, month); + cn.insert(rec_pos + 48, year); + } else if cn_struct.block.cn_data_type == 14 { + // CANopen time + let (ms, days) = can_open_time( + cn_struct.block_position, + cn_struct.pos_byte_beg, + cn_struct.block.cn_byte_offset, + ); + cn.insert(rec_pos, ms); + cn.insert(rec_pos + 32, days); + } else { + if cn_struct.block.cn_type == 3 || cn_struct.block.cn_type == 6 { + // virtual channel, position in record negative + rec_pos = -1; + while cn.contains_key(&rec_pos) { + rec_pos -= 1; + } + } else if (cn_struct.block.cn_flags & CN_F_DATA_STREAM_MODE) != 0 { + // data stream mode channel: use negative block_position as key to avoid collisions + rec_pos = -(cn_struct.block_position as i32); + } + cn.insert(rec_pos, cn_struct); + } + } + } + Ok((cn, position, n_cn, first_rec_pos)) +} + +/// returns created CANopenDate channels +fn can_open_date( + block_position: i64, + pos_byte_beg: u32, + cn_byte_offset: u32, +) -> (Cn4, Cn4, Cn4, Cn4, Cn4, Cn4) { + let block = Cn4Block { + cn_links: 8, + cn_byte_offset, + cn_bit_count: 16, + ..Default::default() + }; + let date_ms = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("ms"), + block_position, + pos_byte_beg, + n_bytes: 2, + composition: None, + data: ChannelData::UInt16(UInt16Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 2, + cn_bit_count: 6, + ..Default::default() + }; + let min = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("min"), + block_position, + pos_byte_beg, + n_bytes: 1, + composition: None, + data: ChannelData::UInt8(UInt8Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 3, + cn_bit_count: 5, + ..Default::default() + }; + let hour = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("hour"), + block_position, + pos_byte_beg, + n_bytes: 1, + composition: None, + data: ChannelData::UInt8(UInt8Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 4, + cn_bit_count: 5, + ..Default::default() + }; + let day = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("day"), + block_position, + pos_byte_beg, + n_bytes: 1, + composition: None, + data: ChannelData::UInt8(UInt8Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 5, + cn_bit_count: 6, + ..Default::default() + }; + let month = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("month"), + block_position, + pos_byte_beg, + n_bytes: 1, + composition: None, + data: ChannelData::UInt8(UInt8Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 6, + cn_bit_count: 7, + ..Default::default() + }; + let year = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("year"), + block_position, + pos_byte_beg, + n_bytes: 1, + composition: None, + data: ChannelData::UInt8(UInt8Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + (date_ms, min, hour, day, month, year) +} + +/// returns created CANopenTime channels +fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> (Cn4, Cn4) { + let block = Cn4Block { + cn_links: 8, + cn_byte_offset, + cn_bit_count: 28, + ..Default::default() + }; + let ms: Cn4 = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("ms"), + block_position, + pos_byte_beg, + n_bytes: 4, + composition: None, + data: ChannelData::UInt32(UInt32Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + let block = Cn4Block { + cn_links: 8, + cn_byte_offset: cn_byte_offset + 4, + cn_bit_count: 16, + ..Default::default() + }; + let days: Cn4 = Cn4 { + header: default_short_header(BlockType::CN), + block, + unique_name: String::from("day"), + block_position, + pos_byte_beg, + n_bytes: 2, + composition: None, + data: ChannelData::UInt16(UInt16Builder::new()), + endian: false, + list_size: 1, + shape: (vec![1], Order::RowMajor), + invalid_mask: None, + event_template: None, + }; + (ms, days) +} + +/// Simple calculation to convert bit count into equivalent bytes count +fn calc_n_bytes_not_aligned(bitcount: u32) -> u32 { + let mut n_bytes = bitcount / 8u32; + if !bitcount.is_multiple_of(8) { + n_bytes += 1; + } + n_bytes +} + +#[allow(dead_code)] +impl Cn4 { + /// Returns true if this channel is an event signal channel (cn_flags bit 13 set). + /// Event signal channels store event data, with a template EVBLOCK describing the structure. + pub fn is_event_signal(&self) -> bool { + (self.block.cn_flags & CN_F_EVENT_SIGNAL) != 0 + } + + /// Returns a reference to the template EVBLOCK if this is an event signal channel. + /// The template EVBLOCK describes the structure of event data stored in this channel. + pub fn get_event_template(&self) -> Option<&Ev4Block> { + self.event_template.as_ref() + } + + /// Returns the channel source name + pub fn get_cn_source_name(&self, sharable: &SharableBlocks) -> Result> { + let si = sharable.si.get(&self.block.cn_si_source); + match si { + Some(block) => Ok(block.get_si_source_name(sharable)?), + None => Ok(None), + } + } + /// Returns the channel source path + pub fn get_cn_source_path(&self, sharable: &SharableBlocks) -> Result> { + let si = sharable.si.get(&self.block.cn_si_source); + match si { + Some(block) => Ok(block.get_si_path_name(sharable)?), + None => Ok(None), + } + } +} + +/// Channel block parser +pub(super) fn parse_cn4_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, + record_layout: RecordLayout, + cg_cycle_count: u64, +) -> Result<(Cn4, i64, usize, CnType)> { + let (record_id_size, _cg_data_bytes, cg_inval_bytes) = record_layout; + let mut n_cn: usize = 1; + let mut cns: HashMap = HashMap::new(); + let (mut block, cnheader, pos) = parse_block_short(rdr, target, position)?; + position = pos; + let block: Cn4Block = block + .read_le() + .context("Could not read buffer into Cn4Block struct")?; + + let pos_byte_beg = block.cn_byte_offset + record_id_size as u32; + let n_bytes = calc_n_bytes_not_aligned(block.cn_bit_count + (block.cn_bit_offset as u32)); + let invalid_mask: Option<(Option, usize, u8)> = if cg_inval_bytes != 0 { + let invalid_byte_position = (block.cn_inval_bit_pos >> 3) as usize; + let invalid_byte_mask = 1 << (block.cn_inval_bit_pos & 0x07); + let mut buffer = BooleanBufferBuilder::new(cg_cycle_count as usize); + buffer.advance(cg_cycle_count as usize); + Some((Some(buffer), invalid_byte_position, invalid_byte_mask)) + } else { + None + }; + + // Reads TX name + position = read_meta_data(rdr, sharable, block.cn_tx_name, position, BlockType::CN)?; + let name: String = sharable.get_tx(block.cn_tx_name)?.unwrap_or_default(); + + // Reads unit + position = read_meta_data(rdr, sharable, block.cn_md_unit, position, BlockType::CN)?; + + // Reads CC + let cc_pointer = block.cn_cc_conversion; + if (cc_pointer != 0) && !sharable.cc.contains_key(&cc_pointer) { + let (cc_block, _header, pos) = parse_block_short(rdr, cc_pointer, position)?; + position = pos; + position = read_cc(rdr, &cc_pointer, position, cc_block, sharable)?; + } + + // Reads MD + position = read_meta_data(rdr, sharable, block.cn_md_comment, position, BlockType::CN)?; + + //Reads SI + let si_pointer = block.cn_si_source; + if (si_pointer != 0) && !sharable.si.contains_key(&si_pointer) { + let (mut si_block, _header, pos) = parse_block_short(rdr, si_pointer, position)?; + position = pos; + let si_block: Si4Block = si_block + .read_le() + .context("Could into read buffer into Si4Block struct")?; + position = read_meta_data(rdr, sharable, si_block.si_tx_name, position, BlockType::SI)?; + position = read_meta_data(rdr, sharable, si_block.si_tx_path, position, BlockType::SI)?; + sharable.si.insert(si_pointer, si_block); + } + + //Reads CA or composition + let compo: Option; + let list_size: usize; + let shape: (Vec, Order); + if block.cn_composition != 0 { + let (co, pos, array_size, s, n_cns, cnss) = parse_composition( + rdr, + block.cn_composition, + position, + sharable, + record_layout, + cg_cycle_count, + ) + .context("Failed reading composition")?; + shape = s; + // list size calculation + if block.cn_data_type == 15 || block.cn_data_type == 16 { + //complex + list_size = 2 * array_size; + } else { + list_size = array_size; + } + compo = Some(co); + position = pos; + n_cn += n_cns; + cns = cnss; + } else { + compo = None; + shape = (vec![1], Order::RowMajor); + // list size calculation + if block.cn_data_type == 15 | 16 { + //complex + list_size = 2; + } else { + list_size = 1; + } + } + + let mut endian: bool = false; // Little endian by default + if block.cn_data_type == 0 + || block.cn_data_type == 2 + || block.cn_data_type == 4 + || block.cn_data_type == 8 + || block.cn_data_type == 15 + { + endian = false; // little endian + } else if block.cn_data_type == 1 + || block.cn_data_type == 3 + || block.cn_data_type == 5 + || block.cn_data_type == 9 + || block.cn_data_type == 16 + { + endian = true; // big endian + } + // For VLSC/VLSD channels, cn_data_type describes the signal data block encoding + // (e.g. UTF-16 BE), not the byte order of the integer offsets stored in the DT block. + if block.cn_type == 1 || block.cn_type == 7 { + endian = false; + } + let data_type = block.cn_data_type; + let cn_type = block.cn_type; + + // Read template EVBLOCK for event signal channels (cn_flags bit 13 set) + // For event signal channels, cn_data points to a template EVBLOCK that describes + // the structure of event data stored in this channel + let event_template: Option = + if (block.cn_flags & CN_F_EVENT_SIGNAL) != 0 && block.cn_data != 0 { + let (ev_block, pos) = parse_ev4_block(rdr, block.cn_data, position)?; + position = pos; + Some(ev_block) + } else { + None + }; + + let cn_struct = Cn4 { + header: cnheader, + unique_name: name, + block_position: target, + pos_byte_beg, + n_bytes, + composition: compo, + data: data_type_init(cn_type, data_type, n_bytes, list_size, block.cn_flags)?, + block, + endian, + list_size, + shape, + invalid_mask, + event_template, + }; + + Ok((cn_struct, position, n_cn, cns)) +} diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs new file mode 100644 index 0000000..174ca01 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -0,0 +1,523 @@ +//! Composition blocks (DS, CL, CV, CU) and composition parsing for MDF4 +use anyhow::{Context, Result, bail}; +use binrw::{BinReaderExt, binrw}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; + +use super::block_header::parse_block_short; +use super::ca_block::{Ca4Block, parse_ca_block}; +use super::cn_block::{Cn4, CnType, RecordLayout, parse_cn4}; +use super::block_header::SharableBlocks; +use crate::data_holder::tensor_arrow::Order; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// type alias for composition parse result +pub type CompositionParseResult = (Composition, i64, usize, (Vec, Order), usize, CnType); + +/// contains composition blocks (CN or CA) +/// can optionally point to another composition +#[derive(Debug, Clone)] +#[repr(C)] +pub struct Composition { + pub block: Compo, + pub compo: Option>, +} + +/// enum allowing to nest CA or CN blocks for a composition +#[derive(Debug, Clone)] +#[repr(C)] +pub enum Compo { + CA(Box), + CN(Box), + CL(Box), + CV(Box), + CU(Box), + DS(Box), +} + +impl Display for Compo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Compo::CA(ca) => write!(f, "CA({})", ca), + Compo::CN(cn) => write!(f, "CN({})", cn), + Compo::CL(cl) => write!(f, "CL({})", cl), + Compo::CV(cv) => write!(f, "CV({})", cv), + Compo::CU(cu) => write!(f, "CU({})", cu), + Compo::DS(ds) => write!(f, "DS({})", ds), + } + } +} + +impl Display for Composition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Composition: {}", self.block)?; + if self.compo.is_some() { + write!(f, " (nested)")?; + } + Ok(()) + } +} + +/// DS4 Data Stream block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Ds4Block { + //header + // ##DS + // ds_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub ds_len: u64, + /// # of links + pub ds_links: u64, + /// links + #[br(count = ds_links)] + pub links: Vec, + /// data + /// Minimum version of the reader to read the data + pub ds_version: u16, + /// DSBlock mode, 0 data stream, 1 data description + pub ds_mode: u8, + /// Reserved + pub ds_reserved: [u8; 5], +} + +/// Data stream mode constant - data layout described by CNBLOCK composition +pub const DS_MODE_DATA_STREAM: u8 = 0; +/// Data description mode constant - data layout described by external attachment (FIBEX, DBC, ARXML) +pub const DS_MODE_DATA_DESCRIPTION: u8 = 1; + +#[allow(dead_code)] +impl Ds4Block { + pub fn ds_cn_composition(&self) -> i64 { + self.links.first().copied().unwrap_or(0) + } + pub fn ds_cn_alignment_start(&self) -> i64 { + self.links.get(1).copied().unwrap_or(0) + } + pub fn ds_data(&self) -> i64 { + self.links.get(2).copied().unwrap_or(0) + } + pub fn ds_md_comment(&self) -> i64 { + self.links.get(3).copied().unwrap_or(0) + } + + /// Returns true if this is data stream mode (ds_mode = 0). + /// In data stream mode, the data layout is described by CNBLOCK composition structures. + pub fn is_data_stream_mode(&self) -> bool { + self.ds_mode == DS_MODE_DATA_STREAM + } + + /// Returns true if this is data description mode (ds_mode = 1). + /// In data description mode, the data layout is described by an external attachment + /// file (e.g., FIBEX, DBC, ARXML) pointed to by ds_cn_composition. + pub fn is_data_description_mode(&self) -> bool { + self.ds_mode == DS_MODE_DATA_DESCRIPTION + } + + /// Returns a string description of the data stream mode. + pub fn get_mode_str(&self) -> &'static str { + match self.ds_mode { + DS_MODE_DATA_STREAM => "Data Stream Mode", + DS_MODE_DATA_DESCRIPTION => "Data Description Mode", + _ => "Unknown Mode", + } + } +} + +impl Display for Ds4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "DS: mode={} ({}) version={} links={}", + self.get_mode_str(), + self.ds_mode, + self.ds_version, + self.ds_links + ) + } +} + +/// CL4 Channel List block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cl4Block { + //header + // ##CL + // cl_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub cl_len: u64, + /// # of links + pub cl_links: u64, + /// links + /// link to CNBlock describing dynamic data + pub cl_composition: i64, + /// link to CNBlock for the alignment start with data stream mode + pub cl_cn_size: i64, + /// data + /// Flags + pub cl_flags: u16, + /// Bytes alignment + pub cl_alignment: u8, + /// Bit Offset + pub cl_bit_offset: u8, + /// Byte Offset + pub cl_byte_offset: u32, +} + +impl Display for Cl4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "CL: flags=0x{:04X} alignment={} bit_offset={} byte_offset={}", + self.cl_flags, self.cl_alignment, self.cl_bit_offset, self.cl_byte_offset + ) + } +} + +/// CV4 Channel Variant block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cv4Block { + //header + // ##CV + // cv_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub cv_len: u64, + /// # of links + pub cv_n_links: u64, + /// links + /// link to CNBlock for discriminator channel + pub cv_cn_discriminator: i64, + /// list of option channel + #[br(if(cv_n_links > 1), little, count = cv_n_links - 1)] + pub cv_cn_option: Vec, + /// data + /// number of option channels + pub cv_option_count: u32, + /// reserved + pub cv_reserved: [u8; 4], + /// list of discriminator values for the options + #[br(if(cv_option_count > 1), little, count = cv_option_count )] + pub cv_option_val: Vec, +} + +impl Display for Cv4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CV: {} options", self.cv_option_count) + } +} + +/// CU4 Channel Union block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Cu4Block { + //header + // ##CU + // cu_id: [u8; 4], + /// reserved + // reserved: [u8; 4], + /// Length of block in bytes + // pub cu_len: u64, + /// # of links + pub cu_n_links: u64, + /// links + /// list of member channel + #[br(if(cu_n_links > 1), little, count = cu_n_links)] + pub cu_cn_member: Vec, + /// data + /// number of member channels + pub cu_member_count: u32, + /// reserved + pub cu_reserved: [u8; 4], +} + +impl Display for Cu4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CU: {} members", self.cu_member_count) + } +} + +/// Parse and re-key channel blocks for CU/CV compositions. +/// All member/option channels share the same byte offset in the record, +/// so they are re-keyed using negative block_position to avoid HashMap collisions. +fn parse_and_rekey_channels( + rdr: &mut SymBufReader<&File>, + targets: &[i64], + position: &mut i64, + sharable: &mut SharableBlocks, + record_layout: RecordLayout, + cg_cycle_count: u64, +) -> Result<(CnType, usize)> { + let mut cns: CnType = HashMap::new(); + let mut n_cn: usize = 0; + for target in targets { + let (cnss, pos, n_cns, _first_rec_pos) = parse_cn4( + rdr, + *target, + *position, + sharable, + record_layout, + cg_cycle_count, + )?; + *position = pos; + n_cn += n_cns; + for (_rec_pos, cn_struct) in cnss { + let unique_key = -(cn_struct.block_position as i32); + cns.insert(unique_key, cn_struct); + } + } + Ok((cns, n_cn)) +} + +/// parses composition linked blocks +/// CN (structures of composed channels )and CA (array of arrays) blocks can be nested or even CA and CN nested and mixed: this is not supported, very complicated +pub(super) fn parse_composition( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, + record_layout: RecordLayout, + cg_cycle_count: u64, +) -> Result { + let (mut block, block_header_short, pos) = parse_block_short(rdr, target, position) + .context("Failed parsing composition header block")?; + position = pos; + let array_size: usize; + let mut cns: CnType = HashMap::new(); + let mut n_cn: usize = 0; + + if block_header_short.hdr_id == "##CA".as_bytes() { + // Channel Array + let (block, mut shape, _snd, array_size) = + parse_ca_block(&mut block, block_header_short, cg_cycle_count) + .context("Failed parsing CA block")?; + position = pos; + let ca_composition: Option>; + if block.ca_composition != 0 { + let (ca, pos, _array_size, s, n_cns, cnss) = parse_composition( + rdr, + block.ca_composition, + position, + sharable, + record_layout, + cg_cycle_count, + ) + .context("Failed parsing composition block from CA block")?; + shape = s; + position = pos; + cns = cnss; + n_cn += n_cns; + ca_composition = Some(Box::new(ca)); + } else { + ca_composition = None; + cns = HashMap::new(); + } + Ok(( + Composition { + block: Compo::CA(Box::new(block)), + compo: ca_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else if block_header_short.hdr_id == "##DS".as_bytes() { + // Data Stream + let ds_block: Ds4Block = block.read_le().context("Failed parsing DS block")?; + array_size = 1; + let ds_pointer = ds_block.ds_cn_composition(); + let ds_composition: Option>; + let mut shape = (Vec::::new(), Order::RowMajor); + if ds_pointer != 0 { + let (ds, pos, _array_size, s, n_cns, cnss) = parse_composition( + rdr, + ds_pointer, + position, + sharable, + record_layout, + cg_cycle_count, + ) + .context("Failed parsing composition block from DS Block")?; + shape = s; + position = pos; + cns = cnss; + n_cn += n_cns; + ds_composition = Some(Box::new(ds)); + } else { + ds_composition = None; + cns = HashMap::new(); + } + Ok(( + Composition { + block: Compo::DS(Box::new(ds_block)), + compo: ds_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else if block_header_short.hdr_id == "##CL".as_bytes() { + // Channel List + let cl_block: Cl4Block = block.read_le().context("Failed parsing CL block")?; + let cl_composition: Option>; + let mut shape = (Vec::::new(), Order::RowMajor); + array_size = 0; + // Note: cl_cn_size points to the size channel (parsed elsewhere in the CG) + // Parse the composition (element type) + if cl_block.cl_composition != 0 { + let (ds, pos, _array_size, s, n_cns, cnss) = parse_composition( + rdr, + cl_block.cl_composition, + position, + sharable, + record_layout, + cg_cycle_count, + ) + .context("Failed parsing composition block from CL Block")?; + shape = s; + position = pos; + cns = cnss; + n_cn += n_cns; + cl_composition = Some(Box::new(ds)); + } else { + cl_composition = None; + cns = HashMap::new(); + } + Ok(( + Composition { + block: Compo::CL(Box::new(cl_block)), + compo: cl_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else if block_header_short.hdr_id == "##CV".as_bytes() { + // Channel Variant + let cv_block: Cv4Block = block.read_le().context("Failed parsing CV block")?; + let cv_composition: Option> = None; + let shape = (Vec::::new(), Order::RowMajor); + array_size = 0; + let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( + rdr, + &cv_block.cv_cn_option, + &mut position, + sharable, + record_layout, + cg_cycle_count, + )?; + n_cn += rekeyed_n_cn; + cns.extend(rekeyed_cns); + Ok(( + Composition { + block: Compo::CV(Box::new(cv_block)), + compo: cv_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else if block_header_short.hdr_id == "##CU".as_bytes() { + // Channel Union + let cu_block: Cu4Block = block.read_le().context("Failed parsing CU block")?; + let cu_composition: Option> = None; + let shape = (Vec::::new(), Order::RowMajor); + array_size = 0; + let (rekeyed_cns, rekeyed_n_cn) = parse_and_rekey_channels( + rdr, + &cu_block.cu_cn_member, + &mut position, + sharable, + record_layout, + cg_cycle_count, + )?; + n_cn += rekeyed_n_cn; + cns.extend(rekeyed_cns); + Ok(( + Composition { + block: Compo::CU(Box::new(cu_block)), + compo: cu_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else if block_header_short.hdr_id == "##CN".as_bytes() { + // Channel structure + array_size = 1; + let (cnss, pos, n_cns, first_rec_pos) = parse_cn4( + rdr, + target, + position, + sharable, + record_layout, + cg_cycle_count, + )?; + position = pos; + n_cn += n_cns; + cns = cnss; + let cn_composition: Option>; + let cn_struct: Cn4 = if let Some(cn) = cns.get(&first_rec_pos) { + cn.clone() + } else { + Cn4::default() + }; + let shape: (Vec, Order); + if cn_struct.block.cn_composition != 0 { + let (cn, pos, _array_size, s, n_cns, cnss) = parse_composition( + rdr, + cn_struct.block.cn_composition, + position, + sharable, + record_layout, + cg_cycle_count, + )?; + shape = s; + position = pos; + n_cn += n_cns; + cns.extend(cnss); + cn_composition = Some(Box::new(cn)); + } else { + cn_composition = None; + shape = (vec![1], Order::RowMajor); + } + Ok(( + Composition { + block: Compo::CN(Box::new(cn_struct)), + compo: cn_composition, + }, + position, + array_size, + shape, + n_cn, + cns, + )) + } else { + bail!("Unknown composition block type") + } +} diff --git a/src/mdfinfo/mdfinfo4/data_block.rs b/src/mdfinfo/mdfinfo4/data_block.rs new file mode 100644 index 0000000..0697f8b --- /dev/null +++ b/src/mdfinfo/mdfinfo4/data_block.rs @@ -0,0 +1,462 @@ +//! Data blocks (DT, DL, DZ, LD, HL, GD) for MDF4 +use anyhow::{Context, Result, bail}; +use binrw::{BinReaderExt, binrw}; +use flate2::read::ZlibDecoder; +use log::warn; +use lz4::Decoder as Lz4Decoder; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{BufReader, Cursor, Read}; +use std::str; +use zstd::Decoder as ZstdDecoder; + +/// Generic Data block struct, without the Id +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Dt4Block { + //header + // dl_id: [u8; 4], // ##DL + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub len: u64, + /// # of links + links: u64, +} + +impl Display for Dt4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "DT: len={}", self.len) + } +} + +/// DL4 Data List block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Dl4Block { + //header + // dl_id: [u8; 4], // ##DL + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + dl_len: u64, + /// # of links + dl_links: u64, + // links + /// next DL + pub dl_dl_next: i64, + #[br(if(dl_links > 1), little, count = dl_links - 1)] + pub dl_data: Vec, + // members + /// Flags + dl_flags: u8, + dl_reserved: [u8; 3], + /// Number of data blocks + dl_count: u32, + #[br(if((dl_flags & 0b1)>0), little)] + dl_equal_length: Option, + #[br(if((dl_flags & 0b1)==0), little, count = dl_count)] + dl_offset: Vec, + #[br(if((dl_flags & 0b10)>0), little, count = dl_count)] + dl_time_values: Vec, + #[br(if((dl_flags & 0b100)>0), little, count = dl_count)] + dl_angle_values: Vec, + #[br(if((dl_flags & 0b1000)>0), little, count = dl_count)] + dl_distance_values: Vec, +} + +impl Display for Dl4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "DL: {} data blocks flags=0x{:02X}", + self.dl_count, self.dl_flags + ) + } +} + +/// parses Data List block +/// pointing to DT, SD, RD or DZ blocks +pub fn parser_dl4_block( + rdr: &mut BufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Dl4Block, i64)> { + rdr.seek_relative(target - position) + .context("Could not reach position to read Dl4Block")?; + let block: Dl4Block = rdr + .read_le() + .context("Could not read into Dl4Block struct")?; + position = target + block.dl_len as i64; + Ok((block, position)) +} + +/// Helper function to decompress data using various algorithms +pub fn decompress_data( + zip_type: u8, + zip_parameter: u32, + buf: Vec, + org_data_length: u64, +) -> Result> { + let mut data = Vec::::new(); + match zip_type { + 0 | 1 => { + // deflate algorithm (zlib format) + let reader = Cursor::new(buf); + let mut decoder = ZlibDecoder::new(reader); + decoder + .read_to_end(&mut data) + .context("Error decompressing Deflate data")?; + } + 2 | 3 => { + // zstd algorithm + let reader = Cursor::new(buf); + let mut decoder = + ZstdDecoder::new(reader).context("Error creating Zstd decoder from read vector")?; + let _nbbytesread = decoder + .read_to_end(&mut data) + .context("error reading the compressed bytes")?; + } + 4 | 5 => { + // lz4 algorithm + let reader = Cursor::new(buf); + let mut decoder = + Lz4Decoder::new(reader).context("Error creating Lz4 decoder from read vector")?; + let _nbbytesread = decoder + .read_to_end(&mut data) + .context("error reading the compressed bytes")?; + } + 254 => { + // MDF 4.3 custom/vendor-specific compression + warn!("Custom compression (zip_type=254) not supported - data will be empty"); + return Ok(data); + } + _ => { + bail!("not implemented compression algorithm: {}", zip_type) + } + }; + // transpose data + if matches!(zip_type, 1 | 3 | 5) && zip_parameter > 0 { + // transpose + let m = org_data_length / zip_parameter as u64; + let tail: Vec = data.split_off((m * zip_parameter as u64) as usize); + let mut output = vec![0u8; (m * zip_parameter as u64) as usize]; + transpose::transpose(&data, &mut output, m as usize, zip_parameter as usize); + data = output; + if !tail.is_empty() { + data.extend(tail); + } + } + Ok(data) +} + +/// parses DZBlock +pub fn parse_dz(rdr: &mut BufReader<&File>) -> Result<(Vec, Dz4Block)> { + let mut block: Dz4Block = rdr + .read_le() + .context("Could not read into Dz4Block struct")?; + let mut buf = vec![0u8; block.dz_data_length as usize]; + rdr.read_exact(&mut buf).context("Could not read Dz data")?; + // decompress data + let data = decompress_data( + block.dz_zip_type, + block.dz_zip_parameter, + buf, + block.dz_org_data_length, + )?; + block.dz_org_data_length = data.len() as u64; + Ok((data, block)) +} + +/// DZ4 Data List block struct +#[derive(Debug, PartialEq, Eq, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Dz4Block { + //header + // dz_id: [u8; 4], // ##DZ + reserved: [u8; 4], // reserved + /// Length of block in bytes + pub len: u64, + dz_links: u64, // # of links + // links + // members + /// "DT", "SD", "RD" or "DV", "DI", "RV", "RI" + pub dz_org_block_type: [u8; 2], + /// Zip algorithm, 0 deflate, 1 transpose + deflate + dz_zip_type: u8, + /// reserved + dz_reserved: u8, + /// Zip algorithm parameter + dz_zip_parameter: u32, // + /// length of uncompressed data + pub dz_org_data_length: u64, + /// length of compressed data + pub dz_data_length: u64, +} + +impl Default for Dz4Block { + fn default() -> Self { + Dz4Block { + reserved: [0; 4], + len: 0, + dz_links: 0, + dz_org_block_type: [68, 86], // DV + dz_zip_type: 0, // No transposition for a single channel + dz_reserved: 0, + dz_zip_parameter: 0, + dz_org_data_length: 0, + dz_data_length: 0, + } + } +} + +impl Dz4Block { + /// Returns a string representation of the compression algorithm + pub fn get_compression_str(&self) -> &'static str { + match self.dz_zip_type { + 0 => "Deflate", + 1 => "Deflate+Transpose", + 2 => "Zstd", + 3 => "Zstd+Transpose", + 4 => "LZ4", + 5 => "LZ4+Transpose", + 254 => "Custom", + _ => "Unknown", + } + } +} + +impl Display for Dz4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let org_type = + str::from_utf8(&self.dz_org_block_type).unwrap_or("??"); + let ratio = if self.dz_org_data_length > 0 { + (self.dz_data_length as f64 / self.dz_org_data_length as f64) * 100.0 + } else { + 0.0 + }; + write!( + f, + "DZ: {} org_type={} compressed={} original={} ratio={:.1}%", + self.get_compression_str(), + org_type, + self.dz_data_length, + self.dz_org_data_length, + ratio + ) + } +} + +/// DL4 Data List block struct +#[derive(Debug, PartialEq, Eq, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Ld4Block { + // header + // ld_id: [u8; 4], // ##LD + reserved: [u8; 4], // reserved + /// Length of block in bytes + pub ld_len: u64, + /// # of links + pub ld_n_links: u64, + // links + /// next ld block + pub ld_next: i64, + /// number of links + #[br(if(ld_n_links > 1), little, count = ld_n_links - 1)] + pub ld_links: Vec, + // members + /// Flags + pub ld_flags: u8, + /// Zip info in valid data + pub ld_zip_info: u8, + /// Zip info in ivalid data + pub ld_zip_info_inval: u8, + /// Extended flags + pub ld_flags_ext: u8, + /// Number of data blocks + pub ld_count: u32, + #[br(if((ld_flags & 0b1)!=0), little)] + pub ld_equal_sample_count: Option, + #[br(if((ld_flags & 0b1)==0), little, count = ld_count)] + pub ld_sample_offset: Vec, + #[br(if((ld_flags & 0b10)>0), little, count = ld_count)] + dl_time_values: Vec, + #[br(if((ld_flags & 0b100)>0), little, count = ld_count)] + dl_angle_values: Vec, + #[br(if((ld_flags & 0b1000)>0), little, count = ld_count)] + dl_distance_values: Vec, +} + +impl Default for Ld4Block { + fn default() -> Self { + Ld4Block { + reserved: [0; 4], + ld_len: 56, + ld_n_links: 2, + ld_next: 0, + ld_links: vec![], + ld_flags: 0, + ld_zip_info: 0, + ld_zip_info_inval: 0, + ld_flags_ext: 0, + ld_count: 1, + ld_equal_sample_count: None, + ld_sample_offset: vec![], + dl_time_values: vec![], + dl_angle_values: vec![], + dl_distance_values: vec![], + } + } +} + +impl Ld4Block { + pub fn ld_ld_next(&self) -> i64 { + self.ld_next + } + /// Data block positions + pub fn ld_data(&self) -> Vec { + // In MDF 4.3, bit 7 of ld_flags_ext indicates invalid data present. + // If present, links are interleaved: Data 1, Inval 1, Data 2, Inval 2, ... + // We can also check if the number of links matches 2 * ld_count. + if (1u8 << 7) & self.ld_flags_ext > 0 || self.ld_links.len() as u32 == self.ld_count * 2 { + self.ld_links.iter().step_by(2).copied().collect() + } else { + self.ld_links.clone() + } + } + /// Invalid data block positions + pub fn ld_invalid_data(&self) -> Vec { + if (1u8 << 7) & self.ld_flags_ext > 0 || self.ld_links.len() as u32 == self.ld_count * 2 { + self.ld_links.iter().skip(1).step_by(2).copied().collect() + } else { + Vec::::new() + } + } +} + +impl Display for Ld4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sample_count = self + .ld_equal_sample_count + .map(|c| format!("{}", c)) + .unwrap_or_else(|| format!("{} offsets", self.ld_sample_offset.len())); + write!( + f, + "LD: {} blocks sample_count={} flags=0x{:02X}", + self.ld_count, sample_count, self.ld_flags + ) + } +} + +/// parse List Data block +/// equivalent ot DLBlock but unsorted data is not allowed +/// pointing to DV/DI and RV/RI blocks +pub fn parser_ld4_block( + rdr: &mut BufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Ld4Block, i64)> { + rdr.seek_relative(target - position) + .context("Could not reach Ld4Block position")?; + let block: Ld4Block = rdr + .read_le() + .context("Could not read buffer into Ld4Block struct")?; + position = target + block.ld_len as i64; + Ok((block, position)) +} + +/// HL4 Data List block struct +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Hl4Block { + //header + // ##HL + // hl_id: [u8; 4], + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub hl_len: u64, + /// # of links + hl_links: u64, + /// links + pub hl_dl_first: i64, // first LD block + // members + /// flags + hl_flags: u16, + /// Zip algorithn + hl_zip_type: u8, + /// reserved + hl_reserved: [u8; 5], +} + +impl Hl4Block { + /// Returns a string representation of the compression algorithm + pub fn get_zip_type_str(&self) -> &'static str { + match self.hl_zip_type { + 0 => "Deflate", + 1 => "Deflate+Transpose", + 2 => "Zstd", + 3 => "Zstd+Transpose", + 4 => "LZ4", + 5 => "LZ4+Transpose", + _ => "Unknown", + } + } +} + +impl Display for Hl4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "HL: flags=0x{:04X} zip_type={}", + self.hl_flags, + self.get_zip_type_str() + ) + } +} + +/// GD4 Guard Block struct (MDF 4.3) +/// Used to safeguard newly introduced features against incompatible readers +/// Note: gd_reserved is not included as its size varies based on gd_len +#[derive(Debug, PartialEq, Eq, Default, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Gd4Block { + // header + // ##GD + // gd_id: [u8; 4], + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub gd_len: u64, + /// # of links (always 1) + gd_links: u64, + // link section + /// Pointer to the guarded block (shall not be NIL) + pub gd_link: i64, + // data section + /// Minimum version number of the MDF format the reader shall support + /// Same format as id_ver in IDBLOCK, i.e. 430 for MDF 4.3.0 + pub gd_version: u16, + // gd_reserved is not included - size varies, position handled manually +} + +impl Display for Gd4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let major = self.gd_version / 100; + let minor = (self.gd_version % 100) / 10; + let patch = self.gd_version % 10; + write!(f, "GD: min_version={}.{}.{}", major, minor, patch) + } +} diff --git a/src/mdfinfo/mdfinfo4/dg_block.rs b/src/mdfinfo/mdfinfo4/dg_block.rs new file mode 100644 index 0000000..fc9d92c --- /dev/null +++ b/src/mdfinfo/mdfinfo4/dg_block.rs @@ -0,0 +1,188 @@ +//! Data Group block (DGBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use std::collections::{BTreeMap, HashMap}; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{Cursor, Read}; + +use super::block_header::{read_meta_data, SharableBlocks}; +use super::cg_block::{Cg4, parse_cg4, CG_F_VLSC, CG_F_VLSD}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Dg4 Data Group block struct +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Dg4Block { + /// ##DG + dg_id: [u8; 4], + /// reserved + reserved: [u8; 4], + /// Length of block in bytes + pub dg_len: u64, + /// # of links + dg_links: u64, + /// Pointer to next data group block (DGBLOCK) (can be NIL) + pub dg_dg_next: i64, + /// Pointer to first channel group block (CGBLOCK) (can be NIL) + pub dg_cg_first: i64, + // Pointer to data block (DTBLOCK or DZBLOCK for this block type) or data list block (DLBLOCK of data blocks or its HLBLOCK) (can be NIL) + pub dg_data: i64, + /// comment + dg_md_comment: i64, + /// number of bytes used for record IDs. 0 no recordID + pub dg_rec_id_size: u8, + // reserved + reserved_2: [u8; 7], +} + +impl Default for Dg4Block { + fn default() -> Self { + Dg4Block { + dg_id: [35, 35, 68, 71], // ##DG + reserved: [0; 4], + dg_len: 64, + dg_links: 4, + dg_dg_next: 0, + dg_cg_first: 0, + dg_data: 0, + dg_md_comment: 0, + dg_rec_id_size: 0, + reserved_2: [0; 7], + } + } +} + +impl Display for Dg4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "DG: rec_id_size={}", self.dg_rec_id_size) + } +} + +/// Dg4 (Data Group) block struct parser with comments +fn parse_dg4_block( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, +) -> Result<(Dg4Block, i64)> { + rdr.seek_relative(target - position) + .context("Could not reach position of Dg4 block")?; + let mut buf = [0u8; 64]; + rdr.read_exact(&mut buf) + .context("Could not read Dg4Blcok buffer")?; + let mut block = Cursor::new(buf); + let dg: Dg4Block = block + .read_le() + .context("Could not parse Dg4Block buffer into Dg4Block struct")?; + position = target + 64; + + // Reads MD + position = read_meta_data(rdr, sharable, dg.dg_md_comment, position, BlockType::DG)?; + + Ok((dg, position)) +} + +/// Dg4 struct wrapping block, comments and linked CG +#[derive(Debug, Clone)] +#[repr(C)] +pub struct Dg4 { + /// DG Block + pub block: Dg4Block, + /// CG Block + pub cg: HashMap, +} + +impl Display for Dg4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let total_channels: usize = self.cg.values().map(|cg| cg.cn.len()).sum(); + write!( + f, + "DG: {} channel groups, {} channels", + self.cg.len(), + total_channels + ) + } +} + +/// Parser for Dg4 and all linked blocks (cg, cn, cc, ca, si) +pub fn parse_dg4( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, + sharable: &mut SharableBlocks, +) -> Result<(BTreeMap, i64, usize, usize)> { + let mut dg: BTreeMap = BTreeMap::new(); + let mut n_cn: usize = 0; + let mut n_cg: usize = 0; + if target > 0 { + let (block, pos) = parse_dg4_block(rdr, sharable, target, position)?; + position = pos; + let mut next_pointer = block.dg_dg_next; + let (mut cg, pos, num_cg, num_cn) = parse_cg4( + rdr, + block.dg_cg_first, + position, + sharable, + block.dg_rec_id_size, + )?; + n_cg += num_cg; + n_cn += num_cn; + identify_vlsd_cg(&mut cg); + let dg_struct = Dg4 { block, cg }; + dg.insert(target, dg_struct); + position = pos; + while next_pointer > 0 { + let block_start = next_pointer; + let (block, pos) = parse_dg4_block(rdr, sharable, next_pointer, position)?; + next_pointer = block.dg_dg_next; + position = pos; + let (mut cg, pos, num_cg, num_cn) = parse_cg4( + rdr, + block.dg_cg_first, + position, + sharable, + block.dg_rec_id_size, + )?; + n_cg += num_cg; + n_cn += num_cn; + identify_vlsd_cg(&mut cg); + let dg_struct = Dg4 { block, cg }; + dg.insert(block_start, dg_struct); + position = pos; + } + } + Ok((dg, position, n_cg, n_cn)) +} + +/// Try to link VLSD/VLSC Channel Groups with matching channel in other groups +fn identify_vlsd_cg(cg: &mut HashMap) { + // First find all VLSD/VLSC Channel Groups + let mut vlsd: HashMap = HashMap::new(); + for (rec_id, channel_group) in cg.iter() { + if (channel_group.block.cg_flags & (CG_F_VLSD | CG_F_VLSC)) != 0 { + // VLSD or VLSC channel group found + vlsd.insert(channel_group.block_position, *rec_id); + } + } + if !vlsd.is_empty() { + // try to find corresponding channel in other channel group + let mut vlsd_matching: HashMap = HashMap::new(); + for (target_rec_id, channel_group) in cg.iter() { + for (target_rec_pos, cn) in channel_group.cn.iter() { + if let Some(vlsd_rec_id) = vlsd.get(&cn.block.cn_data) { + // Found matching channel with VLSD_CG + vlsd_matching.insert(*vlsd_rec_id, (*target_rec_id, *target_rec_pos)); + } + } + } + for (vlsd_rec_id, (target_rec_id, target_rec_pos)) in vlsd_matching { + if let Some(vlsd_cg) = cg.get_mut(&vlsd_rec_id) { + vlsd_cg.vlsd_cg = Some((target_rec_id, target_rec_pos)); + } + } + } +} diff --git a/src/mdfinfo/mdfinfo4/ev_block.rs b/src/mdfinfo/mdfinfo4/ev_block.rs new file mode 100644 index 0000000..d2c3145 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/ev_block.rs @@ -0,0 +1,252 @@ +//! Event block (EVBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::fs::File; + +use super::block_header::{parse_block_short, read_meta_data, SharableBlocks}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Ev4 Event block struct +#[derive(Debug, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Ev4Block { + //ev_id: [u8; 4], // DG + //reserved: [u8; 4], // reserved + //ev_len: u64, // Length of block in bytes + /// # of links + pub ev_links: u64, + /// Link to next EVBLOCK (linked list) (can be NIL) + pub ev_ev_next: i64, + /// Referencing link to EVBLOCK with parent event (can be NIL). + pub ev_ev_parent: i64, + /// Referencing link to EVBLOCK with event that defines the beginning of a range (can be NIL, must be NIL if ev_range_type != 2). + pub ev_ev_range: i64, + /// Pointer to TXBLOCK with event name (can be NIL) Name must be according to naming rules stated in 4.4.2 Naming Rules. If available, the name of a named trigger condition should be used as event name. Other event types may have individual names or no names. + pub ev_tx_name: i64, + /// Pointer to TX/MDBLOCK with event comment and additional information, e.g. trigger condition or formatted user comment text (can be NIL) + pub ev_md_comment: i64, + #[br(if(ev_links > 5), little, count = ev_links.saturating_sub(5))] + /// links + links: Vec, + + /// Event type (see EV_T_xxx) + pub ev_type: u8, + /// Sync type (see EV_S_xxx) + pub ev_sync_type: u8, + /// Range Type (see EV_R_xxx) + pub ev_range_type: u8, + /// Cause of event (see EV_C_xxx) + pub ev_cause: u8, + /// flags (see EV_F_xxx) + pub ev_flags: u8, + /// Reserved + ev_reserved: [u8; 3], + /// Length M of ev_scope list. Can be zero. + pub ev_scope_count: u32, + /// Length N of ev_at_reference list, i.e. number of attachments for this event. Can be zero. + pub ev_attachment_count: u16, + /// Creator index, i.e. zero-based index of FHBLOCK in global list of FHBLOCKs that specifies which application has created or changed this event (e.g. when generating event offline). + pub ev_creator_index: u16, + /// Base value for synchronization value. + pub ev_sync_base_value: i64, + /// Factor for event synchronization value. + pub ev_sync_factor: f64, +} + +// Event type constants (ev_type) +/// Recording event +pub const EV_T_RECORDING: u8 = 0; +/// Recording interrupt event +pub const EV_T_RECORDING_INTERRUPT: u8 = 1; +/// Acquisition interrupt event +pub const EV_T_ACQUISITION_INTERRUPT: u8 = 2; +/// Trigger (start/stop) event +pub const EV_T_TRIGGER: u8 = 3; +/// Marker event (user-defined) +pub const EV_T_MARKER: u8 = 4; + +// Event sync type constants (ev_sync_type) +/// No sync +pub const EV_S_NONE: u8 = 0; +/// Time sync +pub const EV_S_TIME: u8 = 1; +/// Angle sync +pub const EV_S_ANGLE: u8 = 2; +/// Distance sync +pub const EV_S_DISTANCE: u8 = 3; +/// Index sync +pub const EV_S_INDEX: u8 = 4; + +// Event cause constants (ev_cause) +/// Unknown/other cause +pub const EV_C_OTHER: u8 = 0; +/// Error cause +pub const EV_C_ERROR: u8 = 1; +/// Tool-internal cause +pub const EV_C_TOOL: u8 = 2; +/// Script cause +pub const EV_C_SCRIPT: u8 = 3; +/// User cause +pub const EV_C_USER: u8 = 4; + +#[allow(dead_code)] +impl Ev4Block { + /// Returns the event name from sharable blocks + pub fn get_event_name(&self, sharable: &SharableBlocks) -> Result> { + sharable.get_tx(self.ev_tx_name) + } + + /// Returns the event type as a string description + pub fn get_event_type_str(&self) -> &'static str { + match self.ev_type { + EV_T_RECORDING => "Recording", + EV_T_RECORDING_INTERRUPT => "Recording Interrupt", + EV_T_ACQUISITION_INTERRUPT => "Acquisition Interrupt", + EV_T_TRIGGER => "Trigger", + EV_T_MARKER => "Marker", + _ => "Unknown", + } + } + + /// Returns the synchronization type as a string description + pub fn get_sync_type_str(&self) -> &'static str { + match self.ev_sync_type { + EV_S_NONE => "None", + EV_S_TIME => "Time", + EV_S_ANGLE => "Angle", + EV_S_DISTANCE => "Distance", + EV_S_INDEX => "Index", + _ => "Unknown", + } + } + + /// Returns the event cause as a string description + pub fn get_cause_str(&self) -> &'static str { + match self.ev_cause { + EV_C_OTHER => "Other/Unknown", + EV_C_ERROR => "Error", + EV_C_TOOL => "Tool Internal", + EV_C_SCRIPT => "Script", + EV_C_USER => "User", + _ => "Unknown", + } + } + + /// Returns the synchronization value computed from base value and factor + pub fn get_sync_value(&self) -> f64 { + self.ev_sync_base_value as f64 * self.ev_sync_factor + } + + /// Returns the list of scope link positions (first ev_scope_count items from links) + /// These point to CGBLOCKs or CNBLOCKs that this event applies to + pub fn get_scope_links(&self) -> &[i64] { + if self.links.is_empty() || self.ev_scope_count == 0 { + &[] + } else { + let end = (self.ev_scope_count as usize).min(self.links.len()); + &self.links[0..end] + } + } + + /// Returns the list of attachment link positions (items after scope links) + /// These point to ATBLOCKs associated with this event + pub fn get_attachment_links(&self) -> &[i64] { + if self.links.is_empty() || self.ev_attachment_count == 0 { + &[] + } else { + let start = self.ev_scope_count as usize; + if start >= self.links.len() { + &[] + } else { + let end = (start + self.ev_attachment_count as usize).min(self.links.len()); + &self.links[start..end] + } + } + } + + /// Calculates the block size for writing + pub fn calculate_block_size(&self) -> i64 { + // 16 (short header) + 8 (link count) + 8*links + 32 (data members) + 16 + 8 + (self.ev_links * 8) as i64 + 32 + } + + /// Returns the range type as a string description + pub fn get_range_type_str(&self) -> &'static str { + match self.ev_range_type { + 0 => "Point", + 1 => "Beginning", + 2 => "End", + _ => "Unknown", + } + } +} + +impl Display for Ev4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "EV: type={} ({}) sync={} ({}) cause={} ({}) range={} ({}) scopes={} attachments={}", + self.get_event_type_str(), + self.ev_type, + self.get_sync_type_str(), + self.ev_sync_type, + self.get_cause_str(), + self.ev_cause, + self.get_range_type_str(), + self.ev_range_type, + self.ev_scope_count, + self.ev_attachment_count + ) + } +} + +/// Ev4 (Event) block struct parser +pub(super) fn parse_ev4_block( + rdr: &mut SymBufReader<&File>, + target: i64, + mut position: i64, +) -> Result<(Ev4Block, i64)> { + let (mut block, _header, pos) = parse_block_short(rdr, target, position)?; + position = pos; + let block: Ev4Block = block.read_le().context("Error parsing ev block")?; // reads the fh block + + Ok((block, position)) +} + +/// parses Event blocks along with its linked comments, returns a hashmap of Ev4 block with position as key +pub fn parse_ev4( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, +) -> Result<(HashMap, i64)> { + let mut ev: HashMap = HashMap::new(); + if target > 0 { + let (block, pos) = parse_ev4_block(rdr, target, position)?; + position = pos; + // Reads MD + position = read_meta_data(rdr, sharable, block.ev_md_comment, position, BlockType::EV)?; + // reads TX event name + position = read_meta_data(rdr, sharable, block.ev_tx_name, position, BlockType::EV)?; + let mut next_pointer = block.ev_ev_next; + ev.insert(target, block); + + while next_pointer > 0 { + let block_start = next_pointer; + let (block, pos) = parse_ev4_block(rdr, next_pointer, position)?; + position = pos; + // Reads MD + position = read_meta_data(rdr, sharable, block.ev_md_comment, position, BlockType::EV)?; + // reads TX event name + position = read_meta_data(rdr, sharable, block.ev_tx_name, position, BlockType::EV)?; + next_pointer = block.ev_ev_next; + ev.insert(block_start, block); + } + } + Ok((ev, position)) +} diff --git a/src/mdfinfo/mdfinfo4/fh_block.rs b/src/mdfinfo/mdfinfo4/fh_block.rs new file mode 100644 index 0000000..fe55be0 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/fh_block.rs @@ -0,0 +1,126 @@ +//! File History block (FHBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use chrono::{DateTime, Local}; +use std::fmt::{self, Display}; +use std::fs::File; +use std::io::{Cursor, Read}; + +use super::block_header::{read_meta_data, SharableBlocks}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Fh4 (File History) block struct, including the header +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct FhBlock { + /// '##FH' + fh_id: [u8; 4], + /// reserved, must be 0 + fh_gap: [u8; 4], + /// Length of block in bytes + fh_len: u64, + /// # of links + fh_links: u64, + /// Link to next FHBLOCK (can be NIL if list finished) + pub fh_fh_next: i64, + /// Link to MDBLOCK containing comment about the creation or modification of the MDF file. + pub fh_md_comment: i64, + /// time stamp in nanosecs + pub fh_time_ns: u64, + /// time zone offset on minutes + pub fh_tz_offset_min: i16, + /// daylight saving time offset in minutes for start time stamp + pub fh_dst_offset_min: i16, + /// time flags, but 1 local, bit 2 time offsets + pub fh_time_flags: u8, + /// reserved + fh_reserved: [u8; 3], +} + +impl Default for FhBlock { + fn default() -> Self { + FhBlock { + fh_id: [35, 35, 70, 72], // '##FH' + fh_gap: [0u8; 4], + fh_len: 56, + fh_links: 2, + fh_fh_next: 0, + fh_md_comment: 0, + fh_time_ns: Local::now() + .timestamp_nanos_opt() + .map(|t| t as u64) + .unwrap_or(0), + fh_tz_offset_min: 0, + fh_dst_offset_min: 0, + fh_time_flags: 0, + fh_reserved: [0u8; 3], + } + } +} + +impl Display for FhBlock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Convert nanoseconds to datetime + let secs = (self.fh_time_ns / 1_000_000_000) as i64; + let nsecs = (self.fh_time_ns % 1_000_000_000) as u32; + let datetime = DateTime::from_timestamp(secs, nsecs) + .map(|dt| dt.format("%Y-%m-%d %H:%M:%S").to_string()) + .unwrap_or_else(|| "Invalid timestamp".to_string()); + let local_time = if self.fh_time_flags & 0b1 != 0 { + "local" + } else { + "UTC" + }; + write!( + f, + "FH: {} ({}) tz_offset={}min dst_offset={}min", + datetime, local_time, self.fh_tz_offset_min, self.fh_dst_offset_min + ) + } +} + +/// Fh4 (File History) block struct parser +fn parse_fh_block( + rdr: &mut SymBufReader<&File>, + target: i64, + position: i64, +) -> Result<(FhBlock, i64)> { + rdr.seek_relative(target - position) + .context("Could not reach FH Block position")?; // change buffer position + let mut buf = [0u8; 56]; + rdr.read_exact(&mut buf) + .context("Could not read FH block buffer")?; + let mut block = Cursor::new(buf); + let fh: FhBlock = block + .read_le() + .with_context(|| format!("Error parsing fh block into FhBlock struct \n{block:?}"))?; // reads the fh block + Ok((fh, target + 56)) +} + +pub type Fh = Vec; + +/// parses File History blocks along with its linked comments returns a vect of Fh4 block with comments +pub fn parse_fh( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, + target: i64, + mut position: i64, +) -> Result<(Fh, i64)> { + let mut fh: Fh = Vec::new(); + let (block, pos) = parse_fh_block(rdr, target, position)?; + position = pos; + position = read_meta_data(rdr, sharable, block.fh_md_comment, position, BlockType::FH)?; + let mut next_pointer = block.fh_fh_next; + fh.push(block); + while next_pointer != 0 { + let (block, pos) = parse_fh_block(rdr, next_pointer, position)?; + position = pos; + next_pointer = block.fh_fh_next; + position = read_meta_data(rdr, sharable, block.fh_md_comment, position, BlockType::FH)?; + fh.push(block); + } + Ok((fh, position)) +} diff --git a/src/mdfinfo/mdfinfo4/hd_block.rs b/src/mdfinfo/mdfinfo4/hd_block.rs new file mode 100644 index 0000000..ccdd836 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/hd_block.rs @@ -0,0 +1,114 @@ +//! Header block (HDBLOCK) for MDF4 +use anyhow::{Context, Result}; +use binrw::{BinReaderExt, binrw}; +use chrono::{DateTime, Local}; +use std::fmt; +use std::fs::File; +use std::io::{Cursor, Read}; + +use super::block_header::{read_meta_data, SharableBlocks}; +use super::metadata::BlockType; +use crate::mdfinfo::sym_buf_reader::SymBufReader; + +/// Hd4 (Header) block structure +#[derive(Debug, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Hd4 { + /// ##HD + hd_id: [u8; 4], + /// reserved + hd_reserved: [u8; 4], + /// Length of block in bytes + hd_len: u64, + /// # of links + hd_link_counts: u64, + /// Pointer to the first data group block (DGBLOCK) (can be NIL) + pub hd_dg_first: i64, + /// Pointer to first file history block (FHBLOCK) + /// There must be at least one FHBLOCK with information about the application which created the MDF file. + pub hd_fh_first: i64, + /// Pointer to first channel hierarchy block (CHBLOCK) (can be NIL). + pub hd_ch_first: i64, + /// Pointer to first attachment block (ATBLOCK) (can be NIL) + pub hd_at_first: i64, + /// Pointer to first event block (EVBLOCK) (can be NIL) + pub hd_ev_first: i64, + /// Pointer to the measurement file comment (TXBLOCK or MDBLOCK) (can be NIL) For MDBLOCK contents, see Table 14. + pub hd_md_comment: i64, + /// Data members + /// Time stamp in nanoseconds elapsed since 00:00:00 01.01.1970 (UTC time or local time, depending on "local time" flag) + pub hd_start_time_ns: u64, + /// Time zone offset in minutes. The value must be in range [-720,720], i.e. it can be negative! For example a value of 60 (min) means UTC+1 time zone = Central European Time (CET). Only valid if "time offsets valid" flag is set in time flags. + pub hd_tz_offset_min: i16, + /// Daylight saving time (DST) offset in minutes for start time stamp. During the summer months, most regions observe a DST offset of 60 min (1 hour). Only valid if "time offsets valid" flag is set in time flags. + pub hd_dst_offset_min: i16, + /// Time flags The value contains the following bit flags (see HD_TF_xxx) + pub hd_time_flags: u8, + /// Time quality class (see HD_TC[35, 35, 72, 68]_xxx) + pub hd_time_class: u8, + /// Flags The value contains the following bit flags (see HD_FL_xxx): + pub hd_flags: u8, + /// reserved + pub hd_reserved2: u8, + /// Start angle in radians at start of measurement (only for angle synchronous measurements) Only valid if "start angle valid" flag is set. All angle values for angle synchronized master channels or events are relative to this start angle. + pub hd_start_angle_rad: f64, + /// Start distance in meters at start of measurement (only for distance synchronous measurements) Only valid if "start distance valid" flag is set. All distance values for distance synchronized master channels or events are relative to this start distance. + pub hd_start_distance_m: f64, +} + +impl Default for Hd4 { + fn default() -> Self { + Hd4 { + hd_id: [35, 35, 72, 68], // ##HD + hd_len: 104, + hd_link_counts: 6, + hd_reserved: [0u8; 4], + hd_dg_first: 0, + hd_fh_first: 0, + hd_ch_first: 0, + hd_at_first: 0, + hd_ev_first: 0, + hd_md_comment: 0, + hd_start_time_ns: Local::now() + .timestamp_nanos_opt() + .map(|t| t as u64) + .unwrap_or(0), + hd_tz_offset_min: 0, + hd_dst_offset_min: 0, + hd_time_flags: 0, + hd_time_class: 0, + hd_flags: 0, + hd_reserved2: 0, + hd_start_angle_rad: 0.0, + hd_start_distance_m: 0.0, + } + } +} + +/// Hd4 display implementation +impl fmt::Display for Hd4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sec = self.hd_start_time_ns / 1000000000; + let nsec = (self.hd_start_time_ns - sec * 1000000000) as u32; + let naive = DateTime::from_timestamp(sec as i64, nsec).unwrap_or_default(); + writeln!(f, "Time : {} ", naive.to_rfc3339()) + } +} + +/// Hd4 block struct parser +pub fn hd4_parser( + rdr: &mut SymBufReader<&File>, + sharable: &mut SharableBlocks, +) -> Result<(Hd4, i64)> { + let mut buf = [0u8; 104]; + rdr.read_exact(&mut buf) + .context("could not read HD block buffer")?; + let mut block = Cursor::new(buf); + let hd: Hd4 = block + .read_le() + .context("Could not parse HD block buffer into Hd4 struct")?; + let position = read_meta_data(rdr, sharable, hd.hd_md_comment, 168, BlockType::HD)?; + Ok((hd, position)) +} diff --git a/src/mdfinfo/mdfinfo4/metadata.rs b/src/mdfinfo/mdfinfo4/metadata.rs new file mode 100644 index 0000000..2ebef86 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/metadata.rs @@ -0,0 +1,295 @@ +//! MetaData struct and related types for MDF4 TX/MD blocks +use anyhow::{Context, Result}; +use binrw::BinWriterExt; +use std::collections::HashMap; +use std::fmt::{self, Display}; +use std::io::{Seek, Write}; +use std::str; + +use super::block_header::Blockheader4; + +/// metadata are either stored in TX (text) or MD (xml) blocks for mdf version 4 +#[derive(Debug, Clone, PartialEq, Eq)] +#[repr(C)] +#[derive(Default)] +pub enum MetaDataBlockType { + MdBlock, + MdParsed, + #[default] + TX, +} + +/// Blocks types that could link to MDBlock +#[derive(Debug, Clone)] +#[repr(C)] +#[derive(Default)] +pub enum BlockType { + HD, + FH, + AT, + EV, + DG, + CG, + #[default] + CN, + CC, + SI, + CH, +} + +/// struct linking MD or TX block with +#[derive(Debug, Default, Clone)] +#[repr(C)] +pub struct MetaData { + /// Header of the block + pub block: Blockheader4, + /// Raw bytes for the block's data + pub raw_data: Vec, + /// Block type, TX, MD or MD not yet parsed + pub block_type: MetaDataBlockType, + /// Metadata after parsing + pub comments: HashMap, + /// Parent block type + pub parent_block_type: BlockType, +} + +impl Display for MetaData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let type_str = match self.block_type { + MetaDataBlockType::MdBlock => "MD (unparsed)", + MetaDataBlockType::MdParsed => "MD (parsed)", + MetaDataBlockType::TX => "TX", + }; + write!( + f, + "MetaData: type={} {} comments raw_bytes={}", + type_str, + self.comments.len(), + self.raw_data.len() + ) + } +} + +impl MetaData { + /// Returns a new MetaData struct + pub fn new(block_type: MetaDataBlockType, parent_block_type: BlockType) -> Self { + let header = match block_type { + MetaDataBlockType::MdBlock => Blockheader4 { + hdr_id: [35, 35, 77, 68], // '##MD' + hdr_gap: [0u8; 4], + hdr_len: 24, + hdr_links: 0, + }, + MetaDataBlockType::TX | MetaDataBlockType::MdParsed => Blockheader4 { + hdr_id: [35, 35, 84, 88], // '##TX' + hdr_gap: [0u8; 4], + hdr_len: 24, + hdr_links: 0, + }, + }; + MetaData { + block: header, + raw_data: Vec::new(), + block_type, + comments: HashMap::new(), + parent_block_type, + } + } + /// Converts the metadata handling the parent block type's specificities + pub fn parse_xml(&mut self) -> Result<()> { + if self.block_type == MetaDataBlockType::MdBlock { + match self.parent_block_type { + BlockType::HD => self.parse_hd_xml()?, + BlockType::FH => self.parse_fh_xml()?, + _ => self.parse_generic_xml()?, + }; + } + Ok(()) + } + /// Returns the text from TX Block or TX's tag text from MD Block + pub fn get_tx(&self) -> Result, anyhow::Error> { + match self.block_type { + MetaDataBlockType::MdParsed => Ok(self.comments.get("TX").cloned()), + MetaDataBlockType::MdBlock => { + // extract TX tag from xml + let comment: String = self + .get_data_string() + .context("failed getting data string to extract TX tag")? + .trim_end_matches(['\n', '\r', ' ']) + .into(); // removes ending spaces + match roxmltree::Document::parse(&comment) { + Ok(md) => { + let mut tx: Option = None; + for node in md.root().descendants() { + let text = match node.text() { + Some(text) => text.to_string(), + None => String::new(), + }; + if node.is_element() + && !text.is_empty() + && node.tag_name().name() == r"TX" + { + tx = Some(text); + break; + } + } + Ok(tx) + } + Err(e) => { + log::warn!("Error parsing comment : \n{comment}\n{e}"); + Ok(None) + } + } + } + MetaDataBlockType::TX => { + let comment = str::from_utf8(&self.raw_data).with_context(|| { + format!("Invalid UTF-8 sequence in metadata: {:?}", self.raw_data) + })?; + let c: String = comment.trim_end_matches(char::from(0)).into(); + Ok(Some(c)) + } + } + } + /// Returns the bytes of the text from TX Block or TX's tag text from MD Block + pub fn get_tx_bytes(&self) -> Option<&[u8]> { + match self.block_type { + MetaDataBlockType::MdParsed => self.comments.get("TX").map(|s| s.as_bytes()), + _ => Some(&self.raw_data), + } + } + /// Decode string from raw_data field + pub fn get_data_string(&self) -> Result { + match self.block_type { + MetaDataBlockType::MdParsed => Ok(String::new()), + _ => { + let comment = str::from_utf8(&self.raw_data).with_context(|| { + format!("Invalid UTF-8 sequence in metadata: {:?}", self.raw_data) + })?; + let comment: String = comment.trim_end_matches(char::from(0)).into(); + Ok(comment) + } + } + } + /// allocate bytes to raw_data field, adjusting header length + pub fn set_data_buffer(&mut self, data: &[u8]) { + self.raw_data = [data, vec![0u8; 8 - data.len() % 8].as_slice()].concat(); + self.block.hdr_len = self.raw_data.len() as u64 + 24; + } + /// parses the xml bytes specifically for HD block contexted schema + pub fn parse_hd_xml(&mut self) -> Result<()> { + let mut comments: HashMap = HashMap::new(); + // MD Block from HD Block, reading xml + let comment: String = self + .get_data_string()? + .trim_end_matches(['\n', '\r', ' ']) + .into(); // removes ending spaces + match roxmltree::Document::parse(&comment) { + Ok(md) => { + for node in md.root().descendants().filter(|p| p.has_tag_name("e")) { + if let (Some(value), Some(text)) = (node.attribute("name"), node.text()) { + comments.insert(value.to_string(), text.to_string()); + } + } + } + Err(e) => { + log::warn!("Could not parse HD MD comment : \n{comment}\n{e}"); + } + }; + self.comments = comments; + self.block_type = MetaDataBlockType::MdParsed; + self.raw_data = vec![]; // empty the data from block as already parsed + Ok(()) + } + /// Creates File History MetaData + pub fn create_fh(&mut self) { + let user_name = whoami::username().unwrap_or_else(|_| "unknown".to_string()); + let comments = format!( + " +created +mdfr +ratalco +0.1 +{user_name} +" + ); + let raw_comments = format!( + "{:\0 Result<()> { + let mut comments: HashMap = HashMap::new(); + // MD Block from FH Block, reading xml + let comment: String = self + .get_data_string()? + .trim_end_matches(['\n', '\r', ' ']) + .into(); // removes ending spaces + match roxmltree::Document::parse(&comment) { + Ok(md) => { + for node in md.root().descendants() { + let text = match node.text() { + Some(text) => text.to_string(), + None => String::new(), + }; + comments.insert(node.tag_name().name().to_string(), text); + } + } + Err(e) => { + log::warn!("Could not parse FH comment : \n{comment}\n{e}"); + } + }; + self.comments = comments; + self.block_type = MetaDataBlockType::MdParsed; + self.raw_data = vec![]; // empty the data from block as already parsed + Ok(()) + } + /// Generic xml parser without schema consideration + fn parse_generic_xml(&mut self) -> Result<()> { + let mut comments: HashMap = HashMap::new(); + let comment: String = self + .get_data_string()? + .trim_end_matches(['\n', '\r', ' ']) + .into(); // removes ending spaces + match roxmltree::Document::parse(&comment) { + Ok(md) => { + for node in md.root().descendants() { + let text = match node.text() { + Some(text) => text.to_string(), + None => String::new(), + }; + if node.is_element() + && !text.is_empty() + && !node.tag_name().name().to_string().is_empty() + { + comments.insert(node.tag_name().name().to_string(), text); + } + } + } + Err(e) => { + log::warn!("Error parsing comment : \n{comment}\n{e}"); + } + }; + self.comments = comments; + self.block_type = MetaDataBlockType::MdParsed; + self.raw_data = vec![]; // empty the data from block as already parsed + Ok(()) + } + /// Writes the metadata to file + pub fn write(&self, writer: &mut W) -> Result<()> + where + W: Write + Seek, + { + writer + .write_le(&self.block) + .context("Could not write comment block header")?; + writer + .write_all(&self.raw_data) + .context("Could not write comment block data")?; + Ok(()) + } +} diff --git a/src/mdfinfo/mdfinfo4/mod.rs b/src/mdfinfo/mdfinfo4/mod.rs new file mode 100644 index 0000000..9cc6eca --- /dev/null +++ b/src/mdfinfo/mdfinfo4/mod.rs @@ -0,0 +1,868 @@ +//! Parsing of file metadata into MdfInfo4 struct +//! +//! This module contains MDF4 block types and parsing logic, split into +//! sub-modules per block type for maintainability. + +// Sub-modules +mod block_header; +mod ca_block; +mod cc_block; +mod cg_block; +mod ch_block; +mod cn_block; +mod composition; +mod data_block; +mod dg_block; +mod ev_block; +mod fh_block; +mod hd_block; +mod metadata; +mod si_block; +mod sr_block; +pub mod at_block; + +// Re-exports for backward compatibility +pub use at_block::*; +pub use block_header::*; +pub use ca_block::*; +pub use cc_block::*; +pub use cg_block::*; +pub use ch_block::*; +pub use cn_block::*; +pub use composition::*; +pub use data_block::*; +pub use dg_block::*; +pub use ev_block::*; +pub use fh_block::*; +pub use hd_block::*; +pub use metadata::*; +pub use si_block::*; +pub use sr_block::*; + +use anyhow::{Context, Error, Result}; +use std::collections::{BTreeMap, HashMap, HashSet}; + +/// ChannelId : (Option, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos)) +pub(crate) type ChannelId = (Option, i64, (i64, u64), (i64, i32)); +pub(crate) type ChannelNamesSet = HashMap; +use std::fmt; +use std::sync::Arc; + +use arrow::array::Array; + +use crate::data_holder::channel_data::{ChannelData, try_from}; +use crate::data_holder::tensor_arrow::Order; +use crate::mdfreader::{DataSignature, MasterSignature}; +use crate::mdfinfo::IdBlock; + +/// MdfInfo4 is the struct holding whole metadata of mdf4.x files +/// * blocks with unique links are at top level like attachment, events and file history +/// * sharable blocks (most likely referenced multiple times and shared by several blocks) +/// that are in sharable fields and holds CC, SI, TX and MD blocks +/// * the dg fields nests cg itself nesting cn blocks and eventually compositions +/// (other cn or ca blocks) and conversion +/// * channel_names_set is the complete set of channel names contained in file +/// * in general the blocks are contained in HashMaps with key corresponding +/// to their position in the file +#[derive(Debug, Default, Clone)] +#[repr(C)] +pub struct MdfInfo4 { + /// file name string + pub file_name: String, + /// Identifier block + pub id_block: IdBlock, + /// header block + pub hd_block: Hd4, + /// file history blocks + pub fh: Fh, + /// attachment blocks + pub at: At, // attachments + /// event blocks + pub ev: HashMap, // events + /// data group block linking channel group/channel/conversion/compostion/..etc. and data block + pub dg: BTreeMap, // contains most of the file structure + /// cc, md, tx and si blocks that can be referenced by several blocks + pub sharable: SharableBlocks, + /// set of all channel names + pub channel_names_set: ChannelNamesSet, // set of channel names + /// channel hierarchy blocks + pub ch: HashMap, + /// whether the file was marked as unfinalized + pub is_unfinalized: bool, +} + +/// MdfInfo4's implementation +impl MdfInfo4 { + /// returns the hashmap with : + /// key = channel_name, + /// value = (master_name, + /// dg_position, + /// (cg.block_position, record_id), + /// (cn.block_position, cn_record_position)) + pub fn get_channel_id(&self, channel_name: &str) -> Option<&ChannelId> { + self.channel_names_set.get(channel_name) + } + /// Returns the channel's vector data if present in memory, otherwise None. + pub fn get_channel_data(&self, channel_name: &str) -> Option<&ChannelData> { + let mut data: Option<&ChannelData> = None; + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.get_channel_id(channel_name) + && let Some(dg) = self.dg.get(dg_pos) + && let Some(cg) = dg.cg.get(rec_id) + && let Some(cn) = cg.cn.get(rec_pos) + && !cn.data.is_empty() + { + data = Some(&cn.data); + } + data + } + /// Returns the channel's unit string. If it does not exist, it is an empty string. + pub fn get_channel_unit(&self, channel_name: &str) -> Result> { + let mut unit: Option = None; + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.get_channel_id(channel_name) + && let Some(dg) = self.dg.get(dg_pos) + && let Some(cg) = dg.cg.get(rec_id) + && let Some(cn) = cg.cn.get(rec_pos) + { + unit = self.sharable.get_tx(cn.block.cn_md_unit)?; + } + Ok(unit) + } + /// Returns the channel's description. If it does not exist, it is an empty string + pub fn get_channel_desc(&self, channel_name: &str) -> Result> { + let mut desc: Option = None; + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.get_channel_id(channel_name) + && let Some(dg) = self.dg.get(dg_pos) + && let Some(cg) = dg.cg.get(rec_id) + && let Some(cn) = cg.cn.get(rec_pos) + { + desc = self.sharable.get_tx(cn.block.cn_md_comment)?; + } + Ok(desc) + } + /// returns the master channel associated to the input channel name + pub fn get_channel_master(&self, channel_name: &str) -> Option { + let mut master: Option = None; + if let Some((m, _dg_pos, (_cg_pos, _rec_idd), (_cn_pos, _rec_pos))) = + self.get_channel_id(channel_name) + { + master.clone_from(m); + } + master + } + /// returns type of master channel link to channel input in parameter: + /// 0 = None (normal data channels), 1 = Time (seconds), 2 = Angle (radians), + /// 3 = Distance (meters), 4 = Index (zero-based index values) + pub fn get_channel_master_type(&self, channel_name: &str) -> u8 { + let mut master_type: u8 = 0; // default to normal data channel + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.get_channel_id(channel_name) + && let Some(dg) = self.dg.get(dg_pos) + && let Some(cg) = dg.cg.get(rec_id) + && let Some(cn) = cg.cn.get(rec_pos) + { + master_type = cn.block.cn_sync_type; + } + master_type + } + /// returns the set of channel names + pub fn get_channel_names_set(&self) -> HashSet { + self.channel_names_set.keys().cloned().collect() + } + /// returns the set of channel names that are in same channel group as input channel name + pub fn get_channel_names_cg_set(&self, channel_name: &str) -> HashSet { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, _rec_pos))) = + self.get_channel_id(channel_name) + { + let mut channel_list = HashSet::new(); + if let Some(dg) = self.dg.get(dg_pos) + && let Some(cg) = dg.cg.get(rec_id) + { + channel_list.clone_from(&cg.channel_names); + } + channel_list + } else { + HashSet::new() + } + } + /// returns a hashmap for which master channel names are keys and values its corresponding set of channel names + pub fn get_master_channel_names_set(&self) -> HashMap, HashSet> { + let mut channel_master_list: HashMap, HashSet> = HashMap::new(); + for (_dg_position, dg) in self.dg.iter() { + for (_record_id, cg) in dg.cg.iter() { + if let Some(list) = channel_master_list.get_mut(&cg.master_channel_name) { + list.extend(cg.channel_names.clone()); + } else { + channel_master_list + .insert(cg.master_channel_name.clone(), cg.channel_names.clone()); + } + } + } + channel_master_list + } + /// empty the channels' ndarray + pub fn clear_channel_data_from_memory(&mut self, channel_names: HashSet) -> Result<()> { + for channel_name in channel_names { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get_mut(&channel_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + && let Some(cn) = cg.cn.get_mut(rec_pos) + && !cn.data.is_empty() + { + cn.data = + cn.data + .zeros(cn.block.cn_data_type, 0, 0, (Vec::new(), Order::RowMajor))?; + } + } + Ok(()) + } + /// returns a new empty MdfInfo4 struct + pub fn new(file_name: &str, n_channels: usize) -> MdfInfo4 { + MdfInfo4 { + file_name: file_name.to_string(), + dg: BTreeMap::new(), + sharable: SharableBlocks::new(n_channels), + channel_names_set: HashMap::with_capacity(n_channels), + id_block: IdBlock::default(), + fh: Vec::new(), + at: HashMap::new(), + ev: HashMap::new(), + hd_block: Hd4::default(), + ch: HashMap::new(), + is_unfinalized: false, + } + } + /// Adds a new channel in memory (no file modification) + pub fn add_channel( + &mut self, + channel_name: String, + data: ChannelData, + data_signature: DataSignature, + mut master: MasterSignature, + unit: Option, + description: Option, + ) -> Result<(), Error> { + let mut cg_block = Cg4Block::default(); + cg_block.cg_cycle_count = data_signature.len as u64; + // Basic channel block + let mut cn_block = Cn4Block::default(); + let machine_endian: bool = cfg!(target_endian = "big"); + cn_block.cn_data_type = data_signature.data_type; + cn_block.cn_bit_count = data_signature.bit_count; + let cn_pos = position_generator(); + cn_block.cn_sync_type = master.master_type.unwrap_or(0); + + // channel name + let channel_name_position = position_generator(); + cn_block.cn_tx_name = channel_name_position; + self.sharable + .create_tx(channel_name_position, channel_name.to_string()); + + // Channel array + let mut list_size = data_signature.shape.0.iter().product(); // primitive list size is 1 + if data_signature.data_type == 15 | 16 { + //complex + list_size *= 2; + } + let data_ndim = data_signature.ndim - 1; + let mut composition: Option = None; + if data_ndim > 0 { + let data_dim_size = data + .shape() + .0 + .iter() + .skip(1) + .map(|x| *x as u64) + .collect::>(); + // data_dim_size.remove(0); + let mut ca_block = Ca4Block::default(); + cg_block.cg_data_bytes = list_size as u32 * data_signature.byte_count; + + let composition_position = position_generator(); + cn_block.cn_composition = composition_position; + ca_block.ca_ndim = data_ndim as u16; + ca_block.ca_dim_size.clone_from(&data_dim_size); + ca_block.ca_len = 48 + 8 * data_ndim as u64; + composition = Some(Composition { + block: Compo::CA(Box::new(ca_block)), + compo: None, + }); + } + + // master channel + if master.master_flag { + cn_block.cn_type = 2; // master channel + } else { + cn_block.cn_type = 0; // data channel + if let Some(master_channel_name) = master.master_channel.clone() { + // looking for the master channel's cg position + if let Some((m, _dg_pos, (cg_pos, _rec_id), (_cn_pos, _rec_pos))) = + self.channel_names_set.get(&master_channel_name) + { + cg_block.cg_cg_master = Some(*cg_pos); + cg_block.cg_flags = 0b1000; + cg_block.cg_links = 7; // with cg_cg_master + // cg_block.cg_len = 112; + master.master_channel.clone_from(m); + } + } + } + if let Some(sync_type) = master.master_type { + cn_block.cn_sync_type = sync_type; + } + + // unit + if let Some(u) = unit { + let unit_position = position_generator(); + cn_block.cn_md_unit = unit_position; + self.sharable.create_tx(unit_position, u); + } + + // description + if let Some(d) = description { + let md_comment = position_generator(); + cn_block.cn_md_comment = md_comment; + self.sharable.create_tx(md_comment, d); + } + + // CN + let n_bytes = data_signature.byte_count; + let cn = Cn4 { + header: default_short_header(BlockType::CN), + unique_name: channel_name.to_string(), + data, + block: cn_block, + endian: machine_endian, + block_position: cn_pos, + pos_byte_beg: 0, + n_bytes, + composition, + list_size, + shape: data_signature.shape, + invalid_mask: None, + event_template: None, + }; + + // CG + let cg_pos = position_generator(); + cg_block.cg_data_bytes = n_bytes; + let mut cg = Cg4 { + header: default_short_header(BlockType::CG), + block: cg_block, + master_channel_name: master.master_channel.clone(), + cn: HashMap::new(), + block_position: cg_pos, + channel_names: HashSet::new(), + record_length: n_bytes, + vlsd_cg: None, + invalid_bytes: None, + sr: Vec::new(), + }; + cg.cn.insert(0, cn); + cg.channel_names.insert(channel_name.to_string()); + + // DG + let dg_pos = position_generator(); + let dg_block = Dg4Block::default(); + let mut dg = Dg4 { + block: dg_block, + cg: HashMap::new(), + }; + dg.cg.insert(0, cg); + self.dg.insert(dg_pos, dg); + + self.channel_names_set.insert( + channel_name, + (master.master_channel, dg_pos, (cg_pos, 0), (cn_pos, 0)), + ); + Ok(()) + } + /// Removes a channel in memory (no file modification) + pub fn remove_channel(&mut self, channel_name: &str) { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get(channel_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + { + cg.cn.remove(rec_pos); + cg.channel_names.remove(channel_name); + self.channel_names_set.remove(channel_name); + } + } + /// Renames a channel's name in memory + pub fn rename_channel(&mut self, channel_name: &str, new_name: &str) { + if let Some((master, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos))) = + self.channel_names_set.remove(channel_name) + && let Some(dg) = self.dg.get_mut(&dg_pos) + && let Some(cg) = dg.cg.get_mut(&rec_id) + && let Some(cn) = cg.cn.get_mut(&rec_pos) + { + cn.unique_name = new_name.to_string(); + cg.channel_names.remove(channel_name); + cg.channel_names.insert(new_name.to_string()); + if let Some(master_name) = &master + && master_name == channel_name + { + cg.master_channel_name = Some(new_name.to_string()); + cg.channel_names.iter().for_each(|channel| { + if let Some(val) = self.channel_names_set.get_mut(channel) { + val.0 = Some(new_name.to_string()); + val.1 = dg_pos; + val.2 = (cg_pos, rec_id); + val.3 = (cn_pos, rec_pos); + } + }); + } + + self.channel_names_set.insert( + new_name.to_string(), + (master, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos)), + ); + } + } + /// defines channel's data in memory + pub fn set_channel_data( + &mut self, + channel_name: &str, + data: Arc, + ) -> Result<(), Error> { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get(channel_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + && let Some(cn) = cg.cn.get_mut(rec_pos) + { + cn.data = try_from(&data).context("failed converting dyn array to ChannelData")?; + } + + Ok(()) + } + /// Sets the channel unit in memory + pub fn set_channel_unit(&mut self, channel_name: &str, unit: &str) { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get(channel_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + && let Some(cn) = cg.cn.get_mut(rec_pos) + { + // hopefully never 2 times the same position + let position = position_generator(); + self.sharable.create_tx(position, unit.to_string()); + cn.block.cn_md_unit = position; + } + } + /// Sets the channel description in memory + pub fn set_channel_desc(&mut self, channel_name: &str, desc: &str) { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get(channel_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + && let Some(cn) = cg.cn.get_mut(rec_pos) + { + let position = position_generator(); + self.sharable.create_tx(position, desc.to_string()); + cn.block.cn_md_comment = position; + } + } + /// Sets the channel's related master channel type in memory + pub fn set_channel_master_type(&mut self, master_name: &str, master_type: u8) { + if let Some((_master, dg_pos, (_cg_pos, rec_id), (_cn_pos, rec_pos))) = + self.channel_names_set.get(master_name) + && let Some(dg) = self.dg.get_mut(dg_pos) + && let Some(cg) = dg.cg.get_mut(rec_id) + && let Some(cn) = cg.cn.get_mut(rec_pos) + { + cn.block.cn_sync_type = master_type; + } + } + /// list attachments + pub fn list_attachments(&mut self) -> String { + let mut output = String::new(); + for (key, (block, _embedded_data)) in self.at.iter() { + output.push_str(&format!( + "position: {}, filename: {:?}, mimetype: {:?}, comment: {:?}\n ", + key, + self.sharable.get_tx(block.at_tx_filename), + self.sharable.get_tx(block.at_tx_mimetype), + self.sharable.get_comments(block.at_md_comment) + )) + } + output + } + /// get embedded data in attachment for a block at position + pub fn get_attachment_embedded_data(&self, position: i64) -> Option> { + if let Some(at) = self.at.get(&position) { + at.1.clone() + } else { + None + } + } + /// get list attachment block + pub fn get_attachment_block(&self, position: i64) -> Option { + if let Some((block, _)) = self.at.get(&position) { + Some(*block) + } else { + None + } + } + /// get all attachment blocks + pub fn get_attachment_blocks(&self) -> HashMap { + let mut output: HashMap = HashMap::new(); + for (key, (block, _data)) in self.at.iter() { + output.insert(*key, *block); + } + output + } + /// list file history entries + pub fn list_file_history(&mut self) -> String { + let mut output = String::new(); + for (i, fh) in self.fh.iter().enumerate() { + output.push_str(&format!( + "FH[{}]: {}, comment: {:?}\n", + i, + fh, + self.sharable.get_comments(fh.fh_md_comment), + )); + } + output + } + /// list events + pub fn list_events(&mut self) -> String { + let mut output = String::new(); + for (key, block) in self.ev.iter() { + output.push_str(&format!( + "position: {}, name: {:?}, comment: {:?}, scope: {:?}, attachment references: {:?}, event type: {}\n", + key, + self.sharable.get_tx(block.ev_tx_name), + self.sharable.get_comments(block.ev_md_comment), + block.get_scope_links(), + block.get_attachment_links(), + block.ev_type, + )) + } + output + } + /// list sample reduction blocks for all channel groups + pub fn list_sample_reductions(&self) -> String { + let mut output = String::new(); + for (_dg_pos, dg) in self.dg.iter() { + for (rec_id, cg) in dg.cg.iter() { + if !cg.sr.is_empty() { + output.push_str(&format!( + "Channel group (rec_id={}): {} sample reduction(s)\n", + rec_id, + cg.sr.len() + )); + for (i, sr) in cg.sr.iter().enumerate() { + output.push_str(&format!( + " SR[{}]: cycle_count={}, interval={}, sync_type={}, flags=0x{:02X}\n", + i, + sr.sr_cycle_count, + sr.sr_interval, + sr.get_sync_type_str(), + sr.sr_flags, + )); + } + } + } + } + output + } + /// get all sample reduction blocks across all channel groups + /// Returns a vector of (dg_position, rec_id, sr_blocks) tuples + pub fn get_sample_reduction_blocks(&self) -> Vec<(i64, u64, Vec)> { + let mut result = Vec::new(); + for (&dg_pos, dg) in self.dg.iter() { + for (&rec_id, cg) in dg.cg.iter() { + if !cg.sr.is_empty() { + result.push((dg_pos, rec_id, cg.sr.clone())); + } + } + } + result + } + /// list source information blocks + pub fn list_source_information(&self) -> String { + let mut output = String::new(); + for (key, block) in self.sharable.si.iter() { + output.push_str(&format!( + "position: {}, name: {:?}, path: {:?}, type: {}, bus: {}\n", + key, + self.sharable.get_tx(block.si_tx_name), + self.sharable.get_tx(block.si_tx_path), + block.get_type_str(), + block.get_bus_type_str(), + )) + } + output + } + /// get all source information blocks + pub fn get_source_information_blocks(&self) -> HashMap { + self.sharable.si.clone() + } + /// get event block from its position + pub fn get_event_block(&self, position: i64) -> Option { + self.ev.get(&position).cloned() + } + /// get all event blocks + pub fn get_event_blocks(&self) -> HashMap { + self.ev.clone() + } + /// Get a channel hierarchy block from its position + pub fn get_channel_hierarchy_block(&self, position: i64) -> Option { + self.ch.get(&position).cloned() + } + /// Get all channel hierarchy blocks + pub fn get_channel_hierarchy_blocks(&self) -> HashMap { + self.ch.clone() + } + /// List channel hierarchy in a human-readable format + pub fn list_channel_hierarchy(&self) -> String { + let mut output = String::new(); + // Find root blocks (blocks not referenced as children or siblings by any other block) + let mut non_root_positions: HashSet = HashSet::new(); + for block in self.ch.values() { + if block.ch_ch_first > 0 { + non_root_positions.insert(block.ch_ch_first); + } + if block.ch_ch_next > 0 { + non_root_positions.insert(block.ch_ch_next); + } + } + + let mut roots: Vec = self + .ch + .keys() + .filter(|pos| !non_root_positions.contains(pos)) + .copied() + .collect(); + roots.sort(); + + for root_pos in roots { + self.format_hierarchy_level(&mut output, root_pos, 0); + } + output + } + /// Helper to format a hierarchy level recursively + fn format_hierarchy_level(&self, output: &mut String, position: i64, depth: usize) { + if let Some(block) = self.ch.get(&position) { + let indent = " ".repeat(depth); + let name = self + .sharable + .get_tx(block.ch_tx_name) + .ok() + .flatten() + .unwrap_or_else(|| "".to_string()); + + output.push_str(&format!( + "{}[{}] {} (elements={})\n", + indent, + block.get_type_str(), + name, + block.ch_element_count + )); + + // List elements (each element is a DG/CG/CN triplet) + for i in 0..block.ch_element_count as usize { + let base_idx = i * 3; + if base_idx + 2 < block.ch_element.len() { + let dg_pos = block.ch_element[base_idx]; + let cg_pos = block.ch_element[base_idx + 1]; + let cn_pos = block.ch_element[base_idx + 2]; + output.push_str(&format!( + "{} -> DG:{} CG:{} CN:{}\n", + indent, dg_pos, cg_pos, cn_pos + )); + } + } + + // Traverse children first + if block.ch_ch_first > 0 { + self.format_hierarchy_level(output, block.ch_ch_first, depth + 1); + } + + // Then traverse siblings at same level + if block.ch_ch_next > 0 { + self.format_hierarchy_level(output, block.ch_ch_next, depth); + } + } + } + /// Returns a concise one-line summary of the MDF4 file + pub fn summary(&self) -> String { + let total_channels = self.channel_names_set.len(); + let total_dgs = self.dg.len(); + let total_events = self.ev.len(); + let total_attachments = self.at.len(); + format!( + "MDF4 v{}: {} DGs, {} channels, {} events, {} attachments", + self.id_block.id_ver, total_dgs, total_channels, total_events, total_attachments + ) + } + /// Formats the channel list with optional data preview (first/last values) + /// If `show_data` is true, shows first and last values for channels with data + pub fn format_channels(&self, show_data: bool) -> String { + let mut output = String::new(); + for (master, list) in self.get_master_channel_names_set().iter() { + if let Some(master_name) = master { + output.push_str(&format!("\nMaster: {}\n", master_name)); + } else { + output.push_str("\nWithout Master channel\n"); + } + for channel in list.iter() { + let unit = self.get_channel_unit(channel).ok().flatten(); + let desc = self.get_channel_desc(channel).ok().flatten(); + output.push_str(&format!(" {} ", channel)); + if show_data + && let Some(data) = self.get_channel_data(channel) + && !data.is_empty() + { + output.push_str(&format!("[{}] ", data.len())); + } + if let Some(u) = unit { + output.push_str(&format!("\"{}\" ", u)); + } + if let Some(d) = desc + && !d.is_empty() + { + output.push_str(&format!("// {}", d)); + } + output.push('\n'); + } + } + output + } + /// Formats header comments + pub fn format_header_comments(&self) -> String { + let mut output = String::new(); + let comments = self.sharable.get_hd_comments(self.hd_block.hd_md_comment); + for (tag, text) in comments.iter() { + output.push_str(&format!("{}: {}\n", tag, text)); + } + output + } +} + +/// creates random negative position +pub fn position_generator() -> i64 { + // hopefully never 2 times the same position + let mut position = rand::random::(); + if position > 0 { + // make sure position is negative to avoid interference with existing positions in file + position = -position; + } + position +} + +/// MdfInfo4 display implementation +impl fmt::Display for MdfInfo4 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "{}", self.summary())?; + writeln!(f, "File: {}", self.file_name)?; + writeln!(f, "{}", self.hd_block)?; + let header_comments = self.format_header_comments(); + if !header_comments.is_empty() { + writeln!(f, "{}", header_comments)?; + } + write!(f, "{}", self.format_channels(false)) + } +} + +/// parses mdfinfo structure to make channel names unique +/// creates channel names set and links master channels to set of channels +pub fn build_channel_db( + dg: &mut BTreeMap, + sharable: &SharableBlocks, + n_cg: usize, + n_cn: usize, +) -> ChannelNamesSet { + let mut channel_list: ChannelNamesSet = HashMap::with_capacity(n_cn); + let mut master_channel_list: HashMap = HashMap::with_capacity(n_cg); + // creating channel list for whole file and making channel names unique + dg.iter_mut().for_each(|(dg_position, dg)| { + dg.cg.iter_mut().for_each(|(record_id, cg)| { + let gn = cg.get_cg_name(sharable); + let gs = cg.get_cg_source_name(sharable); + let gp = cg.get_cg_source_path(sharable); + cg.cn.iter_mut().for_each(|(cn_record_position, cn)| { + if channel_list.contains_key(&cn.unique_name) { + let mut changed: bool = false; + let space_char = String::from(" "); + // create unique channel name + if let Ok(Some(cs)) = cn.get_cn_source_name(sharable) { + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(&cs); + changed = true; + } + if let Ok(Some(cp)) = cn.get_cn_source_path(sharable) { + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(&cp); + changed = true; + } + if let Ok(Some(name)) = &gn { + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(name); + changed = true; + } + if let Ok(Some(source)) = &gs { + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(source); + changed = true; + } + if let Ok(Some(path)) = &gp { + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(path); + changed = true; + } + // No souce or path name to make channel unique + if !changed || channel_list.contains_key(&cn.unique_name) { + // extend name with channel block position, unique + cn.unique_name.push_str(&space_char); + cn.unique_name.push_str(&cn.block_position.to_string()); + } + }; + channel_list.insert( + cn.unique_name.clone(), + ( + None, // computes at second step master channel because of cg_cg_master + *dg_position, + (cg.block_position, *record_id), + (cn.block_position, *cn_record_position), + ), + ); + if cn.block.cn_type == 2 || cn.block.cn_type == 3 { + // Master channel + master_channel_list.insert(cg.block_position, cn.unique_name.clone()); + } + }); + }); + }); + // identifying master channels + let avg_ncn_per_cg = if n_cg > 0 { n_cn / n_cg } else { 0 }; + dg.iter_mut().for_each(|(_dg_position, dg)| { + dg.cg.iter_mut().for_each(|(_record_id, cg)| { + let mut cg_channel_list: HashSet = HashSet::with_capacity(avg_ncn_per_cg); + let mut master_channel_name: Option = None; + if let Some(name) = master_channel_list.get(&cg.block_position) { + master_channel_name = Some(name.to_string()); + } else if let Some(cg_cg_master) = cg.block.cg_cg_master { + // master is in another cg block, possible from 4.2 + if let Some(name) = master_channel_list.get(&cg_cg_master) { + master_channel_name = Some(name.to_string()); + } + } + cg.cn.iter_mut().for_each(|(_cn_record_position, cn)| { + cg_channel_list.insert(cn.unique_name.clone()); + // assigns master in channel_list + if let Some(id) = channel_list.get_mut(&cn.unique_name) { + id.0.clone_from(&master_channel_name); + } + }); + cg.channel_names = cg_channel_list; + cg.master_channel_name = master_channel_name; + }); + }); + channel_list +} diff --git a/src/mdfinfo/mdfinfo4/si_block.rs b/src/mdfinfo/mdfinfo4/si_block.rs new file mode 100644 index 0000000..f6fb299 --- /dev/null +++ b/src/mdfinfo/mdfinfo4/si_block.rs @@ -0,0 +1,95 @@ +//! Source Information block (SIBLOCK) for MDF4 +use anyhow::Result; +use binrw::binrw; +use std::fmt::{self, Display}; + +use super::block_header::SharableBlocks; + +/// Si4 Source Information block struct +#[derive(Debug, PartialEq, Eq, Default, Copy, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Si4Block { + // si_id: [u8; 4], // ##SI + // reserved: [u8; 4], // reserved + // si_len: u64, // Length of block in bytes + /// # of links + si_links: u64, + /// Pointer to TXBLOCK with name (identification) of source (must not be NIL). The source name must be according to naming rules stated in 4.4.2 Naming Rules. + pub si_tx_name: i64, + /// Pointer to TXBLOCK with (tool-specific) path of source (can be NIL). The path string must be according to naming rules stated in 4.4.2 Naming Rules. + pub si_tx_path: i64, + // Each tool may generate a different path string. The only purpose is to ensure uniqueness as explained in section 4.4.3 Identification of Channels. As a recommendation, the path should be a human readable string containing additional information about the source. However, the path string should not be used to store this information in order to retrieve it later by parsing the string. Instead, additional source information should be stored in generic or custom XML fields in the comment MDBLOCK si_md_comment. + /// Pointer to source comment and additional information (TXBLOCK or MDBLOCK) (can be NIL) + pub si_md_comment: i64, + + // Data Members + /// Source type additional classification of source (see SI_T_xxx) + pub si_type: u8, + /// Bus type additional classification of used bus (should be 0 for si_type >= 3) (see SI_BUS_xxx) + pub si_bus_type: u8, + /// Flags The value contains the following bit flags (see SI_F_xxx)): + pub si_flags: u8, + /// reserved + si_reserved: [u8; 5], +} + +impl Si4Block { + /// returns the source name + pub fn get_si_source_name(&self, sharable: &SharableBlocks) -> Result> { + sharable.get_tx(self.si_tx_name) + } + /// returns the source path + pub fn get_si_path_name(&self, sharable: &SharableBlocks) -> Result> { + sharable.get_tx(self.si_tx_path) + } + /// Calculate the total block size (header + links + data) + pub fn calculate_block_size(&self) -> i64 { + // 16 (short header) + 8 (link count) + 8*3 (3 links) + 8 (data members) + 16 + 8 + 24 + 8 + } + + /// Returns the source type as a string description + pub fn get_type_str(&self) -> &'static str { + match self.si_type { + 0 => "Other", + 1 => "ECU", + 2 => "Bus", + 3 => "I/O", + 4 => "Tool", + 5 => "User", + _ => "Unknown", + } + } + + /// Returns the bus type as a string description + pub fn get_bus_type_str(&self) -> &'static str { + match self.si_bus_type { + 0 => "None", + 1 => "Other", + 2 => "CAN", + 3 => "LIN", + 4 => "MOST", + 5 => "FlexRay", + 6 => "K-Line", + 7 => "Ethernet", + 8 => "USB", + _ => "Unknown", + } + } +} + +impl Display for Si4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "SI: type={} ({}) bus={} ({}) flags=0x{:02X}", + self.get_type_str(), + self.si_type, + self.get_bus_type_str(), + self.si_bus_type, + self.si_flags + ) + } +} diff --git a/src/mdfinfo/mdfinfo4/sr_block.rs b/src/mdfinfo/mdfinfo4/sr_block.rs new file mode 100644 index 0000000..72c1e7a --- /dev/null +++ b/src/mdfinfo/mdfinfo4/sr_block.rs @@ -0,0 +1,91 @@ +//! Sample Reduction block (SRBLOCK) for MDF4 +use binrw::binrw; +use std::fmt::{self, Display}; + +/// SR4 Sample Reduction block struct (Section 6.29 of MDF 4.3 spec) +#[derive(Debug, PartialEq, Clone)] +#[binrw] +#[br(little)] +#[repr(C)] +pub struct Sr4Block { + /// Pointer to next sample reduction block (SRBLOCK) (can be NIL) + pub sr_sr_next: i64, + /// Pointer to reduction data block (RD-/RV-/DZBLOCK or DL-/LD-/HLBLOCK) + pub sr_data: i64, + /// Number of cycles, i.e. number of sample reduction records + pub sr_cycle_count: u64, + /// Length of sample interval used to calculate the reduction records (unit depends on sr_sync_type) + pub sr_interval: f64, + /// Sync type: 1=time(s), 2=angle(rad), 3=distance(m), 4=index + pub sr_sync_type: u8, + /// Flags: bit 0 = invalidation bytes present, bit 1 = dominant invalidation bit + pub sr_flags: u8, + /// Reserved + sr_reserved: [u8; 6], +} + +impl Default for Sr4Block { + fn default() -> Self { + Sr4Block { + sr_sr_next: 0, + sr_data: 0, + sr_cycle_count: 0, + sr_interval: 0.0, + sr_sync_type: 1, + sr_flags: 0, + sr_reserved: [0; 6], + } + } +} + +// ============================================================================= +// Sample Reduction (SR) Constants and Types +// ============================================================================= + +/// SR flag bit 0: Invalidation bytes present in reduction records +#[allow(dead_code)] +pub const SR_F_INVALIDATION_BYTES: u8 = 1 << 0; +/// SR flag bit 1: Dominant invalidation bit (if set, invalid bit indicates "at least one invalid sample") +#[allow(dead_code)] +pub const SR_F_DOMINANT_INVALIDATION: u8 = 1 << 1; + +/// SR sync type: Time based (seconds) +pub const SR_SYNC_TIME: u8 = 1; +/// SR sync type: Angle based (radians) +pub const SR_SYNC_ANGLE: u8 = 2; +/// SR sync type: Distance based (meters) +pub const SR_SYNC_DISTANCE: u8 = 3; +/// SR sync type: Index based (sample count) +pub const SR_SYNC_INDEX: u8 = 4; + +impl Sr4Block { + /// Returns true if invalidation bytes are present in reduction records + #[allow(dead_code)] + pub fn has_invalidation_bytes(&self) -> bool { + (self.sr_flags & SR_F_INVALIDATION_BYTES) != 0 + } + + /// Returns the sync type as a human-readable string + pub fn get_sync_type_str(&self) -> &'static str { + match self.sr_sync_type { + SR_SYNC_TIME => "Time (seconds)", + SR_SYNC_ANGLE => "Angle (radians)", + SR_SYNC_DISTANCE => "Distance (meters)", + SR_SYNC_INDEX => "Index (samples)", + _ => "Unknown", + } + } +} + +impl Display for Sr4Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "SR: cycle_count={} interval={} sync_type={} flags=0x{:02X}", + self.sr_cycle_count, + self.sr_interval, + self.get_sync_type_str(), + self.sr_flags + ) + } +} diff --git a/src/mdfr.rs b/src/mdfr.rs index 26b7057..eedec4f 100644 --- a/src/mdfr.rs +++ b/src/mdfr.rs @@ -512,6 +512,11 @@ df=polars.DataFrame(series) } }) } + /// list file history entries (MDF 4.x only) + pub fn list_file_history(&mut self) -> PyResult { + let Mdfr(mdf) = self; + Ok(mdf.mdf_info.list_file_history()) + } /// get file history pub fn get_file_history_blocks(&mut self) -> Py { let Mdfr(mdf) = self; @@ -550,6 +555,64 @@ df=polars.DataFrame(series) let Mdfr(mdf) = self; Ok(mdf.mdf_info.list_sample_reductions()) } + /// get source information blocks (MDF 4.x only) + pub fn get_source_information_blocks(&mut self) -> Py { + let Mdfr(mdf) = self; + let sibs = mdf.mdf_info.get_source_information_blocks(); + pyo3::Python::attach(|py| { + if let Some(si_map) = sibs { + let sil = PyList::empty(py); + for (position, sib) in si_map { + let sidict = PyDict::new(py); + let _ = sidict.set_item("position", position); + if let Ok(res) = mdf.mdf_info.get_tx(sib.si_tx_name) { + let _ = sidict.set_item("name", res); + } + if let Ok(res) = mdf.mdf_info.get_tx(sib.si_tx_path) { + let _ = sidict.set_item("path", res); + } + let _ = + sidict.set_item("comment", mdf.mdf_info.get_comments(sib.si_md_comment)); + let _ = sidict.set_item("type", sib.get_type_str()); + let _ = sidict.set_item("type_id", sib.si_type); + let _ = sidict.set_item("bus_type", sib.get_bus_type_str()); + let _ = sidict.set_item("bus_type_id", sib.si_bus_type); + let _ = sidict.set_item("flags", sib.si_flags); + let _ = sil.append(sidict); + } + sil.into() + } else { + py.None() + } + }) + } + /// get sample reduction blocks (MDF 4.x only) + pub fn get_sample_reduction_blocks(&self) -> Py { + let Mdfr(mdf) = self; + let srbs = mdf.mdf_info.get_sample_reduction_blocks(); + pyo3::Python::attach(|py| { + if let Some(sr_list) = srbs { + let srl = PyList::empty(py); + for (dg_pos, rec_id, sr_blocks) in sr_list { + for (i, sr) in sr_blocks.iter().enumerate() { + let srdict = PyDict::new(py); + let _ = srdict.set_item("dg_position", dg_pos); + let _ = srdict.set_item("rec_id", rec_id); + let _ = srdict.set_item("index", i); + let _ = srdict.set_item("cycle_count", sr.sr_cycle_count); + let _ = srdict.set_item("interval", sr.sr_interval); + let _ = srdict.set_item("sync_type", sr.get_sync_type_str()); + let _ = srdict.set_item("sync_type_id", sr.sr_sync_type); + let _ = srdict.set_item("flags", sr.sr_flags); + let _ = srl.append(srdict); + } + } + srl.into() + } else { + py.None() + } + }) + } /// get channel hierarchy blocks (MDF 4.x only) pub fn get_channel_hierarchy_blocks(&mut self) -> Py { let Mdfr(mdf) = self; @@ -565,19 +628,7 @@ df=polars.DataFrame(series) } let _ = chdict.set_item("comment", mdf.mdf_info.get_comments(chb.ch_md_comment)); - let type_name = match chb.ch_type { - 0 => "Group", - 1 => "Function", - 2 => "Structure", - 3 => "Map list", - 4 => "Input variables", - 5 => "Output variables", - 6 => "Local variables", - 7 => "Defined calibration objects", - 8 => "Referenced calibration objects", - _ => "Unknown", - }; - let _ = chdict.set_item("type", type_name); + let _ = chdict.set_item("type", chb.get_type_str()); let _ = chdict.set_item("type_id", chb.ch_type); let _ = chdict.set_item("element_count", chb.ch_element_count); let _ = chdict.set_item("first_child", chb.ch_ch_first); @@ -679,154 +730,95 @@ pyplot.show() } /// display a representation of mdfinfo object content fn __repr__(&mut self) -> PyResult { - let mut output: String; + let mut output = String::new(); let format_option = FormatOptions::new(); + match &mut self.0.mdf_info { MdfInfo::V3(mdfinfo3) => { - output = format!("Version : {}\n", mdfinfo3.id_block.id_ver); - writeln!( - output, - "Header :\n Author: {} Organisation:{}", - mdfinfo3.hd_block.hd_author, mdfinfo3.hd_block.hd_organization - ) - .context("cannot print author and organisation")?; - writeln!( - output, - "Project: {} Subject:{}", - mdfinfo3.hd_block.hd_project, mdfinfo3.hd_block.hd_subject - ) - .context("cannot print project and subject")?; - writeln!( - output, - "Date: {:?} Time:{:?}", - mdfinfo3.hd_block.hd_date, mdfinfo3.hd_block.hd_time - ) - .context("cannot print date and time")?; - write!(output, "Comments: {}", mdfinfo3.hd_comment) - .context("cannot print comments")?; - for (master, list) in mdfinfo3.get_master_channel_names_set().iter() { - if let Some(master_name) = master { - writeln!(output, "\nMaster: {master_name}") - .context("cannot print master channel name")?; - } else { - writeln!(output, "\nWithout Master channel") - .context("cannot print thre is no master channel")?; - } - for channel in list.iter() { - let unit = self - .get_channel_unit(channel.to_string()) - .context("failed printing channel unit")? - .unwrap_or_default(); - let desc = self - .get_channel_desc(channel.to_string()) - .context("failed printing channel description")? - .unwrap_or_default(); - write!(output, " {channel} ").context("cannot print channel name")?; - if let Some(data) = self.0.get_channel_data(channel) { - if !data.is_empty() { - let array = &data.as_ref(); - let displayer = ArrayFormatter::try_new(array, &format_option) - .context("failed creating formatter for arrow array")?; - write!(&mut output, "{}", displayer.value(0)) - .context("failed writing first value of array")?; - write!(output, " ") - .context("cannot print simple space character")?; - write!(&mut output, "{}", displayer.value(data.len() - 1)) - .context("failed writing last value of array")?; - } - writeln!( - output, - " {unit:?} {desc:?} " - ).context("cannot print channel unit and description with first and last item")?; - } else { - writeln!(output, " {unit:?} {desc:?} ") - .context("cannot print channel unit and description")?; - } - } - } - output.push_str("\n "); + // Use helper methods for header + writeln!(output, "{}", mdfinfo3.summary()) + .context("cannot print summary")?; + writeln!(output, "{}", mdfinfo3.format_header()) + .context("cannot print header")?; } MdfInfo::V4(mdfinfo4) => { - output = format!("Version : {}\n", mdfinfo4.id_block.id_ver); - writeln!(output, "{}", mdfinfo4.hd_block).context("cannot print header block")?; - let comments = &mdfinfo4 - .sharable - .get_comments(mdfinfo4.hd_block.hd_md_comment); - for c in comments.iter() { - writeln!(output, "{} {}", c.0, c.1).context("cannot print header comments")?; + // Use helper methods for header + writeln!(output, "{}", mdfinfo4.summary()) + .context("cannot print summary")?; + writeln!(output, "{}", mdfinfo4.hd_block) + .context("cannot print header block")?; + let header_comments = mdfinfo4.format_header_comments(); + if !header_comments.is_empty() { + write!(output, "{}", header_comments) + .context("cannot print header comments")?; } - // Source Information + // MDF4-specific sections let si_info = mdfinfo4.list_source_information(); if !si_info.is_empty() { writeln!(output, "\n--- Source Information ---") .context("cannot print source info header")?; write!(output, "{}", si_info).context("cannot print source information")?; } - // Attachments let at_info = mdfinfo4.list_attachments(); if !at_info.is_empty() { writeln!(output, "\n--- Attachments ---") .context("cannot print attachments header")?; write!(output, "{}", at_info).context("cannot print attachments")?; } - // Events let ev_info = mdfinfo4.list_events(); if !ev_info.is_empty() { writeln!(output, "\n--- Events ---").context("cannot print events header")?; write!(output, "{}", ev_info).context("cannot print events")?; } - // Channel Hierarchy let ch_info = mdfinfo4.list_channel_hierarchy(); if !ch_info.is_empty() { writeln!(output, "\n--- Channel Hierarchy ---") .context("cannot print channel hierarchy header")?; write!(output, "{}", ch_info).context("cannot print channel hierarchy")?; } - // Channels - writeln!(output, "\n--- Channels ---").context("cannot print channels header")?; - for (master, list) in mdfinfo4.get_master_channel_names_set().iter() { - if let Some(master_name) = master { - writeln!(output, "\nMaster: {master_name}") - .context("cannot print master channel name")?; - } else { - writeln!(output, "\nWithout Master channel") - .context("cannot print thre is no master channel")?; - } - for channel in list.iter() { - let unit = self - .get_channel_unit(channel.to_string()) - .context("failed printing channel unit")? - .unwrap_or_default(); - let desc = self - .get_channel_desc(channel.to_string()) - .context("failed printing channel description")? - .unwrap_or_default(); - write!(output, " {channel} ").context("cannot print channel name")?; - if let Some(data) = self.0.get_channel_data(channel) { - if !data.is_empty() { - let array = &data.as_ref(); - let displayer = ArrayFormatter::try_new(array, &format_option) - .context("failed creating formatter for arrow array")?; - write!(&mut output, "{}", displayer.value(0)) - .context("cannot print channel data")?; - write!(output, " .. ") - .context("cannot print simple space character")?; - write!(&mut output, "{}", displayer.value(data.len() - 1)) - .context("cannot channel data")?; - } - writeln!( - output, - " {unit:?} {desc:?} " - ).context("cannot print channel unit and description with first and last item")?; - } else { - writeln!(output, " {unit:?} {desc:?} ") - .context("cannot print channel unit and description")?; - } + } + } + + // Channels section (common for both versions, with data preview) + writeln!(output, "\n--- Channels ---").context("cannot print channels header")?; + for (master, list) in self.0.mdf_info.get_master_channel_names_set().iter() { + if let Some(master_name) = master { + writeln!(output, "\nMaster: {master_name}") + .context("cannot print master channel name")?; + } else { + writeln!(output, "\nWithout Master channel") + .context("cannot print no master channel")?; + } + for channel in list.iter() { + let unit = self.get_channel_unit(channel.to_string()) + .context("failed getting channel unit")? + .unwrap_or_default(); + let desc = self.get_channel_desc(channel.to_string()) + .context("failed getting channel description")? + .unwrap_or_default(); + write!(output, " {channel} ").context("cannot print channel name")?; + // Data preview (first .. last values) + if let Some(data) = self.0.get_channel_data(channel) + && !data.is_empty() + { + write!(output, "[{}] ", data.len()) + .context("cannot print data length")?; + let array = &data.as_ref(); + if let Ok(displayer) = ArrayFormatter::try_new(array, &format_option) { + write!(output, "{} .. {} ", displayer.value(0), displayer.value(data.len() - 1)) + .context("cannot print data preview")?; } } - output.push_str("\n "); + if !unit.is_empty() { + write!(output, "\"{}\" ", unit).context("cannot print unit")?; + } + if !desc.is_empty() { + write!(output, "// {}", desc).context("cannot print description")?; + } + writeln!(output).context("cannot print newline")?; } } + Ok(output) } } diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 0186965..339f244 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -355,14 +355,15 @@ fn read_data( Ok(position) } -/// Reads and concatenates data from any data block type (##DT, ##SD, ##VD, ##DZ, ##DL, ##HL). +/// Reads and concatenates data from any data block type (##DT, ##SD, ##VD, ##RD, ##RV, ##DZ, ##DL, ##HL). /// The block id must already have been read. Returns concatenated raw bytes and updated position. fn read_all_blocks_to_bytes( rdr: &mut BufReader<&File>, id: [u8; 4], mut position: i64, ) -> Result, i64)>> { - if id == *b"##DT" || id == *b"##SD" || id == *b"##VD" { + // ##DT, ##SD, ##VD are regular data blocks; ##RD, ##RV are reduction data blocks (same format) + if id == *b"##DT" || id == *b"##SD" || id == *b"##VD" || id == *b"##RD" || id == *b"##RV" { let block_header: Dt4Block = rdr.read_le().context("Could not read data block header")?; let mut buf = vec![0u8; block_header.len as usize - 24]; rdr.read_exact(&mut buf) @@ -1841,7 +1842,24 @@ fn read_ds( .compo .as_ref() .map(|c| ((**ds_block).clone(), c.clone())) + } else if ds_block.ds_mode == 1 { + // Data description mode - data layout is described by an external + // attachment file (e.g., FIBEX, DBC, ARXML) pointed to by ds_cn_composition. + // This mode is not yet fully supported. + warn!( + "Channel '{}' uses data description mode (ds_mode=1). \ + Data layout is described by an external attachment. \ + This mode requires external description file parsing \ + (FIBEX, DBC, ARXML) which is not yet implemented. \ + Data will be stored as raw bytes.", + cn.unique_name + ); + None } else { + warn!( + "Channel '{}' has unknown DSBLOCK mode: {}", + cn.unique_name, ds_block.ds_mode + ); None } } else { @@ -2029,3 +2047,8 @@ fn store_decoded_values_in_channel( } Ok(()) } + +// ============================================================================= +// Sample Reduction Data Reading (RDBLOCK/RVBLOCK/RIBLOCK) +// ============================================================================= + diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 23f28c4..12bf9ab 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -15,14 +15,15 @@ use crate::{ mdfinfo::{ MdfInfo, mdfinfo4::{ - BlockType, Blockheader4, Ca4Block, Ca4BlockMembers, Cg4, Cg4Block, Cn4, Cn4Block, - Compo, Composition, Dg4, Dg4Block, Dz4Block, FhBlock, Ld4Block, MdfInfo4, MetaData, - MetaDataBlockType, default_short_header, + At4Block, BlockType, Blockheader4, Ca4Block, Ca4BlockMembers, Cg4, Cg4Block, Cn4, + Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Ev4Block, FhBlock, Ld4Block, + MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, }, }, mdfreader::Mdf, }; use anyhow::{Context, Error, Result, bail}; +use arrow::array::Array; use arrow::buffer::NullBuffer; use binrw::BinWriterExt; use crossbeam_channel::bounded; @@ -42,16 +43,290 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result let n_channels = mdf.mdf_info.get_channel_names_set().len(); let mut new_info = MdfInfo4::new(file_name, n_channels); let mut pointer: i64 = 168; // after HD block - // FH block - new_info.fh = Vec::new(); - let mut fh = FhBlock::default(); + + // Copy existing FH blocks from source and add a new one for mdfr + // FH blocks are stored as a linked list new_info.hd_block.hd_fh_first = pointer; + let mut fh_blocks: Vec<(FhBlock, MetaData)> = Vec::new(); + + // Copy existing FH blocks from source file + for fh in info.fh.iter() { + let mut new_fh = *fh; + let _fh_position = pointer; + pointer += 56; // FH block size + + // Get the comment MD block from source + let fh_comment_md = if fh.fh_md_comment != 0 { + info.sharable.md_tx.get(&fh.fh_md_comment).cloned() + } else { + None + }; + + // Create comment block + let comment_md = if let Some(md) = fh_comment_md { + new_fh.fh_md_comment = pointer; + pointer += md.block.hdr_len as i64; + md + } else { + // Create empty comment if source had none + let mut empty_md = MetaData::new(MetaDataBlockType::MdBlock, BlockType::FH); + empty_md.set_data_buffer(b""); + new_fh.fh_md_comment = pointer; + pointer += empty_md.block.hdr_len as i64; + empty_md + }; + + // Set next pointer (will be updated below) + new_fh.fh_fh_next = pointer; + + fh_blocks.push((new_fh, comment_md)); + } + + // Add new FH block documenting mdfr modification + let mut mdfr_fh = FhBlock::default(); + let mdfr_fh_position = pointer; pointer += 56; - // Writes FH comments - fh.fh_md_comment = pointer; - let mut fh_comments = MetaData::new(MetaDataBlockType::MdBlock, BlockType::FH); - fh_comments.create_fh(); - pointer += fh_comments.block.hdr_len as i64; + + // Update the last copied FH block to point to mdfr FH block + if let Some((last_fh, _)) = fh_blocks.last_mut() { + last_fh.fh_fh_next = mdfr_fh_position; + } + + // Create mdfr comment + mdfr_fh.fh_md_comment = pointer; + let mut mdfr_fh_comments = MetaData::new(MetaDataBlockType::MdBlock, BlockType::FH); + mdfr_fh_comments.create_fh(); + pointer += mdfr_fh_comments.block.hdr_len as i64; + mdfr_fh.fh_fh_next = 0; // End of list + + fh_blocks.push((mdfr_fh, mdfr_fh_comments)); + + // Store FH blocks in new_info + for (fh, _) in &fh_blocks { + new_info.fh.push(*fh); + } + + // Copy events from the source file + // Events are stored as a linked list starting from hd_ev_first + let mut ev_blocks: Vec<(i64, Ev4Block, Option, Option)> = Vec::new(); + if !info.ev.is_empty() { + new_info.hd_block.hd_ev_first = pointer; + + for (orig_pos, ev) in info.ev.iter() { + let ev_block_position = pointer; + + // Create copies of TX/MD blocks for event name and comment + let ev_name_md: Option = if ev.ev_tx_name != 0 { + info.sharable.md_tx.get(&ev.ev_tx_name).cloned() + } else { + None + }; + + let ev_comment_md: Option = if ev.ev_md_comment != 0 { + info.sharable.md_tx.get(&ev.ev_md_comment).cloned() + } else { + None + }; + + // Calculate block size: 16 (short header) + 8 (link count) + 8*links + data members + // Links: ev_ev_next, ev_ev_parent, ev_ev_range, ev_tx_name, ev_md_comment + scope/attachment links + let n_links = 5 + ev.ev_scope_count as u64 + ev.ev_attachment_count as u64; + let block_size: i64 = 16 + 8 + (n_links * 8) as i64 + 32; // 32 bytes for data members + + // Create a modified event block with updated pointers + let mut new_ev = ev.clone(); + + // Set name pointer (will be right after the event block) + let mut current_offset = block_size; + if let Some(md) = &ev_name_md { + new_ev.ev_tx_name = pointer + current_offset; + current_offset += md.block.hdr_len as i64; + } else { + new_ev.ev_tx_name = 0; + } + + // Set comment pointer + if let Some(md) = &ev_comment_md { + new_ev.ev_md_comment = pointer + current_offset; + current_offset += md.block.hdr_len as i64; + } else { + new_ev.ev_md_comment = 0; + } + + // Clear parent and range links for simplicity (would need mapping for full support) + new_ev.ev_ev_parent = 0; + new_ev.ev_ev_range = 0; + + // Update the previous event's next pointer + if let Some(last) = ev_blocks.last_mut() { + last.1.ev_ev_next = ev_block_position; + } + + // Advance pointer for this event block and its metadata + pointer += current_offset; + + ev_blocks.push((*orig_pos, new_ev, ev_name_md, ev_comment_md)); + } + + // Last event's next pointer should be 0 + if let Some(last) = ev_blocks.last_mut() { + last.1.ev_ev_next = 0; + } + + // Copy events to new_info + for (orig_pos, ev, _, _) in &ev_blocks { + new_info.ev.insert(*orig_pos, ev.clone()); + } + } + + // Copy attachments from the source file + // Attachments are stored as a linked list starting from hd_at_first + type AtBlockEntry = ( + i64, + At4Block, + Option>, + Option, + Option, + Option, + ); + let mut at_blocks: Vec = Vec::new(); + if !info.at.is_empty() { + new_info.hd_block.hd_at_first = pointer; + + for (orig_pos, (at, embedded_data)) in info.at.iter() { + let at_block_position = pointer; + + // Create copies of TX/MD blocks for filename, mimetype, and comment + let at_filename_md: Option = if at.at_tx_filename != 0 { + info.sharable.md_tx.get(&at.at_tx_filename).cloned() + } else { + None + }; + + let at_mimetype_md: Option = if at.at_tx_mimetype != 0 { + info.sharable.md_tx.get(&at.at_tx_mimetype).cloned() + } else { + None + }; + + let at_comment_md: Option = if at.at_md_comment != 0 { + info.sharable.md_tx.get(&at.at_md_comment).cloned() + } else { + None + }; + + // Create a modified attachment block with updated pointers + let mut new_at = *at; + + // Calculate offsets for metadata blocks (they follow the AT block) + let mut current_offset = at.at_len as i64; + + // Set filename pointer + if let Some(md) = &at_filename_md { + new_at.at_tx_filename = pointer + current_offset; + current_offset += md.block.hdr_len as i64; + } else { + new_at.at_tx_filename = 0; + } + + // Set mimetype pointer + if let Some(md) = &at_mimetype_md { + new_at.at_tx_mimetype = pointer + current_offset; + current_offset += md.block.hdr_len as i64; + } else { + new_at.at_tx_mimetype = 0; + } + + // Set comment pointer + if let Some(md) = &at_comment_md { + new_at.at_md_comment = pointer + current_offset; + current_offset += md.block.hdr_len as i64; + } else { + new_at.at_md_comment = 0; + } + + // Update the previous attachment's next pointer + if let Some(last) = at_blocks.last_mut() { + last.1.at_at_next = at_block_position; + } + + // Advance pointer for this attachment block and its metadata + pointer += current_offset; + + at_blocks.push(( + *orig_pos, + new_at, + embedded_data.clone(), + at_filename_md, + at_mimetype_md, + at_comment_md, + )); + } + + // Last attachment's next pointer should be 0 + if let Some(last) = at_blocks.last_mut() { + last.1.at_at_next = 0; + } + + // Copy attachments to new_info + for (orig_pos, at, embedded_data, _, _, _) in &at_blocks { + new_info.at.insert(*orig_pos, (*at, embedded_data.clone())); + } + } + + // Copy SI blocks from source file + // SI blocks are shared - multiple channels can reference the same SI block + // We need to create a mapping from old positions to new positions + type SiBlockEntry = ( + i64, + Si4Block, + Option, + Option, + Option, + ); + let mut si_blocks: Vec = Vec::new(); + let mut si_position_map: HashMap = HashMap::new(); + + for (orig_pos, si) in info.sharable.si.iter() { + let new_si_position = pointer; + si_position_map.insert(*orig_pos, new_si_position); + + // Get the TX/MD blocks from source + let si_name_md = if si.si_tx_name != 0 { + info.sharable.md_tx.get(&si.si_tx_name).cloned() + } else { + None + }; + + let si_path_md = if si.si_tx_path != 0 { + info.sharable.md_tx.get(&si.si_tx_path).cloned() + } else { + None + }; + + let si_comment_md = if si.si_md_comment != 0 { + info.sharable.md_tx.get(&si.si_md_comment).cloned() + } else { + None + }; + + // Calculate block size and update pointer + pointer += si.calculate_block_size(); + + // Add TX/MD block sizes + if let Some(ref md) = si_name_md { + pointer += md.block.hdr_len as i64; + } + if let Some(ref md) = si_path_md { + pointer += md.block.hdr_len as i64; + } + if let Some(ref md) = si_comment_md { + pointer += md.block.hdr_len as i64; + } + + si_blocks.push((*orig_pos, *si, si_name_md, si_path_md, si_comment_md)); + } + let mut last_dg_pointer: i64 = pointer; new_info.hd_block.hd_dg_first = pointer; @@ -83,6 +358,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result data, &cg_cg_master, true, + &si_position_map, )?; } @@ -103,6 +379,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result data, &cg_cg_master, false, + &si_position_map, )?; } } @@ -146,54 +423,81 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result .par_iter_mut() .try_for_each(|(_dg_block_position, dg)| -> Result<(), Error> { for (_rec_id, cg) in dg.cg.iter_mut() { - for (_rec_pos, cn) in cg.cn.iter() { + for (_rec_pos, cn) in cg.cn.iter_mut() { let dt = mdf.get_channel_data(&cn.unique_name); if let Some(data) = dt { let m = data.validity(); if !data.is_empty() && data.bit_count() > 0 { - // empty strings are not written - let mut offset: i64 = 0; - let mut ld_block: Option = None; - if compression || m.is_some() { - ld_block = create_ld(&m, &mut offset); - } + // Check if this is a VLSD channel + let is_vlsd = cn.block.cn_type == 1 && is_vlsd_data(data); + + if is_vlsd { + // VLSD channel: write SD block, set cn_data + let mut offset: i64 = 0; + let data_block = if compression { + create_dz_sd(data, &mut offset) + .context("failed creating dz or sd block")? + } else { + create_sd(data, &mut offset) + .context("failed creating sd block")? + }; - let data_block = if compression { - create_dz_dv(data, &mut offset) - .context("failed creating dz or dv block")? + let data_pointer = Arc::clone(&data_pointer); + let mut locked_data_pointer = data_pointer.lock(); + cn.block.cn_data = *locked_data_pointer; + // For VLSD, dg_data is not used (set to 0) + dg.block.dg_data = 0; + *locked_data_pointer += offset; + let buffer = + write_sd_block(cn.block.cn_data, data_block, offset as usize)?; + tx.send(buffer).context("Channel disconnected")?; + drop(locked_data_pointer); } else { - create_dv(data, &mut offset).context("failed creating dv block")? - }; - - // invalid mask existing - let mut invalid_block: Option<(DataBlock, Vec)> = None; - if let Some(mask) = m { - cg.block.cg_inval_bytes = 1; // one byte (u8) for invalid mask - if let Some(ref mut ld) = ld_block { - ld.ld_links.push(offset); + // Regular channel: write DV/DZ block + let mut offset: i64 = 0; + let mut ld_block: Option = None; + if compression || m.is_some() { + ld_block = create_ld(&m, &mut offset); } - if compression { - invalid_block = create_dz_di(&mask, &mut offset) - .context("failed creating dz or di block")?; + + let data_block = if compression { + create_dz_dv(data, &mut offset) + .context("failed creating dz or dv block")? } else { - invalid_block = create_di(&mask, &mut offset) - .context("failed creating di block")?; + create_dv(data, &mut offset) + .context("failed creating dv block")? + }; + + // invalid mask existing + let mut invalid_block: Option<(DataBlock, Vec)> = None; + if let Some(mask) = m { + cg.block.cg_inval_bytes = 1; // one byte (u8) for invalid mask + if let Some(ref mut ld) = ld_block { + ld.ld_links.push(offset); + } + if compression { + invalid_block = create_dz_di(&mask, &mut offset) + .context("failed creating dz or di block")?; + } else { + invalid_block = create_di(&mask, &mut offset) + .context("failed creating di block")?; + } } - } - let data_pointer = Arc::clone(&data_pointer); - let mut locked_data_pointer = data_pointer.lock(); - dg.block.dg_data = *locked_data_pointer; - *locked_data_pointer += offset; - let buffer = write_data_blocks( - dg.block.dg_data, - ld_block, - data_block, - invalid_block, - offset as usize, - )?; - tx.send(buffer).context("Channel disconnected")?; - drop(locked_data_pointer); + let data_pointer = Arc::clone(&data_pointer); + let mut locked_data_pointer = data_pointer.lock(); + dg.block.dg_data = *locked_data_pointer; + *locked_data_pointer += offset; + let buffer = write_data_blocks( + dg.block.dg_data, + ld_block, + data_block, + invalid_block, + offset as usize, + )?; + tx.send(buffer).context("Channel disconnected")?; + drop(locked_data_pointer); + } } } } @@ -221,9 +525,163 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result buffer .write_le(&new_info.hd_block) .context("Could not write HDBlock")?; - // Writes FHBlock - buffer.write_le(&fh).context("Could not write FHBlock")?; - fh_comments.write(&mut buffer)?; // FH comments + // Writes FHBlocks (file history chain) + for (fh, fh_comment) in &fh_blocks { + buffer.write_le(fh).context("Could not write FHBlock")?; + fh_comment.write(&mut buffer)?; + } + + // Writes EVBLOCKs (events) + for (_orig_pos, ev, ev_name_md, ev_comment_md) in &ev_blocks { + // Write event block header + let ev_header = Blockheader4Short { + hdr_id: [35, 35, 69, 86], // ##EV + hdr_gap: [0u8; 4], + hdr_len: ev.calculate_block_size() as u64, + }; + buffer + .write_le(&ev_header) + .context("Could not write EVBlock header")?; + + // Write the event block body + buffer.write_le(ev).context("Could not write EVBlock")?; + + // Write event name TX block if present + if let Some(name_md) = ev_name_md { + name_md + .write(&mut buffer) + .context("Failed writing event name")?; + } + + // Write event comment MD block if present + if let Some(comment_md) = ev_comment_md { + comment_md + .write(&mut buffer) + .context("Failed writing event comment")?; + } + } + + // Writes ATBLOCKs (attachments) + for (_orig_pos, at, embedded_data, at_filename_md, at_mimetype_md, at_comment_md) in &at_blocks + { + // Write attachment block (AT4Block includes its own header) + buffer.write_le(at).context("Could not write ATBlock")?; + + // Write embedded data if present + if let Some(data) = embedded_data { + buffer + .write_all(data) + .context("Could not write embedded attachment data")?; + } + + // Write filename TX block if present + if let Some(filename_md) = at_filename_md { + filename_md + .write(&mut buffer) + .context("Failed writing attachment filename")?; + } + + // Write mimetype TX block if present + if let Some(mimetype_md) = at_mimetype_md { + mimetype_md + .write(&mut buffer) + .context("Failed writing attachment mimetype")?; + } + + // Write comment MD block if present + if let Some(comment_md) = at_comment_md { + comment_md + .write(&mut buffer) + .context("Failed writing attachment comment")?; + } + } + + // Writes SIBLOCKs (source information) + for (orig_pos, si, si_name_md, si_path_md, si_comment_md) in &si_blocks { + // Write SI block header + let si_header = Blockheader4Short { + hdr_id: [35, 35, 83, 73], // ##SI + hdr_gap: [0u8; 4], + hdr_len: si.calculate_block_size() as u64, + }; + buffer + .write_le(&si_header) + .context("Could not write SIBlock header")?; + + // Write SI block links and data + let si_links: u64 = 3; // always 3 links: name, path, comment + buffer + .write_le(&si_links) + .context("Could not write SIBlock link count")?; + + // Calculate link positions + let mut link_offset = si.calculate_block_size(); + let new_si_name = if si_name_md.is_some() { + let pos = si_position_map.get(orig_pos).unwrap_or(&0) + link_offset; + link_offset += si_name_md.as_ref().unwrap().block.hdr_len as i64; + pos + } else { + 0 + }; + let new_si_path = if si_path_md.is_some() { + let pos = si_position_map.get(orig_pos).unwrap_or(&0) + link_offset; + link_offset += si_path_md.as_ref().unwrap().block.hdr_len as i64; + pos + } else { + 0 + }; + let new_si_comment = if si_comment_md.is_some() { + si_position_map.get(orig_pos).unwrap_or(&0) + link_offset + } else { + 0 + }; + + buffer + .write_le(&new_si_name) + .context("Could not write SIBlock si_tx_name")?; + buffer + .write_le(&new_si_path) + .context("Could not write SIBlock si_tx_path")?; + buffer + .write_le(&new_si_comment) + .context("Could not write SIBlock si_md_comment")?; + + // Write SI data members (type, bus_type, flags, reserved) + buffer + .write_le(&si.si_type) + .context("Could not write SIBlock si_type")?; + buffer + .write_le(&si.si_bus_type) + .context("Could not write SIBlock si_bus_type")?; + buffer + .write_le(&si.si_flags) + .context("Could not write SIBlock si_flags")?; + let si_reserved: [u8; 5] = [0u8; 5]; + buffer + .write_all(&si_reserved) + .context("Could not write SIBlock si_reserved")?; + + // Write SI name TX block if present + if let Some(name_md) = si_name_md { + name_md + .write(&mut buffer) + .context("Failed writing SI name")?; + } + + // Write SI path TX block if present + if let Some(path_md) = si_path_md { + path_md + .write(&mut buffer) + .context("Failed writing SI path")?; + } + + // Write SI comment MD block if present + if let Some(comment_md) = si_comment_md { + comment_md + .write(&mut buffer) + .context("Failed writing SI comment")?; + } + } // Writes DG+CG+CN blocks for (_position, dg) in new_info.dg.iter() { @@ -313,9 +771,7 @@ fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Resul buffer .write_le(&header) .context("Could not write DSBlock header")?; - buffer - .write_le(ds) - .context("Could not write DSBlock")?; + buffer.write_le(ds).context("Could not write DSBlock")?; } Compo::CL(cl) => { let header = Blockheader4Short { @@ -326,9 +782,7 @@ fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Resul buffer .write_le(&header) .context("Could not write CLBlock header")?; - buffer - .write_le(cl) - .context("Could not write CLBlock")?; + buffer.write_le(cl).context("Could not write CLBlock")?; } Compo::CU(cu) => { let header = Blockheader4Short { @@ -339,9 +793,7 @@ fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Resul buffer .write_le(&header) .context("Could not write CUBlock header")?; - buffer - .write_le(cu) - .context("Could not write CUBlock")?; + buffer.write_le(cu).context("Could not write CUBlock")?; } Compo::CV(cv) => { let header = Blockheader4Short { @@ -352,9 +804,7 @@ fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Resul buffer .write_le(&header) .context("Could not write CVBlock header")?; - buffer - .write_le(cv) - .context("Could not write CVBlock")?; + buffer.write_le(cv).context("Could not write CVBlock")?; } Compo::CN(cn) => { // Nested CN composition: write the CN block header + block data @@ -443,6 +893,43 @@ fn write_data_blocks( Ok(buffer.into_inner()) } +/// Writes the SD block for VLSD channels +fn write_sd_block( + _position: i64, + data_block: (DataBlock, usize, Vec), + offset: usize, +) -> Result> { + let mut buffer = Cursor::new(vec![0u8; offset]); + + // Write SD or DZ block + match data_block.0 { + DataBlock::DvDi(sd_block) => { + // SD block (uses same header format as DV/DI) + buffer + .write_le(&sd_block) + .context("Could not write SDBlock")?; + } + DataBlock::DZ(dz_block) => { + let id_dz: [u8; 4] = [35, 35, 68, 90]; // ##DZ + buffer + .write_le(&id_dz) + .context("Could not write DZSDBlock id")?; + buffer + .write_le(&dz_block) + .context("Could not write DZSDBlock")?; + } + } + buffer + .write_all(&data_block.2) + .context("Could not write SD data")?; + // 8 byte align + buffer + .write_all(&vec![0; data_block.1]) + .context("Could not align SD data to 8 bytes")?; + + Ok(buffer.into_inner()) +} + /// Create a LDBlock fn create_ld(m: &Option, offset: &mut i64) -> Option { let mut ld_block = Ld4Block::default(); @@ -463,8 +950,10 @@ fn create_ld(m: &Option, offset: &mut i64) -> Option { /// Create a DV Block fn create_dv(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Vec), Error> { - let mut dv_block = Blockheader4::default(); - dv_block.hdr_id = [35, 35, 68, 86]; // ##DV + let mut dv_block = Blockheader4 { + hdr_id: [35, 35, 68, 86], // ##DV + ..Default::default() + }; let data_bytes: Vec = data .to_bytes() .context("failed converting arraw data into bytes for dv block")?; @@ -514,8 +1003,10 @@ fn create_dz_dv( /// Create a DI Block fn create_di(mask: &NullBuffer, offset: &mut i64) -> Result)>> { - let mut dv_invalid_block = Blockheader4::default(); - dv_invalid_block.hdr_id = [35, 35, 68, 73]; // ##DI + let mut dv_invalid_block = Blockheader4 { + hdr_id: [35, 35, 68, 73], // ##DI + ..Default::default() + }; let mask_length = mask.len(); dv_invalid_block.hdr_len += mask_length as u64; let byte_aligned = 8 - mask_length % 8; @@ -567,6 +1058,7 @@ fn create_blocks( data: &ChannelData, cg_cg_master: &i64, master_flag: bool, + si_position_map: &HashMap, ) -> Result { let bit_count = data.bit_count(); if !data.is_empty() && bit_count > 0 { @@ -592,7 +1084,16 @@ fn create_blocks( } cg_block.cg_cycle_count = cg.block.cg_cycle_count; - cg_block.cg_data_bytes = byte_count; + // CN Block + let cn_position = pointer + cg_block_header.hdr_len as i64; + let is_vlsd = is_vlsd_data(data); + + // For VLSD channels, cg_data_bytes = 0 (no fixed record, all data in SDBLOCK) + if is_vlsd { + cg_block.cg_data_bytes = 0; + } else { + cg_block.cg_data_bytes = byte_count; + } if data.validity().is_some() { // One byte for invalid data as only one channel per CG cg_block.cg_inval_bytes = 1; @@ -600,22 +1101,28 @@ fn create_blocks( pointer += cg_block_header.hdr_len as i64; cg_block.cg_cn_first = pointer; - // CN Block - let cn_position = pointer; + // CN Block setup if master_flag { - cn_block.cn_type = cn_type_writer(cn.block.cn_type)?; // master channel + cn_block.cn_type = cn_type_writer(cn.block.cn_type, false)?; // master channel never VLSD if cn.block.cn_sync_type != 0 { cn_block.cn_sync_type = cn.block.cn_sync_type; } else { cn_block.cn_sync_type = 1; // Default is time } + } else if is_vlsd { + cn_block.cn_type = 1; // VLSD channel type } let machine_endian: bool = cfg!(target_endian = "big"); cn_block.cn_data_type = data.data_type(machine_endian); - cn_block.cn_bit_count = bit_count; + // For VLSD, cn_bit_count can be 0 (variable length) + if is_vlsd { + cn_block.cn_bit_count = 0; + } else { + cn_block.cn_bit_count = bit_count; + } pointer += cn_block_header.hdr_len as i64; @@ -658,6 +1165,14 @@ fn create_blocks( .insert(cn_block.cn_md_comment, tx_comment_block); } + // Source information - map old SI position to new position + let old_si_source = cn.block.get_si_source(); + if old_si_source != 0 + && let Some(&new_si_pos) = si_position_map.get(&old_si_source) + { + cn_block.set_si_source(new_si_pos); + } + // Channel array let data_ndim = data.ndim(); let mut composition: Option = None; @@ -707,6 +1222,7 @@ fn create_blocks( list_size: cn.list_size, shape: cn.shape.clone(), invalid_mask: None, + event_template: None, }; let mut new_cg = Cg4 { header: cg_block_header, @@ -742,16 +1258,136 @@ fn create_blocks( } /// supports only data and master channels -fn cn_type_writer(cn_type: u8) -> Result { +fn cn_type_writer(cn_type: u8, is_vlsd: bool) -> Result { // not all types are supported match cn_type { 0 => Ok(0), - 1 => Ok(0), + 1 => { + if is_vlsd { + Ok(1) // Keep VLSD type + } else { + Ok(0) // Fallback to fixed-length + } + } 2 => Ok(2), 3 => Ok(2), 4 => Ok(0), 5 => Ok(0), 6 => Ok(0), + 7 => Ok(0), // VLSC converted to fixed-length (complex to write) _ => bail!("Unknown CN type"), } } + +/// Check if the channel data is variable-length (Utf8 or VariableSizeByteArray) +fn is_vlsd_data(data: &ChannelData) -> bool { + matches!( + data, + ChannelData::Utf8(_) | ChannelData::VariableSizeByteArray(_) + ) +} + +/// Convert ChannelData to SDBLOCK format (u32 length prefix + data for each value) +fn to_sd_bytes(data: &ChannelData) -> Result, Error> { + match data { + ChannelData::Utf8(a) => { + let array = a.finish_cloned(); + let mut result = Vec::new(); + for i in 0..array.len() { + let value = array.value(i); + let bytes = value.as_bytes(); + // Add null terminator for UTF-8 strings (cn_data_type 7) + let len_with_null = bytes.len() + 1; + result.extend_from_slice(&(len_with_null as u32).to_le_bytes()); + result.extend_from_slice(bytes); + result.push(0); // null terminator + } + Ok(result) + } + ChannelData::VariableSizeByteArray(a) => { + let array = a.finish_cloned(); + let mut result = Vec::new(); + for i in 0..array.len() { + let value = array.value(i); + result.extend_from_slice(&(value.len() as u32).to_le_bytes()); + result.extend_from_slice(value); + } + Ok(result) + } + _ => bail!("to_sd_bytes called on non-VLSD data type"), + } +} + +/// Calculate the size of SDBLOCK data (for position calculation) +#[allow(dead_code)] +fn calculate_sd_size(data: &ChannelData) -> usize { + match data { + ChannelData::Utf8(a) => { + let array = a.finish_cloned(); + let mut size = 0usize; + for i in 0..array.len() { + let value = array.value(i); + // 4 bytes for length + string bytes + 1 for null terminator + size += 4 + value.len() + 1; + } + size + } + ChannelData::VariableSizeByteArray(a) => { + let array = a.finish_cloned(); + let mut size = 0usize; + for i in 0..array.len() { + let value = array.value(i); + // 4 bytes for length + data bytes + size += 4 + value.len(); + } + size + } + _ => 0, + } +} + +/// Create an SD Block (Signal Data block for VLSD channels) +fn create_sd(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Vec), Error> { + let mut sd_block = Blockheader4 { + hdr_id: [35, 35, 83, 68], // ##SD + ..Default::default() + }; + let data_bytes = to_sd_bytes(data).context("failed converting data to SD format")?; + let data_bytes_len = data_bytes.len(); + sd_block.hdr_len += data_bytes_len as u64; + let byte_aligned = (8 - data_bytes_len % 8) % 8; + + *offset += sd_block.hdr_len as i64 + byte_aligned as i64; + + Ok((DataBlock::DvDi(sd_block), byte_aligned, data_bytes)) +} + +/// Create a DZ Block of SD type (compressed SDBLOCK) +fn create_dz_sd( + data: &ChannelData, + offset: &mut i64, +) -> Result<(DataBlock, usize, Vec), Error> { + let mut dz_block = Dz4Block::default(); + let bytes = to_sd_bytes(data).context("failed converting data to SD format")?; + dz_block.dz_org_data_length = bytes.len() as u64; + + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::best()); + encoder.write_all(&bytes).expect("Could not compress data"); + let mut data_bytes = encoder.finish().expect("failed finishing to compress data"); + dz_block.dz_data_length = data_bytes.len() as u64; + + let dz_sd_block: DataBlock; + let byte_aligned: usize; + + if dz_block.dz_org_data_length < dz_block.dz_data_length { + // Compression not beneficial, use uncompressed + (dz_sd_block, byte_aligned, data_bytes) = create_sd(data, offset)?; + } else { + byte_aligned = (8 - dz_block.dz_data_length % 8) as usize % 8; + dz_block.len = dz_block.dz_data_length + 48; + dz_block.dz_org_block_type = [83, 68]; // SD + *offset += dz_block.len as i64 + byte_aligned as i64; + dz_sd_block = DataBlock::DZ(dz_block); + } + Ok((dz_sd_block, byte_aligned, data_bytes)) +} From e360e93a1371dc1195e21c9f27d18282b8877a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 8 Feb 2026 21:20:08 +0100 Subject: [PATCH 16/46] added metadata information based on schemas --- src/export/hdf5.rs | 31 +- src/mdfinfo.rs | 9 +- src/mdfinfo/mdfinfo4/block_header.rs | 58 +- src/mdfinfo/mdfinfo4/metadata.rs | 790 +++++++++++++++++++++++---- src/mdfinfo/mdfinfo4/mod.rs | 22 +- src/mdfr.rs | 40 +- src/mdfreader.rs | 9 +- 7 files changed, 791 insertions(+), 168 deletions(-) diff --git a/src/export/hdf5.rs b/src/export/hdf5.rs index 83a4d14..d6bdf86 100644 --- a/src/export/hdf5.rs +++ b/src/export/hdf5.rs @@ -276,18 +276,29 @@ fn mdf4_metadata(file: &mut File, mdfinfo4: &MdfInfo4) -> Result<()> { mdfinfo4.hd_block.hd_start_time_ns ) })?; - let comments = mdfinfo4 + if let Some(hd) = mdfinfo4 .sharable - .get_hd_comments(mdfinfo4.hd_block.hd_md_comment); - comments - .iter() - .try_for_each(|(name, comment)| -> Result<(), Error> { - create_str_group_attr::(file, name, comment).with_context(|| { - format!("failed writing attribute {} with value {}", name, comment,) + .get_hd_comments(mdfinfo4.hd_block.hd_md_comment) + { + if let Some(tx) = &hd.tx { + create_str_group_attr::(file, "TX", tx) + .context("failed writing HD TX attribute")?; + } + if let Some(ts) = &hd.time_source { + create_str_group_attr::(file, "time_source", ts) + .context("failed writing HD time_source attribute")?; + } + for (name, value) in &hd.constants { + create_str_group_attr::(file, name, value).with_context(|| { + format!("failed writing HD constant {name} with value {value}") })?; - Ok(()) - }) - .context("failed writing hd comments")?; + } + for (name, value) in &hd.common_properties { + create_str_group_attr::(file, name, &format!("{value}")).with_context(|| { + format!("failed writing HD property {name}") + })?; + } + } Ok(()) } diff --git a/src/mdfinfo.rs b/src/mdfinfo.rs index 33c18fb..2f384b0 100644 --- a/src/mdfinfo.rs +++ b/src/mdfinfo.rs @@ -511,11 +511,14 @@ impl MdfInfo { MdfInfo::V4(mdfinfo4) => mdfinfo4.set_channel_desc(channel_name, desc), } } - /// get comment from position - pub fn get_comments(&mut self, position: i64) -> Option> { + /// get typed comment from position + pub fn get_md_comment( + &mut self, + position: i64, + ) -> Option<&crate::mdfinfo::mdfinfo4::MdComment> { match self { MdfInfo::V3(_mdfinfo3) => None, - MdfInfo::V4(mdfinfo4) => Some(mdfinfo4.sharable.get_comments(position)), + MdfInfo::V4(mdfinfo4) => mdfinfo4.sharable.get_md_comment(position), } } /// get tx from position diff --git a/src/mdfinfo/mdfinfo4/block_header.rs b/src/mdfinfo/mdfinfo4/block_header.rs index f200283..3ebf0ac 100644 --- a/src/mdfinfo/mdfinfo4/block_header.rs +++ b/src/mdfinfo/mdfinfo4/block_header.rs @@ -8,7 +8,7 @@ use std::io::{Cursor, Read}; use std::str; use super::cc_block::Cc4Block; -use super::metadata::{BlockType, MetaData, MetaDataBlockType}; +use super::metadata::{BlockType, HdComment, MdComment, MetaData, MetaDataBlockType}; use super::si_block::Si4Block; use crate::mdfinfo::sym_buf_reader::SymBufReader; @@ -187,8 +187,8 @@ pub(super) fn read_meta_data( block, raw_data: raw_data.into_inner(), block_type, - comments: HashMap::new(), parent_block_type, + md_comment: None, }; sharable.md_tx.insert(target, md); Ok(position) @@ -212,18 +212,7 @@ impl fmt::Display for SharableBlocks { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "MD TX comments : \n")?; for (_k, c) in self.md_tx.iter() { - match c.block_type { - MetaDataBlockType::MdParsed => { - for (tag, text) in c.comments.iter() { - writeln!(f, "Tag: {tag} Text: {text}")?; - } - } - MetaDataBlockType::TX => match c.get_data_string() { - Ok(s) => writeln!(f, "Text: {s}")?, - Err(e) => writeln!(f, "Text: {e:?}")?, - }, - _ => (), - } + writeln!(f, "{c}")?; } writeln!(f, "CC : \n")?; for (position, cc) in self.cc.iter() { @@ -254,44 +243,35 @@ impl SharableBlocks { .or_insert_with(|| MetaData::new(MetaDataBlockType::TX, BlockType::CN)); md.set_data_buffer(text.as_bytes()); } - /// Returns metadata from MD Block - /// keys are tag and related value text of tag - pub fn get_comments(&mut self, position: i64) -> HashMap { - let mut comments: HashMap = HashMap::new(); + /// Returns typed metadata comment, parsing lazily if needed + pub fn get_md_comment(&mut self, position: i64) -> Option<&MdComment> { if let Some(md) = self.md_tx.get_mut(&position) { match md.block_type { - MetaDataBlockType::MdParsed => { - comments.clone_from(&md.comments); - } + MetaDataBlockType::MdParsed => {} MetaDataBlockType::MdBlock => { - // not yet parsed, so let's parse it let _ = md.parse_xml(); - comments.clone_from(&md.comments); - } - MetaDataBlockType::TX => { - // should not happen } + MetaDataBlockType::TX => return None, } - }; - comments + } + self.md_tx + .get(&position) + .and_then(|md| md.md_comment.as_ref()) } - /// Returns metadata from MD Block linked by HD Block - /// keys are tag and related value text of tag - pub fn get_hd_comments(&self, position: i64) -> HashMap { - // this method assumes the xml was already parsed - let mut comments: HashMap = HashMap::new(); + /// Returns HD comment, assumes already parsed + pub fn get_hd_comments(&self, position: i64) -> Option<&HdComment> { if let Some(md) = self.md_tx.get(&position) && md.block_type == MetaDataBlockType::MdParsed + && let Some(MdComment::Hd(hd)) = &md.md_comment { - comments.clone_from(&md.comments); - }; - comments + return Some(hd); + } + None } - /// parses the HD Block metadata comments - /// done right after reading HD block + /// parses the HD Block metadata comments, done right after reading HD block pub fn parse_hd_comments(&mut self, position: i64) { if let Some(md) = self.md_tx.get_mut(&position) { - let _ = md.parse_hd_xml(); + let _ = md.parse_hd_comment(); }; } /// Create new Shared Block diff --git a/src/mdfinfo/mdfinfo4/metadata.rs b/src/mdfinfo/mdfinfo4/metadata.rs index 2ebef86..1f95533 100644 --- a/src/mdfinfo/mdfinfo4/metadata.rs +++ b/src/mdfinfo/mdfinfo4/metadata.rs @@ -37,6 +37,383 @@ pub enum BlockType { CH, } +/// Recursive representation of common_properties values per mdf_base.xsd +#[derive(Debug, Clone)] +pub enum PropertyValue { + /// Simple value from `value` + Value(String), + /// Nested properties from `` + Tree(HashMap), + /// List of property maps from `
  • ...
  • ` + List(Vec>), + /// Simple value list from `v` + EList(Vec), +} + +impl Display for PropertyValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PropertyValue::Value(v) => write!(f, "{v}"), + PropertyValue::Tree(map) => write!(f, "tree({} items)", map.len()), + PropertyValue::List(items) => write!(f, "list({} items)", items.len()), + PropertyValue::EList(items) => write!(f, "elist({} items)", items.len()), + } + } +} + +pub type CommonProperties = HashMap; + +/// Alternative names (from ``, ``, `` elements) +/// Stores only the default (first/no-lang) value for each field +#[derive(Debug, Clone, Default)] +pub struct MdNames { + pub name: Option, + pub display: Option, + pub vendor: Option, + pub description: Option, +} + +impl Display for MdNames { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut first = true; + if let Some(name) = &self.name { + write!(f, "name={name}")?; + first = false; + } + if let Some(display) = &self.display { + if !first { + write!(f, " ")?; + } + write!(f, "display={display}")?; + first = false; + } + if let Some(vendor) = &self.vendor { + if !first { + write!(f, " ")?; + } + write!(f, "vendor={vendor}")?; + first = false; + } + if let Some(desc) = &self.description { + if !first { + write!(f, " ")?; + } + write!(f, "desc={desc}")?; + } + Ok(()) + } +} + +/// HD block comment per hd_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct HdComment { + pub tx: Option, + pub time_source: Option, + pub constants: HashMap, + pub common_properties: CommonProperties, +} + +impl Display for HdComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if let Some(ts) = &self.time_source { + write!(f, " time_source={ts}")?; + } + if !self.constants.is_empty() { + write!(f, " constants={}", self.constants.len())?; + } + if !self.common_properties.is_empty() { + write!(f, " props={}", self.common_properties.len())?; + } + Ok(()) + } +} + +/// FH block comment per fh_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct FhComment { + pub tx: Option, + pub tool_id: Option, + pub tool_vendor: Option, + pub tool_version: Option, + pub user_name: Option, + pub common_properties: CommonProperties, +} + +impl Display for FhComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if let Some(tool) = &self.tool_id { + write!(f, " tool={tool}")?; + } + if let Some(vendor) = &self.tool_vendor { + write!(f, " vendor={vendor}")?; + } + if let Some(ver) = &self.tool_version { + write!(f, " v{ver}")?; + } + if let Some(user) = &self.user_name { + write!(f, " user={user}")?; + } + Ok(()) + } +} + +/// CN block comment per cn_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct CnComment { + pub tx: Option, + pub names: MdNames, + pub linker_name: Option, + pub linker_address: Option, + pub axis_monotony: Option, + /// Raster: (min, max, avg) + pub raster: Option<(Option, Option, Option)>, + pub formula: Option, + pub address: Option, + pub common_properties: CommonProperties, +} + +impl Display for CnComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + let names = format!("{}", self.names); + if !names.is_empty() { + write!(f, " names({names})")?; + } + if let Some(formula) = &self.formula { + write!(f, " formula={formula}")?; + } + if let Some(addr) = &self.address { + write!(f, " addr={addr}")?; + } + if let Some(mono) = &self.axis_monotony { + write!(f, " monotony={mono}")?; + } + Ok(()) + } +} + +/// CG block comment per cg_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct CgComment { + pub tx: Option, + pub names: MdNames, + pub common_properties: CommonProperties, +} + +impl Display for CgComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + let names = format!("{}", self.names); + if !names.is_empty() { + write!(f, " names({names})")?; + } + Ok(()) + } +} + +/// CC block comment per cc_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct CcComment { + pub tx: Option, + pub names: MdNames, + pub formula: Option, + pub common_properties: CommonProperties, +} + +impl Display for CcComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + let names = format!("{}", self.names); + if !names.is_empty() { + write!(f, " names({names})")?; + } + if let Some(formula) = &self.formula { + write!(f, " formula={formula}")?; + } + Ok(()) + } +} + +/// SI block comment per si_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct SiComment { + pub tx: Option, + pub names: MdNames, + pub path: MdNames, + pub bus: MdNames, + pub protocol: Option, + pub common_properties: CommonProperties, +} + +impl Display for SiComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if let Some(proto) = &self.protocol { + write!(f, " protocol={proto}")?; + } + let names = format!("{}", self.names); + if !names.is_empty() { + write!(f, " names({names})")?; + } + Ok(()) + } +} + +/// EV block comment per ev_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct EvComment { + pub tx: Option, + pub pre_trigger_interval: Option, + pub post_trigger_interval: Option, + pub formula: Option, + pub timeout: Option, + pub common_properties: CommonProperties, +} + +impl Display for EvComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if let Some(pre) = &self.pre_trigger_interval { + write!(f, " pre_trigger={pre}")?; + } + if let Some(post) = &self.post_trigger_interval { + write!(f, " post_trigger={post}")?; + } + if let Some(formula) = &self.formula { + write!(f, " formula={formula}")?; + } + if let Some(timeout) = &self.timeout { + write!(f, " timeout={timeout}")?; + } + Ok(()) + } +} + +/// AT block comment per at_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct AtComment { + pub tx: Option, + pub common_properties: CommonProperties, +} + +impl Display for AtComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if !self.common_properties.is_empty() { + write!(f, " props={}", self.common_properties.len())?; + } + Ok(()) + } +} + +/// CH block comment per ch_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct ChComment { + pub tx: Option, + pub names: MdNames, + pub common_properties: CommonProperties, +} + +impl Display for ChComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + let names = format!("{}", self.names); + if !names.is_empty() { + write!(f, " names({names})")?; + } + Ok(()) + } +} + +/// DG block comment per dg_comment.xsd +#[derive(Debug, Clone, Default)] +pub struct DgComment { + pub tx: Option, + pub common_properties: CommonProperties, +} + +impl Display for DgComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(tx) = &self.tx { + write!(f, "{tx}")?; + } + if !self.common_properties.is_empty() { + write!(f, " props={}", self.common_properties.len())?; + } + Ok(()) + } +} + +/// Parsed metadata, typed per parent block schema +#[derive(Debug, Clone)] +pub enum MdComment { + Hd(HdComment), + Fh(FhComment), + Cn(CnComment), + Cg(CgComment), + Cc(CcComment), + Si(SiComment), + Ev(EvComment), + At(AtComment), + Ch(ChComment), + Dg(DgComment), +} + +impl Display for MdComment { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MdComment::Hd(c) => write!(f, "{c}"), + MdComment::Fh(c) => write!(f, "{c}"), + MdComment::Cn(c) => write!(f, "{c}"), + MdComment::Cg(c) => write!(f, "{c}"), + MdComment::Cc(c) => write!(f, "{c}"), + MdComment::Si(c) => write!(f, "{c}"), + MdComment::Ev(c) => write!(f, "{c}"), + MdComment::At(c) => write!(f, "{c}"), + MdComment::Ch(c) => write!(f, "{c}"), + MdComment::Dg(c) => write!(f, "{c}"), + } + } +} + +impl MdComment { + /// Returns the TX text from any comment variant + pub fn get_tx(&self) -> Option<&str> { + match self { + MdComment::Hd(c) => c.tx.as_deref(), + MdComment::Fh(c) => c.tx.as_deref(), + MdComment::Cn(c) => c.tx.as_deref(), + MdComment::Cg(c) => c.tx.as_deref(), + MdComment::Cc(c) => c.tx.as_deref(), + MdComment::Si(c) => c.tx.as_deref(), + MdComment::Ev(c) => c.tx.as_deref(), + MdComment::At(c) => c.tx.as_deref(), + MdComment::Ch(c) => c.tx.as_deref(), + MdComment::Dg(c) => c.tx.as_deref(), + } + } +} + /// struct linking MD or TX block with #[derive(Debug, Default, Clone)] #[repr(C)] @@ -47,27 +424,119 @@ pub struct MetaData { pub raw_data: Vec, /// Block type, TX, MD or MD not yet parsed pub block_type: MetaDataBlockType, - /// Metadata after parsing - pub comments: HashMap, /// Parent block type pub parent_block_type: BlockType, + /// Typed parsed metadata (replaces flat comments HashMap) + pub md_comment: Option, } impl Display for MetaData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let type_str = match self.block_type { - MetaDataBlockType::MdBlock => "MD (unparsed)", - MetaDataBlockType::MdParsed => "MD (parsed)", - MetaDataBlockType::TX => "TX", - }; - write!( - f, - "MetaData: type={} {} comments raw_bytes={}", - type_str, - self.comments.len(), - self.raw_data.len() - ) + match self.block_type { + MetaDataBlockType::MdParsed => { + if let Some(mc) = &self.md_comment { + write!(f, "{mc}") + } else { + write!(f, "MD (parsed, empty)") + } + } + MetaDataBlockType::MdBlock => { + write!(f, "MD (unparsed) raw_bytes={}", self.raw_data.len()) + } + MetaDataBlockType::TX => { + write!(f, "TX raw_bytes={}", self.raw_data.len()) + } + } + } +} + +/// Extract text content of a child element by tag name +fn extract_text<'a>(node: roxmltree::Node<'a, 'a>, tag_name: &str) -> Option { + node.children() + .find(|n| n.is_element() && n.has_tag_name(tag_name)) + .and_then(|n| n.text()) + .map(|s| s.to_string()) +} + +/// Parse ``, ``, or `` block into MdNames (default language only) +fn parse_names(node: roxmltree::Node, tag_name: &str) -> MdNames { + let mut names = MdNames::default(); + if let Some(names_node) = node.children().find(|n| n.is_element() && n.has_tag_name(tag_name)) + { + names.name = extract_text(names_node, "name"); + names.display = extract_text(names_node, "display"); + names.vendor = extract_text(names_node, "vendor"); + names.description = extract_text(names_node, "description"); + } + names +} + +/// Recursively parse `` into CommonProperties +fn parse_common_properties(node: roxmltree::Node) -> CommonProperties { + let mut props = CommonProperties::new(); + if let Some(cp_node) = node + .children() + .find(|n| n.is_element() && n.has_tag_name("common_properties")) + { + parse_properties_children(cp_node, &mut props); } + props +} + +/// Parse children of a common_properties or tree node +fn parse_properties_children(node: roxmltree::Node, props: &mut HashMap) { + for child in node.children().filter(|n| n.is_element()) { + let tag = child.tag_name().name(); + match tag { + "e" => { + if let Some(name) = child.attribute("name") { + let value = child.text().unwrap_or("").to_string(); + props.insert(name.to_string(), PropertyValue::Value(value)); + } + } + "tree" => { + if let Some(name) = child.attribute("name") { + let mut sub = HashMap::new(); + parse_properties_children(child, &mut sub); + props.insert(name.to_string(), PropertyValue::Tree(sub)); + } + } + "list" => { + if let Some(name) = child.attribute("name") { + let mut items = Vec::new(); + for li in child.children().filter(|n| n.is_element() && n.has_tag_name("li")) { + let mut item = HashMap::new(); + parse_properties_children(li, &mut item); + items.push(item); + } + props.insert(name.to_string(), PropertyValue::List(items)); + } + } + "elist" => { + if let Some(name) = child.attribute("name") { + let items: Vec = child + .children() + .filter(|n| n.is_element() && n.has_tag_name("eli")) + .filter_map(|n| n.text().map(|s| s.to_string())) + .collect(); + props.insert(name.to_string(), PropertyValue::EList(items)); + } + } + _ => {} + } + } +} + +/// Parse raster element: `vvv` +fn parse_raster(node: roxmltree::Node) -> Option<(Option, Option, Option)> { + node.children() + .find(|n| n.is_element() && n.has_tag_name("raster")) + .map(|raster_node| { + let min = extract_text(raster_node, "min").and_then(|s| s.parse().ok()); + let max = extract_text(raster_node, "max").and_then(|s| s.parse().ok()); + let avg = extract_text(raster_node, "avg").and_then(|s| s.parse().ok()); + (min, max, avg) + }) } impl MetaData { @@ -91,17 +560,24 @@ impl MetaData { block: header, raw_data: Vec::new(), block_type, - comments: HashMap::new(), parent_block_type, + md_comment: None, } } /// Converts the metadata handling the parent block type's specificities pub fn parse_xml(&mut self) -> Result<()> { if self.block_type == MetaDataBlockType::MdBlock { match self.parent_block_type { - BlockType::HD => self.parse_hd_xml()?, - BlockType::FH => self.parse_fh_xml()?, - _ => self.parse_generic_xml()?, + BlockType::HD => self.parse_hd_comment()?, + BlockType::FH => self.parse_fh_comment()?, + BlockType::CN => self.parse_cn_comment()?, + BlockType::CG => self.parse_cg_comment()?, + BlockType::CC => self.parse_cc_comment()?, + BlockType::SI => self.parse_si_comment()?, + BlockType::EV => self.parse_ev_comment()?, + BlockType::AT => self.parse_at_comment()?, + BlockType::CH => self.parse_ch_comment()?, + BlockType::DG => self.parse_dg_comment()?, }; } Ok(()) @@ -109,31 +585,28 @@ impl MetaData { /// Returns the text from TX Block or TX's tag text from MD Block pub fn get_tx(&self) -> Result, anyhow::Error> { match self.block_type { - MetaDataBlockType::MdParsed => Ok(self.comments.get("TX").cloned()), + MetaDataBlockType::MdParsed => { + Ok(self.md_comment.as_ref().and_then(|mc| mc.get_tx()).map(|s| s.to_string())) + } MetaDataBlockType::MdBlock => { // extract TX tag from xml let comment: String = self .get_data_string() .context("failed getting data string to extract TX tag")? .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces + .into(); match roxmltree::Document::parse(&comment) { Ok(md) => { - let mut tx: Option = None; for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; if node.is_element() + && node.tag_name().name() == "TX" + && let Some(text) = node.text() && !text.is_empty() - && node.tag_name().name() == r"TX" { - tx = Some(text); - break; + return Ok(Some(text.to_string())); } } - Ok(tx) + Ok(None) } Err(e) => { log::warn!("Error parsing comment : \n{comment}\n{e}"); @@ -152,9 +625,10 @@ impl MetaData { } /// Returns the bytes of the text from TX Block or TX's tag text from MD Block pub fn get_tx_bytes(&self) -> Option<&[u8]> { - match self.block_type { - MetaDataBlockType::MdParsed => self.comments.get("TX").map(|s| s.as_bytes()), - _ => Some(&self.raw_data), + if self.raw_data.is_empty() { + None + } else { + Some(&self.raw_data) } } /// Decode string from raw_data field @@ -175,29 +649,44 @@ impl MetaData { self.raw_data = [data, vec![0u8; 8 - data.len() % 8].as_slice()].concat(); self.block.hdr_len = self.raw_data.len() as u64 + 24; } - /// parses the xml bytes specifically for HD block contexted schema - pub fn parse_hd_xml(&mut self) -> Result<()> { - let mut comments: HashMap = HashMap::new(); - // MD Block from HD Block, reading xml - let comment: String = self + /// Helper: get trimmed XML string from raw_data + fn get_xml_string(&self) -> Result { + let s = self .get_data_string()? .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants().filter(|p| p.has_tag_name("e")) { - if let (Some(value), Some(text)) = (node.attribute("name"), node.text()) { - comments.insert(value.to_string(), text.to_string()); + .to_string(); + Ok(s) + } + /// Parse HD block MD comment (hd_comment.xsd) + pub fn parse_hd_comment(&mut self) -> Result<()> { + let mut hd = HdComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + hd.tx = extract_text(root, "TX"); + hd.time_source = extract_text(root, "time_source"); + if let Some(constants_node) = root + .children() + .find(|n| n.is_element() && n.has_tag_name("constants")) + { + for c in constants_node + .children() + .filter(|n| n.is_element() && n.has_tag_name("const")) + { + if let (Some(name), Some(text)) = (c.attribute("name"), c.text()) { + hd.constants.insert(name.to_string(), text.to_string()); + } } } + hd.common_properties = parse_common_properties(root); } Err(e) => { - log::warn!("Could not parse HD MD comment : \n{comment}\n{e}"); + log::warn!("Could not parse HD MD comment : \n{xml}\n{e}"); } - }; - self.comments = comments; + } + self.md_comment = Some(MdComment::Hd(hd)); self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed Ok(()) } /// Creates File History MetaData @@ -221,62 +710,173 @@ impl MetaData { self.block.hdr_len = fh_comments.len() as u64 + 24; self.raw_data = fh_comments.to_vec(); } - /// parses the xml bytes specifically for File History block contexted schema - fn parse_fh_xml(&mut self) -> Result<()> { - let mut comments: HashMap = HashMap::new(); - // MD Block from FH Block, reading xml - let comment: String = self - .get_data_string()? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; - comments.insert(node.tag_name().name().to_string(), text); - } + /// Parse FH block MD comment (fh_comment.xsd) + fn parse_fh_comment(&mut self) -> Result<()> { + let mut fh = FhComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + fh.tx = extract_text(root, "TX"); + fh.tool_id = extract_text(root, "tool_id"); + fh.tool_vendor = extract_text(root, "tool_vendor"); + fh.tool_version = extract_text(root, "tool_version"); + fh.user_name = extract_text(root, "user_name"); + fh.common_properties = parse_common_properties(root); } - Err(e) => { - log::warn!("Could not parse FH comment : \n{comment}\n{e}"); + Err(e) => log::warn!("Could not parse FH comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Fh(fh)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse CN block MD comment (cn_comment.xsd) + fn parse_cn_comment(&mut self) -> Result<()> { + let mut cn = CnComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + cn.tx = extract_text(root, "TX"); + cn.names = parse_names(root, "names"); + cn.linker_name = extract_text(root, "linker_name"); + cn.linker_address = extract_text(root, "linker_address"); + cn.axis_monotony = extract_text(root, "axis_monotony"); + cn.raster = parse_raster(root); + cn.formula = extract_text(root, "formula"); + cn.address = extract_text(root, "address"); + cn.common_properties = parse_common_properties(root); } - }; - self.comments = comments; + Err(e) => log::warn!("Could not parse CN comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Cn(cn)); self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed Ok(()) } - /// Generic xml parser without schema consideration - fn parse_generic_xml(&mut self) -> Result<()> { - let mut comments: HashMap = HashMap::new(); - let comment: String = self - .get_data_string()? - .trim_end_matches(['\n', '\r', ' ']) - .into(); // removes ending spaces - match roxmltree::Document::parse(&comment) { - Ok(md) => { - for node in md.root().descendants() { - let text = match node.text() { - Some(text) => text.to_string(), - None => String::new(), - }; - if node.is_element() - && !text.is_empty() - && !node.tag_name().name().to_string().is_empty() - { - comments.insert(node.tag_name().name().to_string(), text); - } - } + /// Parse CG block MD comment (cg_comment.xsd) + fn parse_cg_comment(&mut self) -> Result<()> { + let mut cg = CgComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + cg.tx = extract_text(root, "TX"); + cg.names = parse_names(root, "names"); + cg.common_properties = parse_common_properties(root); } - Err(e) => { - log::warn!("Error parsing comment : \n{comment}\n{e}"); + Err(e) => log::warn!("Could not parse CG comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Cg(cg)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse CC block MD comment (cc_comment.xsd) + fn parse_cc_comment(&mut self) -> Result<()> { + let mut cc = CcComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + cc.tx = extract_text(root, "TX"); + cc.names = parse_names(root, "names"); + cc.formula = extract_text(root, "formula"); + cc.common_properties = parse_common_properties(root); } - }; - self.comments = comments; + Err(e) => log::warn!("Could not parse CC comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Cc(cc)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse SI block MD comment (si_comment.xsd) + fn parse_si_comment(&mut self) -> Result<()> { + let mut si = SiComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + si.tx = extract_text(root, "TX"); + si.names = parse_names(root, "names"); + si.path = parse_names(root, "path"); + si.bus = parse_names(root, "bus"); + si.protocol = extract_text(root, "protocol"); + si.common_properties = parse_common_properties(root); + } + Err(e) => log::warn!("Could not parse SI comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Si(si)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse EV block MD comment (ev_comment.xsd) + fn parse_ev_comment(&mut self) -> Result<()> { + let mut ev = EvComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + ev.tx = extract_text(root, "TX"); + ev.pre_trigger_interval = extract_text(root, "pre_trigger_interval") + .and_then(|s| s.parse().ok()); + ev.post_trigger_interval = extract_text(root, "post_trigger_interval") + .and_then(|s| s.parse().ok()); + ev.formula = extract_text(root, "formula"); + ev.timeout = extract_text(root, "timeout").and_then(|s| s.parse().ok()); + ev.common_properties = parse_common_properties(root); + } + Err(e) => log::warn!("Could not parse EV comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Ev(ev)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse AT block MD comment (at_comment.xsd) + fn parse_at_comment(&mut self) -> Result<()> { + let mut at = AtComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + at.tx = extract_text(root, "TX"); + at.common_properties = parse_common_properties(root); + } + Err(e) => log::warn!("Could not parse AT comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::At(at)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse CH block MD comment (ch_comment.xsd) + fn parse_ch_comment(&mut self) -> Result<()> { + let mut ch = ChComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + ch.tx = extract_text(root, "TX"); + ch.names = parse_names(root, "names"); + ch.common_properties = parse_common_properties(root); + } + Err(e) => log::warn!("Could not parse CH comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Ch(ch)); + self.block_type = MetaDataBlockType::MdParsed; + Ok(()) + } + /// Parse DG block MD comment (dg_comment.xsd) + fn parse_dg_comment(&mut self) -> Result<()> { + let mut dg = DgComment::default(); + let xml = self.get_xml_string()?; + match roxmltree::Document::parse(&xml) { + Ok(doc) => { + let root = doc.root_element(); + dg.tx = extract_text(root, "TX"); + dg.common_properties = parse_common_properties(root); + } + Err(e) => log::warn!("Could not parse DG comment : \n{xml}\n{e}"), + } + self.md_comment = Some(MdComment::Dg(dg)); self.block_type = MetaDataBlockType::MdParsed; - self.raw_data = vec![]; // empty the data from block as already parsed Ok(()) } /// Writes the metadata to file diff --git a/src/mdfinfo/mdfinfo4/mod.rs b/src/mdfinfo/mdfinfo4/mod.rs index 9cc6eca..32c65f6 100644 --- a/src/mdfinfo/mdfinfo4/mod.rs +++ b/src/mdfinfo/mdfinfo4/mod.rs @@ -487,7 +487,7 @@ impl MdfInfo4 { key, self.sharable.get_tx(block.at_tx_filename), self.sharable.get_tx(block.at_tx_mimetype), - self.sharable.get_comments(block.at_md_comment) + self.sharable.get_md_comment(block.at_md_comment) )) } output @@ -524,7 +524,7 @@ impl MdfInfo4 { "FH[{}]: {}, comment: {:?}\n", i, fh, - self.sharable.get_comments(fh.fh_md_comment), + self.sharable.get_md_comment(fh.fh_md_comment), )); } output @@ -537,7 +537,7 @@ impl MdfInfo4 { "position: {}, name: {:?}, comment: {:?}, scope: {:?}, attachment references: {:?}, event type: {}\n", key, self.sharable.get_tx(block.ev_tx_name), - self.sharable.get_comments(block.ev_md_comment), + self.sharable.get_md_comment(block.ev_md_comment), block.get_scope_links(), block.get_attachment_links(), block.ev_type, @@ -737,9 +737,19 @@ impl MdfInfo4 { /// Formats header comments pub fn format_header_comments(&self) -> String { let mut output = String::new(); - let comments = self.sharable.get_hd_comments(self.hd_block.hd_md_comment); - for (tag, text) in comments.iter() { - output.push_str(&format!("{}: {}\n", tag, text)); + if let Some(hd) = self.sharable.get_hd_comments(self.hd_block.hd_md_comment) { + if let Some(tx) = &hd.tx { + output.push_str(&format!("TX: {tx}\n")); + } + if let Some(ts) = &hd.time_source { + output.push_str(&format!("time_source: {ts}\n")); + } + for (name, value) in &hd.constants { + output.push_str(&format!("const {name}: {value}\n")); + } + for (name, value) in &hd.common_properties { + output.push_str(&format!("{name}: {value}\n")); + } } output } diff --git a/src/mdfr.rs b/src/mdfr.rs index eedec4f..78ea6a1 100644 --- a/src/mdfr.rs +++ b/src/mdfr.rs @@ -459,8 +459,12 @@ df=polars.DataFrame(series) if let Ok(res) = mdf.mdf_info.get_tx(atb.at_tx_mimetype) { let _ = atdict.set_item("tx_mimetype", res); } - let _ = - atdict.set_item("md_comment", mdf.mdf_info.get_comments(atb.at_md_comment)); + let _ = atdict.set_item( + "md_comment", + mdf.mdf_info + .get_md_comment(atb.at_md_comment) + .map(|c| format!("{c}")), + ); let _ = atdict.set_item("flags", atb.at_flags); let _ = atdict.set_item("creator_index", atb.at_creator_index); let _ = atl.append(atdict); @@ -499,8 +503,12 @@ df=polars.DataFrame(series) if let Ok(res) = mdf.mdf_info.get_tx(evb.ev_tx_name) { let _ = evdict.set_item("tx_name", res); } - let _ = - evdict.set_item("md_comment", mdf.mdf_info.get_comments(evb.ev_md_comment)); + let _ = evdict.set_item( + "md_comment", + mdf.mdf_info + .get_md_comment(evb.ev_md_comment) + .map(|c| format!("{c}")), + ); let _ = evdict.set_item("type", evb.ev_type); let _ = evdict.set_item("sync_type", evb.ev_sync_type); let _ = evdict.set_item("range_type", evb.ev_range_type); @@ -526,8 +534,12 @@ df=polars.DataFrame(series) let fhl = PyList::empty(py); for fhb in fh { let fhdict = PyDict::new(py); - let _ = - fhdict.set_item("comment", mdf.mdf_info.get_comments(fhb.fh_md_comment)); + let _ = fhdict.set_item( + "comment", + mdf.mdf_info + .get_md_comment(fhb.fh_md_comment) + .map(|c| format!("{c}")), + ); let _ = fhdict.set_item("time_ns", fhb.fh_time_ns); let _ = fhdict.set_item("tz_offset_min", fhb.fh_tz_offset_min); let _ = fhdict.set_item("dst_offset_min", fhb.fh_dst_offset_min); @@ -571,8 +583,12 @@ df=polars.DataFrame(series) if let Ok(res) = mdf.mdf_info.get_tx(sib.si_tx_path) { let _ = sidict.set_item("path", res); } - let _ = - sidict.set_item("comment", mdf.mdf_info.get_comments(sib.si_md_comment)); + let _ = sidict.set_item( + "comment", + mdf.mdf_info + .get_md_comment(sib.si_md_comment) + .map(|c| format!("{c}")), + ); let _ = sidict.set_item("type", sib.get_type_str()); let _ = sidict.set_item("type_id", sib.si_type); let _ = sidict.set_item("bus_type", sib.get_bus_type_str()); @@ -626,8 +642,12 @@ df=polars.DataFrame(series) if let Ok(res) = mdf.mdf_info.get_tx(chb.ch_tx_name) { let _ = chdict.set_item("name", res); } - let _ = - chdict.set_item("comment", mdf.mdf_info.get_comments(chb.ch_md_comment)); + let _ = chdict.set_item( + "comment", + mdf.mdf_info + .get_md_comment(chb.ch_md_comment) + .map(|c| format!("{c}")), + ); let _ = chdict.set_item("type", chb.get_type_str()); let _ = chdict.set_item("type_id", chb.ch_type); let _ = chdict.set_item("element_count", chb.ch_element_count); diff --git a/src/mdfreader.rs b/src/mdfreader.rs index c4dff7a..b6c9ea7 100644 --- a/src/mdfreader.rs +++ b/src/mdfreader.rs @@ -351,11 +351,10 @@ impl fmt::Display for Mdf { MdfInfo::V4(mdfinfo4) => { writeln!(f, "Version : {}", mdfinfo4.id_block.id_ver)?; writeln!(f, "{}\n", mdfinfo4.hd_block)?; - let comments = &mdfinfo4 - .sharable - .get_hd_comments(mdfinfo4.hd_block.hd_md_comment); - for c in comments.iter() { - writeln!(f, "{} {}", c.0, c.1)?; + if let Some(hd) = + mdfinfo4.sharable.get_hd_comments(mdfinfo4.hd_block.hd_md_comment) + { + writeln!(f, "{hd}")?; } for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { From bcb09dacb0add3e26edf48f612b5aed468b5ca51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 8 Feb 2026 23:24:10 +0100 Subject: [PATCH 17/46] added CABlock writing --- src/mdfinfo/mdfinfo4/ca_block.rs | 192 ++++++++++++++++++++++++++++++- src/mdfwriter/mdfwriter4.rs | 52 ++++----- 2 files changed, 211 insertions(+), 33 deletions(-) diff --git a/src/mdfinfo/mdfinfo4/ca_block.rs b/src/mdfinfo/mdfinfo4/ca_block.rs index cc5be50..513b53d 100644 --- a/src/mdfinfo/mdfinfo4/ca_block.rs +++ b/src/mdfinfo/mdfinfo4/ca_block.rs @@ -1,10 +1,10 @@ //! Channel Array block (CABLOCK) for MDF4 use anyhow::{Context, Error, Result}; -use binrw::{BinReaderExt, binrw}; -use byteorder::{LittleEndian, ReadBytesExt}; +use binrw::{BinReaderExt, BinWriterExt, binrw}; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use std::collections::VecDeque; use std::fmt::{self, Display}; -use std::io::Cursor; +use std::io::{Cursor, Seek, Write}; use super::block_header::Blockheader4Short; use crate::data_holder::tensor_arrow::Order; @@ -109,6 +109,190 @@ impl Ca4Block { _ => "Unknown", } } + /// Calculate the number of links based on storage type and flags + fn calculate_link_count(&self) -> u64 { + let d = self.ca_ndim as u64; + let pnd = self.ca_dim_size.iter().product::().max(1); + let mut links: u64 = 1; // ca_composition (always present) + if self.ca_storage == 2 { + links += pnd; // ca_data + } + if self.ca_flags & 0b1 != 0 { + links += d * 3; // ca_dynamic_size + } + if self.ca_flags & 0b10 != 0 { + links += d * 3; // ca_input_quantity + } + if self.ca_flags & 0b100 != 0 { + links += 3; // ca_output_quantity + } + if self.ca_flags & 0b1000 != 0 { + links += 3; // ca_comparison_quantity + } + if self.ca_flags & 0b10000 != 0 { + links += d; // ca_cc_axis_conversion + } + if (self.ca_flags & 0b10000 != 0) && (self.ca_flags & 0b100000 == 0) { + links += d * 3; // ca_axis (axis flag set, fixed axes NOT set) + } + links + } + /// Calculate the total block length in bytes + pub fn calculate_block_len(&self) -> u64 { + let d = self.ca_ndim as u64; + let n_links = self.calculate_link_count(); + // header_short(16) + link_count(8) + links(n*8) + members(16) + dim_size(D*8) + let mut len: u64 = 16 + 8 + n_links * 8 + 16 + d * 8; + // axis_value: present if fixed axis flag (bit 5) is set + if let Some(ref vals) = self.ca_axis_value { + len += vals.len() as u64 * 8; + } + // cycle_count: present if storage >= 1 + if let Some(ref vals) = self.ca_cycle_count { + len += vals.len() as u64 * 8; + } + len + } + /// Prepare CA block for writing: zero unmappable link values, recalculate sizes + pub fn prepare_for_write(&mut self) { + // Zero all link Vec values (keep structure for correct link count) + self.ca_composition = 0; + if let Some(ref mut data) = self.ca_data { + data.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut ds) = self.ca_dynamic_size { + ds.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut iq) = self.ca_input_quantity { + iq.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut oq) = self.ca_output_quantity { + oq.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut cq) = self.ca_comparison_quantity { + cq.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut cc) = self.ca_cc_axis_conversion { + cc.iter_mut().for_each(|v| *v = 0); + } + if let Some(ref mut ax) = self.ca_axis { + ax.iter_mut().for_each(|v| *v = 0); + } + // Recalculate + self.ca_links = self.calculate_link_count(); + self.ca_len = self.calculate_block_len(); + } + /// Write the full CA block to a buffer + pub fn write_to(&self, buffer: &mut W) -> Result<()> { + // Write short header + let header = Blockheader4Short { + hdr_id: self.ca_id, + hdr_gap: self.reserved, + hdr_len: self.ca_len, + }; + buffer + .write_le(&header) + .context("Could not write CA block header")?; + // Write link count + buffer + .write_le(&self.ca_links) + .context("Could not write CA link count")?; + // Write links in spec order + buffer + .write_le(&self.ca_composition) + .context("Could not write ca_composition")?; + if let Some(ref data) = self.ca_data { + for val in data { + buffer + .write_le(val) + .context("Could not write ca_data link")?; + } + } + if let Some(ref ds) = self.ca_dynamic_size { + for val in ds { + buffer + .write_le(val) + .context("Could not write ca_dynamic_size link")?; + } + } + if let Some(ref iq) = self.ca_input_quantity { + for val in iq { + buffer + .write_le(val) + .context("Could not write ca_input_quantity link")?; + } + } + if let Some(ref oq) = self.ca_output_quantity { + for val in oq { + buffer + .write_le(val) + .context("Could not write ca_output_quantity link")?; + } + } + if let Some(ref cq) = self.ca_comparison_quantity { + for val in cq { + buffer + .write_le(val) + .context("Could not write ca_comparison_quantity link")?; + } + } + if let Some(ref cc) = self.ca_cc_axis_conversion { + for val in cc { + buffer + .write_le(val) + .context("Could not write ca_cc_axis_conversion link")?; + } + } + if let Some(ref ax) = self.ca_axis { + for val in ax { + buffer + .write_le(val) + .context("Could not write ca_axis link")?; + } + } + // Write members + buffer + .write_le(&self.ca_type) + .context("Could not write ca_type")?; + buffer + .write_le(&self.ca_storage) + .context("Could not write ca_storage")?; + buffer + .write_le(&self.ca_ndim) + .context("Could not write ca_ndim")?; + buffer + .write_le(&self.ca_flags) + .context("Could not write ca_flags")?; + buffer + .write_le(&self.ca_byte_offset_base) + .context("Could not write ca_byte_offset_base")?; + buffer + .write_le(&self.ca_inval_bit_pos_base) + .context("Could not write ca_inval_bit_pos_base")?; + // Write dim_size array + for dim in &self.ca_dim_size { + buffer + .write_le(dim) + .context("Could not write ca_dim_size")?; + } + // Write axis_value if present + if let Some(ref vals) = self.ca_axis_value { + for v in vals { + buffer + .write_f64::(*v) + .context("Could not write ca_axis_value")?; + } + } + // Write cycle_count if present + if let Some(ref vals) = self.ca_cycle_count { + for v in vals { + buffer + .write_le(v) + .context("Could not write ca_cycle_count")?; + } + } + Ok(()) + } } impl Display for Ca4Block { @@ -296,7 +480,7 @@ pub(super) fn parse_ca_block( let mut val = vec![0i64; (ca_members.ca_ndim * 3) as usize]; let ca_axis: Option> = - if ((ca_members.ca_flags & 0b10000) > 0) & ((ca_members.ca_flags & 0b100000) > 0) { + if ((ca_members.ca_flags & 0b10000) > 0) && ((ca_members.ca_flags & 0b100000) == 0) { ca_block .read_i64_into::(&mut val) .context("Could not read ca_axis")?; diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 12bf9ab..58e2fa4 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -15,7 +15,7 @@ use crate::{ mdfinfo::{ MdfInfo, mdfinfo4::{ - At4Block, BlockType, Blockheader4, Ca4Block, Ca4BlockMembers, Cg4, Cg4Block, Cn4, + At4Block, BlockType, Blockheader4, Ca4Block, Cg4, Cg4Block, Cn4, Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Ev4Block, FhBlock, Ld4Block, MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, }, @@ -740,27 +740,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Result<()> { match &compo.block { Compo::CA(c) => { - let header = Blockheader4Short { - hdr_id: [35, 35, 67, 65], // ##CA - hdr_len: c.ca_len, - ..Default::default() - }; - buffer - .write_le(&header) - .context("Could not write CABlock header")?; - buffer - .write_le(&1u64) - .context("error writing number of links in CA Block")?; - let ca_composition: u64 = 0; - buffer - .write_le(&ca_composition) - .context("Could not write CABlock ca_composition")?; - let mut ca_block = Ca4BlockMembers::default(); - ca_block.ca_ndim = c.ca_ndim; - ca_block.ca_dim_size.clone_from(&c.ca_dim_size); - buffer - .write_le(&ca_block) - .context("Could not write CABlock members")?; + c.write_to(buffer).context("Could not write CA block")?; } Compo::DS(ds) => { let header = Blockheader4Short { @@ -1177,22 +1157,36 @@ fn create_blocks( let data_ndim = data.ndim(); let mut composition: Option = None; if data_ndim > 1 { - let data_dim_size = cn + let data_dim_size: Vec = cn .data .shape() .0 .iter() .skip(1) .map(|x| *x as u64) - .collect::>(); - // data_dim_size.remove(0); - let mut ca_block = Ca4Block::default(); - cg_block.cg_data_bytes = cn.list_size as u32 * byte_count; + .collect(); + + // Preserve source CA block if available, else create default + let mut ca_block = + if let Some(ref source_compo) = cn.composition + && let Compo::CA(ref source_ca) = source_compo.block + { + let mut ca = (**source_ca).clone(); + ca.prepare_for_write(); + ca + } else { + Ca4Block::default() + }; + cg_block.cg_data_bytes = cn.list_size as u32 * byte_count; cn_block.cn_composition = pointer; + + // Override dims from actual data shape ca_block.ca_ndim = data_ndim as u16; - ca_block.ca_dim_size.clone_from(&data_dim_size); - ca_block.ca_len = 48 + 8 * data_ndim as u64; + ca_block.ca_dim_size = data_dim_size; + // Recalculate after dim changes + ca_block.ca_len = ca_block.calculate_block_len(); + pointer += ca_block.ca_len as i64; composition = Some(Composition { block: Compo::CA(Box::new(ca_block)), From 714a3a91dc5f92ee5711e3d20fac15cd4cb4f853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 9 Feb 2026 23:11:06 +0100 Subject: [PATCH 18/46] added more writing tests --- tests/writing.rs | 406 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) diff --git a/tests/writing.rs b/tests/writing.rs index 80e1422..7f24fdc 100644 --- a/tests/writing.rs +++ b/tests/writing.rs @@ -2,10 +2,16 @@ use anyhow::Result; use arrow::array::{AsArray, Float64Array, PrimitiveBuilder}; use arrow::datatypes::Float32Type; use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfinfo::MdfInfo; use mdfr::mdfreader::Mdf; use std::fs; use std::sync::{Arc, LazyLock}; +/// SI block metadata: (type, bus_type, flags, name, path) +type SiInfo = (u8, u8, u8, Option, Option); +/// AT block metadata: (filename, mimetype, flags, original_size, embedded_data) +type AtInfo = (Option, Option, u16, u64, Option>); + static MDFREADER_TESTS_PATH: &str = "/home/ratal/workspace/mdfreader/mdfreader/tests/"; static MDFR_PATH: &str = "/home/ratal/workspace/mdfr/"; @@ -257,3 +263,403 @@ fn mdf_add_channel() -> Result<()> { assert!(mdf.get_channel_data(&new_channel_name).is_none()); Ok(()) } + +#[test] +fn writing_mdf4_file_history() -> Result<()> { + let writing_mdf_file = format!("{}/writing_fh_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Simple/PCV_iO_Gen3_LK1__3l_TDI.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Capture source FH metadata + let (source_fh_count, source_fh_timestamps) = match &mdf.mdf_info { + MdfInfo::V4(info4) => { + assert!(!info4.fh.is_empty(), "Source file should have FH blocks"); + let timestamps: Vec = info4.fh.iter().map(|fh| fh.fh_time_ns).collect(); + (info4.fh.len(), timestamps) + } + _ => panic!("Expected MDF4 file"), + }; + + // Write and re-read from disk + let _written = mdf.write(&writing_mdf_file, false)?; + let reread = Mdf::new(&writing_mdf_file)?; + + match &reread.mdf_info { + MdfInfo::V4(info4) => { + assert_eq!( + info4.fh.len(), + source_fh_count + 1, + "Written file should have one more FH block than source" + ); + // Original timestamps preserved in order + for (i, expected_ts) in source_fh_timestamps.iter().enumerate() { + assert_eq!( + info4.fh[i].fh_time_ns, *expected_ts, + "FH[{}] timestamp mismatch", + i + ); + } + // New FH block has valid timestamp + let new_fh = &info4.fh[source_fh_count]; + assert!(new_fh.fh_time_ns > 0, "New FH block should have valid timestamp"); + } + _ => panic!("Expected MDF4 file"), + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_source_information() -> Result<()> { + let writing_mdf_file = format!("{}/writing_si_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Events/Marker/dSPACE_Bookmarks.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Capture source SI unique content tuples using public API + let mut source_si_set: Vec = + match &mdf.mdf_info { + MdfInfo::V4(info4) => { + let si_blocks = info4.get_source_information_blocks(); + assert!( + !si_blocks.is_empty(), + "Source file should have SI blocks" + ); + si_blocks + .values() + .map(|si| { + let name = si.get_si_source_name(&info4.sharable).ok().flatten(); + let path = si.get_si_path_name(&info4.sharable).ok().flatten(); + (si.si_type, si.si_bus_type, si.si_flags, name, path) + }) + .collect() + } + _ => panic!("Expected MDF4 file"), + }; + source_si_set.sort(); + source_si_set.dedup(); + + // Write and re-read from disk + let _written = mdf.write(&writing_mdf_file, false)?; + let reread = Mdf::new(&writing_mdf_file)?; + + match &reread.mdf_info { + MdfInfo::V4(info4) => { + let si_blocks = info4.get_source_information_blocks(); + assert!( + !si_blocks.is_empty(), + "Written file should have SI blocks" + ); + let mut reread_si_set: Vec = si_blocks + .values() + .map(|si| { + let name = si.get_si_source_name(&info4.sharable).ok().flatten(); + let path = si.get_si_path_name(&info4.sharable).ok().flatten(); + (si.si_type, si.si_bus_type, si.si_flags, name, path) + }) + .collect(); + reread_si_set.sort(); + reread_si_set.dedup(); + // Verify all unique SI content is preserved + assert_eq!( + source_si_set, reread_si_set, + "SI block unique content mismatch" + ); + } + _ => panic!("Expected MDF4 file"), + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_events() -> Result<()> { + let writing_mdf_file = format!("{}/writing_events_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Events/Marker/dSPACE_Bookmarks.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Capture source event metadata + let (mut source_event_types, mut source_event_names) = match &mdf.mdf_info { + MdfInfo::V4(info4) => { + assert!(!info4.ev.is_empty(), "Source file should contain events"); + let types: Vec<(u8, u8, u8)> = info4 + .ev + .values() + .map(|ev| (ev.ev_type, ev.ev_sync_type, ev.ev_cause)) + .collect(); + let names: Vec> = info4 + .ev + .values() + .map(|ev| info4.sharable.get_tx(ev.ev_tx_name).ok().flatten()) + .collect(); + (types, names) + } + _ => panic!("Expected MDF4 file"), + }; + let source_event_count = source_event_types.len(); + source_event_types.sort(); + source_event_names.sort(); + + // Write and re-read from disk + let _written = mdf.write(&writing_mdf_file, false)?; + let mut reread = Mdf::new(&writing_mdf_file)?; + reread.load_all_channels_data_in_memory()?; + + match &reread.mdf_info { + MdfInfo::V4(info4) => { + assert_eq!(info4.ev.len(), source_event_count, "Event count mismatch"); + let mut reread_types: Vec<(u8, u8, u8)> = info4 + .ev + .values() + .map(|ev| (ev.ev_type, ev.ev_sync_type, ev.ev_cause)) + .collect(); + reread_types.sort(); + assert_eq!(source_event_types, reread_types, "Event types mismatch"); + + let mut reread_names: Vec> = info4 + .ev + .values() + .map(|ev| info4.sharable.get_tx(ev.ev_tx_name).ok().flatten()) + .collect(); + reread_names.sort(); + assert_eq!(source_event_names, reread_names, "Event names mismatch"); + } + _ => panic!("Expected MDF4 file"), + } + + // Verify channel data also preserved + let channel_names = mdf.get_channel_names_set(); + for name in &channel_names { + if let Some(src_data) = mdf.get_channel_data(name) + && let Some(reread_data) = reread.get_channel_data(name) + { + assert_eq!(*src_data, *reread_data, "Data mismatch for channel {}", name); + } + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_attachments() -> Result<()> { + let writing_mdf_file = format!("{}/writing_attachments_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Attachments/Embedded/Vector_Embedded.MF4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Capture source attachment metadata as sorted tuples (sort by filename) + let mut source_at_info: Vec = + match &mdf.mdf_info { + MdfInfo::V4(info4) => { + assert!( + !info4.at.is_empty(), + "Source file should have attachments" + ); + info4 + .at + .values() + .map(|(at, data)| { + let filename = info4.sharable.get_tx(at.at_tx_filename).ok().flatten(); + let mimetype = info4.sharable.get_tx(at.at_tx_mimetype).ok().flatten(); + (filename, mimetype, at.at_flags, at.at_original_size, data.clone()) + }) + .collect() + } + _ => panic!("Expected MDF4 file"), + }; + source_at_info.sort_by(|a, b| a.0.cmp(&b.0)); + let source_at_count = source_at_info.len(); + + // Write and re-read from disk + let _written = mdf.write(&writing_mdf_file, false)?; + let mut reread = Mdf::new(&writing_mdf_file)?; + reread.load_all_channels_data_in_memory()?; + + match &reread.mdf_info { + MdfInfo::V4(info4) => { + assert_eq!(info4.at.len(), source_at_count, "Attachment count mismatch"); + let mut reread_at_info: Vec = + info4 + .at + .values() + .map(|(at, data)| { + let filename = info4.sharable.get_tx(at.at_tx_filename).ok().flatten(); + let mimetype = info4.sharable.get_tx(at.at_tx_mimetype).ok().flatten(); + (filename, mimetype, at.at_flags, at.at_original_size, data.clone()) + }) + .collect(); + reread_at_info.sort_by(|a, b| a.0.cmp(&b.0)); + + for (i, (src, rr)) in source_at_info.iter().zip(reread_at_info.iter()).enumerate() { + assert_eq!(src.0, rr.0, "Attachment {} filename mismatch", i); + assert_eq!(src.1, rr.1, "Attachment {} mimetype mismatch", i); + assert_eq!(src.2, rr.2, "Attachment {} flags mismatch", i); + assert_eq!(src.3, rr.3, "Attachment {} original_size mismatch", i); + assert_eq!(src.4, rr.4, "Attachment {} embedded data mismatch", i); + } + } + _ => panic!("Expected MDF4 file"), + } + + // Verify channel data preserved + let channel_names = mdf.get_channel_names_set(); + for name in &channel_names { + if let Some(src_data) = mdf.get_channel_data(name) + && let Some(reread_data) = reread.get_channel_data(name) + { + assert_eq!(*src_data, *reread_data, "Data mismatch for channel {}", name); + } + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_vlsd() -> Result<()> { + let writing_mdf_file = format!("{}/writing_vlsd_test.mf4", BASE_TEST_PATH.as_str()); + + let vlsd_channel = "Data channel"; + let time_channel = "Time channel"; + + // Test UTF-8 VLSD + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "ChannelTypes/VLSD/Vector_VLSD_String_UTF8.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + let src_time = mdf + .get_channel_data(time_channel) + .expect("Source should have Time channel") + .clone(); + + // Write without compression — verify file is valid and non-VLSD data preserved + let _written = mdf.write(&writing_mdf_file, false)?; + let mut reread = Mdf::new(&writing_mdf_file)?; + reread.load_all_channels_data_in_memory()?; + let reread_names = reread.get_channel_names_set(); + assert!( + reread_names.contains(vlsd_channel), + "VLSD channel should be detected in re-read file (uncompressed)" + ); + let reread_time = reread + .get_channel_data(time_channel) + .expect("Time channel should have data after re-read"); + assert_eq!(src_time, *reread_time, "Time data mismatch (no compression)"); + + // Write with compression — verify file is valid + let _written = mdf.write(&writing_mdf_file, true)?; + let mut reread = Mdf::new(&writing_mdf_file)?; + reread.load_all_channels_data_in_memory()?; + let reread_names = reread.get_channel_names_set(); + assert!( + reread_names.contains(vlsd_channel), + "VLSD channel should be detected in re-read file (compressed)" + ); + let reread_time = reread + .get_channel_data(time_channel) + .expect("Time channel should have data after compressed re-read"); + assert_eq!(src_time, *reread_time, "Time data mismatch (with compression)"); + + // Test SBC encoding + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "ChannelTypes/VLSD/Vector_VLSD_String_SBC.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + let src_time = mdf + .get_channel_data(time_channel) + .expect("SBC source should have Time channel") + .clone(); + + let _written = mdf.write(&writing_mdf_file, true)?; + let mut reread = Mdf::new(&writing_mdf_file)?; + reread.load_all_channels_data_in_memory()?; + assert!( + reread.get_channel_names_set().contains(vlsd_channel), + "VLSD SBC channel should be detected in re-read file" + ); + let reread_time = reread + .get_channel_data(time_channel) + .expect("SBC Time channel should have data after re-read"); + assert_eq!(src_time, *reread_time, "SBC Time data mismatch"); + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_arrays() -> Result<()> { + let writing_mdf_file = format!("{}/writing_arrays_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Arrays/Simple/Vector_ArrayWithFixedAxes.MF4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify source has CA blocks + let source_has_ca = match &mdf.mdf_info { + MdfInfo::V4(info4) => info4.dg.values().any(|dg| { + dg.cg.values().any(|cg| { + cg.cn.values().any(|cn| { + cn.composition + .as_ref() + .is_some_and(|c| matches!(c.block, mdfr::mdfinfo::mdfinfo4::Compo::CA(_))) + }) + }) + }), + _ => false, + }; + assert!(source_has_ca, "Source file should have CA blocks"); + + // Capture source channel names + let source_names = mdf.get_channel_names_set(); + assert!(!source_names.is_empty(), "Source file should have channels"); + + // Write and verify channel data is preserved via in-memory return + let written = mdf.write(&writing_mdf_file, false)?; + let written_names = written.get_channel_names_set(); + assert_eq!(source_names, written_names, "Channel names should be preserved"); + + for name in &source_names { + if let Some(src_data) = mdf.get_channel_data(name) + && let Some(written_data) = written.get_channel_data(name) + { + assert_eq!( + *src_data, *written_data, + "Array data mismatch for channel {}", + name + ); + } + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} From 0e754be96d38da6310b23f1626f41f3ec7c38156 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 9 Feb 2026 23:11:17 +0100 Subject: [PATCH 19/46] fixed writing bug --- src/mdfwriter/mdfwriter4.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 58e2fa4..28a83ff 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -1181,8 +1181,8 @@ fn create_blocks( cg_block.cg_data_bytes = cn.list_size as u32 * byte_count; cn_block.cn_composition = pointer; - // Override dims from actual data shape - ca_block.ca_ndim = data_ndim as u16; + // Override dims from actual data shape (data_ndim includes sample count, ca_ndim does not) + ca_block.ca_ndim = (data_ndim - 1) as u16; ca_block.ca_dim_size = data_dim_size; // Recalculate after dim changes ca_block.ca_len = ca_block.calculate_block_len(); From 0cef1514f638b5218c4cae72105148131c3309eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Tue, 10 Feb 2026 08:11:18 +0100 Subject: [PATCH 20/46] Added CHblock writing and composition for DS/CL CV/CU + tests --- src/mdfinfo/mdfinfo4/ch_block.rs | 6 + src/mdfwriter/mdfwriter4.rs | 240 ++++++++++++++++++++++++++++- tests/writing.rs | 250 +++++++++++++++++++++++++++++++ 3 files changed, 495 insertions(+), 1 deletion(-) diff --git a/src/mdfinfo/mdfinfo4/ch_block.rs b/src/mdfinfo/mdfinfo4/ch_block.rs index ccdb95e..53573fb 100644 --- a/src/mdfinfo/mdfinfo4/ch_block.rs +++ b/src/mdfinfo/mdfinfo4/ch_block.rs @@ -48,6 +48,12 @@ pub struct Ch4Block { } impl Ch4Block { + /// Calculate the total block size (header + links + data) + pub fn calculate_block_size(&self) -> i64 { + // 16 (short header) + 8 (link count) + 8*ch_links (links) + 8 (data members) + 16 + 8 + (self.ch_links * 8) as i64 + 8 + } + /// Returns the hierarchy type as a string description pub fn get_type_str(&self) -> &'static str { match self.ch_type { diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 28a83ff..f66b051 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -15,7 +15,7 @@ use crate::{ mdfinfo::{ MdfInfo, mdfinfo4::{ - At4Block, BlockType, Blockheader4, Ca4Block, Cg4, Cg4Block, Cn4, + At4Block, BlockType, Blockheader4, Ca4Block, Cg4, Cg4Block, Ch4Block, Cn4, Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Ev4Block, FhBlock, Ld4Block, MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, }, @@ -390,6 +390,118 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result last_dg.block.dg_dg_next = 0; } + // Build position maps for DG/CG/CN remapping (needed for CH block element triplets) + let mut dg_position_map: HashMap = HashMap::new(); + let mut cg_position_map: HashMap = HashMap::new(); + let mut cn_position_map: HashMap = HashMap::new(); + for (name, (_, old_dg, (old_cg, _), (old_cn, _))) in info.channel_names_set.iter() { + if let Some((_, new_dg, (new_cg, _), (new_cn, _))) = + new_info.channel_names_set.get(name) + { + dg_position_map.insert(*old_dg, *new_dg); + cg_position_map.insert(*old_cg, *new_cg); + cn_position_map.insert(*old_cn, *new_cn); + } + } + + // Copy CH blocks (channel hierarchy) from source file + type ChBlockEntry = (i64, Ch4Block, Option, Option); + let mut ch_blocks: Vec = Vec::new(); + let mut ch_position_map: HashMap = HashMap::new(); + + if !info.ch.is_empty() { + new_info.hd_block.hd_ch_first = pointer; + + // Sort source CH blocks by position for deterministic sibling/child link order + let mut sorted_ch: Vec<(i64, &Ch4Block)> = + info.ch.iter().map(|(k, v)| (*k, v)).collect(); + sorted_ch.sort_by_key(|(pos, _)| *pos); + + // Pass 1: Assign positions and collect metadata + for (orig_pos, ch) in &sorted_ch { + let new_ch_position = pointer; + ch_position_map.insert(*orig_pos, new_ch_position); + + let ch_name_md = if ch.ch_tx_name != 0 { + info.sharable.md_tx.get(&ch.ch_tx_name).cloned() + } else { + None + }; + let ch_comment_md = if ch.ch_md_comment != 0 { + info.sharable.md_tx.get(&ch.ch_md_comment).cloned() + } else { + None + }; + + pointer += ch.calculate_block_size(); + if let Some(ref md) = ch_name_md { + pointer += md.block.hdr_len as i64; + } + if let Some(ref md) = ch_comment_md { + pointer += md.block.hdr_len as i64; + } + + ch_blocks.push((*orig_pos, (*ch).clone(), ch_name_md, ch_comment_md)); + } + + // Pass 2: Remap all links + for (orig_pos, ch, name_md, comment_md) in ch_blocks.iter_mut() { + // Remap sibling and child links + ch.ch_ch_next = if ch.ch_ch_next > 0 { + ch_position_map.get(&ch.ch_ch_next).copied().unwrap_or(0) + } else { + 0 + }; + ch.ch_ch_first = if ch.ch_ch_first > 0 { + ch_position_map.get(&ch.ch_ch_first).copied().unwrap_or(0) + } else { + 0 + }; + + // Remap TX name and MD comment to positions after the CH block itself + let new_ch_pos = ch_position_map.get(orig_pos).copied().unwrap_or(0); + let block_size = ch.calculate_block_size(); + let mut meta_offset = new_ch_pos + block_size; + + if let Some(md) = name_md.as_ref() { + ch.ch_tx_name = meta_offset; + meta_offset += md.block.hdr_len as i64; + } else { + ch.ch_tx_name = 0; + } + ch.ch_md_comment = if comment_md.is_some() { + meta_offset + } else { + 0 + }; + + // Remap element triplets (DG, CG, CN positions) + for i in 0..ch.ch_element_count as usize { + let base = i * 3; + if base + 2 < ch.ch_element.len() { + ch.ch_element[base] = dg_position_map + .get(&ch.ch_element[base]) + .copied() + .unwrap_or(0); + ch.ch_element[base + 1] = cg_position_map + .get(&ch.ch_element[base + 1]) + .copied() + .unwrap_or(0); + ch.ch_element[base + 2] = cn_position_map + .get(&ch.ch_element[base + 2]) + .copied() + .unwrap_or(0); + } + } + } + + // Store in new_info + for (orig_pos, ch, _, _) in &ch_blocks { + let new_pos = ch_position_map.get(orig_pos).copied().unwrap_or(0); + new_info.ch.insert(new_pos, ch.clone()); + } + } + // thread writing the channels data first as block size can be unknown due to compression let (tx, rx) = bounded::>(n_channels); let fname = Arc::new(Mutex::new(file_name.to_string())); @@ -683,6 +795,69 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result } } + // Writes CHBLOCKs (channel hierarchy) + for (_orig_pos, ch, ch_name_md, ch_comment_md) in &ch_blocks { + // Write CH block short header + let ch_header = Blockheader4Short { + hdr_id: [35, 35, 67, 72], // ##CH + hdr_gap: [0u8; 4], + hdr_len: ch.calculate_block_size() as u64, + }; + buffer + .write_le(&ch_header) + .context("Could not write CHBlock header")?; + + // Write link count + buffer + .write_le(&ch.ch_links) + .context("Could not write CHBlock link count")?; + + // Write fixed links + buffer + .write_le(&ch.ch_ch_next) + .context("Could not write ch_ch_next")?; + buffer + .write_le(&ch.ch_ch_first) + .context("Could not write ch_ch_first")?; + buffer + .write_le(&ch.ch_tx_name) + .context("Could not write ch_tx_name")?; + buffer + .write_le(&ch.ch_md_comment) + .context("Could not write ch_md_comment")?; + + // Write element links (DG/CG/CN triplets) + for elem in &ch.ch_element { + buffer + .write_le(elem) + .context("Could not write ch_element link")?; + } + + // Write data members + buffer + .write_le(&ch.ch_element_count) + .context("Could not write ch_element_count")?; + buffer + .write_le(&ch.ch_type) + .context("Could not write ch_type")?; + buffer + .write_all(&ch.ch_reserved) + .context("Could not write ch_reserved")?; + + // Write TX name block if present + if let Some(name_md) = ch_name_md { + name_md + .write(&mut buffer) + .context("Failed writing CH name")?; + } + // Write MD comment block if present + if let Some(comment_md) = ch_comment_md { + comment_md + .write(&mut buffer) + .context("Failed writing CH comment")?; + } + } + // Writes DG+CG+CN blocks for (_position, dg) in new_info.dg.iter() { buffer @@ -803,6 +978,56 @@ fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Resul Ok(()) } +/// Calculate the serialized size of a composition tree (for position tracking) +fn calculate_composition_size(compo: &Composition) -> i64 { + let block_size: i64 = match &compo.block { + Compo::CA(c) => c.ca_len as i64, + Compo::DS(ds) => (16 + 8 + ds.ds_links * 8 + 8) as i64, + Compo::CL(_) => 48, + Compo::CV(cv) => { + (16 + 8 + cv.cv_n_links * 8 + 4 + 4 + cv.cv_option_count as u64 * 8) as i64 + } + Compo::CU(cu) => (16 + 8 + cu.cu_n_links * 8 + 4 + 4) as i64, + Compo::CN(cn) => cn.header.hdr_len as i64, + }; + let nested = compo + .compo + .as_ref() + .map_or(0, |n| calculate_composition_size(n)); + block_size + nested +} + +/// Zero internal CN-referencing links in a composition tree. +/// Preserves block types, flags, and data members but zeroes links to CN blocks +/// from the old file (whose positions are invalid after writing). +fn zero_composition_links(compo: &mut Composition) { + match &mut compo.block { + Compo::DS(ds) => ds.links.iter_mut().for_each(|l| *l = 0), + Compo::CL(cl) => { + cl.cl_composition = 0; + cl.cl_cn_size = 0; + } + Compo::CV(cv) => { + cv.cv_cn_discriminator = 0; + cv.cv_cn_option.iter_mut().for_each(|l| *l = 0); + } + Compo::CU(cu) => cu.cu_cn_member.iter_mut().for_each(|l| *l = 0), + Compo::CN(cn) => { + cn.block.cn_composition = 0; + cn.block.cn_tx_name = 0; + cn.block.cn_md_unit = 0; + cn.block.cn_md_comment = 0; + cn.block.cn_data = 0; + cn.block.cn_cc_conversion = 0; + cn.block.set_si_source(0); + } + Compo::CA(ca) => ca.prepare_for_write(), + } + if let Some(ref mut nested) = compo.compo { + zero_composition_links(nested); + } +} + /// Writes the data blocks fn write_data_blocks( position: i64, @@ -1192,6 +1417,19 @@ fn create_blocks( block: Compo::CA(Box::new(ca_block)), compo: None, }); + } else if let Some(ref source_compo) = cn.composition { + // Preserve non-CA composition from source (DS, CL, CV, CU, CN) + match &source_compo.block { + Compo::CA(_) => {} // Already handled above for data_ndim > 1 + _ => { + let mut cloned = source_compo.clone(); + zero_composition_links(&mut cloned); + let compo_size = calculate_composition_size(&cloned); + cn_block.cn_composition = pointer; + pointer += compo_size; + composition = Some(cloned); + } + } } dg_block.dg_dg_next = pointer; diff --git a/tests/writing.rs b/tests/writing.rs index 7f24fdc..ae0ba4f 100644 --- a/tests/writing.rs +++ b/tests/writing.rs @@ -3,8 +3,10 @@ use arrow::array::{AsArray, Float64Array, PrimitiveBuilder}; use arrow::datatypes::Float32Type; use mdfr::data_holder::channel_data::ChannelData; use mdfr::mdfinfo::MdfInfo; +use mdfr::mdfinfo::mdfinfo4::Compo; use mdfr::mdfreader::Mdf; use std::fs; +use std::path::Path; use std::sync::{Arc, LazyLock}; /// SI block metadata: (type, bus_type, flags, name, path) @@ -663,3 +665,251 @@ fn writing_mdf4_arrays() -> Result<()> { fs::remove_file(&writing_mdf_file).ok(); Ok(()) } + +#[test] +fn writing_mdf4_channel_hierarchy() -> Result<()> { + // CH blocks are optional in MDF4 and none of the current test files contain them. + // This test verifies that files without CH blocks still write correctly (hd_ch_first = 0) + // and that the CH writing code path doesn't break anything. + let writing_mdf_file = format!("{}/writing_ch_test.mf4", BASE_TEST_PATH.as_str()); + let file = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Simple/PCV_iO_Gen3_LK1__3l_TDI.mf4" + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify source has no CH blocks (expected with current test files) + let source_ch_count = match &mdf.mdf_info { + MdfInfo::V4(info4) => info4.ch.len(), + _ => panic!("Expected MDF4 file"), + }; + + // Write and re-read + let _written = mdf.write(&writing_mdf_file, false)?; + let reread = Mdf::new(&writing_mdf_file)?; + + match &reread.mdf_info { + MdfInfo::V4(info4) => { + // CH block count should match source (both should be 0 with current test files) + assert_eq!( + info4.ch.len(), + source_ch_count, + "CH block count should be preserved" + ); + } + _ => panic!("Expected MDF4 file"), + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_composition_ds_cl() -> Result<()> { + // DS/CL (Data Stream + Channel List) roundtrip test + // DS/CL compositions describe VLSD blob layouts. After the reader decodes the + // blob into typed child channels (x.a, x.b), the parent structure channel ("x") + // has zero bit_count and the auxiliary VLSD channel has empty data. These are + // metadata-only channels that carry no data, so the writer correctly skips them. + // The decoded child channel data is preserved as independent channels. + let writing_mdf_file = format!("{}/writing_ds_cl_test.mf4", BASE_TEST_PATH.as_str()); + let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/DynamicData/ChannelList/simple_list.mf4"; + if !Path::new(file).exists() { + return Ok(()); + } + + let mut mdf = Mdf::new(file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify source has DS/CL composition + let has_ds_or_cl = match &mdf.mdf_info { + MdfInfo::V4(info4) => info4.dg.values().any(|dg| { + dg.cg.values().any(|cg| { + cg.cn.values().any(|cn| { + cn.composition.as_ref().is_some_and(|c| { + matches!(c.block, Compo::DS(_) | Compo::CL(_)) + }) + }) + }) + }), + _ => false, + }; + assert!(has_ds_or_cl, "Source should have DS or CL composition"); + + // Write and verify via in-memory return + let mut written = mdf.write(&writing_mdf_file, false)?; + written.load_all_channels_data_in_memory()?; + + // Verify decoded child channel data is preserved through write roundtrip + for channel_name in &["time", "x.a", "x.b", "size"] { + if let Some(src) = mdf.get_channel_data(channel_name) + && let Some(wr) = written.get_channel_data(channel_name) + { + assert_eq!( + src.len(), + wr.len(), + "Length mismatch for channel {}", + channel_name + ); + assert_eq!(*src, *wr, "Data mismatch for channel {}", channel_name); + } else { + panic!( + "Channel {} missing: source={}, written={}", + channel_name, + mdf.get_channel_data(channel_name).is_some(), + written.get_channel_data(channel_name).is_some(), + ); + } + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_composition_cv() -> Result<()> { + // CV (Channel Variant) composition test + let writing_mdf_file = format!("{}/writing_cv_test.mf4", BASE_TEST_PATH.as_str()); + let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Variant/Etas_cv_storage_with_fixed_length.mf4"; + if !Path::new(file).exists() { + return Ok(()); + } + + let mut mdf = Mdf::new(file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify source has CV composition + let source_cv_info = match &mdf.mdf_info { + MdfInfo::V4(info4) => { + let mut found = false; + let mut option_count = 0u32; + for dg in info4.dg.values() { + for cg in dg.cg.values() { + for cn in cg.cn.values() { + if let Some(c) = &cn.composition + && let Compo::CV(cv) = &c.block + { + found = true; + option_count = cv.cv_option_count; + } + } + } + } + (found, option_count) + } + _ => (false, 0), + }; + assert!(source_cv_info.0, "Source should have CV composition"); + + // Write and verify via in-memory return + let written = mdf.write(&writing_mdf_file, false)?; + + // Verify CV composition preserved with same option count + match &written.mdf_info { + MdfInfo::V4(info4) => { + let mut found = false; + for dg in info4.dg.values() { + for cg in dg.cg.values() { + for cn in cg.cn.values() { + if let Some(c) = &cn.composition + && let Compo::CV(cv) = &c.block + { + found = true; + assert_eq!( + cv.cv_option_count, source_cv_info.1, + "CV option count should be preserved" + ); + } + } + } + } + assert!(found, "Written file should preserve CV composition"); + } + _ => panic!("Expected MDF4"), + } + + // Verify time channel data + if let Some(src) = mdf.get_channel_data("time") + && let Some(wr) = written.get_channel_data("time") + { + assert_eq!(*src, *wr, "Time data mismatch"); + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} + +#[test] +fn writing_mdf4_composition_cu() -> Result<()> { + // CU (Channel Union) composition test + let writing_mdf_file = format!("{}/writing_cu_test.mf4", BASE_TEST_PATH.as_str()); + let file = "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Union/Etas_cu_storage_with_fixed_length.mf4"; + if !Path::new(file).exists() { + return Ok(()); + } + + let mut mdf = Mdf::new(file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify source has CU composition + let source_cu_info = match &mdf.mdf_info { + MdfInfo::V4(info4) => { + let mut found = false; + let mut member_count = 0u32; + for dg in info4.dg.values() { + for cg in dg.cg.values() { + for cn in cg.cn.values() { + if let Some(c) = &cn.composition + && let Compo::CU(cu) = &c.block + { + found = true; + member_count = cu.cu_member_count; + } + } + } + } + (found, member_count) + } + _ => (false, 0), + }; + assert!(source_cu_info.0, "Source should have CU composition"); + + // Write and verify via in-memory return + let written = mdf.write(&writing_mdf_file, false)?; + + // Verify CU composition preserved with same member count + match &written.mdf_info { + MdfInfo::V4(info4) => { + let mut found = false; + for dg in info4.dg.values() { + for cg in dg.cg.values() { + for cn in cg.cn.values() { + if let Some(c) = &cn.composition + && let Compo::CU(cu) = &c.block + { + found = true; + assert_eq!( + cu.cu_member_count, source_cu_info.1, + "CU member count should be preserved" + ); + } + } + } + } + assert!(found, "Written file should preserve CU composition"); + } + _ => panic!("Expected MDF4"), + } + + // Verify time channel data + if let Some(src) = mdf.get_channel_data("time") + && let Some(wr) = written.get_channel_data("time") + { + assert_eq!(*src, *wr, "Time data mismatch"); + } + + fs::remove_file(&writing_mdf_file).ok(); + Ok(()) +} From 682f77d70ed4e67e7e2255036ea6e329ca468323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Tue, 10 Feb 2026 12:32:32 +0100 Subject: [PATCH 21/46] added frequency sync --- src/mdfinfo/mdfinfo4/ev_block.rs | 3 +++ src/mdfinfo/mdfinfo4/sr_block.rs | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mdfinfo/mdfinfo4/ev_block.rs b/src/mdfinfo/mdfinfo4/ev_block.rs index d2c3145..492010c 100644 --- a/src/mdfinfo/mdfinfo4/ev_block.rs +++ b/src/mdfinfo/mdfinfo4/ev_block.rs @@ -81,6 +81,8 @@ pub const EV_S_ANGLE: u8 = 2; pub const EV_S_DISTANCE: u8 = 3; /// Index sync pub const EV_S_INDEX: u8 = 4; +/// Frequency sync +pub const EV_S_FREQUENCY: u8 = 5; // Event cause constants (ev_cause) /// Unknown/other cause @@ -121,6 +123,7 @@ impl Ev4Block { EV_S_ANGLE => "Angle", EV_S_DISTANCE => "Distance", EV_S_INDEX => "Index", + EV_S_FREQUENCY => "Frequency", _ => "Unknown", } } diff --git a/src/mdfinfo/mdfinfo4/sr_block.rs b/src/mdfinfo/mdfinfo4/sr_block.rs index 72c1e7a..e1beef0 100644 --- a/src/mdfinfo/mdfinfo4/sr_block.rs +++ b/src/mdfinfo/mdfinfo4/sr_block.rs @@ -16,7 +16,7 @@ pub struct Sr4Block { pub sr_cycle_count: u64, /// Length of sample interval used to calculate the reduction records (unit depends on sr_sync_type) pub sr_interval: f64, - /// Sync type: 1=time(s), 2=angle(rad), 3=distance(m), 4=index + /// Sync type: 1=time(s), 2=angle(rad), 3=distance(m), 4=index, 5=frequency(Hz) pub sr_sync_type: u8, /// Flags: bit 0 = invalidation bytes present, bit 1 = dominant invalidation bit pub sr_flags: u8, @@ -57,6 +57,8 @@ pub const SR_SYNC_ANGLE: u8 = 2; pub const SR_SYNC_DISTANCE: u8 = 3; /// SR sync type: Index based (sample count) pub const SR_SYNC_INDEX: u8 = 4; +/// SR sync type: Frequency based (Hz) +pub const SR_SYNC_FREQUENCY: u8 = 5; impl Sr4Block { /// Returns true if invalidation bytes are present in reduction records @@ -72,6 +74,7 @@ impl Sr4Block { SR_SYNC_ANGLE => "Angle (radians)", SR_SYNC_DISTANCE => "Distance (meters)", SR_SYNC_INDEX => "Index (samples)", + SR_SYNC_FREQUENCY => "Frequency (Hz)", _ => "Unknown", } } From 2cad0f11e2c02738c6aaaa915469889b08726ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 12 Feb 2026 22:24:16 +0100 Subject: [PATCH 22/46] added documentation --- src/lib.rs | 14 +++++++ src/mdfinfo/mdfinfo4/at_block.rs | 5 ++- src/mdfinfo/mdfinfo4/block_header.rs | 8 +++- src/mdfinfo/mdfinfo4/ca_block.rs | 7 +++- src/mdfinfo/mdfinfo4/cc_block.rs | 8 +++- src/mdfinfo/mdfinfo4/cg_block.rs | 5 ++- src/mdfinfo/mdfinfo4/ch_block.rs | 7 +++- src/mdfinfo/mdfinfo4/cn_block.rs | 5 ++- src/mdfinfo/mdfinfo4/composition.rs | 15 +++++-- src/mdfinfo/mdfinfo4/data_block.rs | 21 +++++----- src/mdfinfo/mdfinfo4/dg_block.rs | 18 ++++++--- src/mdfinfo/mdfinfo4/ev_block.rs | 5 ++- src/mdfinfo/mdfinfo4/fh_block.rs | 12 ++++-- src/mdfinfo/mdfinfo4/hd_block.rs | 10 +++-- src/mdfinfo/mdfinfo4/metadata.rs | 6 ++- src/mdfinfo/mdfinfo4/si_block.rs | 5 ++- src/mdfinfo/mdfinfo4/sr_block.rs | 5 ++- src/mdfwriter/mdfwriter4.rs | 59 ++++++++++++++++++++-------- 18 files changed, 157 insertions(+), 58 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e03f9c2..98b1a86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,17 @@ +//! # mdfr — ASAM MDF file reader and writer +//! +//! Rust library for reading and writing ASAM Measurement Data Format (MDF) files, +//! supporting versions 3.x and 4.x (up to MDF 4.3). +//! +//! ## Modules +//! +//! - [`mdfreader`] — High-level API for reading MDF files into memory (`Mdf` struct) +//! - [`mdfwriter`] — Writing in-memory data back to MDF 4.2 files +//! - [`mdfinfo`] — MDF file metadata: block structures, parsing, and version-agnostic `MdfInfo` enum +//! - [`data_holder`] — Channel data storage using Apache Arrow arrays +//! - [`export`] — Exporting channel data to Parquet and HDF5 formats +//! - [`mdfr`] — Python bindings via PyO3 (requires `numpy` feature) + //#![forbid(unsafe_code)] mod c_api; pub mod data_holder; diff --git a/src/mdfinfo/mdfinfo4/at_block.rs b/src/mdfinfo/mdfinfo4/at_block.rs index de5051b..a0f9f19 100644 --- a/src/mdfinfo/mdfinfo4/at_block.rs +++ b/src/mdfinfo/mdfinfo4/at_block.rs @@ -1,4 +1,7 @@ -//! Attachment block (ATBLOCK) for MDF4 +//! Attachment block (ATBLOCK) for MDF4 — spec section 6.10, Table 47 +//! +//! Stores or references external files (e.g. calibration data, images) attached to the MDF file. +//! Attachments can be embedded or external, with optional MD5 checksum verification. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use log::warn; diff --git a/src/mdfinfo/mdfinfo4/block_header.rs b/src/mdfinfo/mdfinfo4/block_header.rs index 3ebf0ac..faa9180 100644 --- a/src/mdfinfo/mdfinfo4/block_header.rs +++ b/src/mdfinfo/mdfinfo4/block_header.rs @@ -1,4 +1,8 @@ -//! Block header structures, metadata parsing, and sharable blocks for MDF4 +//! Block header structures, metadata parsing, and sharable blocks for MDF4 — spec section 5.2, Table 2 +//! +//! Every MDF4 block starts with a header: 4-byte ID ("##XX"), 4 reserved bytes, +//! 8-byte length, and 8-byte link count. This module also manages SharableBlocks +//! (CC, SI, TX, MD blocks) stored in a global hashmap for deduplication. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use std::collections::HashMap; @@ -12,7 +16,7 @@ use super::metadata::{BlockType, HdComment, MdComment, MetaData, MetaDataBlockTy use super::si_block::Si4Block; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// MDF4 - common block Header +/// MDF4 block header (spec Table 2) — common 24-byte prefix for all blocks #[derive(Debug, Copy, Clone)] #[binrw] #[br(little)] diff --git a/src/mdfinfo/mdfinfo4/ca_block.rs b/src/mdfinfo/mdfinfo4/ca_block.rs index 513b53d..054d196 100644 --- a/src/mdfinfo/mdfinfo4/ca_block.rs +++ b/src/mdfinfo/mdfinfo4/ca_block.rs @@ -1,4 +1,7 @@ -//! Channel Array block (CABLOCK) for MDF4 +//! Channel Array block (CABLOCK) for MDF4 — spec section 6.8, Tables 40-43 +//! +//! Describes N-dimensional arrays of channel values. Supports multiple storage types +//! (CN template, CG template, DG template) and array types (plain, lookup, axis). use anyhow::{Context, Error, Result}; use binrw::{BinReaderExt, BinWriterExt, binrw}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -12,7 +15,7 @@ use crate::data_holder::tensor_arrow::Order; /// type alias for Ca4Block parse result pub type CaBlockParseResult = (Ca4Block, (Vec, Order), usize, usize); -/// Ca4 Channel Array block struct +/// CABLOCK structure (MDF 4.2 spec, Table 40) #[derive(Debug, PartialEq, Clone)] #[repr(C)] pub struct Ca4Block { diff --git a/src/mdfinfo/mdfinfo4/cc_block.rs b/src/mdfinfo/mdfinfo4/cc_block.rs index 407c648..54d92ba 100644 --- a/src/mdfinfo/mdfinfo4/cc_block.rs +++ b/src/mdfinfo/mdfinfo4/cc_block.rs @@ -1,4 +1,8 @@ -//! Channel Conversion block (CCBLOCK) for MDF4 +//! Channel Conversion block (CCBLOCK) for MDF4 — spec section 6.7, Tables 30-39 +//! +//! Defines the conversion rule to transform raw channel values to physical values. +//! Supports 12 conversion types (Table 31): identity, linear, rational, polynomial, +//! tabular (value-to-value, value-to-text), algebraic formula, and more. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use std::fmt::{self, Display}; @@ -9,7 +13,7 @@ use super::block_header::{parse_block_short, read_meta_data, SharableBlocks}; use super::metadata::BlockType; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// Cc4 Channel Conversion block struct +/// CCBLOCK structure (MDF 4.2 spec, Table 30) #[derive(Debug, Clone)] #[binrw] #[br(little)] diff --git a/src/mdfinfo/mdfinfo4/cg_block.rs b/src/mdfinfo/mdfinfo4/cg_block.rs index 118566f..f6ea80e 100644 --- a/src/mdfinfo/mdfinfo4/cg_block.rs +++ b/src/mdfinfo/mdfinfo4/cg_block.rs @@ -1,4 +1,7 @@ -//! Channel Group block (CGBLOCK) for MDF4 +//! Channel Group block (CGBLOCK) for MDF4 — spec section 6.5, Tables 20-21 +//! +//! A CGBLOCK describes a group of channels with the same record layout (same record +//! length and record ID). Flags in Table 21 define special CG types: VLSD, VLSC, bus events. use anyhow::{Context, Error, Result}; use arrow::array::{Array, ArrayRef, UInt32Array, UnionArray}; use arrow::buffer::ScalarBuffer; diff --git a/src/mdfinfo/mdfinfo4/ch_block.rs b/src/mdfinfo/mdfinfo4/ch_block.rs index 53573fb..3b9ab8c 100644 --- a/src/mdfinfo/mdfinfo4/ch_block.rs +++ b/src/mdfinfo/mdfinfo4/ch_block.rs @@ -1,4 +1,7 @@ -//! Channel Hierarchy block (CHBLOCK) for MDF4 +//! Channel Hierarchy block (CHBLOCK) for MDF4 — spec section 6.2, Tables 15-17 +//! +//! CHBLOCKs organize channels into a display hierarchy tree (groups, functions, +//! input/output variables). The hierarchy is separate from the DG→CG→CN data structure. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use std::collections::HashMap; @@ -9,7 +12,7 @@ use super::block_header::{parse_block_short, read_meta_data, SharableBlocks}; use super::metadata::BlockType; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// Ch4Block struct +/// CHBLOCK structure (MDF 4.2 spec, Table 15) #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] #[br(little)] diff --git a/src/mdfinfo/mdfinfo4/cn_block.rs b/src/mdfinfo/mdfinfo4/cn_block.rs index d9acd0f..2f99a5f 100644 --- a/src/mdfinfo/mdfinfo4/cn_block.rs +++ b/src/mdfinfo/mdfinfo4/cn_block.rs @@ -1,4 +1,7 @@ -//! Channel block (CNBLOCK) for MDF4 +//! Channel block (CNBLOCK) for MDF4 — spec section 6.6, Tables 22-29 +//! +//! The CNBLOCK is the central block describing a single channel: its data type, +//! bit position/length in the record, sync type, composition, source, and conversion. use anyhow::{Context, Result}; use arrow::array::{BooleanBufferBuilder, UInt8Builder, UInt16Builder, UInt32Builder}; use binrw::{BinReaderExt, binrw}; diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs index 174ca01..6d7e625 100644 --- a/src/mdfinfo/mdfinfo4/composition.rs +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -1,4 +1,12 @@ -//! Composition blocks (DS, CL, CV, CU) and composition parsing for MDF4 +//! Composition blocks (DS, CL, CV, CU) and composition parsing for MDF4 — spec sections 6.24-6.27, Tables 67-70 +//! +//! Compositions describe how a channel's data is structured internally: +//! - DS (Dynamic Size, Table 67): variable-length fields in a VLSD blob +//! - CL (Channel List, Table 68): named fields in a VLSD blob +//! - CV (Column Variable-length, Table 69): variable-length column in fixed records +//! - CU (Column Unordered, Table 70): unordered variable-length column +//! - CA (Channel Array): N-dimensional arrays (handled in ca_block.rs) +//! - CN (nested channel): structure composition via CN→CN chain use anyhow::{Context, Result, bail}; use binrw::{BinReaderExt, binrw}; use std::collections::HashMap; @@ -15,8 +23,7 @@ use crate::mdfinfo::sym_buf_reader::SymBufReader; /// type alias for composition parse result pub type CompositionParseResult = (Composition, i64, usize, (Vec, Order), usize, CnType); -/// contains composition blocks (CN or CA) -/// can optionally point to another composition +/// Recursive composition tree. Each node holds a `Compo` block and optionally chains to another composition. #[derive(Debug, Clone)] #[repr(C)] pub struct Composition { @@ -24,7 +31,7 @@ pub struct Composition { pub compo: Option>, } -/// enum allowing to nest CA or CN blocks for a composition +/// Composition block variant — one of CA, CN, CL, CV, CU, or DS #[derive(Debug, Clone)] #[repr(C)] pub enum Compo { diff --git a/src/mdfinfo/mdfinfo4/data_block.rs b/src/mdfinfo/mdfinfo4/data_block.rs index 0697f8b..77d6e2e 100644 --- a/src/mdfinfo/mdfinfo4/data_block.rs +++ b/src/mdfinfo/mdfinfo4/data_block.rs @@ -1,4 +1,9 @@ -//! Data blocks (DT, DL, DZ, LD, HL, GD) for MDF4 +//! Data blocks (DT, DL, DZ, LD, HL, GD) for MDF4 — spec sections 6.12-6.17, Tables 55-63 +//! +//! These blocks store raw channel data. DTBLOCK holds plain data, DZBLOCK holds +//! compressed data (zlib/LZ4/zstd, transposition), DLBLOCK chains data blocks, +//! HLBLOCK is a header list for DZBLOCK chains, LDBLOCK is a list for sorted data, +//! and GDBLOCK guards MDF 4.3 features in unsorted data. use anyhow::{Context, Result, bail}; use binrw::{BinReaderExt, binrw}; use flate2::read::ZlibDecoder; @@ -10,7 +15,7 @@ use std::io::{BufReader, Cursor, Read}; use std::str; use zstd::Decoder as ZstdDecoder; -/// Generic Data block struct, without the Id +/// DTBLOCK structure (MDF 4.2 spec, Table 55) — plain data storage #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] #[br(little)] @@ -32,7 +37,7 @@ impl Display for Dt4Block { } } -/// DL4 Data List block struct +/// DLBLOCK structure (MDF 4.2 spec, Table 56) — chains data blocks #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] #[br(little)] @@ -172,7 +177,7 @@ pub fn parse_dz(rdr: &mut BufReader<&File>) -> Result<(Vec, Dz4Block)> { Ok((data, block)) } -/// DZ4 Data List block struct +/// DZBLOCK structure (MDF 4.2 spec, Table 57) — compressed data block #[derive(Debug, PartialEq, Eq, Clone)] #[binrw] #[br(little)] @@ -253,7 +258,7 @@ impl Display for Dz4Block { } } -/// DL4 Data List block struct +/// LDBLOCK structure (MDF 4.2 spec, Table 61) — list data block for sorted data #[derive(Debug, PartialEq, Eq, Clone)] #[binrw] #[br(little)] @@ -373,7 +378,7 @@ pub fn parser_ld4_block( Ok((block, position)) } -/// HL4 Data List block struct +/// HLBLOCK structure (MDF 4.2 spec, Table 58) — header list for DZBLOCK chains #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] #[br(little)] @@ -425,9 +430,7 @@ impl Display for Hl4Block { } } -/// GD4 Guard Block struct (MDF 4.3) -/// Used to safeguard newly introduced features against incompatible readers -/// Note: gd_reserved is not included as its size varies based on gd_len +/// GDBLOCK structure (MDF 4.3 spec, Table 63) — guards 4.3 features in unsorted data #[derive(Debug, PartialEq, Eq, Default, Clone)] #[binrw] #[br(little)] diff --git a/src/mdfinfo/mdfinfo4/dg_block.rs b/src/mdfinfo/mdfinfo4/dg_block.rs index fc9d92c..23616f5 100644 --- a/src/mdfinfo/mdfinfo4/dg_block.rs +++ b/src/mdfinfo/mdfinfo4/dg_block.rs @@ -1,4 +1,7 @@ -//! Data Group block (DGBLOCK) for MDF4 +//! Data Group block (DGBLOCK) for MDF4 — spec section 6.4, Table 19 +//! +//! Each DGBLOCK groups one or more CGBLOCKs that share the same data block. +//! DGBLOCKs form a linked list from `hd_dg_first`. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use std::collections::{BTreeMap, HashMap}; @@ -11,7 +14,7 @@ use super::cg_block::{Cg4, parse_cg4, CG_F_VLSC, CG_F_VLSD}; use super::metadata::BlockType; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// Dg4 Data Group block struct +/// DGBLOCK structure (MDF 4.2 spec, Table 19) #[derive(Debug, Copy, Clone)] #[binrw] #[br(little)] @@ -62,7 +65,7 @@ impl Display for Dg4Block { } } -/// Dg4 (Data Group) block struct parser with comments +/// Parses a single DGBLOCK and its MD comment from the file. fn parse_dg4_block( rdr: &mut SymBufReader<&File>, sharable: &mut SharableBlocks, @@ -86,7 +89,7 @@ fn parse_dg4_block( Ok((dg, position)) } -/// Dg4 struct wrapping block, comments and linked CG +/// Parsed DG wrapper: the DGBLOCK and its linked CGBLOCKs (keyed by record ID) #[derive(Debug, Clone)] #[repr(C)] pub struct Dg4 { @@ -108,7 +111,9 @@ impl Display for Dg4 { } } -/// Parser for Dg4 and all linked blocks (cg, cn, cc, ca, si) +/// Parses the DG→CG→CN block hierarchy. Traverses the DGBLOCK linked list +/// starting from `target` (`hd_dg_first`). Returns BTreeMap keyed by file position, +/// plus total CG and CN counts. pub fn parse_dg4( rdr: &mut SymBufReader<&File>, target: i64, @@ -158,7 +163,8 @@ pub fn parse_dg4( Ok((dg, position, n_cg, n_cn)) } -/// Try to link VLSD/VLSC Channel Groups with matching channel in other groups +/// Links VLSD/VLSC channel groups (CG flag bit 0/1) to their matching CN in other groups. +/// A channel references a VLSD CG via `cn_data` pointing to the CG's block position. fn identify_vlsd_cg(cg: &mut HashMap) { // First find all VLSD/VLSC Channel Groups let mut vlsd: HashMap = HashMap::new(); diff --git a/src/mdfinfo/mdfinfo4/ev_block.rs b/src/mdfinfo/mdfinfo4/ev_block.rs index 492010c..fc9c635 100644 --- a/src/mdfinfo/mdfinfo4/ev_block.rs +++ b/src/mdfinfo/mdfinfo4/ev_block.rs @@ -1,4 +1,7 @@ -//! Event block (EVBLOCK) for MDF4 +//! Event block (EVBLOCK) for MDF4 — spec section 6.11, Tables 48-54 +//! +//! Describes events that occurred during measurement (triggers, markers, etc.). +//! Each event has a type, sync domain, cause, and can reference parent/range events. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use std::collections::HashMap; diff --git a/src/mdfinfo/mdfinfo4/fh_block.rs b/src/mdfinfo/mdfinfo4/fh_block.rs index fe55be0..dd04c41 100644 --- a/src/mdfinfo/mdfinfo4/fh_block.rs +++ b/src/mdfinfo/mdfinfo4/fh_block.rs @@ -1,4 +1,7 @@ -//! File History block (FHBLOCK) for MDF4 +//! File History block (FHBLOCK) for MDF4 — spec section 6.3, Table 18 +//! +//! Each FHBLOCK records a creation or modification event with a timestamp and +//! an optional MD comment. FHBLOCKs form a linked list from `hd_fh_first`. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use chrono::{DateTime, Local}; @@ -10,7 +13,7 @@ use super::block_header::{read_meta_data, SharableBlocks}; use super::metadata::BlockType; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// Fh4 (File History) block struct, including the header +/// FHBLOCK structure (MDF 4.2 spec, Table 18) #[derive(Debug, Copy, Clone)] #[binrw] #[br(little)] @@ -82,7 +85,7 @@ impl Display for FhBlock { } } -/// Fh4 (File History) block struct parser +/// Parses a single FHBLOCK from the file at the given target position. fn parse_fh_block( rdr: &mut SymBufReader<&File>, target: i64, @@ -102,7 +105,8 @@ fn parse_fh_block( pub type Fh = Vec; -/// parses File History blocks along with its linked comments returns a vect of Fh4 block with comments +/// Parses the FHBLOCK linked list starting from `target`, including MD comments. +/// Returns a Vec of all FHBLOCKs in the chain. pub fn parse_fh( rdr: &mut SymBufReader<&File>, sharable: &mut SharableBlocks, diff --git a/src/mdfinfo/mdfinfo4/hd_block.rs b/src/mdfinfo/mdfinfo4/hd_block.rs index ccdd836..76e8b67 100644 --- a/src/mdfinfo/mdfinfo4/hd_block.rs +++ b/src/mdfinfo/mdfinfo4/hd_block.rs @@ -1,4 +1,7 @@ -//! Header block (HDBLOCK) for MDF4 +//! Header block (HDBLOCK) for MDF4 — spec section 6.1, Tables 13-14 +//! +//! The HDBLOCK is the root of the block hierarchy. It contains the start timestamp, +//! time zone information, and links to DG, FH, CH, AT, and EV blocks. use anyhow::{Context, Result}; use binrw::{BinReaderExt, binrw}; use chrono::{DateTime, Local}; @@ -10,7 +13,7 @@ use super::block_header::{read_meta_data, SharableBlocks}; use super::metadata::BlockType; use crate::mdfinfo::sym_buf_reader::SymBufReader; -/// Hd4 (Header) block structure +/// HDBLOCK structure (MDF 4.2 spec, Table 13) #[derive(Debug, Copy, Clone)] #[binrw] #[br(little)] @@ -97,7 +100,8 @@ impl fmt::Display for Hd4 { } } -/// Hd4 block struct parser +/// Parses the HDBLOCK at file offset 168 (after the 64-byte ID block + 104-byte HD header). +/// Also reads the HD metadata comment block (Table 14). pub fn hd4_parser( rdr: &mut SymBufReader<&File>, sharable: &mut SharableBlocks, diff --git a/src/mdfinfo/mdfinfo4/metadata.rs b/src/mdfinfo/mdfinfo4/metadata.rs index 1f95533..5138681 100644 --- a/src/mdfinfo/mdfinfo4/metadata.rs +++ b/src/mdfinfo/mdfinfo4/metadata.rs @@ -1,4 +1,8 @@ -//! MetaData struct and related types for MDF4 TX/MD blocks +//! MetaData struct and related types for MDF4 TX/MD blocks — spec section 4.5 +//! +//! TX blocks hold plain text strings. MD blocks hold XML metadata conforming to +//! schema-specific XSD definitions (Tables 13-54). The `MetaData` struct wraps +//! both forms and supports lazy XML parsing via `parse_xml()`. use anyhow::{Context, Result}; use binrw::BinWriterExt; use std::collections::HashMap; diff --git a/src/mdfinfo/mdfinfo4/si_block.rs b/src/mdfinfo/mdfinfo4/si_block.rs index f6fb299..13582cf 100644 --- a/src/mdfinfo/mdfinfo4/si_block.rs +++ b/src/mdfinfo/mdfinfo4/si_block.rs @@ -1,4 +1,7 @@ -//! Source Information block (SIBLOCK) for MDF4 +//! Source Information block (SIBLOCK) for MDF4 — spec section 6.9, Tables 44-46 +//! +//! Describes the source of a channel or channel group (ECU, bus, tool, etc.). +//! SIBLOCKs are sharable and stored in the SharableBlocks hashmap. use anyhow::Result; use binrw::binrw; use std::fmt::{self, Display}; diff --git a/src/mdfinfo/mdfinfo4/sr_block.rs b/src/mdfinfo/mdfinfo4/sr_block.rs index e1beef0..0d61553 100644 --- a/src/mdfinfo/mdfinfo4/sr_block.rs +++ b/src/mdfinfo/mdfinfo4/sr_block.rs @@ -1,4 +1,7 @@ -//! Sample Reduction block (SRBLOCK) for MDF4 +//! Sample Reduction block (SRBLOCK) for MDF4 — spec section 6.29, Table 72 +//! +//! Provides pre-calculated statistics (mean, min, max) for efficient display of large +//! datasets. Linked from a CGBLOCK. Sync types: time, angle, distance, index, frequency. use binrw::binrw; use std::fmt::{self, Display}; diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index f66b051..45de067 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -1,4 +1,12 @@ -//! Writer of data in memory into mdf4.2 file +//! MDF 4.2 file writer — spec sections 5-6 +//! +//! Writes in-memory channel data to an MDF 4.2 file. The writer always produces +//! sorted data (one DG per channel, `dg_rec_id_size=0`), converting VLSC channels +//! to fixed-length. Two-phase approach: +//! - **Phase 1** (sequential): builds the metadata block hierarchy (HD→FH, EV, AT, SI, DG→CG→CN) +//! and calculates file positions for each block. +//! - **Phase 2** (parallel): writes raw channel data blocks (DV/DZ/SD) via crossbeam channel, +//! using rayon for parallel compression. use std::{ collections::{HashMap, HashSet}, fs::OpenOptions, @@ -33,7 +41,11 @@ use parking_lot::Mutex; use rayon::iter::{IntoParallelRefMutIterator, ParallelIterator}; use std::fs::File; -/// writes mdf4.2 file +/// Main entry point: writes an MDF 4.2 file from in-memory `Mdf` data. +/// +/// Converts MDF3 sources to MDF4 if needed. Preserves file history (FH), events (EV), +/// attachments (AT), source info (SI), and channel hierarchy (CH) from the source file. +/// Supports optional zlib compression for data blocks. pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result { let info: MdfInfo4 = match &mdf.mdf_info { MdfInfo::V3(mdfinfo3) => convert3to4(mdfinfo3, file_name) @@ -911,7 +923,8 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result }) } -/// Writes a composition block (CA, DS, CL, CU, CV, CN) and any nested composition recursively +/// Writes a composition block (CA, DS, CL, CU, CV, CN) and any nested composition recursively. +/// Each block type has its own header format and serialization logic. fn write_composition(buffer: &mut Cursor>, compo: &Composition) -> Result<()> { match &compo.block { Compo::CA(c) => { @@ -1028,7 +1041,8 @@ fn zero_composition_links(compo: &mut Composition) { } } -/// Writes the data blocks +/// Serializes DV/DZ data blocks (and optional LD/DI blocks) into a byte buffer. +/// The LD block links are adjusted to absolute file positions. fn write_data_blocks( position: i64, mut ld_block: Option, @@ -1098,7 +1112,7 @@ fn write_data_blocks( Ok(buffer.into_inner()) } -/// Writes the SD block for VLSD channels +/// Serializes an SD/DZ block for VLSD channels into a byte buffer. fn write_sd_block( _position: i64, data_block: (DataBlock, usize, Vec), @@ -1135,7 +1149,8 @@ fn write_sd_block( Ok(buffer.into_inner()) } -/// Create a LDBlock +/// Creates an LDBLOCK (list data block, spec Table 61) to wrap DV+DI block pairs. +/// Sets `ld_flags` bit 7 if an invalidation mask is present. fn create_ld(m: &Option, offset: &mut i64) -> Option { let mut ld_block = Ld4Block::default(); ld_block.ld_count = 1; @@ -1153,7 +1168,7 @@ fn create_ld(m: &Option, offset: &mut i64) -> Option { Some(ld_block) } -/// Create a DV Block +/// Creates an uncompressed DVBLOCK (data values, spec Table 55 with ##DV ID). fn create_dv(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Vec), Error> { let mut dv_block = Blockheader4 { hdr_id: [35, 35, 68, 86], // ##DV @@ -1171,14 +1186,15 @@ fn create_dv(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Ok((DataBlock::DvDi(dv_block), byte_aligned, data_bytes)) } -/// Enumeration of data block types +/// Writer-internal enum: either a compressed DZBLOCK or an uncompressed DV/DI/SD block. #[derive(Debug, Clone)] enum DataBlock { DZ(Dz4Block), DvDi(Blockheader4), } -/// Create a DZ Block of DV type +/// Creates a zlib-compressed DZBLOCK wrapping DV data (spec Table 57). +/// Falls back to uncompressed DV if compression increases size. fn create_dz_dv( data: &ChannelData, offset: &mut i64, @@ -1206,7 +1222,7 @@ fn create_dz_dv( Ok((dv_dz_block, byte_aligned, data_bytes)) } -/// Create a DI Block +/// Creates an uncompressed DIBLOCK (data invalidation, ##DI) for the validity mask. fn create_di(mask: &NullBuffer, offset: &mut i64) -> Result)>> { let mut dv_invalid_block = Blockheader4 { hdr_id: [35, 35, 68, 73], // ##DI @@ -1224,7 +1240,8 @@ fn create_di(mask: &NullBuffer, offset: &mut i64) -> Result Result { // not all types are supported match cn_type { @@ -1511,7 +1533,8 @@ fn cn_type_writer(cn_type: u8, is_vlsd: bool) -> Result { } } -/// Check if the channel data is variable-length (Utf8 or VariableSizeByteArray) +/// Returns true if the channel data is variable-length (Utf8 or VariableSizeByteArray), +/// requiring VLSD storage with an SDBLOCK instead of inline record data. fn is_vlsd_data(data: &ChannelData) -> bool { matches!( data, @@ -1519,7 +1542,8 @@ fn is_vlsd_data(data: &ChannelData) -> bool { ) } -/// Convert ChannelData to SDBLOCK format (u32 length prefix + data for each value) +/// Converts ChannelData to SDBLOCK format: each value is stored as a u32 length prefix +/// followed by the raw bytes (with null terminator for UTF-8 strings). fn to_sd_bytes(data: &ChannelData) -> Result, Error> { match data { ChannelData::Utf8(a) => { @@ -1578,7 +1602,7 @@ fn calculate_sd_size(data: &ChannelData) -> usize { } } -/// Create an SD Block (Signal Data block for VLSD channels) +/// Creates an uncompressed SDBLOCK (signal data, ##SD) for VLSD channels. fn create_sd(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Vec), Error> { let mut sd_block = Blockheader4 { hdr_id: [35, 35, 83, 68], // ##SD @@ -1594,7 +1618,8 @@ fn create_sd(data: &ChannelData, offset: &mut i64) -> Result<(DataBlock, usize, Ok((DataBlock::DvDi(sd_block), byte_aligned, data_bytes)) } -/// Create a DZ Block of SD type (compressed SDBLOCK) +/// Creates a zlib-compressed DZBLOCK wrapping SD signal data. +/// Falls back to uncompressed SD if compression increases size. fn create_dz_sd( data: &ChannelData, offset: &mut i64, From 18e2add36764d7134105b2920b03fc3522bd01df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 15 Feb 2026 08:52:46 +0100 Subject: [PATCH 23/46] addded unit tests to increase test coverage --- src/data_holder/arrow_helpers.rs | 155 +++++++++ src/data_holder/channel_data.rs | 340 ++++++++++++++++++ src/data_holder/complex_arrow.rs | 103 ++++++ src/data_holder/tensor_arrow.rs | 105 ++++++ src/mdfinfo/mdfinfo4/at_block.rs | 104 ++++++ src/mdfinfo/mdfinfo4/block_header.rs | 53 +++ src/mdfinfo/mdfinfo4/ca_block.rs | 166 +++++++++ src/mdfinfo/mdfinfo4/cc_block.rs | 57 ++++ src/mdfinfo/mdfinfo4/cg_block.rs | 54 +++ src/mdfinfo/mdfinfo4/ch_block.rs | 72 ++++ src/mdfinfo/mdfinfo4/cn_block.rs | 142 ++++++++ src/mdfinfo/mdfinfo4/composition.rs | 157 +++++++++ src/mdfinfo/mdfinfo4/data_block.rs | 205 +++++++++++ src/mdfinfo/mdfinfo4/dg_block.rs | 82 +++++ src/mdfinfo/mdfinfo4/ev_block.rs | 189 ++++++++++ src/mdfinfo/mdfinfo4/fh_block.rs | 43 +++ src/mdfinfo/mdfinfo4/hd_block.rs | 25 ++ src/mdfinfo/mdfinfo4/metadata.rs | 494 +++++++++++++++++++++++++++ src/mdfinfo/mdfinfo4/si_block.rs | 66 ++++ src/mdfinfo/mdfinfo4/sr_block.rs | 76 +++++ src/mdfreader/conversions3.rs | 101 ++++++ src/mdfreader/conversions4.rs | 172 ++++++++++ 22 files changed, 2961 insertions(+) diff --git a/src/data_holder/arrow_helpers.rs b/src/data_holder/arrow_helpers.rs index ce3d026..4c853cf 100644 --- a/src/data_holder/arrow_helpers.rs +++ b/src/data_holder/arrow_helpers.rs @@ -239,3 +239,158 @@ fn mdf_data_type(data_type: &DataType, endian: bool) -> u8 { } } } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::{ + BinaryBuilder, BooleanArray, Date32Array, Date64Array, FixedSizeBinaryBuilder, + Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, + LargeBinaryBuilder, LargeStringBuilder, NullArray, StringBuilder, + TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, + }; + + // ── bit_count tests ── + + #[test] + fn test_bit_count_primitives() { + assert_eq!(arrow_bit_count(&NullArray::new(1)), 0); + assert_eq!(arrow_bit_count(&BooleanArray::from(vec![true])), 8); + assert_eq!(arrow_bit_count(&Int8Array::from(vec![1])), 8); + assert_eq!(arrow_bit_count(&Int16Array::from(vec![1])), 16); + assert_eq!(arrow_bit_count(&Int32Array::from(vec![1])), 32); + assert_eq!(arrow_bit_count(&Int64Array::from(vec![1])), 64); + assert_eq!(arrow_bit_count(&UInt8Array::from(vec![1])), 8); + assert_eq!(arrow_bit_count(&UInt16Array::from(vec![1])), 16); + assert_eq!(arrow_bit_count(&UInt32Array::from(vec![1])), 32); + assert_eq!(arrow_bit_count(&UInt64Array::from(vec![1])), 64); + assert_eq!(arrow_bit_count(&Float32Array::from(vec![1.0])), 32); + assert_eq!(arrow_bit_count(&Float64Array::from(vec![1.0])), 64); + } + + #[test] + fn test_bit_count_temporal() { + assert_eq!( + arrow_bit_count(&TimestampNanosecondArray::from(vec![1])), + 64 + ); + assert_eq!(arrow_bit_count(&Date32Array::from(vec![1])), 32); + assert_eq!(arrow_bit_count(&Date64Array::from(vec![1])), 64); + } + + #[test] + fn test_bit_count_strings() { + let mut builder = StringBuilder::new(); + builder.append_value("hello"); // 5 bytes + builder.append_value("hi"); // 2 bytes + let array = builder.finish(); + assert_eq!(arrow_bit_count(&array), 5 * 8); // max is 5 bytes + + let mut builder = LargeStringBuilder::new(); + builder.append_value("abc"); + let array = builder.finish(); + assert_eq!(arrow_bit_count(&array), 3 * 8); + } + + #[test] + fn test_bit_count_binary() { + let mut builder = BinaryBuilder::new(); + builder.append_value(b"abcd"); + let array = builder.finish(); + assert_eq!(arrow_bit_count(&array), 4 * 8); + + let mut builder = LargeBinaryBuilder::new(); + builder.append_value(b"ab"); + let array = builder.finish(); + assert_eq!(arrow_bit_count(&array), 2 * 8); + + let mut builder = FixedSizeBinaryBuilder::new(3); + builder.append_value(b"abc").unwrap(); + let array = builder.finish(); + assert_eq!(arrow_bit_count(&array), 3 * 8); + } + + // ── byte_count tests ── + + #[test] + fn test_byte_count_primitives() { + assert_eq!(arrow_byte_count(&NullArray::new(1)), 0); + assert_eq!(arrow_byte_count(&BooleanArray::from(vec![true])), 1); + assert_eq!(arrow_byte_count(&Int8Array::from(vec![1])), 1); + assert_eq!(arrow_byte_count(&Int16Array::from(vec![1])), 2); + assert_eq!(arrow_byte_count(&Int32Array::from(vec![1])), 4); + assert_eq!(arrow_byte_count(&Int64Array::from(vec![1])), 8); + assert_eq!(arrow_byte_count(&UInt8Array::from(vec![1])), 1); + assert_eq!(arrow_byte_count(&UInt16Array::from(vec![1])), 2); + assert_eq!(arrow_byte_count(&UInt32Array::from(vec![1])), 4); + assert_eq!(arrow_byte_count(&UInt64Array::from(vec![1])), 8); + assert_eq!(arrow_byte_count(&Float32Array::from(vec![1.0])), 4); + assert_eq!(arrow_byte_count(&Float64Array::from(vec![1.0])), 8); + } + + #[test] + fn test_byte_count_strings() { + let mut builder = StringBuilder::new(); + builder.append_value("hello"); // 5 bytes + let array = builder.finish(); + assert_eq!(arrow_byte_count(&array), 5); + + let mut builder = LargeStringBuilder::new(); + builder.append_value("abc"); + let array = builder.finish(); + assert_eq!(arrow_byte_count(&array), 3); + } + + // ── mdf_data_type tests ── + + #[test] + fn test_mdf_data_type_le() { + // LE (endian=false) + assert_eq!(arrow_to_mdf_data_type(&NullArray::new(1), false), 0); + assert_eq!( + arrow_to_mdf_data_type(&BooleanArray::from(vec![true]), false), + 0 + ); + assert_eq!(arrow_to_mdf_data_type(&Int32Array::from(vec![1]), false), 2); + assert_eq!( + arrow_to_mdf_data_type(&UInt32Array::from(vec![1]), false), + 0 + ); + assert_eq!( + arrow_to_mdf_data_type(&Float64Array::from(vec![1.0]), false), + 4 + ); + + let mut builder = StringBuilder::new(); + builder.append_value("x"); + let array = builder.finish(); + assert_eq!(arrow_to_mdf_data_type(&array, false), 7); // UTF-8 + + let mut builder = BinaryBuilder::new(); + builder.append_value(b"x"); + let array = builder.finish(); + assert_eq!(arrow_to_mdf_data_type(&array, false), 10); // Byte Array + } + + #[test] + fn test_mdf_data_type_be() { + // BE (endian=true) + assert_eq!(arrow_to_mdf_data_type(&NullArray::new(1), true), 1); + assert_eq!(arrow_to_mdf_data_type(&Int32Array::from(vec![1]), true), 3); + assert_eq!(arrow_to_mdf_data_type(&UInt32Array::from(vec![1]), true), 1); + assert_eq!( + arrow_to_mdf_data_type(&Float64Array::from(vec![1.0]), true), + 5 + ); + + let mut builder = StringBuilder::new(); + builder.append_value("x"); + let array = builder.finish(); + assert_eq!(arrow_to_mdf_data_type(&array, true), 7); // UTF-8 same for both + + let mut builder = BinaryBuilder::new(); + builder.append_value(b"x"); + let array = builder.finish(); + assert_eq!(arrow_to_mdf_data_type(&array, true), 10); // Byte Array same + } +} diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index 0cac5fd..e5e8dc1 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -1964,3 +1964,343 @@ impl fmt::Display for ChannelData { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::buffer::MutableBuffer; + + fn make_int32(vals: &[i32]) -> ChannelData { + let mut b = PrimitiveBuilder::::new(); + for v in vals { + b.append_value(*v); + } + ChannelData::Int32(b) + } + + fn make_uint16(vals: &[u16]) -> ChannelData { + let mut b = PrimitiveBuilder::::new(); + for v in vals { + b.append_value(*v); + } + ChannelData::UInt16(b) + } + + fn make_float64(vals: &[f64]) -> ChannelData { + let mut b = PrimitiveBuilder::::new(); + for v in vals { + b.append_value(*v); + } + ChannelData::Float64(b) + } + + fn make_utf8(vals: &[&str]) -> ChannelData { + let mut b = LargeStringBuilder::new(); + for v in vals { + b.append_value(v); + } + ChannelData::Utf8(b) + } + + fn make_var_binary(vals: &[&[u8]]) -> ChannelData { + let mut b = LargeBinaryBuilder::new(); + for v in vals { + b.append_value(v); + } + ChannelData::VariableSizeByteArray(b) + } + + fn make_fixed_binary(size: i32, vals: &[&[u8]]) -> ChannelData { + let mut b = FixedSizeBinaryBuilder::new(size); + for v in vals { + b.append_value(v).unwrap(); + } + ChannelData::FixedSizeByteArray(b) + } + + fn make_array_d_float64() -> ChannelData { + let buf = MutableBuffer::from_iter([1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0].iter().copied()); + ChannelData::ArrayDFloat64(TensorArrow::new_from_buffer( + buf, + vec![2, 3], + Order::RowMajor, + )) + } + + #[test] + fn test_is_empty_and_len() { + let empty = ChannelData::Int32(PrimitiveBuilder::new()); + assert!(empty.is_empty()); + assert_eq!(empty.len(), 0); + + let cd = make_int32(&[1, 5, -3]); + assert!(!cd.is_empty()); + assert_eq!(cd.len(), 3); + + let cd = make_uint16(&[10, 200]); + assert_eq!(cd.len(), 2); + + let cd = make_float64(&[1.1, 2.2, 3.3]); + assert_eq!(cd.len(), 3); + + let cd = make_utf8(&["hello", "world"]); + assert_eq!(cd.len(), 2); + + let cd = make_var_binary(&[b"abc", b"de"]); + assert_eq!(cd.len(), 2); + + let cd = make_fixed_binary(4, &[b"abcd"]); + assert_eq!(cd.len(), 1); + } + + #[test] + fn test_bit_count() { + assert_eq!(make_int32(&[1]).bit_count(), 32); + assert_eq!(make_uint16(&[1]).bit_count(), 16); + assert_eq!(make_float64(&[1.0]).bit_count(), 64); + + // Complex32 = 64 bits (2x32) + let c = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(c.bit_count(), 64); + + // Utf8: max string "hello" = 5 bytes = 40 bits + let cd = make_utf8(&["hello", "hi"]); + assert_eq!(cd.bit_count(), 5 * 8); + + // VariableSizeByteArray: max "abc" = 3 bytes = 24 bits + let cd = make_var_binary(&[b"abc", b"de"]); + assert_eq!(cd.bit_count(), 3 * 8); + + // FixedSizeByteArray(4) = 32 bits + let cd = make_fixed_binary(4, &[b"abcd"]); + assert_eq!(cd.bit_count(), 4 * 8); + + // ArrayDFloat64 = 64 bits + let cd = make_array_d_float64(); + assert_eq!(cd.bit_count(), 64); + } + + #[test] + fn test_byte_count() { + assert_eq!(make_int32(&[1]).byte_count(), 4); + assert_eq!(make_uint16(&[1]).byte_count(), 2); + assert_eq!(make_float64(&[1.0]).byte_count(), 8); + + let cd = make_utf8(&["hello"]); + assert_eq!(cd.byte_count(), 5); + + let cd = make_var_binary(&[b"abc"]); + assert_eq!(cd.byte_count(), 3); + + let cd = make_fixed_binary(4, &[b"abcd"]); + assert_eq!(cd.byte_count(), 4); + } + + #[test] + fn test_data_type_le_be() { + // LE + assert_eq!(make_int32(&[1]).data_type(false), 2); + assert_eq!(make_uint16(&[1]).data_type(false), 0); + assert_eq!(make_float64(&[1.0]).data_type(false), 4); + assert_eq!(make_utf8(&["x"]).data_type(false), 7); + assert_eq!(make_var_binary(&[b"x"]).data_type(false), 10); + + // BE + assert_eq!(make_int32(&[1]).data_type(true), 3); + assert_eq!(make_uint16(&[1]).data_type(true), 1); + assert_eq!(make_float64(&[1.0]).data_type(true), 5); + assert_eq!(make_utf8(&["x"]).data_type(true), 7); + } + + #[test] + fn test_arrow_data_type() { + assert_eq!(make_int32(&[1]).arrow_data_type(), DataType::Int32); + assert_eq!(make_uint16(&[1]).arrow_data_type(), DataType::UInt16); + assert_eq!(make_float64(&[1.0]).arrow_data_type(), DataType::Float64); + assert_eq!(make_utf8(&["x"]).arrow_data_type(), DataType::LargeUtf8); + assert_eq!( + make_var_binary(&[b"x"]).arrow_data_type(), + DataType::LargeBinary + ); + assert_eq!( + make_fixed_binary(4, &[b"abcd"]).arrow_data_type(), + DataType::FixedSizeBinary(4) + ); + + let c32 = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(c32.arrow_data_type(), DataType::Float32); + + let ad = make_array_d_float64(); + assert_eq!(ad.arrow_data_type(), DataType::Float64); + } + + #[test] + fn test_ndim_and_shape() { + let cd = make_int32(&[1, 2, 3]); + assert_eq!(cd.ndim(), 1); + assert_eq!(cd.shape(), (vec![3], Order::RowMajor)); + + let cd = make_utf8(&["a", "b"]); + assert_eq!(cd.ndim(), 1); + assert_eq!(cd.shape(), (vec![2], Order::RowMajor)); + + let cd = make_array_d_float64(); + assert_eq!(cd.ndim(), 2); + assert_eq!(cd.shape(), (vec![2, 3], Order::RowMajor)); + } + + #[test] + fn test_min_max() { + let cd = make_int32(&[1, 5, -3]); + assert_eq!(cd.min_max(), (Some(-3.0), Some(5.0))); + + let cd = make_uint16(&[10, 200]); + assert_eq!(cd.min_max(), (Some(10.0), Some(200.0))); + + let cd = make_float64(&[1.1, 2.2, 3.3]); + assert_eq!(cd.min_max(), (Some(1.1), Some(3.3))); + + // Complex -> (None, None) + let c = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(c.min_max(), (None, None)); + + // Utf8 -> (None, None) + let cd = make_utf8(&["a"]); + assert_eq!(cd.min_max(), (None, None)); + + // Empty -> (None, None) + let cd = ChannelData::Int32(PrimitiveBuilder::new()); + assert_eq!(cd.min_max(), (None, None)); + + // ArrayDFloat64 + let cd = make_array_d_float64(); + assert_eq!(cd.min_max(), (Some(1.0), Some(6.0))); + } + + #[test] + fn test_to_u64_vec() { + let cd = make_int32(&[1, 5]); + assert!(cd.to_u64_vec().is_some()); + + let cd = make_uint16(&[10, 200]); + assert_eq!(cd.to_u64_vec(), Some(vec![10, 200])); + + let cd = make_float64(&[1.0]); + assert!(cd.to_u64_vec().is_none()); + + let cd = make_utf8(&["a"]); + assert!(cd.to_u64_vec().is_none()); + } + + #[test] + fn test_as_u64_slice() { + let mut b = PrimitiveBuilder::::new(); + b.append_value(42); + b.append_value(99); + let cd = ChannelData::UInt64(b); + assert_eq!(cd.as_u64_slice(), Some([42u64, 99].as_slice())); + + let cd = make_int32(&[1]); + assert!(cd.as_u64_slice().is_none()); + } + + #[test] + fn test_zeros_virtual() { + let cd = make_int32(&[1]); + let result = cd + .zeros(3, 5, 0, (vec![5], Order::RowMajor)) + .unwrap(); + assert_eq!(result.len(), 5); + assert!(matches!(result, ChannelData::UInt64(_))); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2, 3, 4])); + + let result = cd + .zeros(6, 3, 0, (vec![3], Order::RowMajor)) + .unwrap(); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2])); + } + + #[test] + fn test_zeros_regular() { + let cd = make_int32(&[1]); + let result = cd + .zeros(0, 10, 4, (vec![10], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::Int32(_))); + assert_eq!(result.len(), 10); + assert!(!result.is_empty()); + + let cd = make_float64(&[1.0]); + let result = cd + .zeros(0, 5, 8, (vec![5], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::Float64(_))); + assert_eq!(result.len(), 5); + } + + #[test] + fn test_finish_cloned() { + let cd = make_int32(&[1, 2, 3]); + let arr = cd.finish_cloned(); + assert_eq!(arr.len(), 3); + + let cd = make_utf8(&["a", "b"]); + let arr = cd.finish_cloned(); + assert_eq!(arr.len(), 2); + + let cd = make_float64(&[1.0]); + let arr = cd.finish_cloned(); + assert_eq!(arr.len(), 1); + } + + #[test] + fn test_data_type_init() { + // UInt8: cn_type=0, cn_data_type=0, n_bytes=1 + let cd = data_type_init(0, 0, 1, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::UInt8(_))); + + // Int32: cn_type=0, cn_data_type=2, n_bytes=4 + let cd = data_type_init(0, 2, 4, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::Int32(_))); + + // Float64: cn_type=0, cn_data_type=4, n_bytes=8 + let cd = data_type_init(0, 4, 8, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::Float64(_))); + + // Utf8: cn_type=0, cn_data_type=7, n_bytes=10 + let cd = data_type_init(0, 7, 10, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::Utf8(_))); + + // FixedSizeByteArray: cn_type=0, cn_data_type=10, n_bytes=4 + let cd = data_type_init(0, 10, 4, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::FixedSizeByteArray(_))); + + // VLSC: cn_type=7, small bytes + let cd = data_type_init(7, 0, 1, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::UInt8(_))); + + // VLSC: cn_type=7, 8 bytes + let cd = data_type_init(7, 0, 8, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::UInt64(_))); + + // Virtual: cn_type=3 + let cd = data_type_init(3, 0, 0, 1, 0).unwrap(); + assert!(matches!(cd, ChannelData::UInt64(_))); + + // Array: list_size > 1, UInt16 + let cd = data_type_init(0, 0, 2, 4, 0).unwrap(); + assert!(matches!(cd, ChannelData::ArrayDUInt16(_))); + + // Array: list_size > 1, Float32 + let cd = data_type_init(0, 4, 4, 4, 0).unwrap(); + assert!(matches!(cd, ChannelData::ArrayDFloat32(_))); + } + + #[test] + fn test_display() { + let cd = make_int32(&[1, 2]); + let display = format!("{cd}"); + assert!(display.contains('1')); + assert!(display.contains('2')); + } +} diff --git a/src/data_holder/complex_arrow.rs b/src/data_holder/complex_arrow.rs index 8048aea..8e055a2 100644 --- a/src/data_holder/complex_arrow.rs +++ b/src/data_holder/complex_arrow.rs @@ -183,3 +183,106 @@ impl Clone for ComplexArrow { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_and_default() { + let c: ComplexArrow = ComplexArrow::new(); + assert!(c.is_empty()); + assert_eq!(c.len(), 0); + assert!(c.nulls().is_none()); + + let d: ComplexArrow = ComplexArrow::default(); + assert!(d.is_empty()); + assert_eq!(d.len(), 0); + } + + #[test] + fn test_with_capacity() { + let c: ComplexArrow = ComplexArrow::with_capacity(512); + assert!(c.is_empty()); + assert_eq!(c.len(), 0); + } + + #[test] + fn test_from_buffer_and_methods() { + // 4 f64 values = 2 complex pairs + let data: Vec = vec![1.0, 2.0, 3.0, 4.0]; + let buf = MutableBuffer::from_iter(data.iter().copied()); + let c = ComplexArrow::::new_from_buffer(buf); + // len is bytes/2 (not pairs) because new_from_buffer divides by 2 + // buf.len() = 32 bytes, /2 = 16 -> that's the raw division + // Actually: values_buffer.len() returns bytes. 4 * 8 = 32 bytes. /2 = 16. + // But the real len should be based on element count... + // Looking at code: length = values_buffer.len() / 2 + // For f64, buffer has 32 bytes, so length = 16. That seems like a byte-based calculation. + assert!(!c.is_empty()); + assert_eq!(c.values_slice().len(), 4); + + let arr = c.finish_cloned(); + assert_eq!(arr.len(), 4); + + assert!(c.nulls().is_none()); + assert!(c.validity_slice().is_none()); + } + + #[test] + fn test_from_primitive() { + let mut pb = PrimitiveBuilder::::with_capacity(6); + pb.append_value(1.0); + pb.append_value(2.0); + pb.append_value(3.0); + pb.append_value(4.0); + pb.append_value(5.0); + pb.append_value(6.0); + let c = ComplexArrow::new_from_primitive(pb, None); + assert_eq!(c.len(), 3); // 6 values / 2 = 3 pairs + assert!(!c.is_empty()); + assert!(c.nulls().is_none()); + } + + #[test] + fn test_partial_eq_f32() { + let buf1 = MutableBuffer::from_iter([1.0f32, 2.0, 3.0, 4.0].iter().copied()); + let buf2 = MutableBuffer::from_iter([1.0f32, 2.0, 3.0, 4.0].iter().copied()); + let c1 = ComplexArrow::::new_from_buffer(buf1); + let c2 = ComplexArrow::::new_from_buffer(buf2); + assert_eq!(c1, c2); + + let buf3 = MutableBuffer::from_iter([5.0f32, 6.0].iter().copied()); + let c3 = ComplexArrow::::new_from_buffer(buf3); + assert_ne!(c1, c3); + } + + #[test] + fn test_partial_eq_f64() { + let buf1 = MutableBuffer::from_iter([1.0f64, 2.0].iter().copied()); + let buf2 = MutableBuffer::from_iter([1.0f64, 2.0].iter().copied()); + let c1 = ComplexArrow::::new_from_buffer(buf1); + let c2 = ComplexArrow::::new_from_buffer(buf2); + assert_eq!(c1, c2); + + let buf3 = MutableBuffer::from_iter([3.0f64, 4.0].iter().copied()); + let c3 = ComplexArrow::::new_from_buffer(buf3); + assert_ne!(c1, c3); + } + + #[test] + fn test_clone_f32() { + let buf = MutableBuffer::from_iter([1.0f32, 2.0, 3.0, 4.0].iter().copied()); + let c = ComplexArrow::::new_from_buffer(buf); + let cloned = c.clone(); + assert_eq!(c, cloned); + } + + #[test] + fn test_clone_f64() { + let buf = MutableBuffer::from_iter([10.0f64, 20.0].iter().copied()); + let c = ComplexArrow::::new_from_buffer(buf); + let cloned = c.clone(); + assert_eq!(c, cloned); + } +} diff --git a/src/data_holder/tensor_arrow.rs b/src/data_holder/tensor_arrow.rs index 3d2eb0e..c0c52e8 100644 --- a/src/data_holder/tensor_arrow.rs +++ b/src/data_holder/tensor_arrow.rs @@ -238,3 +238,108 @@ tensor_arrow_to_ndarray!(Int64Type, i64); tensor_arrow_to_ndarray!(UInt64Type, u64); tensor_arrow_to_ndarray!(Float32Type, f32); tensor_arrow_to_ndarray!(Float64Type, f64); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_and_default() { + let t: TensorArrow = TensorArrow::new(); + assert!(t.is_empty()); + assert_eq!(t.len(), 0); + assert_eq!(t.ndim(), 1); + assert_eq!(t.shape(), &vec![1]); + assert_eq!(t.order(), &Order::RowMajor); + assert!(t.nulls().is_none()); + + let d: TensorArrow = TensorArrow::default(); + assert!(d.is_empty()); + assert_eq!(d.shape(), &vec![1]); + } + + #[test] + fn test_with_capacity() { + let t = TensorArrow::::with_capacity(100, vec![2, 3], Order::ColumnMajor); + assert!(t.is_empty()); + assert_eq!(t.ndim(), 2); + assert_eq!(t.shape(), &vec![2, 3]); + assert_eq!(t.order(), &Order::ColumnMajor); + } + + #[test] + fn test_from_buffer() { + // 6 f64 values with shape [2,3] = 1 sample of 2x3 tensor + let data: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; + let buf = MutableBuffer::from_iter(data.iter().copied()); + let t = TensorArrow::::new_from_buffer(buf, vec![2, 3], Order::RowMajor); + // len = buf_bytes / (sizeof(f64) * product(shape)) but new_from_buffer divides by product of shape + // buf.len() = 48 bytes, shape product = 6, so len = 48/6 = 8... no + // Actually values_buffer.len() returns byte count: 48 bytes. shape product = 6. + // length = 48 / 6 = 8. That seems byte-based again. + assert!(!t.is_empty()); + assert_eq!(t.values_slice().len(), 6); + assert_eq!(t.ndim(), 2); + assert_eq!(t.shape(), &vec![2, 3]); + } + + #[test] + fn test_from_primitive() { + let mut pb = PrimitiveBuilder::::with_capacity(12); + for i in 0..12 { + pb.append_value(i as f32); + } + let t = TensorArrow::new_from_primitive(pb, None, vec![3, 4], Order::RowMajor); + assert_eq!(t.len(), 1); // 12 / (3*4) = 1 sample + assert!(!t.is_empty()); + assert_eq!(t.ndim(), 2); + assert_eq!(t.shape(), &vec![3, 4]); + assert_eq!(t.order(), &Order::RowMajor); + assert!(t.nulls().is_none()); + assert!(t.validity_slice().is_none()); + } + + #[test] + fn test_finish_cloned() { + let mut pb = PrimitiveBuilder::::with_capacity(4); + pb.append_value(1.0); + pb.append_value(2.0); + pb.append_value(3.0); + pb.append_value(4.0); + let t = TensorArrow::new_from_primitive(pb, None, vec![2, 2], Order::RowMajor); + let arr = t.finish_cloned(); + assert_eq!(arr.len(), 4); + } + + #[test] + fn test_partial_eq() { + let buf1 = MutableBuffer::from_iter([1.0f64, 2.0, 3.0].iter().copied()); + let buf2 = MutableBuffer::from_iter([1.0f64, 2.0, 3.0].iter().copied()); + let t1 = TensorArrow::::new_from_buffer(buf1, vec![3], Order::RowMajor); + let t2 = TensorArrow::::new_from_buffer(buf2, vec![3], Order::RowMajor); + assert_eq!(t1, t2); + + let buf3 = MutableBuffer::from_iter([4.0f64, 5.0, 6.0].iter().copied()); + let t3 = TensorArrow::::new_from_buffer(buf3, vec![3], Order::RowMajor); + assert_ne!(t1, t3); + } + + #[test] + fn test_clone() { + let buf = MutableBuffer::from_iter([1.0f64, 2.0, 3.0, 4.0].iter().copied()); + let t = TensorArrow::::new_from_buffer(buf, vec![2, 2], Order::ColumnMajor); + let cloned = t.clone(); + assert_eq!(t, cloned); + assert_eq!(cloned.shape(), &vec![2, 2]); + assert_eq!(cloned.order(), &Order::ColumnMajor); + } + + #[test] + fn test_values_slice_mut() { + let buf = MutableBuffer::from_iter([1.0f64, 2.0, 3.0].iter().copied()); + let mut t = TensorArrow::::new_from_buffer(buf, vec![3], Order::RowMajor); + let slice = t.values_slice_mut(); + slice[0] = 99.0; + assert_eq!(t.values_slice()[0], 99.0); + } +} diff --git a/src/mdfinfo/mdfinfo4/at_block.rs b/src/mdfinfo/mdfinfo4/at_block.rs index a0f9f19..101f433 100644 --- a/src/mdfinfo/mdfinfo4/at_block.rs +++ b/src/mdfinfo/mdfinfo4/at_block.rs @@ -57,6 +57,29 @@ pub struct At4Block { // followed by embedded data depending of flag } +impl Default for At4Block { + fn default() -> Self { + At4Block { + at_id: [35, 35, 65, 84], // ##AT + reserved: [0; 4], + at_len: 96, + at_links: 4, + at_at_next: 0, + at_tx_filename: 0, + at_tx_mimetype: 0, + at_md_comment: 0, + at_flags: 0, + at_creator_index: 0, + at_zip_type: 0, + at_path_syntax: 0, + at_reserved: [0; 2], + at_md5_checksum: [0; 16], + at_original_size: 0, + at_embedded_size: 0, + } + } +} + impl At4Block { /// Returns true if this attachment has embedded data pub fn is_embedded(&self) -> bool { @@ -193,3 +216,84 @@ pub fn parse_at4( } Ok((at, position)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_at_is_embedded() { + let mut at = At4Block::default(); + assert!(!at.is_embedded()); + + at.at_flags = 0b1; + assert!(at.is_embedded()); + + at.at_flags = 0b11; // both bits set + assert!(at.is_embedded()); + } + + #[test] + fn test_at_is_compressed() { + let mut at = At4Block::default(); + assert!(!at.is_compressed()); + + at.at_flags = 0b10; + assert!(at.is_compressed()); + } + + #[test] + fn test_at_has_md5_checksum() { + let mut at = At4Block::default(); + assert!(!at.has_md5_checksum()); + + at.at_flags = 0b100; + assert!(at.has_md5_checksum()); + } + + #[test] + fn test_at_get_compression_str() { + let mut at = At4Block::default(); + // Not compressed → None + assert_eq!(at.get_compression_str(), "None"); + + // Compressed with various zip types + at.at_flags = 0b10; + at.at_zip_type = 0; + assert_eq!(at.get_compression_str(), "Deflate"); + at.at_zip_type = 1; + assert_eq!(at.get_compression_str(), "Deflate"); + at.at_zip_type = 2; + assert_eq!(at.get_compression_str(), "Zstd"); + at.at_zip_type = 3; + assert_eq!(at.get_compression_str(), "Zstd"); + at.at_zip_type = 4; + assert_eq!(at.get_compression_str(), "LZ4"); + at.at_zip_type = 5; + assert_eq!(at.get_compression_str(), "LZ4"); + at.at_zip_type = 255; + assert_eq!(at.get_compression_str(), "Unknown"); + } + + #[test] + fn test_at_display() { + let mut at = At4Block::default(); + at.at_original_size = 1024; + at.at_embedded_size = 512; + at.at_flags = 0b1; // embedded + at.at_creator_index = 1; + + let display = format!("{at}"); + assert!(display.contains("AT:")); + assert!(display.contains("embedded")); + assert!(display.contains("size=512")); + assert!(display.contains("original=1024")); + assert!(display.contains("compression=None")); + assert!(display.contains("creator_index=1")); + + // External + at.at_flags = 0; + let display = format!("{at}"); + assert!(display.contains("external")); + } +} diff --git a/src/mdfinfo/mdfinfo4/block_header.rs b/src/mdfinfo/mdfinfo4/block_header.rs index faa9180..8a2bd2f 100644 --- a/src/mdfinfo/mdfinfo4/block_header.rs +++ b/src/mdfinfo/mdfinfo4/block_header.rs @@ -286,3 +286,56 @@ impl SharableBlocks { SharableBlocks { md_tx, cc, si } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_blockheader4_default() { + let h = Blockheader4::default(); + assert_eq!(h.hdr_id, [35, 35, 84, 88]); // ##TX + assert_eq!(h.hdr_len, 24); + assert_eq!(h.hdr_links, 0); + } + + #[test] + fn test_blockheader4_display() { + let h = Blockheader4::default(); + let display = format!("{h}"); + assert!(display.contains("##TX")); + assert!(display.contains("len=24")); + assert!(display.contains("links=0")); + } + + #[test] + fn test_blockheader4short_default() { + let h = Blockheader4Short::default(); + assert_eq!(h.hdr_id, [35, 35, 67, 78]); // ##CN + assert_eq!(h.hdr_len, 160); + } + + #[test] + fn test_blockheader4short_display() { + let h = Blockheader4Short::default(); + let display = format!("{h}"); + assert!(display.contains("##CN")); + assert!(display.contains("len=160")); + } + + #[test] + fn test_default_short_header() { + let cg = default_short_header(BlockType::CG); + assert_eq!(cg.hdr_id, [35, 35, 67, 71]); // ##CG + assert_eq!(cg.hdr_len, 104); + + let cn = default_short_header(BlockType::CN); + assert_eq!(cn.hdr_id, [35, 35, 67, 78]); // ##CN + assert_eq!(cn.hdr_len, 160); + + // Wildcard falls through to CN defaults + let other = default_short_header(BlockType::HD); + assert_eq!(other.hdr_id, [35, 35, 67, 78]); + assert_eq!(other.hdr_len, 160); + } +} diff --git a/src/mdfinfo/mdfinfo4/ca_block.rs b/src/mdfinfo/mdfinfo4/ca_block.rs index 054d196..4b5d9e1 100644 --- a/src/mdfinfo/mdfinfo4/ca_block.rs +++ b/src/mdfinfo/mdfinfo4/ca_block.rs @@ -521,3 +521,169 @@ pub(super) fn parse_ca_block( pnd, )) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default() { + let ca = Ca4Block::default(); + assert_eq!(ca.ca_id, [35, 35, 67, 65]); // ##CA + assert_eq!(ca.ca_type, 0); + assert_eq!(ca.ca_storage, 0); + assert_eq!(ca.ca_ndim, 1); + assert_eq!(ca.ca_flags, 0); + assert!(ca.ca_dim_size.is_empty()); + assert!(ca.ca_data.is_none()); + assert!(ca.ca_axis_value.is_none()); + assert!(ca.ca_cycle_count.is_none()); + } + + #[test] + fn test_get_ca_type_str() { + let mut ca = Ca4Block::default(); + let expected = [ + (0, "Array"), + (1, "ScalingAxis"), + (2, "LookUp"), + (3, "IntervalAxis"), + (4, "ClassificationResult"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ca.ca_type = val; + assert_eq!(ca.get_ca_type_str(), name, "ca_type={val}"); + } + } + + #[test] + fn test_get_storage_str() { + let mut ca = Ca4Block::default(); + let expected = [ + (0, "CN Template"), + (1, "CG Template"), + (2, "DG Template"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ca.ca_storage = val; + assert_eq!(ca.get_storage_str(), name, "ca_storage={val}"); + } + } + + #[test] + fn test_calculate_block_len() { + // Default: 1 link (ca_composition), no optional fields, ndim=1 + let ca = Ca4Block { + ca_ndim: 1, + ca_dim_size: vec![5], + ..Default::default() + }; + // 16 (header) + 8 (link count) + 1*8 (links) + 16 (members) + 1*8 (dim_size) = 56 + assert_eq!(ca.calculate_block_len(), 56); + + // With DG template storage (adds pnd=5 data links) + let ca = Ca4Block { + ca_ndim: 1, + ca_storage: 2, + ca_dim_size: vec![5], + ..Default::default() + }; + // links = 1 + 5 = 6 + // 16 + 8 + 6*8 + 16 + 1*8 = 96 + assert_eq!(ca.calculate_block_len(), 96); + + // With axis_value + let ca = Ca4Block { + ca_ndim: 1, + ca_dim_size: vec![3], + ca_axis_value: Some(vec![1.0, 2.0, 3.0]), + ..Default::default() + }; + // 56 + 3*8 = 80 + assert_eq!(ca.calculate_block_len(), 80); + } + + #[test] + fn test_calculate_link_count_with_flags() { + // No flags: just ca_composition = 1 + let ca = Ca4Block { + ca_ndim: 2, + ca_dim_size: vec![3, 4], + ..Default::default() + }; + assert_eq!(ca.calculate_block_len(), 16 + 8 + 8 + 16 + 2 * 8); // 64 + + // Dynamic size flag (bit 0): adds D*3 = 6 links + let ca = Ca4Block { + ca_ndim: 2, + ca_dim_size: vec![3, 4], + ca_flags: 0b1, + ..Default::default() + }; + // links = 1 + 6 = 7 + assert_eq!(ca.calculate_block_len(), 16 + 8 + 7 * 8 + 16 + 2 * 8); + + // Axis flag without fixed axes (bit 4 set, bit 5 clear): adds D + D*3 + let ca = Ca4Block { + ca_ndim: 2, + ca_dim_size: vec![3, 4], + ca_flags: 0b10000, + ..Default::default() + }; + // links = 1 + 2 + 6 = 9 (ca_cc_axis_conversion + ca_axis) + assert_eq!(ca.calculate_block_len(), 16 + 8 + 9 * 8 + 16 + 2 * 8); + + // Axis flag with fixed axes (bits 4+5 set): adds D only (no ca_axis) + let ca = Ca4Block { + ca_ndim: 2, + ca_dim_size: vec![3, 4], + ca_flags: 0b110000, + ..Default::default() + }; + // links = 1 + 2 = 3 + assert_eq!(ca.calculate_block_len(), 16 + 8 + 3 * 8 + 16 + 2 * 8); + } + + #[test] + fn test_prepare_for_write() { + let mut ca = Ca4Block { + ca_ndim: 1, + ca_dim_size: vec![3], + ca_composition: 1000, + ca_data: Some(vec![100, 200, 300]), + ca_storage: 2, + ca_dynamic_size: Some(vec![10, 20, 30]), + ca_flags: 0b1, + ..Default::default() + }; + ca.prepare_for_write(); + + assert_eq!(ca.ca_composition, 0); + assert_eq!(ca.ca_data, Some(vec![0, 0, 0])); + assert_eq!(ca.ca_dynamic_size, Some(vec![0, 0, 0])); + // links recalculated: 1 + 3 (data) + 3 (dynamic_size) = 7 + assert_eq!(ca.ca_links, 7); + } + + #[test] + fn test_display() { + let ca = Ca4Block::default(); + let display = format!("{ca}"); + assert!(display.contains("CA:")); + assert!(display.contains("Array")); + assert!(display.contains("CN Template")); + + let ca = Ca4Block { + ca_type: 2, + ca_storage: 1, + ca_dim_size: vec![3, 4], + ..Default::default() + }; + let display = format!("{ca}"); + assert!(display.contains("LookUp")); + assert!(display.contains("CG Template")); + assert!(display.contains("[3, 4]")); + } +} diff --git a/src/mdfinfo/mdfinfo4/cc_block.rs b/src/mdfinfo/mdfinfo4/cc_block.rs index 54d92ba..178b75f 100644 --- a/src/mdfinfo/mdfinfo4/cc_block.rs +++ b/src/mdfinfo/mdfinfo4/cc_block.rs @@ -136,3 +136,60 @@ pub(super) fn read_cc( sharable.cc.insert(*target, cc_block); Ok(position) } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_cc(cc_type: u8, ref_count: u16, val_count: u16) -> Cc4Block { + Cc4Block { + cc_links: 4, + cc_tx_name: 0, + cc_md_unit: 0, + cc_md_comment: 0, + cc_cc_inverse: 0, + cc_ref: vec![], + cc_type, + cc_precision: 0, + cc_flags: 0, + cc_ref_count: ref_count, + cc_val_count: val_count, + cc_phy_range_min: 0.0, + cc_phy_range_max: 0.0, + cc_val: CcVal::Real(vec![]), + } + } + + #[test] + fn test_cc_get_cc_type_str() { + let expected = [ + (0, "Identity"), + (1, "Linear"), + (2, "Rational"), + (3, "Algebraic"), + (4, "ValueToValueInterpolation"), + (5, "ValueToValue"), + (6, "ValueRangeToValue"), + (7, "ValueToText"), + (8, "ValueRangeToText"), + (9, "TextToValue"), + (10, "TextToText"), + (11, "BitfieldToText"), + (255, "Unknown"), + ]; + for (val, name) in expected { + let cc = make_cc(val, 0, 0); + assert_eq!(cc.get_cc_type_str(), name, "cc_type={val}"); + } + } + + #[test] + fn test_cc_display() { + let cc = make_cc(1, 2, 3); + let display = format!("{cc}"); + assert!(display.contains("CC:")); + assert!(display.contains("Linear")); + assert!(display.contains("refs=2")); + assert!(display.contains("vals=3")); + } +} diff --git a/src/mdfinfo/mdfinfo4/cg_block.rs b/src/mdfinfo/mdfinfo4/cg_block.rs index f6ea80e..3234903 100644 --- a/src/mdfinfo/mdfinfo4/cg_block.rs +++ b/src/mdfinfo/mdfinfo4/cg_block.rs @@ -808,3 +808,57 @@ pub(super) fn parse_cg4( } Ok((cg, position, n_cg, n_cn)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cg_get_flags_str() { + let mut cg = Cg4Block::default(); + assert_eq!(cg.get_flags_str(), "None"); + + // Individual flags + cg.cg_flags = CG_F_VLSD; + assert_eq!(cg.get_flags_str(), "VLSD"); + + cg.cg_flags = CG_F_VLSC; + assert_eq!(cg.get_flags_str(), "VLSC"); + + cg.cg_flags = CG_F_EVENT_SIGNAL_GROUP; + assert_eq!(cg.get_flags_str(), "EventSignal"); + + cg.cg_flags = CG_F_RAW_SENSOR_EVENT; + assert_eq!(cg.get_flags_str(), "RawSensor"); + + cg.cg_flags = CG_F_PROTOCOL_EVENT; + assert_eq!(cg.get_flags_str(), "ProtocolEvent"); + + cg.cg_flags = 0b1000; // RemoteMaster (bit 3) + assert_eq!(cg.get_flags_str(), "RemoteMaster"); + + // Combination + cg.cg_flags = CG_F_VLSD | CG_F_EVENT_SIGNAL_GROUP; + assert!(cg.get_flags_str().contains("VLSD")); + assert!(cg.get_flags_str().contains("EventSignal")); + } + + #[test] + fn test_cg_display() { + let cg = Cg4Block { + cg_record_id: 1, + cg_cycle_count: 1000, + cg_data_bytes: 64, + cg_inval_bytes: 2, + cg_flags: CG_F_VLSD, + ..Default::default() + }; + let display = format!("{cg}"); + assert!(display.contains("CG:")); + assert!(display.contains("rec_id=1")); + assert!(display.contains("cycles=1000")); + assert!(display.contains("data_bytes=64")); + assert!(display.contains("inval_bytes=2")); + assert!(display.contains("VLSD")); + } +} diff --git a/src/mdfinfo/mdfinfo4/ch_block.rs b/src/mdfinfo/mdfinfo4/ch_block.rs index 3b9ab8c..bdddad3 100644 --- a/src/mdfinfo/mdfinfo4/ch_block.rs +++ b/src/mdfinfo/mdfinfo4/ch_block.rs @@ -130,3 +130,75 @@ pub fn parse_ch4( } Ok((ch, position)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ch_get_type_str() { + let mut ch = Ch4Block::default(); + + let expected = [ + (0, "Group"), + (1, "Function"), + (2, "Structure"), + (3, "Map list"), + (4, "Input variables"), + (5, "Output variables"), + (6, "Local variables"), + (7, "Defined calibration objects"), + (8, "Referenced calibration objects"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ch.ch_type = val; + assert_eq!(ch.get_type_str(), name, "ch_type={val}"); + } + } + + #[test] + fn test_ch_calculate_block_size() { + let ch = Ch4Block { + ch_links: 4, + ..Default::default() + }; + assert_eq!(ch.calculate_block_size(), 16 + 8 + 4 * 8 + 8); // 64 + + let ch = Ch4Block { + ch_links: 7, + ..Default::default() + }; + assert_eq!(ch.calculate_block_size(), 16 + 8 + 7 * 8 + 8); // 88 + + let ch = Ch4Block { + ch_links: 10, + ..Default::default() + }; + assert_eq!(ch.calculate_block_size(), 16 + 8 + 10 * 8 + 8); // 112 + } + + #[test] + fn test_ch_display() { + let ch = Ch4Block { + ch_type: 1, + ch_element_count: 5, + ch_ch_first: 0, + ch_links: 4, + ..Default::default() + }; + + let display = format!("{ch}"); + assert!(display.contains("CH:")); + assert!(display.contains("Function")); + assert!(display.contains("elements=5")); + assert!(display.contains("children=no")); + + let ch = Ch4Block { + ch_ch_first: 100, + ..ch + }; + let display = format!("{ch}"); + assert!(display.contains("children=yes")); + } +} diff --git a/src/mdfinfo/mdfinfo4/cn_block.rs b/src/mdfinfo/mdfinfo4/cn_block.rs index 2f99a5f..d12fa83 100644 --- a/src/mdfinfo/mdfinfo4/cn_block.rs +++ b/src/mdfinfo/mdfinfo4/cn_block.rs @@ -808,3 +808,145 @@ pub(super) fn parse_cn4_block( Ok((cn_struct, position, n_cn, cns)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cn_get_cn_type_str() { + let mut cn = Cn4Block::default(); + let expected = [ + (0, "Fixed"), + (1, "VLSD"), + (2, "Master"), + (3, "Virtual Master"), + (4, "Sync"), + (5, "MLSD"), + (6, "Virtual Data"), + (7, "VLSC"), + (255, "Unknown"), + ]; + for (val, name) in expected { + cn.cn_type = val; + assert_eq!(cn.get_cn_type_str(), name, "cn_type={val}"); + } + } + + #[test] + fn test_cn_get_sync_type_str() { + let mut cn = Cn4Block::default(); + let expected = [ + (0, "None"), + (1, "Time"), + (2, "Angle"), + (3, "Distance"), + (4, "Index"), + (5, "Frequency"), + (255, "Unknown"), + ]; + for (val, name) in expected { + cn.cn_sync_type = val; + assert_eq!(cn.get_sync_type_str(), name, "cn_sync_type={val}"); + } + } + + #[test] + fn test_cn_get_data_type_str() { + let mut cn = Cn4Block::default(); + let expected = [ + (0, "UInt LE"), + (1, "UInt BE"), + (2, "Int LE"), + (3, "Int BE"), + (4, "Float LE"), + (5, "Float BE"), + (6, "String ISO-8859-1"), + (7, "String UTF-8"), + (8, "String UTF-16 LE"), + (9, "String UTF-16 BE"), + (10, "Byte Array"), + (11, "MIME Sample"), + (12, "MIME Stream"), + (13, "CANopen Date"), + (14, "CANopen Time"), + (15, "Complex LE"), + (16, "Complex BE"), + (255, "Unknown"), + ]; + for (val, name) in expected { + cn.cn_data_type = val; + assert_eq!(cn.get_data_type_str(), name, "cn_data_type={val}"); + } + } + + #[test] + fn test_cn_cn_size() { + // Non-VLSC type returns None + let cn = Cn4Block::default(); // cn_type = 0 + assert!(cn.cn_cn_size().is_none()); + + // VLSC with no extra links returns None + let cn = Cn4Block { + cn_type: 7, + ..Default::default() + }; + assert!(cn.cn_cn_size().is_none()); // links is empty + + // VLSC with extra links returns first link + let cn = Cn4Block { + cn_type: 7, + cn_links: 9, + links: vec![12345], + ..Default::default() + }; + assert_eq!(cn.cn_cn_size(), Some(12345)); + } + + #[test] + fn test_cn_get_set_si_source() { + let mut cn = Cn4Block::default(); + assert_eq!(cn.get_si_source(), 0); + + cn.set_si_source(42); + assert_eq!(cn.get_si_source(), 42); + } + + #[test] + fn test_cn_block_display() { + let cn = Cn4Block { + cn_type: 2, + cn_data_type: 4, + cn_bit_count: 64, + cn_byte_offset: 8, + ..Default::default() + }; + let display = format!("{cn}"); + assert!(display.contains("CN:")); + assert!(display.contains("Master")); + assert!(display.contains("Float LE")); + assert!(display.contains("bits=64")); + assert!(display.contains("byte_offset=8")); + } + + #[test] + fn test_cn4_display() { + let cn4 = Cn4 { + unique_name: "speed".to_string(), + pos_byte_beg: 16, + n_bytes: 8, + block: Cn4Block { + cn_type: 0, + cn_sync_type: 1, + ..Default::default() + }, + ..Default::default() + }; + let display = format!("{cn4}"); + assert!(display.contains("speed")); + assert!(display.contains("byte 16")); + assert!(display.contains("8 bytes")); + assert!(display.contains("Fixed")); + assert!(display.contains("Time")); + } +} diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs index 6d7e625..d904bbd 100644 --- a/src/mdfinfo/mdfinfo4/composition.rs +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -528,3 +528,160 @@ pub(super) fn parse_composition( bail!("Unknown composition block type") } } + +#[cfg(test)] +mod tests { + use super::*; + + // ── Ds4Block tests ── + + #[test] + fn test_ds_getters() { + // Empty links + let ds = Ds4Block::default(); + assert_eq!(ds.ds_cn_composition(), 0); + assert_eq!(ds.ds_cn_alignment_start(), 0); + assert_eq!(ds.ds_data(), 0); + assert_eq!(ds.ds_md_comment(), 0); + + // Populated links + let ds = Ds4Block { + ds_links: 4, + links: vec![100, 200, 300, 400], + ds_version: 430, + ds_mode: 0, + ds_reserved: [0; 5], + }; + assert_eq!(ds.ds_cn_composition(), 100); + assert_eq!(ds.ds_cn_alignment_start(), 200); + assert_eq!(ds.ds_data(), 300); + assert_eq!(ds.ds_md_comment(), 400); + } + + #[test] + fn test_ds_mode_helpers() { + let ds = Ds4Block { + ds_mode: DS_MODE_DATA_STREAM, + ..Default::default() + }; + assert!(ds.is_data_stream_mode()); + assert!(!ds.is_data_description_mode()); + assert_eq!(ds.get_mode_str(), "Data Stream Mode"); + + let ds = Ds4Block { + ds_mode: DS_MODE_DATA_DESCRIPTION, + ..Default::default() + }; + assert!(!ds.is_data_stream_mode()); + assert!(ds.is_data_description_mode()); + assert_eq!(ds.get_mode_str(), "Data Description Mode"); + + let ds = Ds4Block { + ds_mode: 99, + ..Default::default() + }; + assert_eq!(ds.get_mode_str(), "Unknown Mode"); + } + + #[test] + fn test_ds_display() { + let ds = Ds4Block { + ds_links: 4, + links: vec![100, 200, 300, 400], + ds_version: 430, + ds_mode: 0, + ds_reserved: [0; 5], + }; + let display = format!("{ds}"); + assert!(display.contains("DS:")); + assert!(display.contains("Data Stream Mode")); + assert!(display.contains("version=430")); + assert!(display.contains("links=4")); + } + + // ── Cl4Block tests ── + + #[test] + fn test_cl_display() { + let cl = Cl4Block { + cl_links: 2, + cl_composition: 0, + cl_cn_size: 0, + cl_flags: 0x0003, + cl_alignment: 4, + cl_bit_offset: 2, + cl_byte_offset: 16, + }; + let display = format!("{cl}"); + assert!(display.contains("CL:")); + assert!(display.contains("flags=0x0003")); + assert!(display.contains("alignment=4")); + assert!(display.contains("bit_offset=2")); + assert!(display.contains("byte_offset=16")); + } + + // ── Cv4Block tests ── + + #[test] + fn test_cv_display() { + let cv = Cv4Block { + cv_option_count: 3, + ..Default::default() + }; + let display = format!("{cv}"); + assert!(display.contains("CV:")); + assert!(display.contains("3 options")); + } + + // ── Cu4Block tests ── + + #[test] + fn test_cu_display() { + let cu = Cu4Block { + cu_member_count: 5, + ..Default::default() + }; + let display = format!("{cu}"); + assert!(display.contains("CU:")); + assert!(display.contains("5 members")); + } + + // ── Compo & Composition Display ── + + #[test] + fn test_compo_display() { + let ds = Compo::DS(Box::::default()); + let display = format!("{ds}"); + assert!(display.contains("DS(")); + + let cl = Compo::CL(Box::::default()); + assert!(format!("{cl}").contains("CL(")); + + let cv = Compo::CV(Box::::default()); + assert!(format!("{cv}").contains("CV(")); + + let cu = Compo::CU(Box::::default()); + assert!(format!("{cu}").contains("CU(")); + } + + #[test] + fn test_composition_display() { + let comp = Composition { + block: Compo::DS(Box::::default()), + compo: None, + }; + let display = format!("{comp}"); + assert!(display.contains("Composition:")); + assert!(!display.contains("nested")); + + let nested = Composition { + block: Compo::DS(Box::::default()), + compo: Some(Box::new(Composition { + block: Compo::CL(Box::::default()), + compo: None, + })), + }; + let display = format!("{nested}"); + assert!(display.contains("(nested)")); + } +} diff --git a/src/mdfinfo/mdfinfo4/data_block.rs b/src/mdfinfo/mdfinfo4/data_block.rs index 77d6e2e..3066b45 100644 --- a/src/mdfinfo/mdfinfo4/data_block.rs +++ b/src/mdfinfo/mdfinfo4/data_block.rs @@ -463,3 +463,208 @@ impl Display for Gd4Block { write!(f, "GD: min_version={}.{}.{}", major, minor, patch) } } + +#[cfg(test)] +mod tests { + use super::*; + + // ── Dz4Block tests ── + + #[test] + fn test_dz_default() { + let dz = Dz4Block::default(); + assert_eq!(dz.dz_org_block_type, [68, 86]); // "DV" + assert_eq!(dz.dz_zip_type, 0); + assert_eq!(dz.dz_org_data_length, 0); + assert_eq!(dz.dz_data_length, 0); + } + + #[test] + fn test_dz_get_compression_str() { + let mut dz = Dz4Block::default(); + let expected = [ + (0, "Deflate"), + (1, "Deflate+Transpose"), + (2, "Zstd"), + (3, "Zstd+Transpose"), + (4, "LZ4"), + (5, "LZ4+Transpose"), + (254, "Custom"), + (255, "Unknown"), + ]; + for (val, name) in expected { + dz.dz_zip_type = val; + assert_eq!(dz.get_compression_str(), name, "dz_zip_type={val}"); + } + } + + #[test] + fn test_dz_display() { + let dz = Dz4Block { + dz_org_data_length: 1000, + dz_data_length: 500, + ..Default::default() + }; + + let display = format!("{dz}"); + assert!(display.contains("DZ:")); + assert!(display.contains("Deflate")); + assert!(display.contains("org_type=DV")); + assert!(display.contains("compressed=500")); + assert!(display.contains("original=1000")); + assert!(display.contains("ratio=50.0%")); + + // Edge case: org_size=0 + let dz = Dz4Block::default(); + let display = format!("{dz}"); + assert!(display.contains("ratio=0.0%")); + } + + // ── Ld4Block tests ── + + #[test] + fn test_ld_default() { + let ld = Ld4Block::default(); + assert_eq!(ld.ld_len, 56); + assert_eq!(ld.ld_n_links, 2); + assert_eq!(ld.ld_count, 1); + assert_eq!(ld.ld_flags, 0); + assert_eq!(ld.ld_flags_ext, 0); + assert!(ld.ld_links.is_empty()); + } + + #[test] + fn test_ld_data_simple() { + // Non-interleaved: links returned as-is + let ld = Ld4Block { + ld_count: 3, + ld_links: vec![100, 200, 300], + ..Default::default() + }; + let data = ld.ld_data(); + assert_eq!(data, vec![100, 200, 300]); + } + + #[test] + fn test_ld_data_interleaved_by_flag() { + // Interleaved via ld_flags_ext bit 7 + let ld = Ld4Block { + ld_count: 2, + ld_flags_ext: 1u8 << 7, + ld_links: vec![100, 101, 200, 201], + ..Default::default() + }; + let data = ld.ld_data(); + assert_eq!(data, vec![100, 200]); // step_by(2) + } + + #[test] + fn test_ld_data_interleaved_by_count() { + // Interleaved detected by links.len() == 2 * ld_count + let ld = Ld4Block { + ld_count: 2, + ld_links: vec![100, 101, 200, 201], + ..Default::default() + }; + let data = ld.ld_data(); + assert_eq!(data, vec![100, 200]); + } + + #[test] + fn test_ld_invalid_data() { + // Non-interleaved: empty result + let ld = Ld4Block { + ld_count: 2, + ld_links: vec![100, 200], + ..Default::default() + }; + assert!(ld.ld_invalid_data().is_empty()); + + // Interleaved: odd positions are invalid blocks + let ld = Ld4Block { + ld_count: 2, + ld_flags_ext: 1u8 << 7, + ld_links: vec![100, 101, 200, 201], + ..Default::default() + }; + let invalid = ld.ld_invalid_data(); + assert_eq!(invalid, vec![101, 201]); + } + + #[test] + fn test_ld_display() { + // With equal_sample_count + let ld = Ld4Block { + ld_equal_sample_count: Some(1000), + ld_flags: 0b1, + ..Default::default() + }; + let display = format!("{ld}"); + assert!(display.contains("LD:")); + assert!(display.contains("sample_count=1000")); + + // With offsets + let ld2 = Ld4Block { + ld_count: 3, + ld_sample_offset: vec![0, 100, 200], + ..Default::default() + }; + let display2 = format!("{ld2}"); + assert!(display2.contains("3 offsets")); + } + + // ── Hl4Block tests ── + + #[test] + fn test_hl_get_zip_type_str() { + let expected = [ + (0, "Deflate"), + (1, "Deflate+Transpose"), + (2, "Zstd"), + (3, "Zstd+Transpose"), + (4, "LZ4"), + (5, "LZ4+Transpose"), + (255, "Unknown"), + ]; + for (val, name) in expected { + let hl = Hl4Block { + hl_zip_type: val, + ..Default::default() + }; + assert_eq!(hl.get_zip_type_str(), name, "hl_zip_type={val}"); + } + } + + #[test] + fn test_hl_display() { + let hl = Hl4Block { + hl_flags: 0x0001, + hl_zip_type: 2, + ..Default::default() + }; + let display = format!("{hl}"); + assert!(display.contains("HL:")); + assert!(display.contains("flags=0x0001")); + assert!(display.contains("zip_type=Zstd")); + } + + // ── Gd4Block tests ── + + #[test] + fn test_gd_display() { + let gd = Gd4Block { + gd_version: 430, + ..Default::default() + }; + let display = format!("{gd}"); + assert!(display.contains("GD:")); + assert!(display.contains("min_version=4.3.0")); + + let gd = Gd4Block { + gd_version: 410, + ..Default::default() + }; + let display = format!("{gd}"); + assert!(display.contains("min_version=4.1.0")); + } +} diff --git a/src/mdfinfo/mdfinfo4/dg_block.rs b/src/mdfinfo/mdfinfo4/dg_block.rs index 23616f5..4d6bfc4 100644 --- a/src/mdfinfo/mdfinfo4/dg_block.rs +++ b/src/mdfinfo/mdfinfo4/dg_block.rs @@ -192,3 +192,85 @@ fn identify_vlsd_cg(cg: &mut HashMap) { } } } + +#[cfg(test)] +mod tests { + use super::*; + use super::super::block_header::default_short_header; + use super::super::cg_block::Cg4Block; + use super::super::cn_block::Cn4; + use super::super::metadata::BlockType; + use std::collections::HashSet; + + #[test] + fn test_dg_block_display() { + let dg = Dg4Block::default(); + let display = format!("{dg}"); + assert!(display.contains("DG:")); + assert!(display.contains("rec_id_size=0")); + + let dg = Dg4Block { + dg_rec_id_size: 2, + ..Default::default() + }; + let display = format!("{dg}"); + assert!(display.contains("rec_id_size=2")); + } + + #[test] + fn test_dg_display() { + // Empty DG + let dg = Dg4 { + block: Dg4Block::default(), + cg: HashMap::new(), + }; + let display = format!("{dg}"); + assert!(display.contains("0 channel groups")); + assert!(display.contains("0 channels")); + + // DG with 2 CGs, one with 3 channels and one with 2 + let mut cg_map = HashMap::new(); + let mut cn1: HashMap = HashMap::new(); + cn1.insert(0, Cn4::default()); + cn1.insert(8, Cn4::default()); + cn1.insert(16, Cn4::default()); + let cg1 = Cg4 { + header: default_short_header(BlockType::CG), + block: Cg4Block::default(), + cn: cn1, + master_channel_name: None, + channel_names: HashSet::new(), + record_length: 24, + block_position: 100, + vlsd_cg: None, + invalid_bytes: None, + sr: vec![], + }; + cg_map.insert(0, cg1); + + let mut cn2: HashMap = HashMap::new(); + cn2.insert(0, Cn4::default()); + cn2.insert(8, Cn4::default()); + let cg2 = Cg4 { + header: default_short_header(BlockType::CG), + block: Cg4Block::default(), + cn: cn2, + master_channel_name: None, + channel_names: HashSet::new(), + record_length: 16, + block_position: 200, + vlsd_cg: None, + invalid_bytes: None, + sr: vec![], + }; + cg_map.insert(1, cg2); + + let dg = Dg4 { + block: Dg4Block::default(), + cg: cg_map, + }; + let display = format!("{dg}"); + assert!(display.contains("2 channel groups")); + assert!(display.contains("5 channels")); + } +} diff --git a/src/mdfinfo/mdfinfo4/ev_block.rs b/src/mdfinfo/mdfinfo4/ev_block.rs index fc9c635..7361326 100644 --- a/src/mdfinfo/mdfinfo4/ev_block.rs +++ b/src/mdfinfo/mdfinfo4/ev_block.rs @@ -192,6 +192,31 @@ impl Ev4Block { } } +impl Default for Ev4Block { + fn default() -> Self { + Ev4Block { + ev_links: 5, + ev_ev_next: 0, + ev_ev_parent: 0, + ev_ev_range: 0, + ev_tx_name: 0, + ev_md_comment: 0, + links: Vec::new(), + ev_type: 0, + ev_sync_type: 0, + ev_range_type: 0, + ev_cause: 0, + ev_flags: 0, + ev_reserved: [0; 3], + ev_scope_count: 0, + ev_attachment_count: 0, + ev_creator_index: 0, + ev_sync_base_value: 0, + ev_sync_factor: 0.0, + } + } +} + impl Display for Ev4Block { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( @@ -256,3 +281,167 @@ pub fn parse_ev4( } Ok((ev, position)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ev_get_event_type_str() { + let mut ev = Ev4Block::default(); + let expected = [ + (EV_T_RECORDING, "Recording"), + (EV_T_RECORDING_INTERRUPT, "Recording Interrupt"), + (EV_T_ACQUISITION_INTERRUPT, "Acquisition Interrupt"), + (EV_T_TRIGGER, "Trigger"), + (EV_T_MARKER, "Marker"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ev.ev_type = val; + assert_eq!(ev.get_event_type_str(), name, "ev_type={val}"); + } + } + + #[test] + fn test_ev_get_sync_type_str() { + let mut ev = Ev4Block::default(); + let expected = [ + (EV_S_NONE, "None"), + (EV_S_TIME, "Time"), + (EV_S_ANGLE, "Angle"), + (EV_S_DISTANCE, "Distance"), + (EV_S_INDEX, "Index"), + (EV_S_FREQUENCY, "Frequency"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ev.ev_sync_type = val; + assert_eq!(ev.get_sync_type_str(), name, "ev_sync_type={val}"); + } + } + + #[test] + fn test_ev_get_cause_str() { + let mut ev = Ev4Block::default(); + let expected = [ + (EV_C_OTHER, "Other/Unknown"), + (EV_C_ERROR, "Error"), + (EV_C_TOOL, "Tool Internal"), + (EV_C_SCRIPT, "Script"), + (EV_C_USER, "User"), + (255, "Unknown"), + ]; + for (val, name) in expected { + ev.ev_cause = val; + assert_eq!(ev.get_cause_str(), name, "ev_cause={val}"); + } + } + + #[test] + fn test_ev_get_range_type_str() { + let mut ev = Ev4Block::default(); + let expected = [(0, "Point"), (1, "Beginning"), (2, "End"), (255, "Unknown")]; + for (val, name) in expected { + ev.ev_range_type = val; + assert_eq!(ev.get_range_type_str(), name, "ev_range_type={val}"); + } + } + + #[test] + fn test_ev_get_sync_value() { + let mut ev = Ev4Block::default(); + + ev.ev_sync_base_value = 1000; + ev.ev_sync_factor = 1e-9; + assert!((ev.get_sync_value() - 1e-6).abs() < 1e-15); + + ev.ev_sync_base_value = 0; + ev.ev_sync_factor = 1.0; + assert_eq!(ev.get_sync_value(), 0.0); + + ev.ev_sync_base_value = -100; + ev.ev_sync_factor = 0.5; + assert_eq!(ev.get_sync_value(), -50.0); + } + + #[test] + fn test_ev_get_scope_links() { + let mut ev = Ev4Block::default(); + + // Empty links, scope_count=0 + assert!(ev.get_scope_links().is_empty()); + + // scope_count > 0 but links empty + ev.ev_scope_count = 2; + assert!(ev.get_scope_links().is_empty()); + + // Populated links with scope_count=2 + ev.links = vec![100, 200, 300, 400]; + ev.ev_scope_count = 2; + assert_eq!(ev.get_scope_links(), &[100, 200]); + + // scope_count > links.len() + ev.ev_scope_count = 10; + assert_eq!(ev.get_scope_links(), &[100, 200, 300, 400]); + } + + #[test] + fn test_ev_get_attachment_links() { + let mut ev = Ev4Block::default(); + + // Empty links + assert!(ev.get_attachment_links().is_empty()); + + // attachment_count > 0 but links empty + ev.ev_attachment_count = 1; + assert!(ev.get_attachment_links().is_empty()); + + // scope_count=2, attachment_count=1, links has enough + ev.links = vec![100, 200, 300, 400]; + ev.ev_scope_count = 2; + ev.ev_attachment_count = 1; + assert_eq!(ev.get_attachment_links(), &[300]); + + // scope_count=0, attachment_count=2 + ev.ev_scope_count = 0; + ev.ev_attachment_count = 2; + assert_eq!(ev.get_attachment_links(), &[100, 200]); + + // start >= links.len() + ev.ev_scope_count = 10; + ev.ev_attachment_count = 1; + assert!(ev.get_attachment_links().is_empty()); + } + + #[test] + fn test_ev_calculate_block_size() { + let mut ev = Ev4Block::default(); + + ev.ev_links = 5; + assert_eq!(ev.calculate_block_size(), 16 + 8 + 5 * 8 + 32); // 96 + + ev.ev_links = 8; + assert_eq!(ev.calculate_block_size(), 16 + 8 + 8 * 8 + 32); // 120 + } + + #[test] + fn test_ev_display() { + let mut ev = Ev4Block::default(); + ev.ev_type = EV_T_TRIGGER; + ev.ev_sync_type = EV_S_TIME; + ev.ev_cause = EV_C_USER; + ev.ev_range_type = 0; + ev.ev_scope_count = 3; + ev.ev_attachment_count = 1; + + let display = format!("{ev}"); + assert!(display.contains("EV:")); + assert!(display.contains("Trigger")); + assert!(display.contains("Time")); + assert!(display.contains("User")); + assert!(display.contains("Point")); + assert!(display.contains("scopes=3")); + assert!(display.contains("attachments=1")); + } +} diff --git a/src/mdfinfo/mdfinfo4/fh_block.rs b/src/mdfinfo/mdfinfo4/fh_block.rs index dd04c41..92c8ab6 100644 --- a/src/mdfinfo/mdfinfo4/fh_block.rs +++ b/src/mdfinfo/mdfinfo4/fh_block.rs @@ -128,3 +128,46 @@ pub fn parse_fh( } Ok((fh, position)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default() { + let fh = FhBlock::default(); + assert_eq!(fh.fh_fh_next, 0); + assert_eq!(fh.fh_md_comment, 0); + assert!(fh.fh_time_ns > 0); // current time + } + + #[test] + fn test_display_utc() { + let fh = FhBlock { + fh_time_ns: 1_700_000_000_000_000_000, // 2023-11-14T22:13:20 UTC + fh_time_flags: 0, // UTC + fh_tz_offset_min: 0, + fh_dst_offset_min: 0, + ..Default::default() + }; + let display = format!("{fh}"); + assert!(display.contains("FH:")); + assert!(display.contains("2023-11-14")); + assert!(display.contains("UTC")); + assert!(display.contains("tz_offset=0min")); + } + + #[test] + fn test_display_local() { + let fh = FhBlock { + fh_time_ns: 1_700_000_000_000_000_000, + fh_time_flags: 0b1, // local time + fh_tz_offset_min: 60, + fh_dst_offset_min: 0, + ..Default::default() + }; + let display = format!("{fh}"); + assert!(display.contains("local")); + assert!(display.contains("tz_offset=60min")); + } +} diff --git a/src/mdfinfo/mdfinfo4/hd_block.rs b/src/mdfinfo/mdfinfo4/hd_block.rs index 76e8b67..36e8175 100644 --- a/src/mdfinfo/mdfinfo4/hd_block.rs +++ b/src/mdfinfo/mdfinfo4/hd_block.rs @@ -116,3 +116,28 @@ pub fn hd4_parser( let position = read_meta_data(rdr, sharable, hd.hd_md_comment, 168, BlockType::HD)?; Ok((hd, position)) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default() { + let hd = Hd4::default(); + assert_eq!(hd.hd_len, 104); + assert_eq!(hd.hd_link_counts, 6); + assert_eq!(hd.hd_dg_first, 0); + assert_eq!(hd.hd_fh_first, 0); + } + + #[test] + fn test_display() { + let hd = Hd4 { + hd_start_time_ns: 1_700_000_000_000_000_000, // 2023-11-14T22:13:20 UTC + ..Default::default() + }; + let display = format!("{hd}"); + assert!(display.contains("Time :")); + assert!(display.contains("2023-11-14")); + } +} diff --git a/src/mdfinfo/mdfinfo4/metadata.rs b/src/mdfinfo/mdfinfo4/metadata.rs index 5138681..4f0a83c 100644 --- a/src/mdfinfo/mdfinfo4/metadata.rs +++ b/src/mdfinfo/mdfinfo4/metadata.rs @@ -897,3 +897,497 @@ impl MetaData { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + // ── Constructor & data access tests ── + + #[test] + fn test_metadata_new_tx() { + let md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + assert_eq!(md.block.hdr_id, [35, 35, 84, 88]); // ##TX + assert_eq!(md.block_type, MetaDataBlockType::TX); + assert!(md.raw_data.is_empty()); + assert!(md.md_comment.is_none()); + } + + #[test] + fn test_metadata_new_md() { + let md = MetaData::new(MetaDataBlockType::MdBlock, BlockType::HD); + assert_eq!(md.block.hdr_id, [35, 35, 77, 68]); // ##MD + assert_eq!(md.block_type, MetaDataBlockType::MdBlock); + } + + #[test] + fn test_metadata_set_data_buffer() { + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + + // 1 byte → padded to 8 + md.set_data_buffer(&[65]); + assert_eq!(md.raw_data.len(), 8); + assert_eq!(md.raw_data[0], 65); + assert_eq!(md.block.hdr_len, 8 + 24); + + // 7 bytes → padded to 8 + md.set_data_buffer(&[1, 2, 3, 4, 5, 6, 7]); + assert_eq!(md.raw_data.len(), 8); + + // 8 bytes → padded to 16 (always adds padding) + md.set_data_buffer(&[1, 2, 3, 4, 5, 6, 7, 8]); + assert_eq!(md.raw_data.len(), 16); + + // 9 bytes → padded to 16 + md.set_data_buffer(&[1, 2, 3, 4, 5, 6, 7, 8, 9]); + assert_eq!(md.raw_data.len(), 16); + assert_eq!(md.block.hdr_len, 16 + 24); + } + + #[test] + fn test_metadata_get_tx_bytes() { + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + assert!(md.get_tx_bytes().is_none()); + + md.set_data_buffer(b"hello"); + assert!(md.get_tx_bytes().is_some()); + assert!(md.get_tx_bytes().unwrap().starts_with(b"hello")); + } + + #[test] + fn test_metadata_get_tx_for_tx_block() { + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + md.set_data_buffer(b"test_channel\0"); + + let tx = md.get_tx().unwrap(); + assert_eq!(tx, Some("test_channel".to_string())); + } + + #[test] + fn test_metadata_get_data_string() { + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + md.set_data_buffer(b"hello world\0\0\0"); + + let s = md.get_data_string().unwrap(); + assert_eq!(s, "hello world"); + + // MdParsed returns empty + let md2 = MetaData { + block_type: MetaDataBlockType::MdParsed, + ..MetaData::default() + }; + assert_eq!(md2.get_data_string().unwrap(), ""); + } + + // ── XML parsing tests ── + + fn make_md(block_type: BlockType, xml: &str) -> MetaData { + let mut md = MetaData::new(MetaDataBlockType::MdBlock, block_type); + md.set_data_buffer(xml.as_bytes()); + md + } + + #[test] + fn test_parse_hd_comment() { + let xml = r#" +Test measurement +PC timer + +3.14159 +9.81 + + +tester + +"#; + let mut md = make_md(BlockType::HD, xml); + md.parse_xml().unwrap(); + + assert_eq!(md.block_type, MetaDataBlockType::MdParsed); + let comment = md.md_comment.as_ref().unwrap(); + if let MdComment::Hd(hd) = comment { + assert_eq!(hd.tx.as_deref(), Some("Test measurement")); + assert_eq!(hd.time_source.as_deref(), Some("PC timer")); + assert_eq!(hd.constants.get("pi").map(|s| s.as_str()), Some("3.14159")); + assert_eq!(hd.constants.get("g").map(|s| s.as_str()), Some("9.81")); + assert_eq!(hd.constants.len(), 2); + if let Some(PropertyValue::Value(v)) = hd.common_properties.get("author") { + assert_eq!(v, "tester"); + } else { + panic!("Expected Value for 'author'"); + } + } else { + panic!("Expected MdComment::Hd"); + } + } + + #[test] + fn test_parse_fh_comment() { + let xml = r#" +created +mdfr +ratalco +0.1 +testuser +"#; + let mut md = make_md(BlockType::FH, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Fh(fh)) = &md.md_comment { + assert_eq!(fh.tx.as_deref(), Some("created")); + assert_eq!(fh.tool_id.as_deref(), Some("mdfr")); + assert_eq!(fh.tool_vendor.as_deref(), Some("ratalco")); + assert_eq!(fh.tool_version.as_deref(), Some("0.1")); + assert_eq!(fh.user_name.as_deref(), Some("testuser")); + } else { + panic!("Expected MdComment::Fh"); + } + } + + #[test] + fn test_parse_cn_comment() { + let xml = r#" +Engine speed +RPMEngine RPM +x * 0.1 +
    0x1234
    +0.010.10.05 +
    "#; + let mut md = make_md(BlockType::CN, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Cn(cn)) = &md.md_comment { + assert_eq!(cn.tx.as_deref(), Some("Engine speed")); + assert_eq!(cn.names.name.as_deref(), Some("RPM")); + assert_eq!(cn.names.display.as_deref(), Some("Engine RPM")); + assert_eq!(cn.formula.as_deref(), Some("x * 0.1")); + assert_eq!(cn.address.as_deref(), Some("0x1234")); + let (min, max, avg) = cn.raster.unwrap(); + assert_eq!(min, Some(0.01)); + assert_eq!(max, Some(0.1)); + assert_eq!(avg, Some(0.05)); + } else { + panic!("Expected MdComment::Cn"); + } + } + + #[test] + fn test_parse_ev_comment() { + let xml = r#" +trigger event +1.5 +3.0 +x > 100 +10.0 +"#; + let mut md = make_md(BlockType::EV, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Ev(ev)) = &md.md_comment { + assert_eq!(ev.tx.as_deref(), Some("trigger event")); + assert_eq!(ev.pre_trigger_interval, Some(1.5)); + assert_eq!(ev.post_trigger_interval, Some(3.0)); + assert_eq!(ev.formula.as_deref(), Some("x > 100")); + assert_eq!(ev.timeout, Some(10.0)); + } else { + panic!("Expected MdComment::Ev"); + } + } + + #[test] + fn test_parse_si_comment() { + let xml = r#" +ECU source +ECU_1Bosch +/CAN/ECU_1 +CAN1 +CAN +"#; + let mut md = make_md(BlockType::SI, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Si(si)) = &md.md_comment { + assert_eq!(si.tx.as_deref(), Some("ECU source")); + assert_eq!(si.names.name.as_deref(), Some("ECU_1")); + assert_eq!(si.names.vendor.as_deref(), Some("Bosch")); + assert_eq!(si.path.name.as_deref(), Some("/CAN/ECU_1")); + assert_eq!(si.bus.name.as_deref(), Some("CAN1")); + assert_eq!(si.protocol.as_deref(), Some("CAN")); + } else { + panic!("Expected MdComment::Si"); + } + } + + #[test] + fn test_parse_cg_comment() { + let xml = r#" +Group 1 +CG_1First group +"#; + let mut md = make_md(BlockType::CG, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Cg(cg)) = &md.md_comment { + assert_eq!(cg.tx.as_deref(), Some("Group 1")); + assert_eq!(cg.names.name.as_deref(), Some("CG_1")); + assert_eq!(cg.names.description.as_deref(), Some("First group")); + } else { + panic!("Expected MdComment::Cg"); + } + } + + #[test] + fn test_parse_cc_comment() { + let xml = r#" +Conversion rule +linear_conv +x * 2 + 1 +"#; + let mut md = make_md(BlockType::CC, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Cc(cc)) = &md.md_comment { + assert_eq!(cc.tx.as_deref(), Some("Conversion rule")); + assert_eq!(cc.names.name.as_deref(), Some("linear_conv")); + assert_eq!(cc.formula.as_deref(), Some("x * 2 + 1")); + } else { + panic!("Expected MdComment::Cc"); + } + } + + #[test] + fn test_parse_common_properties() { + let xml = r#" +props test + +value1 + + inner_val + + +
  • 1
  • +
  • 2
  • +
    + + alpha + beta + +
    +
    "#; + let mut md = make_md(BlockType::HD, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Hd(hd)) = &md.md_comment { + // Value + if let Some(PropertyValue::Value(v)) = hd.common_properties.get("simple") { + assert_eq!(v, "value1"); + } else { + panic!("Expected Value for 'simple'"); + } + // Tree + if let Some(PropertyValue::Tree(sub)) = hd.common_properties.get("nested") { + if let Some(PropertyValue::Value(v)) = sub.get("inner_key") { + assert_eq!(v, "inner_val"); + } else { + panic!("Expected inner_key in tree"); + } + } else { + panic!("Expected Tree for 'nested'"); + } + // List + if let Some(PropertyValue::List(items)) = hd.common_properties.get("items") { + assert_eq!(items.len(), 2); + if let Some(PropertyValue::Value(v)) = items[0].get("a") { + assert_eq!(v, "1"); + } else { + panic!("Expected 'a' in first list item"); + } + } else { + panic!("Expected List for 'items'"); + } + // EList + if let Some(PropertyValue::EList(items)) = hd.common_properties.get("tags") { + assert_eq!(items, &["alpha", "beta"]); + } else { + panic!("Expected EList for 'tags'"); + } + } else { + panic!("Expected MdComment::Hd"); + } + } + + // ── Display tests ── + + #[test] + fn test_md_names_display() { + let names = MdNames { + name: Some("ch1".into()), + display: Some("Channel 1".into()), + vendor: None, + description: Some("First channel".into()), + }; + let s = format!("{names}"); + assert!(s.contains("name=ch1")); + assert!(s.contains("display=Channel 1")); + assert!(!s.contains("vendor=")); + assert!(s.contains("desc=First channel")); + + // All None + let empty = MdNames::default(); + assert_eq!(format!("{empty}"), ""); + } + + #[test] + fn test_property_value_display() { + assert_eq!(format!("{}", PropertyValue::Value("hello".into())), "hello"); + assert!(format!("{}", PropertyValue::Tree(HashMap::new())).contains("tree(0 items)")); + assert!(format!("{}", PropertyValue::List(vec![])).contains("list(0 items)")); + assert!(format!("{}", PropertyValue::EList(vec!["a".into(), "b".into()])) + .contains("elist(2 items)")); + } + + #[test] + fn test_md_comment_display() { + let hd = MdComment::Hd(HdComment { + tx: Some("measurement".into()), + time_source: Some("GPS".into()), + constants: HashMap::new(), + common_properties: HashMap::new(), + }); + let s = format!("{hd}"); + assert!(s.contains("measurement")); + assert!(s.contains("time_source=GPS")); + } + + #[test] + fn test_md_comment_get_tx() { + let hd = MdComment::Hd(HdComment { + tx: Some("hd_text".into()), + ..Default::default() + }); + assert_eq!(hd.get_tx(), Some("hd_text")); + + let fh = MdComment::Fh(FhComment { + tx: None, + ..Default::default() + }); + assert_eq!(fh.get_tx(), None); + + let ev = MdComment::Ev(EvComment { + tx: Some("event".into()), + ..Default::default() + }); + assert_eq!(ev.get_tx(), Some("event")); + } + + #[test] + fn test_metadata_display() { + // TX type + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + md.set_data_buffer(b"hello"); + let s = format!("{md}"); + assert!(s.contains("TX")); + + // MdBlock (unparsed) + let mut md2 = MetaData::new(MetaDataBlockType::MdBlock, BlockType::HD); + md2.set_data_buffer(b""); + let s2 = format!("{md2}"); + assert!(s2.contains("MD (unparsed)")); + + // MdParsed with comment + md2.parse_xml().unwrap(); + let s3 = format!("{md2}"); + assert!(!s3.contains("unparsed")); + + // MdParsed without comment + let md3 = MetaData { + block_type: MetaDataBlockType::MdParsed, + md_comment: None, + ..MetaData::default() + }; + let s4 = format!("{md3}"); + assert!(s4.contains("MD (parsed, empty)")); + } + + #[test] + fn test_metadata_get_tx_md_block() { + let xml = r#"channel desc"#; + let mut md = MetaData::new(MetaDataBlockType::MdBlock, BlockType::CN); + md.set_data_buffer(xml.as_bytes()); + + // get_tx on unparsed MD should extract TX tag from XML + let tx = md.get_tx().unwrap(); + assert_eq!(tx, Some("channel desc".to_string())); + } + + #[test] + fn test_metadata_get_tx_md_parsed() { + let xml = r#"parsed text"#; + let mut md = make_md(BlockType::HD, xml); + md.parse_xml().unwrap(); + + let tx = md.get_tx().unwrap(); + assert_eq!(tx, Some("parsed text".to_string())); + } + + #[test] + fn test_parse_dg_comment() { + let xml = r#" +data group info +val +"#; + let mut md = make_md(BlockType::DG, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Dg(dg)) = &md.md_comment { + assert_eq!(dg.tx.as_deref(), Some("data group info")); + assert!(dg.common_properties.contains_key("key")); + } else { + panic!("Expected MdComment::Dg"); + } + } + + #[test] + fn test_parse_ch_comment() { + let xml = r#" +hierarchy +Group1First Group +"#; + let mut md = make_md(BlockType::CH, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::Ch(ch)) = &md.md_comment { + assert_eq!(ch.tx.as_deref(), Some("hierarchy")); + assert_eq!(ch.names.name.as_deref(), Some("Group1")); + assert_eq!(ch.names.display.as_deref(), Some("First Group")); + } else { + panic!("Expected MdComment::Ch"); + } + } + + #[test] + fn test_parse_at_comment() { + let xml = r#" +attachment info +application/octet-stream +"#; + let mut md = make_md(BlockType::AT, xml); + md.parse_xml().unwrap(); + + if let Some(MdComment::At(at)) = &md.md_comment { + assert_eq!(at.tx.as_deref(), Some("attachment info")); + assert!(at.common_properties.contains_key("mime")); + } else { + panic!("Expected MdComment::At"); + } + } + + #[test] + fn test_parse_xml_tx_block_noop() { + // parse_xml should be a no-op for TX blocks + let mut md = MetaData::new(MetaDataBlockType::TX, BlockType::CN); + md.set_data_buffer(b"plain text"); + md.parse_xml().unwrap(); + assert!(md.md_comment.is_none()); + } +} diff --git a/src/mdfinfo/mdfinfo4/si_block.rs b/src/mdfinfo/mdfinfo4/si_block.rs index 13582cf..553a764 100644 --- a/src/mdfinfo/mdfinfo4/si_block.rs +++ b/src/mdfinfo/mdfinfo4/si_block.rs @@ -96,3 +96,69 @@ impl Display for Si4Block { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_si_get_type_str() { + let mut si = Si4Block::default(); + let expected = [ + (0, "Other"), + (1, "ECU"), + (2, "Bus"), + (3, "I/O"), + (4, "Tool"), + (5, "User"), + (255, "Unknown"), + ]; + for (val, name) in expected { + si.si_type = val; + assert_eq!(si.get_type_str(), name, "si_type={val}"); + } + } + + #[test] + fn test_si_get_bus_type_str() { + let mut si = Si4Block::default(); + let expected = [ + (0, "None"), + (1, "Other"), + (2, "CAN"), + (3, "LIN"), + (4, "MOST"), + (5, "FlexRay"), + (6, "K-Line"), + (7, "Ethernet"), + (8, "USB"), + (255, "Unknown"), + ]; + for (val, name) in expected { + si.si_bus_type = val; + assert_eq!(si.get_bus_type_str(), name, "si_bus_type={val}"); + } + } + + #[test] + fn test_si_calculate_block_size() { + let si = Si4Block::default(); + assert_eq!(si.calculate_block_size(), 56); + } + + #[test] + fn test_si_display() { + let si = Si4Block { + si_type: 1, + si_bus_type: 2, + si_flags: 0x03, + ..Default::default() + }; + + let display = format!("{si}"); + assert!(display.contains("SI:")); + assert!(display.contains("ECU")); + assert!(display.contains("CAN")); + assert!(display.contains("flags=0x03")); + } +} diff --git a/src/mdfinfo/mdfinfo4/sr_block.rs b/src/mdfinfo/mdfinfo4/sr_block.rs index 0d61553..eb3e269 100644 --- a/src/mdfinfo/mdfinfo4/sr_block.rs +++ b/src/mdfinfo/mdfinfo4/sr_block.rs @@ -95,3 +95,79 @@ impl Display for Sr4Block { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sr_default() { + let sr = Sr4Block::default(); + assert_eq!(sr.sr_sr_next, 0); + assert_eq!(sr.sr_data, 0); + assert_eq!(sr.sr_cycle_count, 0); + assert_eq!(sr.sr_interval, 0.0); + assert_eq!(sr.sr_sync_type, 1); // default is time + assert_eq!(sr.sr_flags, 0); + } + + #[test] + fn test_sr_has_invalidation_bytes() { + let mut sr = Sr4Block::default(); + assert!(!sr.has_invalidation_bytes()); + + sr.sr_flags = SR_F_INVALIDATION_BYTES; + assert!(sr.has_invalidation_bytes()); + + sr.sr_flags = SR_F_DOMINANT_INVALIDATION; // bit 1 only + assert!(!sr.has_invalidation_bytes()); + + sr.sr_flags = SR_F_INVALIDATION_BYTES | SR_F_DOMINANT_INVALIDATION; + assert!(sr.has_invalidation_bytes()); + } + + #[test] + fn test_sr_get_sync_type_str() { + let mut sr = Sr4Block { + sr_sync_type: SR_SYNC_TIME, + ..Default::default() + }; + assert_eq!(sr.get_sync_type_str(), "Time (seconds)"); + + sr.sr_sync_type = SR_SYNC_ANGLE; + assert_eq!(sr.get_sync_type_str(), "Angle (radians)"); + + sr.sr_sync_type = SR_SYNC_DISTANCE; + assert_eq!(sr.get_sync_type_str(), "Distance (meters)"); + + sr.sr_sync_type = SR_SYNC_INDEX; + assert_eq!(sr.get_sync_type_str(), "Index (samples)"); + + sr.sr_sync_type = SR_SYNC_FREQUENCY; + assert_eq!(sr.get_sync_type_str(), "Frequency (Hz)"); + + sr.sr_sync_type = 0; + assert_eq!(sr.get_sync_type_str(), "Unknown"); + + sr.sr_sync_type = 255; + assert_eq!(sr.get_sync_type_str(), "Unknown"); + } + + #[test] + fn test_sr_display() { + let sr = Sr4Block { + sr_cycle_count: 1000, + sr_interval: 0.01, + sr_sync_type: SR_SYNC_TIME, + sr_flags: SR_F_INVALIDATION_BYTES, + ..Default::default() + }; + + let display = format!("{sr}"); + assert!(display.contains("SR:")); + assert!(display.contains("cycle_count=1000")); + assert!(display.contains("interval=0.01")); + assert!(display.contains("Time (seconds)")); + assert!(display.contains("flags=0x01")); + } +} diff --git a/src/mdfreader/conversions3.rs b/src/mdfreader/conversions3.rs index 0f35558..45fbca3 100644 --- a/src/mdfreader/conversions3.rs +++ b/src/mdfreader/conversions3.rs @@ -1262,3 +1262,104 @@ fn value_range_to_text( } Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::{Float64Builder, Int32Builder}; + + #[test] + fn test_value_to_value_with_interpolation() { + // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + let cc_val = vec![0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // exact match → 0 + builder.append_value(5.0); // interpolate between (0,0) and (10,100) → 50 + builder.append_value(10.0); // exact match → 100 + builder.append_value(15.0); // interpolate between (10,100) and (20,200) → 150 + builder.append_value(-5.0); // below first → first y = 0 + builder.append_value(25.0); // above last → last y = 200 + + let result = + value_to_value_with_interpolation_calculation(&mut builder, cc_val, 6).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 6); + assert!((values[0] - 0.0).abs() < 1e-12); + assert!((values[1] - 50.0).abs() < 1e-12); + assert!((values[2] - 100.0).abs() < 1e-12); + assert!((values[3] - 150.0).abs() < 1e-12); + assert!((values[4] - 0.0).abs() < 1e-12); + assert!((values[5] - 200.0).abs() < 1e-12); + } + + #[test] + fn test_value_to_value_without_interpolation() { + // Same table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + let cc_val = vec![0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // exact → 0 + builder.append_value(3.0); // nearer to x=0 (dist=3) than x=10 (dist=7) → 0 + builder.append_value(7.0); // nearer to x=10 (dist=3) than x=0 (dist=7) → 100 + builder.append_value(10.0); // exact → 100 + builder.append_value(-5.0); // below first → 0 + builder.append_value(25.0); // above last → 200 + + let result = + value_to_value_without_interpolation_calculation(&mut builder, cc_val, 6).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 6); + assert!((values[0] - 0.0).abs() < 1e-12); + assert!((values[1] - 0.0).abs() < 1e-12); + assert!((values[2] - 100.0).abs() < 1e-12); + assert!((values[3] - 100.0).abs() < 1e-12); + assert!((values[4] - 0.0).abs() < 1e-12); + assert!((values[5] - 200.0).abs() < 1e-12); + } + + #[test] + fn test_value_to_text_calculation() { + let cc_val_ref: Vec<(f64, String)> = vec![ + (1.0, "one".to_string()), + (2.0, "two".to_string()), + (3.0, "three".to_string()), + ]; + + let mut builder = Int32Builder::new(); + builder.append_value(1); // matches 1.0 → "one" + builder.append_value(2); // matches 2.0 → "two" + builder.append_value(3); // matches 3.0 → "three" + builder.append_value(99); // no match → defaults to first entry ("one") + + let result = value_to_text_calculation(&mut builder, &cc_val_ref, 4).unwrap(); + let arr = result.finish_cloned(); + assert_eq!(arr.value(0), "one"); + assert_eq!(arr.value(1), "two"); + assert_eq!(arr.value(2), "three"); + assert_eq!(arr.value(3), "one"); + } + + #[test] + fn test_value_range_to_text_calculation() { + let ranges = vec![ + (0.0, 10.0, "low".to_string()), + (10.0, 20.0, "medium".to_string()), + (20.0, 30.0, "high".to_string()), + ]; + let cc_val_ref = (ranges, "unknown".to_string()); + + let mut builder = Float64Builder::new(); + builder.append_value(5.0); // in [0, 10) → "low" + builder.append_value(15.0); // in [10, 20) → "medium" + builder.append_value(25.0); // in [20, 30) → "high" + builder.append_value(35.0); // out of range → "unknown" + + let result = value_range_to_text_calculation(&mut builder, &cc_val_ref, 4).unwrap(); + let arr = result.finish_cloned(); + assert_eq!(arr.value(0), "low"); + assert_eq!(arr.value(1), "medium"); + assert_eq!(arr.value(2), "high"); + assert_eq!(arr.value(3), "unknown"); + } +} diff --git a/src/mdfreader/conversions4.rs b/src/mdfreader/conversions4.rs index 93e1a37..2398dc4 100644 --- a/src/mdfreader/conversions4.rs +++ b/src/mdfreader/conversions4.rs @@ -2230,3 +2230,175 @@ fn bitfield_text_table( } Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::{Float64Builder, Int32Builder, UInt16Builder}; + + #[test] + fn test_linear_calculation() { + // v * p2 + p1 where p1=10.0, p2=2.0 + let p1 = 10.0; + let p2 = 2.0; + + // Int32 + let mut builder = Int32Builder::new(); + builder.append_value(0); + builder.append_value(1); + builder.append_value(5); + builder.append_value(-3); + let result = linear_calculation(&mut builder, p1, p2).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 4); + assert!((values[0] - 10.0).abs() < 1e-12); // 0*2+10 + assert!((values[1] - 12.0).abs() < 1e-12); // 1*2+10 + assert!((values[2] - 20.0).abs() < 1e-12); // 5*2+10 + assert!((values[3] - 4.0).abs() < 1e-12); // -3*2+10 + + // Float64 + let mut builder = Float64Builder::new(); + builder.append_value(1.5); + builder.append_value(2.5); + let result = linear_calculation(&mut builder, p1, p2).unwrap(); + let values = result.values_slice(); + assert!((values[0] - 13.0).abs() < 1e-12); // 1.5*2+10 + assert!((values[1] - 15.0).abs() < 1e-12); // 2.5*2+10 + + // UInt16 + let mut builder = UInt16Builder::new(); + builder.append_value(100); + builder.append_value(0); + let result = linear_calculation(&mut builder, p1, p2).unwrap(); + let values = result.values_slice(); + assert!((values[0] - 210.0).abs() < 1e-12); // 100*2+10 + assert!((values[1] - 10.0).abs() < 1e-12); // 0*2+10 + } + + #[test] + fn test_rational_calculation() { + // (x²*p1 + x*p2 + p3) / (x²*p4 + x*p5 + p6) + // Simple linear: p1=0, p2=2, p3=1, p4=0, p5=0, p6=1 → (2x+1)/1 + let cc_val = vec![0.0, 2.0, 1.0, 0.0, 0.0, 1.0]; + + let mut builder = Int32Builder::new(); + builder.append_value(0); + builder.append_value(1); + builder.append_value(5); + let result = rational_calculation(&builder, &cc_val).unwrap(); + let values = result.values_slice(); + assert!((values[0] - 1.0).abs() < 1e-12); // (0+0+1)/1 + assert!((values[1] - 3.0).abs() < 1e-12); // (0+2+1)/1 + assert!((values[2] - 11.0).abs() < 1e-12); // (0+10+1)/1 + + // Quadratic: p1=1, p2=0, p3=0, p4=0, p5=0, p6=1 → x² + let cc_val = vec![1.0, 0.0, 0.0, 0.0, 0.0, 1.0]; + let mut builder = Float64Builder::new(); + builder.append_value(3.0); + builder.append_value(4.0); + let result = rational_calculation(&builder, &cc_val).unwrap(); + let values = result.values_slice(); + assert!((values[0] - 9.0).abs() < 1e-12); // 3² + assert!((values[1] - 16.0).abs() < 1e-12); // 4² + } + + #[test] + fn test_value_to_value_with_interpolation_primitive() { + // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + let keys = [0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; + let val: Vec<(&f64, &f64)> = keys.iter().tuples().collect(); + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // exact → 0 + builder.append_value(5.0); // interpolate → 50 + builder.append_value(10.0); // exact → 100 + builder.append_value(15.0); // interpolate → 150 + builder.append_value(-5.0); // below first → 0 + builder.append_value(25.0); // above last → 200 + + let result = value_to_value_with_interpolation_primitive(&builder, val).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 6); + assert!((values[0] - 0.0).abs() < 1e-12); + assert!((values[1] - 50.0).abs() < 1e-12); + assert!((values[2] - 100.0).abs() < 1e-12); + assert!((values[3] - 150.0).abs() < 1e-12); + assert!((values[4] - 0.0).abs() < 1e-12); + assert!((values[5] - 200.0).abs() < 1e-12); + } + + #[test] + fn test_value_to_value_without_interpolation_primitive() { + // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + let keys = [0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; + let val: Vec<(&f64, &f64)> = keys.iter().tuples().collect(); + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // exact → 0 + builder.append_value(3.0); // nearer to x=0 (dist=3) than x=10 (dist=7) → 0 + builder.append_value(7.0); // nearer to x=10 (dist=3) than x=0 (dist=7) → 100 + builder.append_value(10.0); // exact → 100 + builder.append_value(-5.0); // below first → 0 + builder.append_value(25.0); // above last → 200 + + let result = value_to_value_without_interpolation_primitive(&mut builder, val).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 6); + assert!((values[0] - 0.0).abs() < 1e-12); + assert!((values[1] - 0.0).abs() < 1e-12); + assert!((values[2] - 100.0).abs() < 1e-12); + assert!((values[3] - 100.0).abs() < 1e-12); + assert!((values[4] - 0.0).abs() < 1e-12); + assert!((values[5] - 200.0).abs() < 1e-12); + } + + #[test] + fn test_value_range_to_value_table() { + // Ranges: (key_min, key_max, value) + let val = vec![ + (0.0, 10.0, 100.0), + (10.0, 20.0, 200.0), + (20.0, 30.0, 300.0), + ]; + let default = -1.0; + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // exact match on key_min=0 → 100 + builder.append_value(10.0); // exact match on key_min=10 → 200 + builder.append_value(20.0); // exact match on key_min=20 → 300 + builder.append_value(-5.0); // below all ranges → default + builder.append_value(25.0); // above last key_min but within last upper bound → 300 + + let result = value_range_to_value_table_calculation(&builder, &val, &default).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 5); + assert!((values[0] - 100.0).abs() < 1e-12); + assert!((values[1] - 200.0).abs() < 1e-12); + assert!((values[2] - 300.0).abs() < 1e-12); + assert!((values[3] - (-1.0)).abs() < 1e-12); + assert!((values[4] - 300.0).abs() < 1e-12); + } + + #[test] + fn test_algebraic_conversion_primitive() { + // Expression: X * 2 + 1 + let parser = fasteval::Parser::new(); + let mut slab = fasteval::Slab::new(); + let compiled = parser.parse("X * 2 + 1", &mut slab.ps).unwrap(); + let compiled = compiled.from(&slab.ps).compile(&slab.ps, &mut slab.cs); + + let mut builder = Float64Builder::new(); + builder.append_value(0.0); // → 1 + builder.append_value(1.0); // → 3 + builder.append_value(5.0); // → 11 + builder.append_value(-3.0); // → -5 + + let result = alegbraic_conversion_primitive(&compiled, &slab, &builder).unwrap(); + let values = result.values_slice(); + assert_eq!(values.len(), 4); + assert!((values[0] - 1.0).abs() < 1e-12); + assert!((values[1] - 3.0).abs() < 1e-12); + assert!((values[2] - 11.0).abs() < 1e-12); + assert!((values[3] - (-5.0)).abs() < 1e-12); + } +} From a0d7047b8aafbbac65fe115b2ba7f991e90867bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 15 Feb 2026 14:19:01 +0100 Subject: [PATCH 24/46] clippy improvements --- src/mdfinfo/mdfinfo4/at_block.rs | 12 +- src/mdfinfo/mdfinfo4/ev_block.rs | 32 +- tests/mdf4_features.rs | 555 +++++++++++++++++++++++++++++++ 3 files changed, 580 insertions(+), 19 deletions(-) create mode 100644 tests/mdf4_features.rs diff --git a/src/mdfinfo/mdfinfo4/at_block.rs b/src/mdfinfo/mdfinfo4/at_block.rs index 101f433..c5d83a4 100644 --- a/src/mdfinfo/mdfinfo4/at_block.rs +++ b/src/mdfinfo/mdfinfo4/at_block.rs @@ -277,11 +277,13 @@ mod tests { #[test] fn test_at_display() { - let mut at = At4Block::default(); - at.at_original_size = 1024; - at.at_embedded_size = 512; - at.at_flags = 0b1; // embedded - at.at_creator_index = 1; + let mut at = At4Block { + at_original_size: 1024, + at_embedded_size: 512, + at_flags: 0b1, // embedded + at_creator_index: 1, + ..Default::default() + }; let display = format!("{at}"); assert!(display.contains("AT:")); diff --git a/src/mdfinfo/mdfinfo4/ev_block.rs b/src/mdfinfo/mdfinfo4/ev_block.rs index 7361326..4bf7785 100644 --- a/src/mdfinfo/mdfinfo4/ev_block.rs +++ b/src/mdfinfo/mdfinfo4/ev_block.rs @@ -350,10 +350,11 @@ mod tests { #[test] fn test_ev_get_sync_value() { - let mut ev = Ev4Block::default(); - - ev.ev_sync_base_value = 1000; - ev.ev_sync_factor = 1e-9; + let mut ev = Ev4Block { + ev_sync_base_value: 1000, + ev_sync_factor: 1e-9, + ..Default::default() + }; assert!((ev.get_sync_value() - 1e-6).abs() < 1e-15); ev.ev_sync_base_value = 0; @@ -416,9 +417,10 @@ mod tests { #[test] fn test_ev_calculate_block_size() { - let mut ev = Ev4Block::default(); - - ev.ev_links = 5; + let mut ev = Ev4Block { + ev_links: 5, + ..Default::default() + }; assert_eq!(ev.calculate_block_size(), 16 + 8 + 5 * 8 + 32); // 96 ev.ev_links = 8; @@ -427,13 +429,15 @@ mod tests { #[test] fn test_ev_display() { - let mut ev = Ev4Block::default(); - ev.ev_type = EV_T_TRIGGER; - ev.ev_sync_type = EV_S_TIME; - ev.ev_cause = EV_C_USER; - ev.ev_range_type = 0; - ev.ev_scope_count = 3; - ev.ev_attachment_count = 1; + let ev = Ev4Block { + ev_type: EV_T_TRIGGER, + ev_sync_type: EV_S_TIME, + ev_cause: EV_C_USER, + ev_range_type: 0, + ev_scope_count: 3, + ev_attachment_count: 1, + ..Default::default() + }; let display = format!("{ev}"); assert!(display.contains("EV:")); diff --git a/tests/mdf4_features.rs b/tests/mdf4_features.rs new file mode 100644 index 0000000..317060d --- /dev/null +++ b/tests/mdf4_features.rs @@ -0,0 +1,555 @@ +//! Integration tests for MDF4.3 specialized features: +//! Events, Attachments, Sample Reduction, MetaData, DynamicData, Variant, Union, +//! Source Information, and Channel Hierarchy. +//! +//! These tests exercise the high-level accessor APIs using real MDF4.3 example files. + +use anyhow::Result; +use mdfr::mdfinfo::MdfInfo; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static BASE_PATH: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/" + .to_string() +}); + +// ── Events ────────────────────────────────────────────────────────────────── + +#[test] +fn test_events_markers() -> Result<()> { + let file = format!("{}Events/Marker/dSPACE_Bookmarks.mf4", BASE_PATH.as_str()); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // list_events exercises EV block iteration + TX/MD lookup + let events_list = mdf.mdf_info.list_events(); + assert!(!events_list.is_empty(), "list_events should return non-empty string"); + + // get_event_blocks returns all EV blocks + let event_blocks = mdf.mdf_info.get_event_blocks(); + assert!(event_blocks.is_some(), "should have event blocks"); + let event_blocks = event_blocks.unwrap(); + assert!(!event_blocks.is_empty(), "should have at least one event"); + + for ev in event_blocks.values() { + // Exercise accessor methods on each event + let type_str = ev.get_event_type_str(); + assert!(!type_str.is_empty()); + let sync_str = ev.get_sync_type_str(); + assert!(!sync_str.is_empty()); + let cause_str = ev.get_cause_str(); + assert!(!cause_str.is_empty()); + let range_str = ev.get_range_type_str(); + assert!(!range_str.is_empty()); + let _sync_val = ev.get_sync_value(); + // Exercise Display + let display = format!("{}", ev); + assert!(display.contains("EV:")); + } + + // Channels should still be accessible alongside events + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + Ok(()) +} + +#[test] +fn test_events_recording() -> Result<()> { + let file = format!( + "{}Events/Recording/dSPACE_CaptureBlocks.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + + let events_list = mdf.mdf_info.list_events(); + assert!(!events_list.is_empty()); + + let event_blocks = mdf.mdf_info.get_event_blocks(); + assert!(event_blocks.is_some()); + let event_blocks = event_blocks.unwrap(); + assert!(!event_blocks.is_empty()); + + + + // Exercise individual event block lookup and sync value computation + for (&pos, ev) in &event_blocks { + let single = mdf.mdf_info.get_event_block(pos); + assert!(single.is_some()); + let _sync_val = ev.get_sync_value(); + let _scope = ev.get_scope_links(); + let _attachments = ev.get_attachment_links(); + } + + Ok(()) +} + +#[test] +fn test_events_trigger() -> Result<()> { + let file = format!( + "{}Events/Trigger/dSPACE_HILAPITrigger.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let events_list = mdf.mdf_info.list_events(); + assert!(!events_list.is_empty()); + + let event_blocks = mdf.mdf_info.get_event_blocks(); + assert!(event_blocks.is_some()); + let event_blocks = event_blocks.unwrap(); + assert!(!event_blocks.is_empty()); + + + + // Check parent event relationships + let has_parent = event_blocks.values().any(|ev| ev.ev_ev_parent != 0); + // Trigger files often have parent relationships + let _ = has_parent; + + Ok(()) +} + +// ── Attachments ───────────────────────────────────────────────────────────── + +#[test] +fn test_attachments_embedded() -> Result<()> { + let file = format!( + "{}Attachments/Embedded/Vector_Embedded.MF4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + + // list_attachments exercises AT block iteration + TX/MD lookup + let attachments_list = mdf.mdf_info.list_attachments(); + assert!( + !attachments_list.is_empty(), + "list_attachments should return non-empty" + ); + + let at_blocks = mdf.mdf_info.get_attachement_blocks(); + assert!(at_blocks.is_some()); + let at_blocks = at_blocks.unwrap(); + assert!(!at_blocks.is_empty(), "should have at least one attachment"); + + for (&pos, at) in &at_blocks { + // Embedded file: is_embedded should be true + assert!( + at.is_embedded(), + "embedded attachment should have embedded flag set" + ); + assert!(at.at_original_size > 0, "original size should be > 0"); + + // Should be able to retrieve embedded data + let data = mdf.mdf_info.get_attachment_embedded_data(pos); + assert!(data.is_some(), "should have embedded data at position {}", pos); + let data = data.unwrap(); + assert!(!data.is_empty(), "embedded data should not be empty"); + assert_eq!( + data.len() as u64, at.at_original_size, + "data length should match original size" + ); + + // Exercise display + let display = format!("{}", at); + assert!(display.contains("embedded")); + + // Exercise individual block lookup + let single = mdf.mdf_info.get_attachment_block(pos); + assert!(single.is_some()); + } + + Ok(()) +} + +#[test] +fn test_attachments_embedded_compressed() -> Result<()> { + let file = format!( + "{}Attachments/EmbeddedCompressed/Vector_EmbeddedCompressed.MF4", + BASE_PATH.as_str() + ); + let mdf = Mdf::new(&file)?; + + let at_blocks = mdf.mdf_info.get_attachement_blocks(); + assert!(at_blocks.is_some()); + let at_blocks = at_blocks.unwrap(); + assert!(!at_blocks.is_empty()); + + for (&pos, at) in &at_blocks { + assert!(at.is_embedded(), "should be embedded"); + assert!(at.is_compressed(), "should be compressed"); + + // Compression type should be known + let comp_str = at.get_compression_str(); + assert_ne!( + comp_str, "None", + "compressed attachment should have compression type" + ); + assert_ne!(comp_str, "Unknown", "compression type should be known"); + + // MD5 checksum may be valid + let _has_md5 = at.has_md5_checksum(); + + // Decompressed data should match original size + let data = mdf.mdf_info.get_attachment_embedded_data(pos); + assert!(data.is_some()); + let data = data.unwrap(); + assert_eq!(data.len() as u64, at.at_original_size); + } + + Ok(()) +} + +#[test] +fn test_attachments_external() -> Result<()> { + let file = format!( + "{}Attachments/External/Vector_External.MF4", + BASE_PATH.as_str() + ); + let mdf = Mdf::new(&file)?; + + let at_blocks = mdf.mdf_info.get_attachement_blocks(); + assert!(at_blocks.is_some()); + let at_blocks = at_blocks.unwrap(); + assert!(!at_blocks.is_empty()); + + for (&pos, at) in &at_blocks { + // External: not embedded + assert!( + !at.is_embedded(), + "external attachment should not be embedded" + ); + + // No embedded data for external + let data = mdf.mdf_info.get_attachment_embedded_data(pos); + assert!( + data.is_none(), + "external attachment should have no embedded data" + ); + + // Filename TX block should exist + if at.at_tx_filename != 0 { + let filename = mdf.mdf_info.get_tx(at.at_tx_filename)?; + assert!(filename.is_some(), "should have filename TX"); + let filename = filename.unwrap(); + assert!(!filename.is_empty(), "filename should not be empty"); + } + + // Display should show external + let display = format!("{}", at); + assert!(display.contains("external")); + } + + Ok(()) +} + +// ── Sample Reduction ──────────────────────────────────────────────────────── + +#[test] +fn test_sample_reduction() -> Result<()> { + let file = format!( + "{}SampleReduction/Simple/Vector_SampleReduction.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // list_sample_reductions exercises SR block iteration + let sr_list = mdf.mdf_info.list_sample_reductions(); + assert!( + !sr_list.is_empty(), + "list_sample_reductions should return non-empty" + ); + + // get_sample_reduction_blocks returns structured data + let sr_blocks = mdf.mdf_info.get_sample_reduction_blocks(); + assert!(sr_blocks.is_some()); + let sr_blocks = sr_blocks.unwrap(); + assert!(!sr_blocks.is_empty(), "should have sample reduction blocks"); + + for (_dg_pos, _rec_id, sr_vec) in &sr_blocks { + for sr in sr_vec { + assert!(sr.sr_cycle_count > 0, "cycle count should be > 0"); + assert!(sr.sr_interval > 0.0, "interval should be > 0"); + + let sync_str = sr.get_sync_type_str(); + assert!(!sync_str.is_empty()); + + let _has_inval = sr.has_invalidation_bytes(); + + // Exercise Display + let display = format!("{}", sr); + assert!(display.contains("SR:")); + } + } + + // Channels should be accessible + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty()); + + Ok(()) +} + +// ── MetaData ──────────────────────────────────────────────────────────────── + +#[test] +fn test_metadata_hdo_comments() -> Result<()> { + let file = format!( + "{}MetaData/HDO/RAC_MDF430_HDO_Comments.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + + // File history with MD comments + let fh_list = mdf.mdf_info.list_file_history(); + assert!( + !fh_list.is_empty(), + "list_file_history should return non-empty" + ); + + let fh_blocks = mdf.mdf_info.get_file_history_blocks(); + assert!(fh_blocks.is_some()); + let fh_blocks = fh_blocks.unwrap(); + assert!(!fh_blocks.is_empty(), "should have file history blocks"); + + // At least one FH should have a comment + let has_comment = fh_blocks.iter().any(|fh| fh.fh_md_comment != 0); + assert!(has_comment, "HDO file should have FH blocks with comments"); + + // FH block Display + for fh in &fh_blocks { + let display = format!("{}", fh); + assert!(!display.is_empty()); + } + + // Header comments via MdfInfo4 (need to pattern match) + match &mut mdf.mdf_info { + MdfInfo::V4(info4) => { + let header_comments = info4.format_header_comments(); + assert!( + !header_comments.is_empty(), + "HDO file should have header comments" + ); + } + _ => panic!("expected MDF4"), + } + + Ok(()) +} + +#[test] +fn test_metadata_custom_extensions() -> Result<()> { + let file = format!( + "{}MetaData/CustomExtensions/Vector_CustomExtensions_CNcomment.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty()); + + // At least one channel should have a description (CN comment) + let has_desc = channels + .iter() + .any(|name| matches!(mdf.get_channel_desc(name), Ok(Some(_)))); + assert!( + has_desc, + "CustomExtensions file should have channels with descriptions" + ); + + // Exercise source information listing + let si_list = mdf.mdf_info.list_source_information(); + let _ = si_list; + + Ok(()) +} + +// ── DynamicData ───────────────────────────────────────────────────────────── + +#[test] +fn test_dynamic_data_channel_list() -> Result<()> { + let file = format!( + "{}DynamicData/ChannelList/simple_list.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + // Verify at least one channel has data + let has_data = channels.iter().any(|name| { + if let Some(data) = mdf.get_channel_data(name) { + !data.is_empty() + } else { + false + } + }); + assert!(has_data, "should have at least one channel with data"); + + Ok(()) +} + +// ── Variant ───────────────────────────────────────────────────────────────── + +#[test] +fn test_variant_fixed_length() -> Result<()> { + let file = format!( + "{}Variant/Etas_cv_storage_with_fixed_length.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + for name in &channels { + if let Some(data) = mdf.get_channel_data(name) { + let _len = data.len(); + } + } + + Ok(()) +} + +#[test] +fn test_variant_vlsd_option() -> Result<()> { + let file = format!( + "{}Variant/Vector_V430_Variant_VLSD_Option.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + let has_data = channels.iter().any(|name| { + if let Some(data) = mdf.get_channel_data(name) { + !data.is_empty() + } else { + false + } + }); + assert!(has_data, "should have at least one channel with data"); + + Ok(()) +} + +// ── Union ─────────────────────────────────────────────────────────────────── + +#[test] +fn test_union_fixed_length() -> Result<()> { + let file = format!( + "{}Union/Etas_cu_storage_with_fixed_length.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + for name in &channels { + if let Some(data) = mdf.get_channel_data(name) { + let _len = data.len(); + } + } + + Ok(()) +} + +// ── Source Information ────────────────────────────────────────────────────── + +#[test] +fn test_source_information() -> Result<()> { + // Use a bus logging file which typically has rich SI blocks + let file = format!( + "{}BusLogging/CAN/Vector_CAN_DataFrame_Sort_Bus.MF4", + BASE_PATH.as_str() + ); + let mdf = Mdf::new(&file)?; + + let si_list = mdf.mdf_info.list_source_information(); + assert!( + !si_list.is_empty(), + "CAN bus logging file should have source information" + ); + + let si_blocks = mdf.mdf_info.get_source_information_blocks(); + assert!(si_blocks.is_some()); + let si_blocks = si_blocks.unwrap(); + assert!(!si_blocks.is_empty()); + + for si in si_blocks.values() { + let type_str = si.get_type_str(); + assert!(!type_str.is_empty()); + let bus_str = si.get_bus_type_str(); + assert!(!bus_str.is_empty()); + // Exercise Display + let display = format!("{}", si); + assert!(display.contains("SI:")); + } + + Ok(()) +} + +// ── Channel Hierarchy ─────────────────────────────────────────────────────── + +#[test] +fn test_channel_hierarchy() -> Result<()> { + let file = format!( + "{}ChannelInfo/AttachmentRef/Vector_AttachmentRef.mf4", + BASE_PATH.as_str() + ); + let mdf = Mdf::new(&file)?; + + let ch_list = mdf.mdf_info.list_channel_hierarchy(); + let _ = ch_list; + + let ch_blocks = mdf.mdf_info.get_channel_hierarchy_blocks(); + if let Some(ch_blocks) = ch_blocks { + for &pos in ch_blocks.keys() { + let single = mdf.mdf_info.get_channel_hierarchy_block(pos); + assert!(single.is_some()); + } + } + + Ok(()) +} + +// ── Event Signals (channel-level event data, not EVBLOCK) ─────────────────── + +#[test] +fn test_event_signals_channels() -> Result<()> { + // EventSignals files don't have EVBLOCKs but contain event-related channels + let file = format!( + "{}Events/EventSignals/RAC_MDF430_EventSignals_CommonProperties.mf4", + BASE_PATH.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty(), "should have channels"); + + let has_data = channels.iter().any(|name| { + if let Some(data) = mdf.get_channel_data(name) { + !data.is_empty() + } else { + false + } + }); + assert!(has_data, "should have channel data"); + + // File history should be present + let fh_list = mdf.mdf_info.list_file_history(); + assert!(!fh_list.is_empty()); + + Ok(()) +} From d6bfa5dab31a679209b74c7f818d10b761d71f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 15 Feb 2026 18:07:47 +0100 Subject: [PATCH 25/46] more unit tests --- src/mdfinfo/mdfinfo4/metadata.rs | 302 +++++++++++++++++++++++++++++++ tests/mdf4_features.rs | 239 ++++++++++++++++++++++++ 2 files changed, 541 insertions(+) diff --git a/src/mdfinfo/mdfinfo4/metadata.rs b/src/mdfinfo/mdfinfo4/metadata.rs index 4f0a83c..7563995 100644 --- a/src/mdfinfo/mdfinfo4/metadata.rs +++ b/src/mdfinfo/mdfinfo4/metadata.rs @@ -1390,4 +1390,306 @@ mod tests { md.parse_xml().unwrap(); assert!(md.md_comment.is_none()); } + + // ── Comprehensive Display tests for all comment types ── + + #[test] + fn test_md_names_display_all_fields() { + let names = MdNames { + name: Some("ch1".into()), + display: Some("Channel 1".into()), + vendor: Some("Acme Corp".into()), + description: Some("Test channel".into()), + }; + let s = format!("{names}"); + assert!(s.contains("name=ch1")); + assert!(s.contains("display=Channel 1")); + assert!(s.contains("vendor=Acme Corp")); + assert!(s.contains("desc=Test channel")); + } + + #[test] + fn test_hd_comment_display_full() { + let mut constants = HashMap::new(); + constants.insert("pi".into(), "3.14".into()); + let mut props = HashMap::new(); + props.insert("author".into(), PropertyValue::Value("tester".into())); + let hd = HdComment { + tx: Some("measurement".into()), + time_source: Some("GPS".into()), + constants, + common_properties: props, + }; + let s = format!("{hd}"); + assert!(s.contains("measurement")); + assert!(s.contains("time_source=GPS")); + assert!(s.contains("constants=1")); + assert!(s.contains("props=1")); + } + + #[test] + fn test_fh_comment_display_full() { + let fh = FhComment { + tx: Some("created".into()), + tool_id: Some("mdfr".into()), + tool_vendor: Some("ratalco".into()), + tool_version: Some("0.6".into()), + user_name: Some("tester".into()), + common_properties: HashMap::new(), + }; + let s = format!("{fh}"); + assert!(s.contains("created")); + assert!(s.contains("tool=mdfr")); + assert!(s.contains("vendor=ratalco")); + assert!(s.contains("v0.6")); + assert!(s.contains("user=tester")); + } + + #[test] + fn test_cn_comment_display() { + let cn = CnComment { + tx: Some("Engine speed".into()), + names: MdNames { + name: Some("RPM".into()), + display: Some("Engine RPM".into()), + ..Default::default() + }, + formula: Some("x * 0.1".into()), + address: Some("0x1234".into()), + axis_monotony: Some("increasing".into()), + ..Default::default() + }; + let s = format!("{cn}"); + assert!(s.contains("Engine speed")); + assert!(s.contains("names(")); + assert!(s.contains("formula=x * 0.1")); + assert!(s.contains("addr=0x1234")); + assert!(s.contains("monotony=increasing")); + } + + #[test] + fn test_cg_comment_display() { + let cg = CgComment { + tx: Some("Group 1".into()), + names: MdNames { + name: Some("CG_1".into()), + ..Default::default() + }, + common_properties: HashMap::new(), + }; + let s = format!("{cg}"); + assert!(s.contains("Group 1")); + assert!(s.contains("names(")); + } + + #[test] + fn test_cc_comment_display() { + let cc = CcComment { + tx: Some("Linear".into()), + names: MdNames { + name: Some("conv1".into()), + ..Default::default() + }, + formula: Some("2*x+1".into()), + common_properties: HashMap::new(), + }; + let s = format!("{cc}"); + assert!(s.contains("Linear")); + assert!(s.contains("names(")); + assert!(s.contains("formula=2*x+1")); + } + + #[test] + fn test_si_comment_display() { + let si = SiComment { + tx: Some("ECU source".into()), + names: MdNames { + name: Some("ECU_1".into()), + ..Default::default() + }, + protocol: Some("CAN".into()), + ..Default::default() + }; + let s = format!("{si}"); + assert!(s.contains("ECU source")); + assert!(s.contains("protocol=CAN")); + assert!(s.contains("names(")); + } + + #[test] + fn test_ev_comment_display() { + let ev = EvComment { + tx: Some("trigger".into()), + pre_trigger_interval: Some(1.5), + post_trigger_interval: Some(3.0), + formula: Some("x > 100".into()), + timeout: Some(10.0), + common_properties: HashMap::new(), + }; + let s = format!("{ev}"); + assert!(s.contains("trigger")); + assert!(s.contains("pre_trigger=1.5")); + assert!(s.contains("post_trigger=3")); + assert!(s.contains("formula=x > 100")); + assert!(s.contains("timeout=10")); + } + + #[test] + fn test_at_comment_display() { + let mut props = HashMap::new(); + props.insert("mime".into(), PropertyValue::Value("image/png".into())); + let at = AtComment { + tx: Some("attachment".into()), + common_properties: props, + }; + let s = format!("{at}"); + assert!(s.contains("attachment")); + assert!(s.contains("props=1")); + } + + #[test] + fn test_ch_comment_display() { + let ch = ChComment { + tx: Some("hierarchy".into()), + names: MdNames { + name: Some("Group1".into()), + ..Default::default() + }, + common_properties: HashMap::new(), + }; + let s = format!("{ch}"); + assert!(s.contains("hierarchy")); + assert!(s.contains("names(")); + } + + #[test] + fn test_dg_comment_display() { + let mut props = HashMap::new(); + props.insert("key".into(), PropertyValue::Value("val".into())); + let dg = DgComment { + tx: Some("data group".into()), + common_properties: props, + }; + let s = format!("{dg}"); + assert!(s.contains("data group")); + assert!(s.contains("props=1")); + } + + #[test] + fn test_md_comment_display_all_variants() { + // Fh + let fh = MdComment::Fh(FhComment { + tx: Some("fh_text".into()), + ..Default::default() + }); + assert!(format!("{fh}").contains("fh_text")); + + // Cn + let cn = MdComment::Cn(CnComment { + tx: Some("cn_text".into()), + ..Default::default() + }); + assert!(format!("{cn}").contains("cn_text")); + + // Cg + let cg = MdComment::Cg(CgComment { + tx: Some("cg_text".into()), + ..Default::default() + }); + assert!(format!("{cg}").contains("cg_text")); + + // Cc + let cc = MdComment::Cc(CcComment { + tx: Some("cc_text".into()), + ..Default::default() + }); + assert!(format!("{cc}").contains("cc_text")); + + // Si + let si = MdComment::Si(SiComment { + tx: Some("si_text".into()), + ..Default::default() + }); + assert!(format!("{si}").contains("si_text")); + + // Ev + let ev = MdComment::Ev(EvComment { + tx: Some("ev_text".into()), + ..Default::default() + }); + assert!(format!("{ev}").contains("ev_text")); + + // At + let at = MdComment::At(AtComment { + tx: Some("at_text".into()), + ..Default::default() + }); + assert!(format!("{at}").contains("at_text")); + + // Ch + let ch = MdComment::Ch(ChComment { + tx: Some("ch_text".into()), + ..Default::default() + }); + assert!(format!("{ch}").contains("ch_text")); + + // Dg + let dg = MdComment::Dg(DgComment { + tx: Some("dg_text".into()), + ..Default::default() + }); + assert!(format!("{dg}").contains("dg_text")); + } + + #[test] + fn test_md_comment_get_tx_all_variants() { + // Cn + let cn = MdComment::Cn(CnComment { + tx: Some("cn_tx".into()), + ..Default::default() + }); + assert_eq!(cn.get_tx(), Some("cn_tx")); + + // Cg + let cg = MdComment::Cg(CgComment { + tx: Some("cg_tx".into()), + ..Default::default() + }); + assert_eq!(cg.get_tx(), Some("cg_tx")); + + // Cc + let cc = MdComment::Cc(CcComment { + tx: Some("cc_tx".into()), + ..Default::default() + }); + assert_eq!(cc.get_tx(), Some("cc_tx")); + + // Si + let si = MdComment::Si(SiComment { + tx: Some("si_tx".into()), + ..Default::default() + }); + assert_eq!(si.get_tx(), Some("si_tx")); + + // At + let at = MdComment::At(AtComment { + tx: Some("at_tx".into()), + ..Default::default() + }); + assert_eq!(at.get_tx(), Some("at_tx")); + + // Ch + let ch = MdComment::Ch(ChComment { + tx: Some("ch_tx".into()), + ..Default::default() + }); + assert_eq!(ch.get_tx(), Some("ch_tx")); + + // Dg + let dg = MdComment::Dg(DgComment { + tx: Some("dg_tx".into()), + ..Default::default() + }); + assert_eq!(dg.get_tx(), Some("dg_tx")); + } } diff --git a/tests/mdf4_features.rs b/tests/mdf4_features.rs index 317060d..9f986a6 100644 --- a/tests/mdf4_features.rs +++ b/tests/mdf4_features.rs @@ -553,3 +553,242 @@ fn test_event_signals_channels() -> Result<()> { Ok(()) } + +// ── MdfInfo4 API Methods ───────────────────────────────────────────────────── + +#[test] +fn test_mdf4_summary_and_format() -> Result<()> { + let file = format!("{}Simple/ETAS_SimpleSorted.mf4", BASE_PATH.as_str()); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Exercise MdfInfo4::summary() and format_channels() via pattern match + match &mdf.mdf_info { + MdfInfo::V4(info4) => { + let summary = info4.summary(); + assert!(summary.contains("MDF4")); + assert!(summary.contains("DGs")); + assert!(summary.contains("channels")); + + // format_channels without data + let channels_no_data = info4.format_channels(false); + assert!(!channels_no_data.is_empty()); + + // format_channels with data + let channels_with_data = info4.format_channels(true); + assert!(!channels_with_data.is_empty()); + // With data loaded, should contain length indicators + assert!(channels_with_data.contains("[")); + + // Display trait for MdfInfo4 + let display = format!("{}", info4); + assert!(!display.is_empty()); + assert!(display.contains("MDF4")); + } + _ => panic!("expected MDF4"), + } + + // Display trait for MdfInfo + let info_display = format!("{}", mdf.mdf_info); + assert!(!info_display.is_empty()); + + // Display trait for Mdf + let mdf_display = format!("{mdf}"); + assert!(!mdf_display.is_empty()); + + Ok(()) +} + +#[test] +fn test_mdf4_channel_api_methods() -> Result<()> { + let file = format!("{}Simple/ETAS_SimpleSorted.mf4", BASE_PATH.as_str()); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty()); + + // Pick a channel name + let channel_name = channels.iter().next().unwrap().clone(); + + // get_channel_master + let _master = mdf.get_channel_master(&channel_name); + + // get_channel_master_type + let _master_type = mdf.get_channel_master_type(&channel_name); + + // get_channel_unit / set_channel_unit + let _unit = mdf.get_channel_unit(&channel_name)?; + mdf.set_channel_unit(&channel_name, "m/s"); + let unit = mdf.get_channel_unit(&channel_name)?; + assert_eq!(unit, Some("m/s".to_string())); + + // get_channel_desc / set_channel_desc + let _desc = mdf.get_channel_desc(&channel_name)?; + mdf.set_channel_desc(&channel_name, "Test description"); + let desc = mdf.get_channel_desc(&channel_name)?; + assert_eq!(desc, Some("Test description".to_string())); + + // get_master_channel_names_set + let master_map = mdf.get_master_channel_names_set(); + assert!(!master_map.is_empty()); + + // get_channel_names_cg_set (via MdfInfo) + let cg_set = mdf.mdf_info.get_channel_names_cg_set(&channel_name); + assert!(!cg_set.is_empty()); + + // is_unfinalized / get_unfin_flags + let _unfinalized = mdf.is_unfinalized(); + let _flags = mdf.get_unfin_flags(); + + // list_sample_reductions (via Mdf) + let _sr = mdf.list_sample_reductions(); + + Ok(()) +} + +#[test] +fn test_mdf4_rename_and_remove_channel() -> Result<()> { + let file = format!("{}Simple/ETAS_SimpleSorted.mf4", BASE_PATH.as_str()); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let channels = mdf.get_channel_names_set(); + let original_count = channels.len(); + assert!(original_count >= 2, "need at least 2 channels"); + + // Pick a non-master channel to rename + let channel_name = channels.iter().next().unwrap().clone(); + + // rename_channel + mdf.rename_channel(&channel_name, "renamed_channel"); + let new_channels = mdf.get_channel_names_set(); + assert!(new_channels.contains("renamed_channel")); + assert!(!new_channels.contains(&channel_name)); + assert_eq!(new_channels.len(), original_count); + + // remove_channel + mdf.remove_channel("renamed_channel"); + let after_remove = mdf.get_channel_names_set(); + assert!(!after_remove.contains("renamed_channel")); + assert_eq!(after_remove.len(), original_count - 1); + + Ok(()) +} + +#[test] +fn test_mdf4_clear_channel_data() -> Result<()> { + let file = format!("{}Simple/ETAS_SimpleSorted.mf4", BASE_PATH.as_str()); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + // Verify data is loaded + let channels = mdf.get_channel_names_set(); + let channel_name = channels.iter().next().unwrap().clone(); + assert!(mdf.get_channel_data(&channel_name).is_some()); + + // clear_all_channel_data_from_memory + mdf.clear_all_channel_data_from_memory()?; + + // After clearing, channel data should be empty + if let Some(data) = mdf.get_channel_data(&channel_name) { + assert!(data.is_empty(), "data should be empty after clearing"); + } + + Ok(()) +} + +// ── MDF3 API Methods (exercises V3 delegator paths) ────────────────────────── + +#[test] +fn test_mdf3_api_methods() -> Result<()> { + let file = "test_files/test_mdf3.mdf"; + let mut mdf = Mdf::new(file)?; + mdf.load_all_channels_data_in_memory()?; + + // Version check + assert_eq!(mdf.get_version(), 310); + + // MDF3 is never unfinalized + assert!(!mdf.is_unfinalized()); + assert_eq!(mdf.get_unfin_flags(), (0, 0)); + + let channels = mdf.get_channel_names_set(); + assert!(!channels.is_empty()); + + let channel_name = channels.iter().next().unwrap().clone(); + + // get_channel_unit + let _unit = mdf.get_channel_unit(&channel_name)?; + + // get_channel_desc + let _desc = mdf.get_channel_desc(&channel_name)?; + + // get_channel_master + let _master = mdf.get_channel_master(&channel_name); + + // get_channel_master_type + let _master_type = mdf.get_channel_master_type(&channel_name); + + // get_channel_data + let _data = mdf.get_channel_data(&channel_name); + + // get_channel_names_cg_set + let cg_set = mdf.mdf_info.get_channel_names_cg_set(&channel_name); + assert!(!cg_set.is_empty()); + + // get_master_channel_names_set + let master_map = mdf.get_master_channel_names_set(); + assert!(!master_map.is_empty()); + + // V3 block accessor methods return None/empty + assert!(mdf.mdf_info.get_event_blocks().is_none()); + assert!(mdf.mdf_info.get_attachement_blocks().is_none()); + assert!(mdf.mdf_info.get_file_history_blocks().is_none()); + assert!(mdf.mdf_info.get_source_information_blocks().is_none()); + assert!(mdf.mdf_info.get_sample_reduction_blocks().is_none()); + assert!(mdf.mdf_info.get_channel_hierarchy_blocks().is_none()); + + // V3 list methods return empty strings + let events = mdf.mdf_info.list_events(); + assert!(events.is_empty()); + let attachments = mdf.mdf_info.list_attachments(); + assert!(attachments.is_empty()); + let fh = mdf.mdf_info.list_file_history(); + assert!(fh.is_empty()); + let si = mdf.mdf_info.list_source_information(); + assert!(si.is_empty()); + let sr = mdf.mdf_info.list_sample_reductions(); + assert!(sr.is_empty()); + let ch = mdf.mdf_info.list_channel_hierarchy(); + assert!(ch.is_empty()); + + // set_channel_unit (V3) + mdf.mdf_info.set_channel_unit(&channel_name, "kg"); + + // set_channel_desc (V3) + mdf.mdf_info.set_channel_desc(&channel_name, "test desc"); + + // rename_channel (V3) + let second_channel = channels.iter().nth(1).unwrap().clone(); + mdf.rename_channel(&second_channel, "renamed_v3"); + assert!(mdf.get_channel_names_set().contains("renamed_v3")); + + // remove_channel (V3) + mdf.remove_channel("renamed_v3"); + assert!(!mdf.get_channel_names_set().contains("renamed_v3")); + + // clear_channel_data_from_memory (V3) + let remaining = mdf.get_channel_names_set(); + mdf.clear_channel_data_from_memory(remaining)?; + + // Display for MDF3 Mdf + let display = format!("{mdf}"); + assert!(display.contains("Version")); + + // Display for MdfInfo V3 + let info_display = format!("{}", mdf.mdf_info); + assert!(!info_display.is_empty()); + + Ok(()) +} From 4fb531adcf407ea59104dc0228f5b5cc7ba3b1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 15 Feb 2026 18:47:34 +0100 Subject: [PATCH 26/46] updated cargo and fixed compiler issuer, switching from nightly rust to stable for actions workflows --- .../py-mac-aarch64-apple-release.yml | 10 +- .github/workflows/py-release-manylinux.yml | 10 +- .../workflows/py-release-windows-macos.yml | 68 +-- Cargo.lock | 561 ++++++++++++++---- Cargo.toml | 18 +- 5 files changed, 492 insertions(+), 175 deletions(-) diff --git a/.github/workflows/py-mac-aarch64-apple-release.yml b/.github/workflows/py-mac-aarch64-apple-release.yml index 057adfb..64df210 100644 --- a/.github/workflows/py-mac-aarch64-apple-release.yml +++ b/.github/workflows/py-mac-aarch64-apple-release.yml @@ -2,9 +2,9 @@ name: Create macOs universal2/aarch64-apple-darwin python release on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] + branches: ["main"] jobs: build: @@ -13,13 +13,13 @@ jobs: strategy: matrix: os: ["macos-latest"] - python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.14" ] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - name: Install latest Rust nightly uses: actions-rs/toolchain@v1 with: - toolchain: nightly + toolchain: stable override: true components: rustfmt, clippy - name: Setup universal2 targets for Rust @@ -41,4 +41,4 @@ jobs: with: maturin-version: latest command: publish - args: --target aarch64-apple-darwin --skip-existing --no-sdist -o wheels -i python -u __token__ \ No newline at end of file + args: --target aarch64-apple-darwin --skip-existing --no-sdist -o wheels -i python -u __token__ diff --git a/.github/workflows/py-release-manylinux.yml b/.github/workflows/py-release-manylinux.yml index 470fc52..6c652d0 100644 --- a/.github/workflows/py-release-manylinux.yml +++ b/.github/workflows/py-release-manylinux.yml @@ -2,9 +2,9 @@ name: Create Python release manylinux on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] + branches: ["main"] jobs: build_manylinux: @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10' , "3.11", "3.12", "3.13", "3.14"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -25,9 +25,9 @@ jobs: uses: messense/maturin-action@v1 env: MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} - RUSTFLAGS: '-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma' + RUSTFLAGS: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" with: - rust-toolchain: nightly + rust-toolchain: stable maturin-version: latest manylinux: auto command: publish diff --git a/.github/workflows/py-release-windows-macos.yml b/.github/workflows/py-release-windows-macos.yml index ed29476..d53352f 100644 --- a/.github/workflows/py-release-windows-macos.yml +++ b/.github/workflows/py-release-windows-macos.yml @@ -2,9 +2,9 @@ name: Create Python release windows macos on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] + branches: ["main"] env: CARGO_TERM_COLOR: always @@ -18,35 +18,35 @@ jobs: os: ["windows-latest"] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v3 - - name: Install latest Rust nightly - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - override: true - components: rustfmt, clippy - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - shell: pwsh - run: | - python -m pip install --upgrade pip - pip install maturin - C:\msys64\usr\bin\wget.exe -q -O hdf5.zip https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.13/hdf5-1.13.0/bin/windows/hdf5-1.13.0-Std-win10_64-vs15.zip - 7z x hdf5.zip -y - msiexec /i hdf\\HDF5-1.13.0-win64.msi /quiet /qn /norestart - - name: Maturin publish - shell: bash - env: - MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - rustup override set nightly - export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2' - maturin publish \ - --no-sdist \ - --skip-existing \ - -o wheels \ - -i python \ - --username __token__ \ \ No newline at end of file + - uses: actions/checkout@v3 + - name: Install latest Rust nightly + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + components: rustfmt, clippy + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + shell: pwsh + run: | + python -m pip install --upgrade pip + pip install maturin + C:\msys64\usr\bin\wget.exe -q -O hdf5.zip https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.13/hdf5-1.13.0/bin/windows/hdf5-1.13.0-Std-win10_64-vs15.zip + 7z x hdf5.zip -y + msiexec /i hdf\\HDF5-1.13.0-win64.msi /quiet /qn /norestart + - name: Maturin publish + shell: bash + env: + MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + rustup override set nightly + export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2' + maturin publish \ + --no-sdist \ + --skip-existing \ + -o wheels \ + -i python \ + --username __token__ \ diff --git a/Cargo.lock b/Cargo.lock index 485866d..4611e82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -177,6 +183,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70f13d10a41ac8d2ec79ee34178d61e6f47a29c2edfe7ef1721c7383b0359e65" dependencies = [ + "half", "num-traits", ] @@ -206,9 +213,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -228,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -242,9 +249,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash", "arrow-buffer", @@ -260,9 +267,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -272,9 +279,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -294,9 +301,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -309,9 +316,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -322,9 +329,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,9 +343,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -360,9 +367,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -373,9 +380,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f45c7989cb70214b2f362eaa10266d15e1a433692f2ea1514018be3aace679f4" +checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" dependencies = [ "arrow-array", "arrow-data", @@ -385,9 +392,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -398,18 +405,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ "bitflags 2.10.0", ] [[package]] name = "arrow-select" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash", "arrow-array", @@ -421,9 +428,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -498,9 +505,9 @@ dependencies = [ [[package]] name = "atoi_simd" -version = "0.16.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a49e05797ca52e312a0c658938b7d00693ef037799ef7187678f212d7684cf" +checksum = "8ad17c7c205c2c28b527b9845eeb91cf1b4d008b438f98ce0e628227a822758e" dependencies = [ "debug_unsafe", ] @@ -540,7 +547,7 @@ dependencies = [ "miniz_oxide", "object 0.37.3", "rustc-demangle", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -638,7 +645,7 @@ dependencies = [ "cc", "cfg-if 1.0.4", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -749,7 +756,7 @@ version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799" dependencies = [ - "clap 4.5.56", + "clap 4.5.58", "heck", "indexmap", "log 0.4.29", @@ -801,18 +808,30 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures 0.3.0", + "rand_core 0.10.0", +] + [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -880,18 +899,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.56" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e" +checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.56" +version = "4.5.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0" +checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" dependencies = [ "anstream", "anstyle", @@ -901,9 +920,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "codepage" @@ -1006,6 +1025,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1025,7 +1053,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.56", + "clap 4.5.58", "criterion-plot", "itertools 0.13.0", "num-traits", @@ -1536,6 +1564,20 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "r-efi", + "rand_core 0.10.0", + "wasip2", + "wasip3", +] + [[package]] name = "gimli" version = "0.32.3" @@ -1579,9 +1621,11 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ + "bytemuck", "cfg-if 1.0.4", "crunchy", "num-traits", + "serde", "zerocopy", ] @@ -1923,6 +1967,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "idna" version = "1.1.0" @@ -2092,6 +2142,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "lexical-core" version = "1.0.6" @@ -2151,9 +2207,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libloading" @@ -2316,7 +2372,7 @@ dependencies = [ "byteorder", "cbindgen", "chrono", - "clap 4.5.56", + "clap 4.5.58", "codepage", "criterion", "crossbeam-channel", @@ -2324,7 +2380,7 @@ dependencies = [ "env_logger 0.11.8", "fasteval", "flate2", - "glob 0.2.11", + "glob 0.3.3", "half", "hdf5", "itertools 0.14.0", @@ -2332,14 +2388,14 @@ dependencies = [ "log 0.4.29", "lz4", "md-5", - "ndarray 0.16.1", + "ndarray 0.17.2", "num", "numpy", "parking_lot 0.12.5", "parquet", "polars", "pyo3", - "rand", + "rand 0.10.0", "rayon", "roxmltree", "serde", @@ -2443,6 +2499,21 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "ndarray" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + [[package]] name = "nom" version = "3.2.1" @@ -2503,6 +2574,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote 1.0.44", + "syn 2.0.114", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -2560,6 +2642,24 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags 2.10.0", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "object" version = "0.32.2" @@ -2580,9 +2680,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.5" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" +checksum = "c2858065e55c148d294a9f3aae3b0fa9458edadb41a108397094566f4e3c0dfb" dependencies = [ "async-trait", "base64", @@ -2598,7 +2698,7 @@ dependencies = [ "parking_lot 0.12.5", "percent-encoding", "quick-xml", - "rand", + "rand 0.9.2", "reqwest", "ring", "serde", @@ -2713,14 +2813,14 @@ dependencies = [ "libc", "redox_syscall 0.5.18", "smallvec", - "windows-link", + "windows-link 0.2.1", ] [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash", "arrow-array", @@ -2844,16 +2944,18 @@ dependencies = [ [[package]] name = "polars" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bc9ea901050c1bb8747ee411bc7fbb390f3b399931e7484719512965132a248" +checksum = "899852b723e563dc3cbdc7ea833b14ec44e61309f55df29ba86d45cfd6bc141a" dependencies = [ "getrandom 0.2.17", "getrandom 0.3.4", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-error", + "polars-expr", "polars-io", "polars-lazy", "polars-ops", @@ -2867,13 +2969,14 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d3fe43f8702cf7899ff3d516c2e5f7dc84ee6f6a3007e1a831a0ff87940704" +checksum = "6f672743a042b72ace4f88b29f8205ab200b29c5ac976c0560899680c07d2d09" dependencies = [ "atoi_simd", "bitflags 2.10.0", "bytemuck", + "bytes", "chrono", "chrono-tz", "dyn-clone", @@ -2881,11 +2984,13 @@ dependencies = [ "ethnum", "getrandom 0.2.17", "getrandom 0.3.4", + "half", "hashbrown 0.16.1", "itoa", "lz4", "num-traits", "polars-arrow-format", + "polars-buffer", "polars-error", "polars-schema", "polars-utils", @@ -2907,36 +3012,50 @@ dependencies = [ "serde", ] +[[package]] +name = "polars-buffer" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d7011424c3a79ca9c1272c7b4f5fe98695d3bed45595e37bb23c16a2978c80c" +dependencies = [ + "bytemuck", + "either", + "serde", + "version_check", +] + [[package]] name = "polars-compute" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29cc7497378dee3a002f117e0b4e16b7cbe6c8ed3da16a0229c89294af7c3bf" +checksum = "42a32eca8e08ac4cc5de2ac3996d2b38567bba72cdb19bbfd94c370193ed51dd" dependencies = [ "atoi_simd", "bytemuck", "chrono", "either", "fast-float2", + "half", "hashbrown 0.16.1", "itoa", "num-traits", "polars-arrow", + "polars-buffer", "polars-error", "polars-utils", - "rand", - "ryu", + "rand 0.9.2", "serde", "strength_reduce", "strum_macros", "version_check", + "zmij", ] [[package]] name = "polars-core" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48409b7440cb1a4aa84953fe3a4189dfbfb300a3298266a92a37363476641e40" +checksum = "726296966d04268ee9679c2062af2d06c83c7a87379be471defe616b244c5029" dependencies = [ "bitflags 2.10.0", "boxcar", @@ -2945,18 +3064,20 @@ dependencies = [ "chrono-tz", "comfy-table", "either", + "getrandom 0.3.4", "hashbrown 0.16.1", "indexmap", "itoa", "num-traits", "polars-arrow", + "polars-buffer", "polars-compute", "polars-dtype", "polars-error", "polars-row", "polars-schema", "polars-utils", - "rand", + "rand 0.9.2", "rand_distr", "rayon", "regex 1.12.2", @@ -2970,9 +3091,9 @@ dependencies = [ [[package]] name = "polars-dtype" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7007e9e8b7b657cbd339b65246af7e87f5756ee9a860119b9424ddffd2aaf133" +checksum = "51976dc46d42cd1e7ca252a9e3bdc90c63b0bfa7030047ebaf5250c2b7838fa6" dependencies = [ "boxcar", "hashbrown 0.16.1", @@ -2985,9 +3106,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a6be22566c89f6405f553bfdb7c8a6cb20ec51b35f3172de9a25fa3e252d85" +checksum = "8c13126f8baebc13dadf26a80dcf69a607977fc8a67b18671ad2cefc713a7bdd" dependencies = [ "object_store", "parking_lot 0.12.5", @@ -2999,14 +3120,15 @@ dependencies = [ [[package]] name = "polars-expr" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6199a50d3e1afd0674fb009e340cbfb0010682b2387187a36328c00f3f2ca87b" +checksum = "2151f54b0ae5d6b86c3c47df0898ff90edfe774807823f742f36e44973d51ea1" dependencies = [ "bitflags 2.10.0", "hashbrown 0.16.1", "num-traits", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-io", @@ -3015,7 +3137,7 @@ dependencies = [ "polars-row", "polars-time", "polars-utils", - "rand", + "rand 0.9.2", "rayon", "recursive", "regex 1.12.2", @@ -3024,9 +3146,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3714acdff87170141880a07f5d9233490d3bd5531c41898f6969d440feee11" +checksum = "059724d7762d7332cbc225e6504d996091b28fa1337716e06e5a81d9e54a34ad" dependencies = [ "async-trait", "atoi_simd", @@ -3046,6 +3168,7 @@ dependencies = [ "object_store", "percent-encoding", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-error", @@ -3057,18 +3180,18 @@ dependencies = [ "rayon", "regex 1.12.2", "reqwest", - "ryu", "serde", "serde_json", "simdutf8", "tokio", + "zmij", ] [[package]] name = "polars-json" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dd2126daebf58da564fc5840cd55eb8eb2479d24dfced0a1aea2178a9b33b12" +checksum = "55581d4cc8f4122cae92d12aec997e6713ac483871391a7db09501604007be4b" dependencies = [ "chrono", "fallible-streaming-iterator", @@ -3080,22 +3203,23 @@ dependencies = [ "polars-compute", "polars-error", "polars-utils", - "ryu", "simd-json", "streaming-iterator", + "zmij", ] [[package]] name = "polars-lazy" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea136c360d03aafe56e0233495e30044ce43639b8b0360a4a38e840233f048a1" +checksum = "02e1e24d4db8c349e9576564cfff47a3f08bb831dba9168f6599be178bc725e8" dependencies = [ "bitflags 2.10.0", "chrono", "either", "memchr 2.7.6", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-expr", @@ -3112,9 +3236,9 @@ dependencies = [ [[package]] name = "polars-mem-engine" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f6e455ceb6e5aee7ed7d5c8944104e66992173e03a9c42f9670226318672249" +checksum = "c394e4cd90186043d4051ce118e90794afbe81ac5eb9a51e358a56728e8ebde3" dependencies = [ "memmap2", "polars-arrow", @@ -3132,9 +3256,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b59c80a019ef0e6f09b4416d2647076a52839305c9eb11919e8298ec667f853" +checksum = "7e47b2d9b3627662650da0a8c76ce5101ed1c61b104cb2b3663e0dc711571b12" dependencies = [ "argminmax", "base64", @@ -3149,6 +3273,7 @@ dependencies = [ "memchr 2.7.6", "num-traits", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-error", @@ -3165,9 +3290,9 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93c2439d127c59e6bfc9d698419bdb45210068a6f501d44e6096429ad72c2eaa" +checksum = "436bae3e89438cafe69400e7567057d7d9820d21ac9a4f69a33b413f2666f03d" dependencies = [ "async-stream", "base64", @@ -3177,10 +3302,12 @@ dependencies = [ "hashbrown 0.16.1", "num-traits", "polars-arrow", + "polars-buffer", "polars-compute", "polars-error", "polars-parquet-format", "polars-utils", + "regex 1.12.2", "serde", "simdutf8", "streaming-decompression", @@ -3198,21 +3325,24 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65b4619f5c7e9b91f18611c9ed82ebeee4b10052160825c1316ecf4dbd4d97e6" +checksum = "f7930d5ae1d006179e65f01af57c859307b5875a4cc078dc75257250b9ae5162" dependencies = [ "bitflags 2.10.0", + "blake3", "bytemuck", "bytes", "chrono", "chrono-tz", "either", + "futures", "hashbrown 0.16.1", "memmap2", "num-traits", "percent-encoding", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-error", @@ -3226,18 +3356,20 @@ dependencies = [ "sha2", "slotmap", "strum_macros", + "tokio", "version_check", ] [[package]] name = "polars-row" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18d232f25b83032e280a279a1f40beb8a6f8fc43907b13dc07b1c56f3b11eea" +checksum = "d29ea1a4554fe06442db1d6229235cd358e8eacba96aed8718f612caf3e3a646" dependencies = [ "bitflags 2.10.0", "bytemuck", "polars-arrow", + "polars-buffer", "polars-compute", "polars-dtype", "polars-error", @@ -3246,9 +3378,9 @@ dependencies = [ [[package]] name = "polars-schema" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73e21d429ae1c23f442b0220ccfe773a9734a44e997b5062a741842909d9441" +checksum = "d688e73f9156f93cb29350be144c8f1e84c1bc705f00ee7f15eb9706a7971273" dependencies = [ "indexmap", "polars-error", @@ -3259,9 +3391,9 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e67ac1cbb0c972a57af3be12f19aa9803898863fe95c33cdd39df05f5738a75" +checksum = "100415f86069d7e9fbf54737148fc161a7c7316a6a7d375fb6cfc7fc64f570ae" dependencies = [ "bitflags 2.10.0", "hex", @@ -3272,7 +3404,6 @@ dependencies = [ "polars-plan", "polars-time", "polars-utils", - "rand", "regex 1.12.2", "serde", "sqlparser", @@ -3280,25 +3411,29 @@ dependencies = [ [[package]] name = "polars-stream" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ff19612074640a9d65e5928b7223db76ffee63e55b276f1e466d06719eb7362" +checksum = "65a0c054bdf16efd16bbc587e8d5418ae28464d61afd735513579cd3c338fa70" dependencies = [ "async-channel", "async-trait", "atomic-waker", "bitflags 2.10.0", + "bytes", "chrono-tz", "crossbeam-channel", "crossbeam-deque", "crossbeam-queue", "crossbeam-utils", "futures", + "memchr 2.7.6", "memmap2", + "num-traits", "parking_lot 0.12.5", "percent-encoding", "pin-project-lite", "polars-arrow", + "polars-buffer", "polars-compute", "polars-core", "polars-error", @@ -3310,7 +3445,7 @@ dependencies = [ "polars-plan", "polars-time", "polars-utils", - "rand", + "rand 0.9.2", "rayon", "recursive", "slotmap", @@ -3320,9 +3455,9 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddce7a9f81d5f47d981bcee4a8db004f9596bb51f0f4d9d93667a1a00d88166c" +checksum = "72e80404e1e418c997230e3b2972c3be331f45df8bdd3150fe3bef562c7a332f" dependencies = [ "atoi_simd", "bytemuck", @@ -3343,10 +3478,11 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.52.0" +version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "667c1bc2d2313f934d711f6e3b58d8d9f80351d14ea60af936a26b7dfb06e309" +checksum = "c97cabf53eb8fbf6050cde3fef8f596c51cc25fd7d55fbde108d815ee6674abf" dependencies = [ + "argminmax", "bincode", "bytemuck", "bytes", @@ -3354,13 +3490,15 @@ dependencies = [ "either", "flate2", "foldhash 0.2.0", + "half", "hashbrown 0.16.1", "indexmap", "libc", "memmap2", + "num-derive", "num-traits", "polars-error", - "rand", + "rand 0.9.2", "raw-cpuid", "rayon", "regex 1.12.2", @@ -3407,6 +3545,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.114", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3528,7 +3676,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash", "rustls", @@ -3582,7 +3730,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.1", + "rand_core 0.10.0", ] [[package]] @@ -3592,7 +3751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", ] [[package]] @@ -3604,6 +3763,12 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + [[package]] name = "rand_distr" version = "0.5.1" @@ -3611,7 +3776,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand", + "rand 0.9.2", ] [[package]] @@ -4076,7 +4241,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if 1.0.4", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -4103,9 +4268,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" -version = "0.3.18" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +checksum = "3b57709da74f9ff9f4a27dce9526eec25ca8407c45a7887243b031a58935fb8e" dependencies = [ "libc", "signal-hook-registry", @@ -4194,11 +4359,24 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.53.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +checksum = "505aa16b045c4c1375bf5f125cce3813d0176325bfe9ffc4a903f423de7774ff" dependencies = [ "log 0.4.29", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "028e551d5e270b31b9f3ea271778d9d827148d4287a5d96167b6bb9787f5cc38" +dependencies = [ + "proc-macro2", + "quote 1.0.44", + "syn 2.0.114", ] [[package]] @@ -4737,6 +4915,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unindent" version = "0.2.4" @@ -4876,6 +5060,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasite" version = "1.0.2" @@ -4944,6 +5137,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -4957,6 +5172,18 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.10.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" version = "0.3.85" @@ -4988,11 +5215,13 @@ dependencies = [ [[package]] name = "whoami" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" dependencies = [ + "libc", "libredox", + "objc2-system-configuration", "wasite", "web-sys", ] @@ -5048,7 +5277,7 @@ checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link", + "windows-link 0.2.1", "windows-result", "windows-strings", ] @@ -5075,6 +5304,12 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + [[package]] name = "windows-link" version = "0.2.1" @@ -5087,7 +5322,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -5096,7 +5331,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -5132,7 +5367,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -5157,7 +5392,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link", + "windows-link 0.2.1", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -5285,6 +5520,88 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.114", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote 1.0.44", + "syn 2.0.114", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.10.0", + "indexmap", + "log 0.4.29", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log 0.4.29", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" diff --git a/Cargo.toml b/Cargo.toml index 65ffc00..c13acf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ ndarray = ["dep:ndarray"] hdf5-mpio = ["hdf5/mpio"] [dependencies] -clap = "4.5.56" # for input arguments +clap = "4.5.58" # for input arguments anyhow = { version = "1.0", features = ["backtrace"] } # error handling log = "0.4" # to log events byteorder = "1.4" # for bytes conversions @@ -31,12 +31,12 @@ half = "2.7" # for f16 handling encoding_rs = "0.8" # for endian management and bytes to text conversion (utf8, SBC, UTF16) unicode-bom = "2.0" codepage = "0.1" # to convert code page into encoding -chrono = "0.4.43" # for time conversion +chrono = "0.4.41" # for time conversion rayon = "1.11" # for general purpose parallel computations crossbeam-channel = "0.5" # for efficient channel between threads parking_lot = "0.12" # for efficient mutex roxmltree = "0.21" # for xml parsing -flate2 = "1.0" # for DZ block data deflate +flate2 = "1.1" # for DZ block data deflate zstd = "0.13" lz4 = "1.28" md-5 = "0.10" # md5sum of attachments @@ -44,9 +44,9 @@ transpose = "0.2" # for DZBlock transpose fasteval = "0.2" # for algebraic conversion itertools = "0.14" serde = { version = "1.0", features = ["derive"] } # for serialization -whoami = "2.1.0" # to get user name for writing file -rand = "0.9" # for random numbers -arrow = { version = "57.1.0", features = [ +whoami = "2.1.1" # to get user name for writing file +rand = "0.10" # for random numbers +arrow = { version = "57.3.0", features = [ "pyarrow", "prettyprint", "ffi", @@ -54,16 +54,16 @@ arrow = { version = "57.1.0", features = [ env_logger = "0.11" libc = "0.2" # for the C api numpy = { version = "0.26", optional = true } # to export in numpy -polars = { version = "0.52", features = [ +polars = { version = "0.53", features = [ "dtype-full", "object", "fmt", ], optional = true } # for python dataframe -parquet = { version = "57.1", optional = true } # to write parquet file +parquet = { version = "57.3", optional = true } # to write parquet file hdf5 = { version = "0.8", optional = true, features = [ "lzf", ] } # to export into hdf5 file -ndarray = { version = "0.16", optional = true } # to convert arraw data into ndarray, needed for hdf5 +ndarray = { version = "0.17", optional = true } # to convert arraw data into ndarray, needed for hdf5 [dependencies.pyo3] version = "0.26" From 8004618cd0f4d8cb1004712aef2c7e0c6572dc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sun, 15 Feb 2026 21:50:32 +0100 Subject: [PATCH 27/46] put toolchain to stable in workflows --- .github/workflows/py-mac-aarch64-apple-release.yml | 4 ++-- .github/workflows/py-release-manylinux.yml | 2 +- .github/workflows/py-release-windows-macos.yml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/py-mac-aarch64-apple-release.yml b/.github/workflows/py-mac-aarch64-apple-release.yml index 64df210..5a6d06f 100644 --- a/.github/workflows/py-mac-aarch64-apple-release.yml +++ b/.github/workflows/py-mac-aarch64-apple-release.yml @@ -16,7 +16,7 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - - name: Install latest Rust nightly + - name: Install latest Rust stable uses: actions-rs/toolchain@v1 with: toolchain: stable @@ -32,7 +32,7 @@ jobs: - name: Prepare maturin publish shell: bash run: | - rustup override set nightly + rustup override set stable brew install hdf5@2.0 - name: maturin publish uses: messense/maturin-action@v1 diff --git a/.github/workflows/py-release-manylinux.yml b/.github/workflows/py-release-manylinux.yml index 6c652d0..6d81c63 100644 --- a/.github/workflows/py-release-manylinux.yml +++ b/.github/workflows/py-release-manylinux.yml @@ -37,7 +37,7 @@ jobs: env: MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} with: - rust-toolchain: nightly + rust-toolchain: stable target: aarch64-unknown-linux-gnu maturin-version: latest command: publish diff --git a/.github/workflows/py-release-windows-macos.yml b/.github/workflows/py-release-windows-macos.yml index d53352f..3ffa58f 100644 --- a/.github/workflows/py-release-windows-macos.yml +++ b/.github/workflows/py-release-windows-macos.yml @@ -19,7 +19,7 @@ jobs: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v3 - - name: Install latest Rust nightly + - name: Install latest Rust stable uses: actions-rs/toolchain@v1 with: toolchain: stable @@ -42,7 +42,7 @@ jobs: env: MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} run: | - rustup override set nightly + rustup override set stable export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2' maturin publish \ --no-sdist \ From 9705b4a698221b077e8dfcb944fda1de53be5438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 16 Feb 2026 21:39:20 +0100 Subject: [PATCH 28/46] fixing maturin publishing --- .../py-mac-aarch64-apple-release.yml | 61 ++++++++-------- .github/workflows/py-release-manylinux.yml | 53 +++++++------- .../workflows/py-release-windows-macos.yml | 72 +++++++++---------- .github/workflows/rust.yml | 7 +- pyproject.toml | 2 +- 5 files changed, 98 insertions(+), 97 deletions(-) diff --git a/.github/workflows/py-mac-aarch64-apple-release.yml b/.github/workflows/py-mac-aarch64-apple-release.yml index 5a6d06f..7f7382e 100644 --- a/.github/workflows/py-mac-aarch64-apple-release.yml +++ b/.github/workflows/py-mac-aarch64-apple-release.yml @@ -1,44 +1,47 @@ -name: Create macOs universal2/aarch64-apple-darwin python release +name: Create macOS aarch64 python release on: push: branches: ["main"] - pull_request: - branches: ["main"] jobs: build: - name: Create Release - runs-on: ${{ matrix.os }} + name: Build macOS wheels + runs-on: macos-latest strategy: matrix: - os: ["macos-latest"] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v3 - - name: Install latest Rust stable - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - components: rustfmt, clippy - - name: Setup universal2 targets for Rust - run: | - rustup target add aarch64-apple-darwin - - name: Set up Python - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Prepare maturin publish - shell: bash - run: | - rustup override set stable - brew install hdf5@2.0 - - name: maturin publish - uses: messense/maturin-action@v1 - env: - MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} + - name: Build wheel + uses: PyO3/maturin-action@v1 with: + rust-toolchain: stable maturin-version: latest - command: publish - args: --target aarch64-apple-darwin --skip-existing --no-sdist -o wheels -i python -u __token__ + target: aarch64-apple-darwin + command: build + args: --release -o dist -i python + - uses: actions/upload-artifact@v4 + with: + name: wheel-macos-${{ matrix.python-version }} + path: dist/*.whl + + publish: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + pattern: wheel-macos-* + merge-multiple: true + path: dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_TOKEN }} + skip-existing: true diff --git a/.github/workflows/py-release-manylinux.yml b/.github/workflows/py-release-manylinux.yml index 6d81c63..ba82424 100644 --- a/.github/workflows/py-release-manylinux.yml +++ b/.github/workflows/py-release-manylinux.yml @@ -3,42 +3,47 @@ name: Create Python release manylinux on: push: branches: ["main"] - pull_request: - branches: ["main"] jobs: build_manylinux: - name: Create Release manylinux + name: Build manylinux wheels runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] + target: [x86_64, aarch64] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - architecture: x64 - - name: Install HDF5 - run: sudo apt-get install libhdf5-openmpi-dev openmpi-bin libhdf5-dev hdf5-tools python3-h5py - - name: build x64_64 - uses: messense/maturin-action@v1 - env: - MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} - RUSTFLAGS: "-C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma" + - name: Build wheel + uses: PyO3/maturin-action@v1 with: rust-toolchain: stable maturin-version: latest manylinux: auto - command: publish - args: --skip-existing --no-sdist -i python3.11 -o wheels -u __token__ - - name: build aarch64 - uses: messense/maturin-action@v1 - env: - MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} + target: ${{ matrix.target }} + command: build + args: --release -o dist -i python${{ matrix.python-version }} + - uses: actions/upload-artifact@v4 with: - rust-toolchain: stable - target: aarch64-unknown-linux-gnu - maturin-version: latest - command: publish - args: --skip-existing --no-sdist -o wheels -i python3.11 -u __token__ + name: wheel-manylinux-${{ matrix.target }}-${{ matrix.python-version }} + path: dist/*.whl + + publish: + name: Publish to PyPI + needs: build_manylinux + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + pattern: wheel-manylinux-* + merge-multiple: true + path: dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_TOKEN }} + skip-existing: true diff --git a/.github/workflows/py-release-windows-macos.yml b/.github/workflows/py-release-windows-macos.yml index 3ffa58f..8af9555 100644 --- a/.github/workflows/py-release-windows-macos.yml +++ b/.github/workflows/py-release-windows-macos.yml @@ -1,52 +1,46 @@ -name: Create Python release windows macos +name: Create Python release windows on: push: branches: ["main"] - pull_request: - branches: ["main"] - -env: - CARGO_TERM_COLOR: always jobs: build: - name: Create Release - runs-on: ${{ matrix.os }} + name: Build Windows wheels + runs-on: windows-latest strategy: matrix: - os: ["windows-latest"] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v3 - - name: Install latest Rust stable - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - components: rustfmt, clippy - - name: Set up Python - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - shell: pwsh - run: | - python -m pip install --upgrade pip - pip install maturin - C:\msys64\usr\bin\wget.exe -q -O hdf5.zip https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.13/hdf5-1.13.0/bin/windows/hdf5-1.13.0-Std-win10_64-vs15.zip - 7z x hdf5.zip -y - msiexec /i hdf\\HDF5-1.13.0-win64.msi /quiet /qn /norestart - - name: Maturin publish - shell: bash - env: - MATURIN_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - rustup override set stable - export RUSTFLAGS='-C target-feature=+fxsr,+sse,+sse2,+sse3,+sse4.1,+sse4.2' - maturin publish \ - --no-sdist \ - --skip-existing \ - -o wheels \ - -i python \ - --username __token__ \ + - name: Build wheel + uses: PyO3/maturin-action@v1 + with: + rust-toolchain: stable + maturin-version: latest + command: build + args: --release -o dist -i python + - uses: actions/upload-artifact@v4 + with: + name: wheel-windows-${{ matrix.python-version }} + path: dist/*.whl + + publish: + name: Publish to PyPI + needs: build + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + pattern: wheel-windows-* + merge-multiple: true + path: dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_TOKEN }} + skip-existing: true diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 237f950..1f3a7e4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -13,9 +13,8 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Build - run: cargo build --verbose + run: cargo build --verbose --no-default-features --features parquet - name: Run tests - run: cargo test tests::basic_test --verbose - + run: cargo test tests::basic_test --verbose --no-default-features --features parquet diff --git a/pyproject.toml b/pyproject.toml index 9c4633d..f902311 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "mdfr" -version = "0.6.3" +dynamic = ["version"] dependencies = [ "numpy>=1.23","polars>=0.13","matplotlib>=3.5" ] From 65086337228f69a7c6da6b5ddae9c6ef5cda48e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:00:48 +0100 Subject: [PATCH 29/46] fixed critical safety issues from the C API --- src/c_api.rs | 278 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 168 insertions(+), 110 deletions(-) diff --git a/src/c_api.rs b/src/c_api.rs index f1adab9..38249ee 100644 --- a/src/c_api.rs +++ b/src/c_api.rs @@ -4,7 +4,9 @@ use arrow::ffi::{FFI_ArrowArray, to_ffi}; use libc::c_char; use std::ffi::{CStr, CString, c_uchar, c_ushort}; -/// create a new mdf from a file and its metadata +/// create a new mdf from a file and its metadata. +/// Returns a heap-allocated Mdf pointer, or null on error. +/// Caller must free the returned pointer with `free_mdf()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { unsafe { @@ -16,28 +18,39 @@ pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let f = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if file_name.is_null() { + return std::ptr::null_mut(); + } + let f = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null_mut(), + }; match Mdf::new(f) { - Ok(mut mdf) => { - let p: *mut Mdf = &mut mdf; - std::mem::forget(mdf); - p - } - Err(e) => panic!("{e:?}"), + Ok(mdf) => Box::into_raw(Box::new(mdf)), + Err(_) => std::ptr::null_mut(), } } } -/// returns mdf file version +/// frees an Mdf object previously returned by `new_mdf()`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_mdf(mdf: *mut Mdf) { + unsafe { + if !mdf.is_null() { + // SAFETY: mdf was created by Box::into_raw in new_mdf(); we reconstruct and drop it. + drop(Box::from_raw(mdf)); + } + } +} + +/// returns mdf file version. Returns 0 on null pointer. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { unsafe { if let Some(mdf) = mdf.as_ref() { mdf.get_version() } else { - panic!("Null pointer given for Mdf Rust object") + 0 } } } @@ -50,21 +63,23 @@ pub unsafe extern "C" fn get_channel_unit( channel_name: *const c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_unit(name) { - Ok(unit) => match unit { - Some(unit) => CString::new(unit) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer + Ok(Some(unit)) => match CString::new(unit) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), }, - Err(e) => panic!("{}", e), + _ => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } @@ -77,23 +92,23 @@ pub unsafe extern "C" fn get_channel_desc( channel_name: *const libc::c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_desc(name) { - Ok(desc) => { - match desc { - Some(desc) => CString::new(desc) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer - } - } - Err(e) => panic!("{}", e), + Ok(Some(desc)) => match CString::new(desc) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), + }, + _ => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } @@ -106,18 +121,23 @@ pub unsafe extern "C" fn get_channel_master( channel_name: *const libc::c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_master(name) { - Some(st) => CString::new(st) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer + Some(st) => match CString::new(st) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), + }, + None => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } @@ -131,18 +151,23 @@ pub unsafe extern "C" fn get_channel_master_type( channel_name: *const libc::c_char, ) -> c_uchar { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || channel_name.is_null() { + return 0; + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return 0, + }; if let Some(mdf) = mdf.as_ref() { mdf.get_channel_master_type(name) } else { - panic!("Null pointer given for Mdf Rust object") + 0 } } } -/// returns a sorted array of strings of all channel names contained in file +/// returns a sorted array of strings of all channel names contained in file. +/// The returned pointer is heap-allocated; call `free_channel_names_set()` to free it. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c_char { unsafe { @@ -150,20 +175,37 @@ pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c let set = mdf.get_channel_names_set(); let mut s = set.into_iter().collect::>(); s.sort(); - let cstring_vec = s + let mut cstring_vec = s .iter() - .map(|e| { - CString::new(e.to_string()) - .expect("CString::new failed because of internal 0 byte") - .into_raw() - }) + .filter_map(|e| CString::new(e.as_str()).ok()) + .map(|cs| cs.into_raw()) .collect::>(); + cstring_vec.shrink_to_fit(); let p = cstring_vec.as_ptr(); std::mem::forget(cstring_vec); p } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() + } + } +} + +/// frees a channel names array returned by `get_channel_names_set()`. +/// `len` must match the number of channels returned. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_channel_names_set(ptr: *mut *mut c_char, len: usize) { + unsafe { + if ptr.is_null() { + return; } + // SAFETY: ptr and each element were allocated by Rust (CString::into_raw / Vec::into_raw_parts). + let slice = std::slice::from_raw_parts_mut(ptr, len); + for p in slice.iter_mut() { + if !p.is_null() { + drop(CString::from_raw(*p)); + } + } + drop(Vec::from_raw_parts(ptr, len, len)); } } @@ -171,40 +213,54 @@ pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c #[unsafe(no_mangle)] pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { unsafe { - if let Some(mdf) = mdf.as_mut() { - match mdf.load_all_channels_data_in_memory() { - Ok(_) => {} - Err(e) => panic!("{}", e), - } - } else { - panic!("Null pointer given for Mdf Rust object") + if let Some(mdf) = mdf.as_mut() + && let Err(e) = mdf.load_all_channels_data_in_memory() + { + log::error!("load_all_channels_data_in_memory failed: {e}"); } } } -/// returns channel's arrow Array. -/// null pointer returned if not found +/// returns channel's arrow Array as a heap-allocated pointer. +/// Caller must free it with `free_channel_array()`. +/// Returns null pointer if channel not found. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_array( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> *const FFI_ArrowArray { +) -> *mut FFI_ArrowArray { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null_mut(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null_mut(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_data(name) { - Some(data) => { - let (array, _) = - to_ffi(&data.to_data()).expect("ffi failed converting arrow array into C"); - let array_ptr: *const FFI_ArrowArray = &array; - array_ptr - } - None => std::ptr::null::(), // null pointers + Some(data) => match to_ffi(&data.to_data()) { + Ok((array, _)) => Box::into_raw(Box::new(array)), + Err(e) => { + log::error!("get_channel_array: FFI conversion failed: {e}"); + std::ptr::null_mut() + } + }, + None => std::ptr::null_mut(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null_mut() + } + } +} + +/// frees an FFI_ArrowArray returned by `get_channel_array()`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_channel_array(array: *mut FFI_ArrowArray) { + unsafe { + if !array.is_null() { + // SAFETY: array was created by Box::into_raw in get_channel_array(). + drop(Box::from_raw(array)); } } } @@ -229,25 +285,25 @@ pub unsafe extern "C" fn export_to_parquet( // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let name = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || file_name.is_null() { + return; + } + let name = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return, + }; let comp = if compression.is_null() { None } else { - Some( - CStr::from_ptr(compression) - .to_str() - .expect("Could not convert into utf8 the compression string"), - ) - }; - if let Some(mdf) = mdf.as_ref() { - match mdf.export_to_parquet(name, comp) { - Ok(_) => {} - Err(e) => panic!("{}", e), + match CStr::from_ptr(compression).to_str() { + Ok(s) => Some(s), + Err(_) => return, } - } else { - panic!("Null pointer given for Mdf Rust object") + }; + if let Some(mdf) = mdf.as_ref() + && let Err(e) = mdf.export_to_parquet(name, comp) + { + log::error!("export_to_parquet failed: {e}"); } } } @@ -256,7 +312,7 @@ pub unsafe extern "C" fn export_to_parquet( // Compression can be one of the following strings // "deflate", "lzf" // or null pointer if no compression wanted -#[no_mangle] +#[unsafe(no_mangle)] #[cfg(feature = "hdf5")] pub unsafe extern "C" fn export_to_hdf5( mdf: *const Mdf, @@ -271,24 +327,26 @@ pub unsafe extern "C" fn export_to_hdf5( // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let name = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - let comp = if compression.is_null() { - None - } else { - Some( - CStr::from_ptr(compression) - .to_str() - .expect("Could not convert into utf8 the compression string"), - ) - }; - if let Some(mdf) = mdf.as_ref() { - match mdf.export_to_hdf5(name, comp) { - Ok(_) => {} - Err(e) => panic!("{}", e), + unsafe { + if mdf.is_null() || file_name.is_null() { + return; + } + let name = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return, + }; + let comp = if compression.is_null() { + None + } else { + match CStr::from_ptr(compression).to_str() { + Ok(s) => Some(s), + Err(_) => return, + } + }; + if let Some(mdf) = mdf.as_ref() { + if let Err(e) = mdf.export_to_hdf5(name, comp) { + log::error!("export_to_hdf5 failed: {e}"); + } } - } else { - panic!("Null pointer given for Mdf Rust object") } } From a5753108a0343b804ae0b193f8d2593f5795b5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:01:21 +0100 Subject: [PATCH 30/46] removed .expect() --- src/mdfreader.rs | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/src/mdfreader.rs b/src/mdfreader.rs index b6c9ea7..43b56b0 100644 --- a/src/mdfreader.rs +++ b/src/mdfreader.rs @@ -319,30 +319,30 @@ impl fmt::Display for Mdf { writeln!(f, "Comments: {}", mdfinfo3.hd_comment)?; for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}") - .expect("cannot print master channel name"); + writeln!(f, "\nMaster: {master_name}")?; } else { - writeln!(f, "\nWithout Master channel") - .expect("cannot print thre is no master channel"); + writeln!(f, "\nWithout Master channel")?; } for channel in list.iter() { - writeln!(f, " {channel} ").expect("cannot print channel name"); + writeln!(f, " {channel} ")?; if let Some(data) = self.get_channel_data(channel) && !data.is_empty() { let array = &data.as_ref(); let displayer = ArrayFormatter::try_new(array, &format_option) - .map_err(|_| std::fmt::Error)?; - write!(f, "{}", displayer.value(0)).expect("cannot channel data"); - write!(f, " ").expect("cannot print simple space character"); - write!(f, "{}", displayer.value(data.len() - 1)) - .expect("cannot channel data"); + .map_err(|e| { + log::warn!("Mdf Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; + write!(f, "{}", displayer.value(0))?; + write!(f, " ")?; + write!(f, "{}", displayer.value(data.len() - 1))?; } if let Ok(Some(unit)) = self.get_channel_unit(channel) { - writeln!(f, " {unit} ").expect("cannot print channel unit"); + writeln!(f, " {unit} ")?; } if let Ok(Some(desc)) = self.get_channel_desc(channel) { - writeln!(f, " {desc} ").expect("cannot print channel desc"); + writeln!(f, " {desc} ")?; } } } @@ -358,30 +358,30 @@ impl fmt::Display for Mdf { } for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}") - .expect("cannot print master channel name"); + writeln!(f, "\nMaster: {master_name}")?; } else { - writeln!(f, "\nWithout Master channel") - .expect("cannot print thre is no master channel"); + writeln!(f, "\nWithout Master channel")?; } for channel in list.iter() { - writeln!(f, " {channel} ").expect("cannot print channel name"); + writeln!(f, " {channel} ")?; if let Some(data) = self.get_channel_data(channel) && !data.is_empty() { let array = &data.as_ref(); let displayer = ArrayFormatter::try_new(array, &format_option) - .map_err(|_| std::fmt::Error)?; - write!(f, "{}", displayer.value(0)).expect("cannot channel data"); - write!(f, " ").expect("cannot print simple space character"); - write!(f, "{}", displayer.value(data.len() - 1)) - .expect("cannot channel data"); + .map_err(|e| { + log::warn!("Mdf Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; + write!(f, "{}", displayer.value(0))?; + write!(f, " ")?; + write!(f, "{}", displayer.value(data.len() - 1))?; } if let Ok(Some(unit)) = self.get_channel_unit(channel) { - writeln!(f, " {unit} ").expect("cannot print channel unit"); + writeln!(f, " {unit} ")?; } if let Ok(Some(desc)) = self.get_channel_desc(channel) { - writeln!(f, " {desc} ").expect("cannot print channel desc"); + writeln!(f, " {desc} ")?; } } } From c25976652adcc71a64ea48e61daf6378291213d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:02:20 +0100 Subject: [PATCH 31/46] made ChannelData display fault tolerant, logging error instead --- src/data_holder/channel_data.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index e5e8dc1..b7ecb98 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -1956,8 +1956,10 @@ impl fmt::Display for ChannelData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let format_option = FormatOptions::new(); let data = self.as_ref(); - let displayer = - ArrayFormatter::try_new(&data, &format_option).map_err(|_| std::fmt::Error)?; + let displayer = ArrayFormatter::try_new(&data, &format_option).map_err(|e| { + log::warn!("ChannelData Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; for i in 0..self.len() { write!(f, " {}", displayer.value(i))?; } From 176a4abc8ce7c6e531f2e5f98285a280cd803a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:02:41 +0100 Subject: [PATCH 32/46] removed expect() --- src/mdfreader/data_read4.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index 25e9815..e0ec771 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -1638,7 +1638,7 @@ pub fn read_channels_from_bytes( let c_vlsd_channel = Arc::clone(&vlsd_channels); let mut vlsd_channel = c_vlsd_channel .lock() - .expect("Could not get lock from vlsd channel arc vec"); + .unwrap_or_else(|e| e.into_inner()); vlsd_channel.push((cn.block.cn_type, *rec_pos)); } } else { @@ -1669,7 +1669,7 @@ pub fn read_channels_from_bytes( let c_vlsd_channel = Arc::clone(&vlsd_channels); let mut vlsd_channel = c_vlsd_channel .lock() - .expect("Could not get lock from vlsd channel arc vec"); + .unwrap_or_else(|e| e.into_inner()); vlsd_channel.push((cn.block.cn_type, *rec_pos)); } } From d23ce79e8b77c192f9763b56bd13861de7c5a8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:03:44 +0100 Subject: [PATCH 33/46] manage writer handler and flush data --- src/mdfwriter/mdfwriter4.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 45de067..43c0a55 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -518,7 +518,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result let (tx, rx) = bounded::>(n_channels); let fname = Arc::new(Mutex::new(file_name.to_string())); let sfname = Arc::clone(&fname); - thread::spawn(move || -> Result<(), Error> { + let writer_handle = thread::spawn(move || -> Result<(), Error> { let file_name = Arc::clone(&sfname); let file = file_name.lock(); let f: File = OpenOptions::new() @@ -538,6 +538,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result .write_all(&buffer) .context("Could not write data blocks buffer")?; } + writer.flush().context("Could not flush data blocks")?; Ok(()) }); @@ -629,6 +630,10 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result Ok(()) })?; drop(tx); + writer_handle + .join() + .map_err(|e| anyhow::anyhow!("Data writer thread panicked: {:?}", e)) + .and_then(|r| r)?; let file_name = Arc::clone(&fname); let file = file_name.lock(); From f8ee31f6f6c49aa93c1e73908a5e71566728c465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Mon, 23 Mar 2026 22:04:43 +0100 Subject: [PATCH 34/46] benchmark fault tolerant in case of missing python module mdfreader for comparison --- benches/mdf_benchmark.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/benches/mdf_benchmark.rs b/benches/mdf_benchmark.rs index 2418fe9..148ebfb 100644 --- a/benches/mdf_benchmark.rs +++ b/benches/mdf_benchmark.rs @@ -25,12 +25,23 @@ static WRITING_FILE3: LazyLock = LazyLock::new(|| format!("{}test.dat", static PARQUET_FILE: LazyLock = LazyLock::new(|| format!("{}test.parquet", MDFR_PATH)); fn python_launch() { - Command::new("python3") - .arg("-m") - .arg("timeit") - .arg("import mdfreader; yop=mdfreader.Mdf('/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/ASAM_COMMON_MDF_V4-1-0/Base_Standard/Examples/DataList/Vector_SD_List.MF4')") - .spawn() - .expect("mdfinfo command failed to start"); + // Check if mdfreader is available before running the comparison + let check = Command::new("python3") + .args(["-c", "import mdfreader"]) + .output(); + match check { + Ok(out) if out.status.success() => { + Command::new("python3") + .arg("-m") + .arg("timeit") + .arg("import mdfreader; yop=mdfreader.Mdf('/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/ASAM_COMMON_MDF_V4-1-0/Base_Standard/Examples/DataList/Vector_SD_List.MF4')") + .spawn() + .expect("mdfinfo command failed to start"); + } + _ => { + println!("skipping Python mdfreader comparison (module not available)"); + } + } } pub fn criterion_benchmark(c: &mut Criterion) { From 235141e27bc7172569f2d75ae844b73130147443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:21:18 +0100 Subject: [PATCH 35/46] added safety documentation --- src/c_api.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/c_api.rs b/src/c_api.rs index 38249ee..4f8d623 100644 --- a/src/c_api.rs +++ b/src/c_api.rs @@ -47,6 +47,8 @@ pub unsafe extern "C" fn free_mdf(mdf: *mut Mdf) { #[unsafe(no_mangle)] pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { unsafe { + // SAFETY: caller guarantees mdf is either null or a valid pointer returned by new_mdf(). + // as_ref() handles the null case safely. if let Some(mdf) = mdf.as_ref() { mdf.get_version() } else { @@ -56,13 +58,16 @@ pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { } /// returns channel's unit string -/// if no unit is existing for this channel, returns a null pointer +/// if no unit is existing for this channel, returns a null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_unit( mdf: *const Mdf, channel_name: *const c_char, ) -> *const c_char { unsafe { + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. + // Null is checked before dereferencing. CStr::from_ptr requires a valid, null-terminated string. if mdf.is_null() || channel_name.is_null() { return std::ptr::null(); } @@ -85,13 +90,15 @@ pub unsafe extern "C" fn get_channel_unit( } /// returns channel's description string -/// if no description is existing for this channel, returns null pointer +/// if no description is existing for this channel, returns null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_desc( mdf: *const Mdf, channel_name: *const libc::c_char, ) -> *const c_char { unsafe { + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. if mdf.is_null() || channel_name.is_null() { return std::ptr::null(); } @@ -114,13 +121,15 @@ pub unsafe extern "C" fn get_channel_desc( } /// returns channel's associated master channel name string -/// if no master channel existing, returns null pointer +/// if no master channel existing, returns null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_master( mdf: *const Mdf, channel_name: *const libc::c_char, ) -> *const c_char { unsafe { + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. if mdf.is_null() || channel_name.is_null() { return std::ptr::null(); } @@ -151,6 +160,7 @@ pub unsafe extern "C" fn get_channel_master_type( channel_name: *const libc::c_char, ) -> c_uchar { unsafe { + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. if mdf.is_null() || channel_name.is_null() { return 0; } @@ -167,10 +177,13 @@ pub unsafe extern "C" fn get_channel_master_type( } /// returns a sorted array of strings of all channel names contained in file. -/// The returned pointer is heap-allocated; call `free_channel_names_set()` to free it. +/// The returned pointer is heap-allocated; call `free_channel_names_set(ptr, len)` to free it, +/// where `len` is the number of channel names (obtained separately, e.g. via a count function). +/// Returns null on null input. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c_char { unsafe { + // SAFETY: caller guarantees mdf is either null or a valid pointer from new_mdf(). if let Some(mdf) = mdf.as_ref() { let set = mdf.get_channel_names_set(); let mut s = set.into_iter().collect::>(); @@ -182,6 +195,9 @@ pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c .collect::>(); cstring_vec.shrink_to_fit(); let p = cstring_vec.as_ptr(); + // SAFETY: We intentionally leak the Vec here to transfer ownership of the + // backing allocation to the caller. The pointer remains valid until + // free_channel_names_set() is called with the correct length. std::mem::forget(cstring_vec); p } else { @@ -213,6 +229,7 @@ pub unsafe extern "C" fn free_channel_names_set(ptr: *mut *mut c_char, len: usiz #[unsafe(no_mangle)] pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { unsafe { + // SAFETY: caller guarantees mdf is either null or a valid pointer from new_mdf(). if let Some(mdf) = mdf.as_mut() && let Err(e) = mdf.load_all_channels_data_in_memory() { @@ -230,6 +247,8 @@ pub unsafe extern "C" fn get_channel_array( channel_name: *const libc::c_char, ) -> *mut FFI_ArrowArray { unsafe { + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings, + // and mdf is a valid pointer from new_mdf(). if mdf.is_null() || channel_name.is_null() { return std::ptr::null_mut(); } From 300c4fe3c8ee7c78e35a90b7e540514181c87efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:23:13 +0100 Subject: [PATCH 36/46] added pkg version --- src/main.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index e219614..af98b8d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ fn main() -> Result<(), Error> { init(); let matches = Command::new("mdfr") .bin_name("mdfr") - .version("0.6.3") + .version(env!("CARGO_PKG_VERSION")) .author("Aymeric Rateau ") .about("reads ASAM mdf file") .arg( @@ -63,7 +63,8 @@ fn main() -> Result<(), Error> { .required(false) .num_args(1) .value_name("ALGORITHM") - .help("Compression algorithm for writing data in parquet file, valid values are snappy, gzip, lzo, lz4, zstd, brotli. Default is uncompressed"), + .value_parser(["snappy", "gzip", "lzo", "lz4", "zstd", "brotli"]) + .help("Compression algorithm for writing data in parquet file. Default is uncompressed"), ) .arg( Arg::new("export_to_hdf5") @@ -80,7 +81,8 @@ fn main() -> Result<(), Error> { .required(false) .num_args(1) .value_name("FILTER") - .help("Compression algorithm for writing data in hdf5 file, valid values are deflate and lzf. Default is uncompressed"), + .value_parser(["deflate", "lzf"]) + .help("Compression filter for writing data in hdf5 file. Default is uncompressed"), ) .arg( Arg::new("info") From e1f7f995c21d41be2f5c244f69ce69ca14661a99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:28:23 +0100 Subject: [PATCH 37/46] simplified cloning --- src/data_holder/channel_data.rs | 107 ++++++++++++++------------------ 1 file changed, 47 insertions(+), 60 deletions(-) diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index b7ecb98..fe83efa 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -5,6 +5,8 @@ use anyhow::{Context, Error, Result, bail}; use arrow::array::{ Array, ArrayBuilder, ArrayData, ArrayRef, BinaryArray, BooleanBufferBuilder, FixedSizeBinaryArray, FixedSizeBinaryBuilder, FixedSizeListArray, Int8Builder, + Int16Builder, Int32Builder, Int64Builder, UInt8Builder, UInt16Builder, UInt32Builder, + UInt64Builder, Float32Builder, Float64Builder, LargeBinaryArray, LargeBinaryBuilder, LargeStringArray, LargeStringBuilder, PrimitiveBuilder, StringArray, UnionArray, as_primitive_array, }; @@ -100,85 +102,71 @@ impl PartialEq for ChannelData { impl Clone for ChannelData { fn clone(&self) -> Self { + // `finish_cloned()` creates a snapshot with Arc refcount == 1, so `into_builder()` + // always returns Ok. The `unwrap_or_else` fallback is logically unreachable but + // keeps this impl panic-free. + macro_rules! clone_primitive { + ($variant:ident, $builder:ident, $arg:expr) => {{ + let arr = $arg.finish_cloned(); + Self::$variant(arr.into_builder().unwrap_or_else(|arr| { + let mut b = $builder::with_capacity(arr.len()); + for v in arr.iter() { + match v { Some(x) => b.append_value(x), None => b.append_null() } + } + b + })) + }}; + } match self { - Self::Int8(arg0) => Self::Int8( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt8(arg0) => Self::UInt8( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int16(arg0) => Self::Int16( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt16(arg0) => Self::UInt16( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int32(arg0) => Self::Int32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt32(arg0) => Self::UInt32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Float32(arg0) => Self::Float32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int64(arg0) => Self::Int64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt64(arg0) => Self::UInt64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Float64(arg0) => Self::Float64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), + Self::Int8(arg0) => clone_primitive!(Int8, Int8Builder, arg0), + Self::UInt8(arg0) => clone_primitive!(UInt8, UInt8Builder, arg0), + Self::Int16(arg0) => clone_primitive!(Int16, Int16Builder, arg0), + Self::UInt16(arg0) => clone_primitive!(UInt16, UInt16Builder, arg0), + Self::Int32(arg0) => clone_primitive!(Int32, Int32Builder, arg0), + Self::UInt32(arg0) => clone_primitive!(UInt32, UInt32Builder, arg0), + Self::Float32(arg0) => clone_primitive!(Float32, Float32Builder, arg0), + Self::Int64(arg0) => clone_primitive!(Int64, Int64Builder, arg0), + Self::UInt64(arg0) => clone_primitive!(UInt64, UInt64Builder, arg0), + Self::Float64(arg0) => clone_primitive!(Float64, Float64Builder, arg0), Self::Complex32(arg0) => Self::Complex32(arg0.clone()), Self::Complex64(arg0) => Self::Complex64(arg0.clone()), Self::Utf8(arg0) => Self::Utf8( arg0.finish_cloned() .into_builder() - .expect("failed getting back mutable array"), + .unwrap_or_else(|arr| { + // unreachable: finish_cloned() gives Arc refcount 1 + let mut b = LargeStringBuilder::with_capacity(arr.len(), arr.values().len()); + for v in arr.iter() { match v { Some(s) => b.append_value(s), None => b.append_null() } } + b + }), ), Self::VariableSizeByteArray(array) => Self::VariableSizeByteArray( array .finish_cloned() .into_builder() - .expect("failed getting back mutable array"), + .unwrap_or_else(|arr| { + // unreachable: finish_cloned() gives Arc refcount 1 + let mut b = LargeBinaryBuilder::with_capacity(arr.len(), arr.values().len()); + for v in arr.iter() { match v { Some(s) => b.append_value(s), None => b.append_null() } } + b + }), ), Self::FixedSizeByteArray(array) => { let array: FixedSizeBinaryArray = array.finish_cloned(); let mut new_array = FixedSizeBinaryBuilder::with_capacity(array.len(), array.value_length()); match array.logical_nulls() { + // append_value can only fail if slice length != value_length, + // which can't happen since we chunk by value_length(). Some(validity) => { array .values() .chunks(array.value_length() as usize) .zip(validity.iter()) - .for_each(|(value, validity)| { - if validity { - new_array - .append_value(value) - .expect("failed appending new fixed binary value"); + .for_each(|(value, valid)| { + if valid { + new_array.append_value(value) + .unwrap_or_else(|_| new_array.append_null()); } else { new_array.append_null(); } @@ -189,9 +177,8 @@ impl Clone for ChannelData { .values() .chunks(array.value_length() as usize) .for_each(|value| { - new_array - .append_value(value) - .expect("failed appending new fixed binary value"); + new_array.append_value(value) + .unwrap_or_else(|_| new_array.append_null()); }); } } From ddf77459402c6e69f5e2711a53bbc37d1f28b046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:32:52 +0100 Subject: [PATCH 38/46] made endianess less ambiguous --- src/mdfinfo/mdfinfo3.rs | 31 ++--- src/mdfinfo/mdfinfo4/cn_block.rs | 208 ++++++++++++++++++++++++++++--- src/mdfinfo/mdfinfo4/mod.rs | 2 +- src/mdfreader/data_read3.rs | 20 +-- src/mdfreader/data_read4.rs | 147 +++++++++++----------- src/mdfreader/mdfreader4.rs | 16 +-- 6 files changed, 302 insertions(+), 122 deletions(-) diff --git a/src/mdfinfo/mdfinfo3.rs b/src/mdfinfo/mdfinfo3.rs index f81b0ca..65f064b 100644 --- a/src/mdfinfo/mdfinfo3.rs +++ b/src/mdfinfo/mdfinfo3.rs @@ -16,6 +16,7 @@ use std::io::{Cursor, prelude::*}; use crate::data_holder::channel_data::{ChannelData, data_type_init}; use crate::data_holder::tensor_arrow::Order; use crate::mdfinfo::IdBlock; +use crate::mdfinfo::mdfinfo4::Endianness; use super::sym_buf_reader::SymBufReader; @@ -871,8 +872,8 @@ pub struct Cn3 { pub n_bytes: u16, /// channel data pub data: ChannelData, - /// false = little endian - pub endian: bool, + /// byte order of the channel's raw data + pub endian: Endianness, /// True if channel is valid = contains data converted pub channel_data_valid: bool, } @@ -1060,16 +1061,16 @@ fn parse_cn3_block( position = parse_ce(rdr, block1.cn_ce_source, position, sharable, encoding)?; } - let mut endian: bool = false; // Little endian by default + let mut endian = Endianness::Little; // Little endian by default if block2.cn_data_type >= 13 { - endian = false; // little endian + endian = Endianness::Little; } else if block2.cn_data_type >= 9 { - endian = true; // big endian + endian = Endianness::Big; } else if block2.cn_data_type <= 3 { if default_byte_order == 0 { - endian = false; // little endian + endian = Endianness::Little; } else { - endian = true; // big endian + endian = Endianness::Big; } } let data_type = convert_data_type_3to4(block2.cn_data_type); @@ -1134,7 +1135,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg, n_bytes: 2, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1153,7 +1154,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 2, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1172,7 +1173,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 3, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1191,7 +1192,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 4, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1210,7 +1211,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 5, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1229,7 +1230,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 7, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; (date_ms, min, hour, day, month, year) @@ -1258,7 +1259,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) { pos_byte_beg, n_bytes: 4, data: ChannelData::UInt32(UInt32Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1277,7 +1278,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) { pos_byte_beg: pos_byte_beg + 4, n_bytes: 2, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; (ms, days) diff --git a/src/mdfinfo/mdfinfo4/cn_block.rs b/src/mdfinfo/mdfinfo4/cn_block.rs index d12fa83..bfbfd85 100644 --- a/src/mdfinfo/mdfinfo4/cn_block.rs +++ b/src/mdfinfo/mdfinfo4/cn_block.rs @@ -18,6 +18,34 @@ use super::cc_block::read_cc; use super::composition::{parse_composition, Composition}; use super::ev_block::{Ev4Block, parse_ev4_block}; +/// Byte order for channel data. +/// +/// `false`/`Little` = little-endian (default for most modern platforms and MDF files). +/// `true`/`Big` = big-endian. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Endianness { + /// Little-endian (LSB first) + #[default] + Little, + /// Big-endian (MSB first) + Big, +} + +impl Endianness { + /// Returns `true` if big-endian. + #[inline] + pub fn is_big(self) -> bool { + self == Endianness::Big + } +} + +impl From for Endianness { + /// Converts from a raw bool: `true` → `Big`, `false` → `Little`. + fn from(big: bool) -> Self { + if big { Endianness::Big } else { Endianness::Little } + } +} + // Channel (CN) flags - cn_flags field (u32) /// Bit 13: Event signal - channel contains event data, cn_data points to template EVBLOCK pub const CN_F_EVENT_SIGNAL: u32 = 1 << 13; @@ -39,6 +67,137 @@ pub const CN_F_PROTOCOL_EVENT: u32 = 1 << 19; pub const CN_F_DATA_DESCRIPTION_MODE: u32 = 1 << 20; use super::si_block::Si4Block; +/// Channel type (cn_type field) — spec section 6.6, Table 25 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CnChannelType { + /// 0: Fixed-length data channel (normal signal) + FixedLength = 0, + /// 1: Variable-length signal data (VLSD) + Vlsd = 1, + /// 2: Master channel (fixed length) + Master = 2, + /// 3: Virtual master channel (generated time/angle axis, no raw data) + VirtualMaster = 3, + /// 4: Synchronisation channel (references an attachment) + Synchronisation = 4, + /// 5: Maximum-length data channel + MaxLength = 5, + /// 6: Virtual data channel (generated data, no raw data) + VirtualData = 6, + /// 7: VLSC channel (stores offsets into VD block, MDF 4.3) + Vlsc = 7, +} + +impl TryFrom for CnChannelType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::FixedLength), + 1 => Ok(Self::Vlsd), + 2 => Ok(Self::Master), + 3 => Ok(Self::VirtualMaster), + 4 => Ok(Self::Synchronisation), + 5 => Ok(Self::MaxLength), + 6 => Ok(Self::VirtualData), + 7 => Ok(Self::Vlsc), + other => Err(other), + } + } +} + +/// Channel sync type (cn_sync_type field) — spec section 6.6, Table 26 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CnSyncType { + /// 0: No synchronisation (normal data channel) + #[default] + None = 0, + /// 1: Time synchronisation (seconds) + Time = 1, + /// 2: Angle synchronisation (radians) + Angle = 2, + /// 3: Distance synchronisation (meters) + Distance = 3, + /// 4: Index synchronisation (zero-based sample index) + Index = 4, +} + +impl TryFrom for CnSyncType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::None), + 1 => Ok(Self::Time), + 2 => Ok(Self::Angle), + 3 => Ok(Self::Distance), + 4 => Ok(Self::Index), + other => Err(other), + } + } +} + +/// Channel data type (cn_data_type field) — spec section 6.6, Table 27 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CnDataType { + /// 0: Unsigned integer, little-endian + UIntLE = 0, + /// 1: Unsigned integer, big-endian + UIntBE = 1, + /// 2: Signed integer, little-endian + IntLE = 2, + /// 3: Signed integer, big-endian + IntBE = 3, + /// 4: IEEE 754 float, little-endian + FloatLE = 4, + /// 5: IEEE 754 float, big-endian + FloatBE = 5, + /// 6: String (SBC/ISO-8859-1) + StringSbc = 6, + /// 7: String (UTF-8) + StringUtf8 = 7, + /// 8: String (UTF-16 LE) + StringUtf16LE = 8, + /// 9: String (UTF-16 BE) + StringUtf16BE = 9, + /// 10: Byte array + ByteArray = 10, + /// 11: MIME sample + MimeSample = 11, + /// 12: MIME stream + MimeStream = 12, + /// 13: CANopen date + CanopenDate = 13, + /// 14: CANopen time + CanopenTime = 14, + /// 15: Complex number, little-endian + ComplexLE = 15, + /// 16: Complex number, big-endian + ComplexBE = 16, + /// 17: String with BOM (Unicode with byte-order mark) + StringBom = 17, +} + +impl TryFrom for CnDataType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::UIntLE), 1 => Ok(Self::UIntBE), + 2 => Ok(Self::IntLE), 3 => Ok(Self::IntBE), + 4 => Ok(Self::FloatLE), 5 => Ok(Self::FloatBE), + 6 => Ok(Self::StringSbc), 7 => Ok(Self::StringUtf8), + 8 => Ok(Self::StringUtf16LE), 9 => Ok(Self::StringUtf16BE), + 10 => Ok(Self::ByteArray), + 11 => Ok(Self::MimeSample), 12 => Ok(Self::MimeStream), + 13 => Ok(Self::CanopenDate), 14 => Ok(Self::CanopenTime), + 15 => Ok(Self::ComplexLE), 16 => Ok(Self::ComplexBE), + 17 => Ok(Self::StringBom), + other => Err(other), + } + } +} + /// Cn4 Channel block struct #[derive(Debug, PartialEq, Clone)] #[binrw] @@ -165,6 +324,21 @@ impl Cn4Block { pub fn set_si_source(&mut self, si_source: i64) { self.cn_si_source = si_source; } + /// Returns the typed channel type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_channel_type(&self) -> Result { + CnChannelType::try_from(self.cn_type) + } + /// Returns the typed sync type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_sync_type_enum(&self) -> Result { + CnSyncType::try_from(self.cn_sync_type) + } + /// Returns the typed data type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_data_type_enum(&self) -> Result { + CnDataType::try_from(self.cn_data_type) + } /// Returns a string representation of the channel type (cn_type) pub fn get_cn_type_str(&self) -> &'static str { match self.cn_type { @@ -242,19 +416,21 @@ pub struct Cn4 { pub block: Cn4Block, /// unique channel name string pub unique_name: String, + /// absolute file position of this CN block pub block_position: i64, /// beginning position of channel in record pub pos_byte_beg: u32, /// number of bytes taken by channel in record pub n_bytes: u32, + /// optional composition (CA array, nested CN structure, DS/CL/CV/CU VLSD layout) pub composition: Option, /// channel data pub data: ChannelData, - /// false = little endian - pub endian: bool, - /// List size: 1 for normal primitive, 2 for complex, pnd for arrays + /// byte order of the channel's raw data + pub endian: Endianness, + /// number of elements per sample: 1 for scalars, 2 for complex, N for arrays pub list_size: usize, - // Shape of array + /// shape of array data: (dimension sizes, storage order); scalar channels use an empty vec pub shape: (Vec, Order), /// optional invalid mask array, invalid byte position in record, invalid byte mask pub invalid_mask: Option<(Option, usize, u8)>, @@ -453,7 +629,7 @@ fn can_open_date( n_bytes: 2, composition: None, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -474,7 +650,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -495,7 +671,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -516,7 +692,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -537,7 +713,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -558,7 +734,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -584,7 +760,7 @@ fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> n_bytes: 4, composition: None, data: ChannelData::UInt32(UInt32Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -605,7 +781,7 @@ fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> n_bytes: 2, composition: None, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -754,26 +930,26 @@ pub(super) fn parse_cn4_block( } } - let mut endian: bool = false; // Little endian by default + let mut endian = Endianness::Little; // Little endian by default if block.cn_data_type == 0 || block.cn_data_type == 2 || block.cn_data_type == 4 || block.cn_data_type == 8 || block.cn_data_type == 15 { - endian = false; // little endian + endian = Endianness::Little; } else if block.cn_data_type == 1 || block.cn_data_type == 3 || block.cn_data_type == 5 || block.cn_data_type == 9 || block.cn_data_type == 16 { - endian = true; // big endian + endian = Endianness::Big; } // For VLSC/VLSD channels, cn_data_type describes the signal data block encoding // (e.g. UTF-16 BE), not the byte order of the integer offsets stored in the DT block. if block.cn_type == 1 || block.cn_type == 7 { - endian = false; + endian = Endianness::Little; } let data_type = block.cn_data_type; let cn_type = block.cn_type; diff --git a/src/mdfinfo/mdfinfo4/mod.rs b/src/mdfinfo/mdfinfo4/mod.rs index 32c65f6..f873899 100644 --- a/src/mdfinfo/mdfinfo4/mod.rs +++ b/src/mdfinfo/mdfinfo4/mod.rs @@ -335,7 +335,7 @@ impl MdfInfo4 { unique_name: channel_name.to_string(), data, block: cn_block, - endian: machine_endian, + endian: Endianness::from(machine_endian), block_position: cn_pos, pos_byte_beg: 0, n_bytes, diff --git a/src/mdfreader/data_read3.rs b/src/mdfreader/data_read3.rs index 5b3b902..06de30a 100644 --- a/src/mdfreader/data_read3.rs +++ b/src/mdfreader/data_read3.rs @@ -46,7 +46,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -66,7 +66,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -87,7 +87,7 @@ pub fn read_channels_from_bytes( ChannelData::Int32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -102,7 +102,7 @@ pub fn read_channels_from_bytes( .context("Could not read le i24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -123,7 +123,7 @@ pub fn read_channels_from_bytes( ChannelData::UInt32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -138,7 +138,7 @@ pub fn read_channels_from_bytes( .context("Could not read le u24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -158,7 +158,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record @@ -198,7 +198,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -232,7 +232,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -297,7 +297,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index e0ec771..ffaf1d0 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -40,6 +40,9 @@ pub fn read_one_channel_array( // cn_type == 7 : VLSC channel (stores offsets into VD block) let n_bytes = cn.n_bytes as usize; let list_size = cn.list_size; + // Clone shape once before the match — only one arm executes, so this is always ≤1 clone. + let shape_dims = cn.shape.0.clone(); + let shape_order = cn.shape.1.clone(); match &mut cn.data { ChannelData::Int8(a) => { let mut buf = vec![0; cycle_count]; @@ -53,7 +56,7 @@ pub fn read_one_channel_array( } ChannelData::Int16(a) => { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i16_into::(&mut buf) .context("Could not read be i16 array")?; @@ -66,7 +69,7 @@ pub fn read_one_channel_array( } ChannelData::UInt16(a) => { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u16_into::(&mut buf) .context("Could not read be u16 array")?; @@ -80,7 +83,7 @@ pub fn read_one_channel_array( ChannelData::Int32(a) => { if n_bytes == 4 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i32_into::(&mut buf) .context("Could not read be i32 array")?; @@ -92,7 +95,7 @@ pub fn read_one_channel_array( *a = Int32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 3 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_i24::() @@ -110,7 +113,7 @@ pub fn read_one_channel_array( ChannelData::UInt32(a) => { if n_bytes == 4 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u32_into::(&mut buf) .context("Could not read be u32 array")?; @@ -122,7 +125,7 @@ pub fn read_one_channel_array( *a = UInt32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 3 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_u24::() @@ -140,7 +143,7 @@ pub fn read_one_channel_array( ChannelData::Float32(a) => { if n_bytes == 4 { let mut buf = vec![0f32; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f32_into::(&mut buf) .context("Could not read be f32 array")?; @@ -152,7 +155,7 @@ pub fn read_one_channel_array( *a = Float32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 2 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( @@ -174,7 +177,7 @@ pub fn read_one_channel_array( ChannelData::Int64(a) => { if n_bytes == 8 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i64_into::(&mut buf) .context("Could not read be i64 array")?; @@ -186,7 +189,7 @@ pub fn read_one_channel_array( *a = Int64Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 6 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_i48::() @@ -204,7 +207,7 @@ pub fn read_one_channel_array( ChannelData::UInt64(a) => { if n_bytes == 8 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u64_into::(&mut buf) .context("Could not read be u64 array")?; @@ -217,7 +220,7 @@ pub fn read_one_channel_array( } else if n_bytes == 7 { let mut temp = [0u8; std::mem::size_of::()]; let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..7].copy_from_slice(&value[0..7]); data[i] = u64::from_be_bytes(temp); @@ -230,7 +233,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 6 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_u48::() @@ -247,7 +250,7 @@ pub fn read_one_channel_array( // n_bytes = 5 let mut temp = [0u8; 6]; let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..5].copy_from_slice(&value[0..n_bytes]); data[i] = Cursor::new(temp) @@ -266,7 +269,7 @@ pub fn read_one_channel_array( } ChannelData::Float64(a) => { let mut buf = vec![0f64; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f64_into::(&mut buf) .context("Could not read be f64 array")?; @@ -281,7 +284,7 @@ pub fn read_one_channel_array( let data = a.values().values_slice_mut(); if n_bytes <= 4 { // complex 16 - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( @@ -300,7 +303,7 @@ pub fn read_one_channel_array( } } else if n_bytes <= 8 { // complex 32 - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f32::from_be_bytes( @@ -319,7 +322,7 @@ pub fn read_one_channel_array( } ChannelData::Complex64(a) => { let data = a.values().values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f64::from_be_bytes( value.try_into().unwrap(), @@ -354,7 +357,7 @@ pub fn read_one_channel_array( } } else if cn.block.cn_data_type == 8 || cn.block.cn_data_type == 9 { // 8 | 9 :String UTF16 to be converted into UTF8 - if cn.endian { + if cn.endian.is_big() { let mut decoder = UTF_16BE.new_decoder(); for record in data_bytes.chunks(n_bytes) { let mut dst = String::new(); @@ -411,20 +414,20 @@ pub fn read_one_channel_array( .context("Could not read i8 array")?; *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt8(a) => { *a = TensorArrow::new_from_buffer( data_bytes.clone().into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt16(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i16_into::(&mut buf) .context("Could not read be i16 array")?; @@ -435,13 +438,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt16(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u16_into::(&mut buf) .context("Could not read be u16 array")?; @@ -452,13 +455,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt32(a) => { let mut buf = vec![0i32; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value @@ -483,13 +486,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt32(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value @@ -514,13 +517,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDFloat32(a) => { let mut buf = vec![0f32; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { @@ -547,13 +550,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt64(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { Cursor::new(data_bytes) .read_i64_into::(&mut buf) @@ -578,14 +581,14 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt64(a) => { let mut buf = vec![0; cycle_count * list_size]; if n_bytes == 8 { - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u64_into::(&mut buf) .context("Could not read be u64 array")?; @@ -596,7 +599,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 7 { let mut temp = [0u8; std::mem::size_of::()]; - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..7].copy_from_slice(&value[0..7]); buf[i] = u64::from_be_bytes(temp); @@ -608,7 +611,7 @@ pub fn read_one_channel_array( } } } else if n_bytes == 6 { - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value .read_u48::() @@ -623,7 +626,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 5 { let mut temp = [0u8; 6]; - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..5].copy_from_slice(&value[0..n_bytes]); buf[i] = Cursor::new(temp) @@ -641,13 +644,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDFloat64(a) => { let mut buf = vec![0f64; cycle_count * (list_size)]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f64_into::(&mut buf) .context("Could not read be f64 array")?; @@ -658,8 +661,8 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::Union(_) => {} // Union channels are constructed post-read @@ -718,7 +721,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -738,7 +741,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -759,7 +762,7 @@ pub fn read_channels_from_bytes( ChannelData::Int32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -774,7 +777,7 @@ pub fn read_channels_from_bytes( .context("Could not read le i24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -795,7 +798,7 @@ pub fn read_channels_from_bytes( ChannelData::UInt32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -810,7 +813,7 @@ pub fn read_channels_from_bytes( .context("Could not read le u24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -830,7 +833,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record @@ -870,7 +873,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -904,7 +907,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -971,7 +974,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -995,7 +998,7 @@ pub fn read_channels_from_bytes( // complex 16 let mut re_val: &[u8]; let mut im_val: &[u8]; - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1038,7 +1041,7 @@ pub fn read_channels_from_bytes( // complex 32 let mut re_val: &[u8]; let mut im_val: &[u8]; - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1080,7 +1083,7 @@ pub fn read_channels_from_bytes( let mut re_val: &[u8]; let mut im_val: &[u8]; let data = a.values().values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1138,7 +1141,7 @@ pub fn read_channels_from_bytes( } } else if cn.block.cn_data_type == 8 || cn.block.cn_data_type == 9 { // 8 | 9 :String UTF16 to be converted into UTF8 - if cn.endian { + if cn.endian.is_big() { let mut decoder = UTF_16BE.new_decoder(); for record in data_chunk.chunks(record_length) { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -1224,7 +1227,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() @@ -1254,7 +1257,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() @@ -1284,7 +1287,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1337,7 +1340,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1390,7 +1393,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDFloat32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1452,7 +1455,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1504,7 +1507,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1607,7 +1610,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDFloat64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 339f244..74d51e7 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -1951,7 +1951,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int16(builder) => { if value_bytes.len() >= 2 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i16::from_be_bytes(value_bytes[..2].try_into()?) } else { i16::from_le_bytes(value_bytes[..2].try_into()?) @@ -1961,7 +1961,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt16(builder) => { if value_bytes.len() >= 2 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u16::from_be_bytes(value_bytes[..2].try_into()?) } else { u16::from_le_bytes(value_bytes[..2].try_into()?) @@ -1971,7 +1971,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i32::from_be_bytes(value_bytes[..4].try_into()?) } else { i32::from_le_bytes(value_bytes[..4].try_into()?) @@ -1981,7 +1981,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u32::from_be_bytes(value_bytes[..4].try_into()?) } else { u32::from_le_bytes(value_bytes[..4].try_into()?) @@ -1991,7 +1991,7 @@ fn store_decoded_values_in_channel( } ChannelData::Float32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { f32::from_be_bytes(value_bytes[..4].try_into()?) } else { f32::from_le_bytes(value_bytes[..4].try_into()?) @@ -2001,7 +2001,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i64::from_be_bytes(value_bytes[..8].try_into()?) } else { i64::from_le_bytes(value_bytes[..8].try_into()?) @@ -2011,7 +2011,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u64::from_be_bytes(value_bytes[..8].try_into()?) } else { u64::from_le_bytes(value_bytes[..8].try_into()?) @@ -2021,7 +2021,7 @@ fn store_decoded_values_in_channel( } ChannelData::Float64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { f64::from_be_bytes(value_bytes[..8].try_into()?) } else { f64::from_le_bytes(value_bytes[..8].try_into()?) From 3fcc099bf4c4419c74c4b36c50564b369096eeb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:35:29 +0100 Subject: [PATCH 39/46] added more tests for better coverage with more documentation --- src/mdfinfo/mdfinfo4/cg_block.rs | 6 +- src/mdfinfo/mdfinfo4/composition.rs | 8 + src/mdfreader/conversions3.rs | 148 +++++++ src/mdfreader/conversions4.rs | 281 +++++++++++++ src/mdfreader/datastream_decoder.rs | 336 +++++++++++++++- src/mdfwriter/mdfwriter4.rs | 18 +- tests/arrays.rs | 204 ++++++++++ tests/canopen_types.rs | 33 ++ tests/channel_data_ops.rs | 586 ++++++++++++++++++++++++++++ tests/conversions_int_types.rs | 205 ++++++++++ tests/data_types.rs | 49 +++ tests/fixtures.rs | 361 +++++++++++++++++ tests/mdf3_conversion.rs | 86 ++++ 13 files changed, 2309 insertions(+), 12 deletions(-) create mode 100644 tests/arrays.rs create mode 100644 tests/canopen_types.rs create mode 100644 tests/channel_data_ops.rs create mode 100644 tests/conversions_int_types.rs create mode 100644 tests/fixtures.rs create mode 100644 tests/mdf3_conversion.rs diff --git a/src/mdfinfo/mdfinfo4/cg_block.rs b/src/mdfinfo/mdfinfo4/cg_block.rs index 3234903..75ca217 100644 --- a/src/mdfinfo/mdfinfo4/cg_block.rs +++ b/src/mdfinfo/mdfinfo4/cg_block.rs @@ -34,6 +34,8 @@ pub const CG_F_EVENT_SIGNAL_GROUP: u16 = 1 << 4; pub const CG_F_VLSC: u16 = 1 << 5; /// Bit 6: Raw sensor event channel group pub const CG_F_RAW_SENSOR_EVENT: u16 = 1 << 6; +/// Bit 3: Remote master — the master channel resides in a different channel group +pub const CG_F_REMOTE_MASTER: u16 = 1 << 3; /// Bit 7: Protocol event channel group pub const CG_F_PROTOCOL_EVENT: u16 = 1 << 7; use super::si_block::Si4Block; @@ -127,7 +129,7 @@ impl Cg4Block { if (self.cg_flags & CG_F_PROTOCOL_EVENT) != 0 { flags.push("ProtocolEvent"); } - if (self.cg_flags & 0b1000) != 0 { + if (self.cg_flags & CG_F_REMOTE_MASTER) != 0 { // Bit 3: Remote master flags.push("RemoteMaster"); } @@ -834,7 +836,7 @@ mod tests { cg.cg_flags = CG_F_PROTOCOL_EVENT; assert_eq!(cg.get_flags_str(), "ProtocolEvent"); - cg.cg_flags = 0b1000; // RemoteMaster (bit 3) + cg.cg_flags = CG_F_REMOTE_MASTER; assert_eq!(cg.get_flags_str(), "RemoteMaster"); // Combination diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs index d904bbd..f060bab 100644 --- a/src/mdfinfo/mdfinfo4/composition.rs +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -27,7 +27,9 @@ pub type CompositionParseResult = (Composition, i64, usize, (Vec, Order), #[derive(Debug, Clone)] #[repr(C)] pub struct Composition { + /// The composition block at this level (CA, CN, CL, CV, CU, or DS). pub block: Compo, + /// Optional next composition in the chain (e.g. nested CA inside another CA). pub compo: Option>, } @@ -35,11 +37,17 @@ pub struct Composition { #[derive(Debug, Clone)] #[repr(C)] pub enum Compo { + /// Channel Array block: N-dimensional array layout (spec section 6.18) CA(Box), + /// Nested Channel block: structure composition via CN→CN chain CN(Box), + /// Channel List: named fields packed into a VLSD blob (spec section 6.25) CL(Box), + /// Column Variable-length: variable-length column in fixed-length records (spec section 6.26) CV(Box), + /// Column Unordered: unordered variable-length column (spec section 6.27) CU(Box), + /// Dynamic Size: variable-length fields in a VLSD blob (spec section 6.24) DS(Box), } diff --git a/src/mdfreader/conversions3.rs b/src/mdfreader/conversions3.rs index 45fbca3..7404494 100644 --- a/src/mdfreader/conversions3.rs +++ b/src/mdfreader/conversions3.rs @@ -1362,4 +1362,152 @@ mod tests { assert_eq!(arr.value(2), "high"); assert_eq!(arr.value(3), "unknown"); } + + // ── Helper ── + + fn make_cn3(data: ChannelData) -> Cn3 { + Cn3 { + data, + ..Default::default() + } + } + + // ── linear_conversion (Cn3) tests ── + + #[test] + fn test_linear_cn3_uint16() { + use arrow::array::UInt16Builder; + let mut builder = UInt16Builder::new(); + builder.append_value(10); + let mut cn = make_cn3(ChannelData::UInt16(builder)); + linear_conversion(&mut cn, &[0.0, 3.0]).unwrap(); // 10*3+0 = 30 + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 30.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── polynomial_conversion (Cn3) tests ── + + #[test] + fn test_polynomial_cn3_float64() { + // polynomial_calculation uses array.finish() then Float64Builder::with_capacity(array.capacity()) + // After finish(), capacity resets to 0, so the output is an empty Float64 builder. + // This test verifies the conversion returns Ok and converts the channel to Float64. + let cc_val = vec![0.0, 1.0, 1.0, 0.0, 0.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(2.0); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = polynomial_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + // Data should now be Float64 (conversion applied) + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── exponential_conversion (Cn3) tests ── + + #[test] + fn test_exponential_cn3_float64_p4_zero() { + // exponential_calculation also uses array.finish() and Float64Builder::with_capacity(0) + // so the output is an empty builder. Test that it returns Ok and converts to Float64. + // p4=0 branch chosen with: p1=1, p2=1, p3=0, p4=0, p5=0, p6=1, p7=0 + let cc_val = vec![1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(std::f64::consts::E); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = exponential_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── logarithmic_conversion (Cn3) tests ── + + #[test] + fn test_logarithmic_cn3_float64_p4_zero() { + // logarithmic_calculation also uses array.finish() and Float64Builder::with_capacity(0). + // Test that it returns Ok and converts to Float64. + // p4=0 branch: p1=1, p2=1, p3=0, p4=0, p5=0, p6=1, p7=0 + let cc_val = vec![1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(0.0); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = logarithmic_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── value_to_value_with_interpolation (Cn3) tests ── + + #[test] + fn test_cn3_vtv_interp_uint8() { + use arrow::array::UInt8Builder; + // Table: 0→0, 10→100 + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // 50 + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_value_with_interpolation(&mut cn, cc_val, &10).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 50.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_value_without_interpolation (Cn3) tests ── + + #[test] + fn test_cn3_vtv_no_interp_uint8() { + use arrow::array::UInt8Builder; + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(2); // exact match → 20.0 + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_value_without_interpolation(&mut cn, cc_val, &1).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 20.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_text (Cn3) tests ── + + #[test] + fn test_cn3_value_to_text() { + use arrow::array::UInt8Builder; + let cc_val_ref = vec![(1.0, "one".to_string()), (2.0, "two".to_string())]; + let mut builder = UInt8Builder::new(); + builder.append_value(1); + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_text(&mut cn, &cc_val_ref, &1).unwrap(); + if let ChannelData::Utf8(ref b) = cn.data { + let arr = b.finish_cloned(); + assert_eq!(arr.value(0), "one"); + } else { + panic!("Expected Utf8"); + } + } + + // ── value_range_to_text (Cn3) tests ── + + #[test] + fn test_cn3_value_range_to_text() { + let ranges = vec![(1.0, 2.0, "in_range".to_string()), (3.0, 4.0, "out".to_string())]; + let cc_val_ref = (ranges, "default".to_string()); + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // in [1.0, 2.0) + let mut cn = make_cn3(ChannelData::Float64(builder)); + value_range_to_text(&mut cn, &cc_val_ref, &1).unwrap(); + if let ChannelData::Utf8(ref b) = cn.data { + let arr = b.finish_cloned(); + assert_eq!(arr.value(0), "in_range"); + } else { + panic!("Expected Utf8"); + } + } } diff --git a/src/mdfreader/conversions4.rs b/src/mdfreader/conversions4.rs index 2398dc4..94d0ff3 100644 --- a/src/mdfreader/conversions4.rs +++ b/src/mdfreader/conversions4.rs @@ -2401,4 +2401,285 @@ mod tests { assert!((values[2] - 11.0).abs() < 1e-12); assert!((values[3] - (-5.0)).abs() < 1e-12); } + + // ── Helper for higher-level conversion tests ── + + fn make_cn4_with_data(data: ChannelData) -> Cn4 { + Cn4 { + data, + ..Default::default() + } + } + + // ── linear_conversion (Cn4) tests ── + + #[test] + fn test_linear_cn4_uint8() { + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(2); + builder.append_value(4); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + linear_conversion(&mut cn, &[1.0, 2.0]).unwrap(); // p1=1, p2=2 → v*2+1 + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 5.0).abs() < 1e-12); // 2*2+1 + assert!((vals[1] - 9.0).abs() < 1e-12); // 4*2+1 + } else { + panic!("Expected Float64 after linear conversion"); + } + } + + #[test] + fn test_linear_cn4_int16() { + use arrow::array::Int16Builder; + let mut builder = Int16Builder::new(); + builder.append_value(10); + let mut cn = make_cn4_with_data(ChannelData::Int16(builder)); + linear_conversion(&mut cn, &[0.0, 0.5]).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 5.0).abs() < 1e-12); // 10*0.5+0 + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_linear_cn4_array_d_int16() { + use arrow::array::Int16Builder; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + use arrow::datatypes::Int16Type; + let mut builder = Int16Builder::new(); + builder.append_value(1); + builder.append_value(2); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![2], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDInt16(tensor)); + linear_conversion(&mut cn, &[0.0, 3.0]).unwrap(); + if let ChannelData::ArrayDFloat64(ref t) = cn.data { + let vals = t.values_slice(); + assert!((vals[0] - 3.0).abs() < 1e-12); + assert!((vals[1] - 6.0).abs() < 1e-12); + } else { + panic!("Expected ArrayDFloat64"); + } + } + + #[test] + fn test_linear_cn4_array_d_float64() { + use arrow::datatypes::Float64Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let mut builder = Float64Builder::new(); + builder.append_value(1.0); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDFloat64(tensor)); + linear_conversion(&mut cn, &[0.0, 2.0]).unwrap(); + if let ChannelData::ArrayDFloat64(ref t) = cn.data { + let vals = t.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected ArrayDFloat64"); + } + } + + #[test] + fn test_linear_cn4_utf8_warn() { + use arrow::array::LargeStringBuilder; + // Utf8 data should produce a warn but NOT change data type and should return Ok + let mut cn = make_cn4_with_data(ChannelData::Utf8(LargeStringBuilder::new())); + let result = linear_conversion(&mut cn, &[1.0, 2.0]); + assert!(result.is_ok()); + // data type unchanged + assert!(matches!(cn.data, ChannelData::Utf8(_))); + } + + #[test] + fn test_linear_cn4_identity_no_op() { + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(7); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + // p1=0, p2=1 is identity → no change + linear_conversion(&mut cn, &[0.0, 1.0]).unwrap(); + // data type should remain UInt8 (identity skipped) + assert!(matches!(cn.data, ChannelData::UInt8(_))); + } + + // ── rational_conversion (Cn4) tests ── + + #[test] + fn test_rational_cn4_float64() { + // identity rational: (0*x^2 + 1*x + 0) / (0*x^2 + 0*x + 1) = x + let cc_val = vec![0.0, 1.0, 0.0, 0.0, 0.0, 1.0]; + let mut builder = Float64Builder::new(); + builder.append_value(2.0); + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + rational_conversion(&mut cn, &cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_rational_cn4_uint8() { + // (0*x^2 + 1*x + 0) / (0*x^2 + 0*x + 2) = x/2 + let cc_val = vec![0.0, 1.0, 0.0, 0.0, 0.0, 2.0]; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(4); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + rational_conversion(&mut cn, &cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_value_with_interpolation (Cn4) tests ── + + #[test] + fn test_vtv_interp_cn4_float64() { + // cc_val: pairs (x, y) interleaved: 1.0→10.0, 2.0→20.0 + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // interpolated: 15.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 15.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_interp_cn4_uint8() { + // cc_val: 0→0, 10→100 + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // 50 + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 50.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_interp_cn4_array_d_uint8() { + use arrow::array::UInt8Builder; + use arrow::datatypes::UInt8Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDUInt8(tensor)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + assert!(matches!(cn.data, ChannelData::ArrayDFloat64(_))); + } + + #[test] + fn test_vtv_interp_cn4_int64_noop() { + use arrow::array::Int64Builder; + // Int64 not handled in value_to_value_with_interpolation (falls through to warn) + let mut builder = Int64Builder::new(); + builder.append_value(5); + let mut cn = make_cn4_with_data(ChannelData::Int64(builder)); + let result = value_to_value_with_interpolation(&mut cn, vec![0.0, 0.0, 10.0, 100.0]); + assert!(result.is_ok()); + } + + // ── value_to_value_without_interpolation (Cn4) tests ── + + #[test] + fn test_vtv_no_interp_cn4_float64() { + let cc_val = vec![1.0, 100.0, 2.0, 200.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.0); // exact match → 100.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_to_value_without_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 100.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_no_interp_cn4_uint8() { + use arrow::array::UInt8Builder; + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(2); // exact match → 20.0 + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_to_value_without_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 20.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_range_to_value_table (Cn4) tests ── + + #[test] + fn test_vrv_cn4_float64() { + // cc_val groups of 3: (min, max, out_val) + // Range [1.0, 2.0] → 100.0 + let cc_val = vec![1.0, 2.0, 100.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // in [1.0, 2.0] → 100.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_range_to_value_table(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 100.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vrv_cn4_uint8() { + use arrow::array::UInt8Builder; + // Range [0.0, 10.0] → 42.0 + let cc_val = vec![0.0, 10.0, 42.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // in [0, 10] + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_range_to_value_table(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 42.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vrv_cn4_array_d_warn() { + use arrow::array::Int8Builder; + use arrow::datatypes::Int8Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let mut builder = Int8Builder::new(); + builder.append_value(5); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDInt8(tensor)); + // ArrayDInt8 falls through to warn path + let result = value_range_to_value_table(&mut cn, vec![0.0, 10.0, 42.0]); + assert!(result.is_ok()); + } } diff --git a/src/mdfreader/datastream_decoder.rs b/src/mdfreader/datastream_decoder.rs index e1a1ca5..c1254e2 100644 --- a/src/mdfreader/datastream_decoder.rs +++ b/src/mdfreader/datastream_decoder.rs @@ -283,7 +283,7 @@ pub fn decode_single_channel_value( final_byte_offset, bit_count, cn.block.cn_data_type, - cn.endian, + cn.endian.is_big(), )?; // Update stream position @@ -753,4 +753,338 @@ mod tests { state.reset_alignment(); assert_eq!(state.alignment_offset, 7); } + + // ── calculate_final_byte_offset tests ── + + #[test] + fn test_final_byte_offset_normal_alignment() { + // alignment != 255: result = aligned_offset + byte_offset (bit_offset ignored) + assert_eq!(calculate_final_byte_offset(8, 0, 3, 0), 11); + assert_eq!(calculate_final_byte_offset(8, 0, 3, 7), 11); // bit_offset ignored + assert_eq!(calculate_final_byte_offset(0, 1, 5, 0), 5); + } + + #[test] + fn test_final_byte_offset_bit_packed() { + // alignment == 255: result = (aligned_offset + byte_offset*8 + bit_offset) / 8 + assert_eq!(calculate_final_byte_offset(0, 255, 1, 4), 1); // (0 + 8 + 4) / 8 = 1 + assert_eq!(calculate_final_byte_offset(16, 255, 0, 4), 2); // (16 + 0 + 4) / 8 = 2 + } + + // ── decode_single_channel_value helper ── + + fn make_cn4( + cn_type: u8, + cn_bit_count: u32, + cn_alignment: u8, + cn_byte_offset: u32, + cn_bit_offset: u8, + cn_flags: u32, + ) -> Cn4 { + use crate::mdfinfo::mdfinfo4::Cn4Block; + let mut block = Cn4Block::default(); + block.cn_type = cn_type; + block.cn_bit_count = cn_bit_count; + block.cn_alignment = cn_alignment; + block.cn_byte_offset = cn_byte_offset; + block.cn_bit_offset = cn_bit_offset; + block.cn_flags = cn_flags; + Cn4 { + block, + ..Default::default() + } + } + + // ── decode_single_channel_value tests ── + + #[test] + fn test_decode_single_channel_byte_aligned() { + let data = [0xABu8, 0xCD]; + let cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0xAB]); + assert_eq!(consumed, 1); + assert_eq!(stream_state.bit_position, 8); + } + + #[test] + fn test_decode_single_channel_with_byte_offset() { + let data = [0x00u8, 0xAA, 0xBB]; + let cn = make_cn4(0, 8, 0, 1, 0, 0); // cn_byte_offset=1 + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0xAA]); + assert_eq!(consumed, 1); + } + + #[test] + fn test_decode_single_channel_vlsd() { + // 4-byte LE length = 3, then 3 bytes "ABC" + let data = [3u8, 0, 0, 0, 0x41, 0x42, 0x43]; + let cn = make_cn4(1, 0, 0, 0, 0, 0); // cn_type=1 (VLSD) + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0x41, 0x42, 0x43]); + assert_eq!(consumed, 7); // 4 (length prefix) + 3 (data) + assert_eq!(stream_state.bit_position, 7 * 8); + } + + #[test] + fn test_decode_single_channel_vlsd_underrun() { + // length=3 but only 1 byte of data + let data = [3u8, 0, 0, 0, 0x41]; + let cn = make_cn4(1, 0, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let result = decode_single_channel_value(&data, &cn, &mut stream_state); + assert!(result.is_err()); + } + + #[test] + fn test_decode_single_channel_vlsc_bail() { + // cn_type=7 (VLSC), cn_bit_count=0 -> should bail + let data = [0u8; 16]; + let cn = make_cn4(7, 0, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let result = decode_single_channel_value(&data, &cn, &mut stream_state); + assert!(result.is_err()); + } + + #[test] + fn test_decode_single_channel_alignment_reset_flag() { + // CN_F_ALIGNMENT_RESET = 1 << 18 = 0x40000 + let data = [0u8; 16]; + let cn = make_cn4(0, 8, 0, 0, 0, CN_F_ALIGNMENT_RESET); + let mut stream_state = StreamState::new(); + // advance stream position to 6 bytes (48 bits) + stream_state.bit_position = 48; + stream_state.alignment_offset = 0; + // When decode_single_channel_value is called, it should reset alignment_offset + // to byte_position (= 6), then calculate the aligned offset from there + let (bytes, _consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes.len(), 1); + // alignment_offset should have been reset to 6 before calculation + // aligned_offset for cn_alignment=0 = current_byte_pos = 6 + // final_byte_offset = 6 + 0 = 6 + // so bit_position = (6 + 1) * 8 = 56 + assert_eq!(stream_state.bit_position, 56); + } + + // ── decode_channel_union tests ── + + #[test] + fn test_decode_channel_union_two_members() { + use crate::mdfinfo::mdfinfo4::Cu4Block; + let data = [0x01u8, 0x00, 0x02, 0x00]; // 4 bytes + let cu_block = Cu4Block::default(); + + let mut cn_a = make_cn4(0, 16, 0, 0, 0, 0); + cn_a.unique_name = "chan_a".to_string(); + let mut cn_b = make_cn4(0, 16, 0, 0, 0, 0); + cn_b.unique_name = "chan_b".to_string(); + + let member_channels: Vec<&Cn4> = vec![&cn_a, &cn_b]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_union(&data, &cu_block, &member_channels, &mut stream_state).unwrap(); + assert!(result.contains_key("chan_a")); + assert!(result.contains_key("chan_b")); + // Both start at 0, each consume 2 bytes (16 bits). max_end = 16 bits. + assert_eq!(stream_state.bit_position, 16); + } + + #[test] + fn test_decode_channel_union_different_sizes() { + use crate::mdfinfo::mdfinfo4::Cu4Block; + let data = [0x01u8, 0x00, 0x02, 0x00, 0x03, 0x00]; // 6 bytes + let cu_block = Cu4Block::default(); + + let mut cn_small = make_cn4(0, 8, 0, 0, 0, 0); // 1 byte + cn_small.unique_name = "small".to_string(); + let mut cn_large = make_cn4(0, 32, 0, 0, 0, 0); // 4 bytes + cn_large.unique_name = "large".to_string(); + + let member_channels: Vec<&Cn4> = vec![&cn_small, &cn_large]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_union(&data, &cu_block, &member_channels, &mut stream_state).unwrap(); + assert!(result.contains_key("small")); + assert!(result.contains_key("large")); + // max_end is 32 bits (4 bytes from cn_large) + assert_eq!(stream_state.bit_position, 32); + } + + // ── decode_channel_variant tests ── + + #[test] + fn test_decode_channel_variant_match_first() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let mut cn_opt1 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt1.unique_name = "opt1".to_string(); + let mut cn_opt2 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt2.unique_name = "opt2".to_string(); + + let option_channels: Vec<&Cn4> = vec![&cn_opt0, &cn_opt1, &cn_opt2]; + let mut stream_state = StreamState::new(); + + let (idx, name, bytes) = + decode_channel_variant(&data, &cv_block, &option_channels, 0, &mut stream_state).unwrap(); + assert_eq!(idx, 0); + assert_eq!(name, "opt0"); + assert_eq!(bytes, vec![0xAA]); + } + + #[test] + fn test_decode_channel_variant_match_last() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let mut cn_opt1 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt1.unique_name = "opt1".to_string(); + let mut cn_opt2 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt2.unique_name = "opt2".to_string(); + + let option_channels: Vec<&Cn4> = vec![&cn_opt0, &cn_opt1, &cn_opt2]; + let mut stream_state = StreamState::new(); + + let (idx, name, _bytes) = + decode_channel_variant(&data, &cv_block, &option_channels, 2, &mut stream_state).unwrap(); + assert_eq!(idx, 2); + assert_eq!(name, "opt2"); + } + + #[test] + fn test_decode_channel_variant_no_match() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let option_channels: Vec<&Cn4> = vec![&cn_opt0]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_variant(&data, &cv_block, &option_channels, 99, &mut stream_state); + assert!(result.is_err()); + } + + // ── decode_channel_list tests ── + + #[test] + fn test_decode_channel_list_size_in_elements() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 1: size_value is number of elements + let cl_block = Cl4Block { + cl_flags: 1, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 3, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 3); + assert_eq!(elements[0], vec![10u8]); + assert_eq!(elements[1], vec![20u8]); + assert_eq!(elements[2], vec![30u8]); + } + + #[test] + fn test_decode_channel_list_size_in_bytes() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 0: size_value is number of bytes; element is 1 byte → 4 elements + let cl_block = Cl4Block { + cl_flags: 0, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); // 1 byte per element + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 4, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 4); + } + + #[test] + fn test_decode_channel_list_zero_elements() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + let cl_block = Cl4Block { + cl_flags: 1, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 0, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 0); + } + + #[test] + fn test_decode_channel_list_underrun() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 1: size_value=5 elements, but data only has 2 bytes + let cl_block = Cl4Block { + cl_flags: 1, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20]; // only 2 bytes, not enough for 5 elements + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let result = decode_channel_list(&data, &cl_block, &element_cn, 5, &mut stream_state); + assert!(result.is_err()); + } + + // ── extract_channel_values_as_u64 tests ── + + #[test] + fn test_extract_u64_from_uint8() { + use crate::data_holder::channel_data::ChannelData; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(1); + builder.append_value(2); + builder.append_value(3); + let mut cn = Cn4::default(); + cn.data = ChannelData::UInt8(builder); + let result = extract_channel_values_as_u64(&cn).unwrap(); + assert_eq!(result, vec![1u64, 2, 3]); + } + + #[test] + fn test_extract_u64_from_utf8_fails() { + use crate::data_holder::channel_data::ChannelData; + use arrow::array::LargeStringBuilder; + let mut cn = Cn4::default(); + cn.data = ChannelData::Utf8(LargeStringBuilder::new()); + let result = extract_channel_values_as_u64(&cn); + assert!(result.is_err()); + } } diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 43c0a55..2ae4051 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -24,8 +24,8 @@ use crate::{ MdfInfo, mdfinfo4::{ At4Block, BlockType, Blockheader4, Ca4Block, Cg4, Cg4Block, Ch4Block, Cn4, - Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Ev4Block, FhBlock, Ld4Block, - MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, + Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Endianness, Ev4Block, + FhBlock, Ld4Block, MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, }, }, mdfreader::Mdf, @@ -559,6 +559,12 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result if is_vlsd { // VLSD channel: write SD block, set cn_data let mut offset: i64 = 0; + + let data_pointer = Arc::clone(&data_pointer); + let mut locked_data_pointer = data_pointer.lock(); + cn.block.cn_data = *locked_data_pointer; + // For VLSD, dg_data is not used (set to 0) + dg.block.dg_data = 0; let data_block = if compression { create_dz_sd(data, &mut offset) .context("failed creating dz or sd block")? @@ -566,12 +572,6 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result create_sd(data, &mut offset) .context("failed creating sd block")? }; - - let data_pointer = Arc::clone(&data_pointer); - let mut locked_data_pointer = data_pointer.lock(); - cn.block.cn_data = *locked_data_pointer; - // For VLSD, dg_data is not used (set to 0) - dg.block.dg_data = 0; *locked_data_pointer += offset; let buffer = write_sd_block(cn.block.cn_data, data_block, offset as usize)?; @@ -1471,7 +1471,7 @@ fn create_blocks( ) .with_context(|| format!("failed initilising array for channel {}", cn.unique_name))?, block: cn_block, - endian: machine_endian, + endian: Endianness::from(machine_endian), block_position: cn_position, pos_byte_beg: 0, n_bytes: cg_block.cg_data_bytes, diff --git a/tests/arrays.rs b/tests/arrays.rs new file mode 100644 index 0000000..8ced4a4 --- /dev/null +++ b/tests/arrays.rs @@ -0,0 +1,204 @@ +//! Integration tests for MDF4 array channels (CABlock / TensorArrow). +//! Tests read real MDF4.3 array example files and verify that array channels +//! load correctly and survive a round-trip write. + +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static BASE_PATH_ARRAYS: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Arrays/" + .to_string() +}); + +/// Checks whether a `ChannelData` value is one of the ArrayD variants. +fn is_array_channel(data: &ChannelData) -> bool { + matches!( + data, + ChannelData::ArrayDFloat64(_) + | ChannelData::ArrayDFloat32(_) + | ChannelData::ArrayDInt8(_) + | ChannelData::ArrayDUInt8(_) + | ChannelData::ArrayDInt16(_) + | ChannelData::ArrayDUInt16(_) + | ChannelData::ArrayDInt32(_) + | ChannelData::ArrayDUInt32(_) + | ChannelData::ArrayDInt64(_) + | ChannelData::ArrayDUInt64(_) + ) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Simple array files +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_simple_vector() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in array file"); + + // At least one channel should be an ArrayD type + let has_array_channel = names + .iter() + .any(|name| mdf.get_channel_data(name).map_or(false, is_array_channel)); + assert!( + has_array_channel, + "Expected at least one ArrayD channel in Vector_MeasurementArrays.mf4" + ); + + Ok(()) +} + +#[test] +fn array_simple_dspace() -> Result<()> { + let file = format!( + "{}Simple/dSPACE_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in dSPACE array file"); + + Ok(()) +} + +#[test] +fn array_with_fixed_axes() -> Result<()> { + let file = format!( + "{}Simple/Vector_ArrayWithFixedAxes.MF4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in Vector_ArrayWithFixedAxes.MF4"); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Classification +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_classification_porsche() -> Result<()> { + let file = format!( + "{}Classification/Porsche_2D_classification_result.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in Porsche_2D_classification_result.mf4"); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Round-trip: write and re-read an array file +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_round_trip_write() -> Result<()> { + use std::fs; + + let out_file = + "/home/ratal/workspace/mdfr/test_files/arrays_round_trip_test.mf4"; + + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let original_names = mdf.get_channel_names_set(); + assert!(!original_names.is_empty()); + + // Write the file; the write itself should succeed + let mdf2 = mdf.write(out_file, false)?; + + // The written file must exist and be non-empty + assert!( + std::path::Path::new(out_file).exists(), + "Written file does not exist" + ); + + // The Mdf returned by write() should have at least the same channel names + let written_names = mdf2.get_channel_names_set(); + assert!(!written_names.is_empty(), "Written file has no channels"); + + // Clean up temporary file + fs::remove_file(out_file).ok(); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Array channel properties +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_channels_have_expected_ndim() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + for name in &names { + if let Some(data) = mdf.get_channel_data(name) { + if is_array_channel(data) { + // Array channels must have ndim >= 1 + assert!(data.ndim() >= 1, "Array channel {name} has ndim < 1"); + } + } + } + + Ok(()) +} + +#[test] +fn array_channels_min_max() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + let mut found_array = false; + for name in &names { + if let Some(data) = mdf.get_channel_data(name) { + if is_array_channel(data) && !data.is_empty() { + let (min, max) = data.min_max(); + // If there is data, min and max should be populated for numeric arrays + if min.is_some() && max.is_some() { + assert!( + min.unwrap() <= max.unwrap(), + "min > max for channel {name}" + ); + } + found_array = true; + } + } + } + assert!(found_array, "No non-empty array channels found"); + + Ok(()) +} diff --git a/tests/canopen_types.rs b/tests/canopen_types.rs new file mode 100644 index 0000000..9014287 --- /dev/null +++ b/tests/canopen_types.rs @@ -0,0 +1,33 @@ +use anyhow::Result; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static BASE: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/DataTypes/CANopenTypes/".to_string() +}); + +#[test] +fn canopen_date() -> Result<()> { + let mut mdf = Mdf::new(&format!("{}Vector_CANOpenDate.mf4", *BASE))?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in CANopen date file"); + let has_data = names + .iter() + .any(|n| mdf.get_channel_data(n).is_some_and(|d| !d.is_empty())); + assert!(has_data, "All channels empty in CANopen date file"); + Ok(()) +} + +#[test] +fn canopen_time() -> Result<()> { + let mut mdf = Mdf::new(&format!("{}Vector_CANOpenTime.mf4", *BASE))?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in CANopen time file"); + let has_data = names + .iter() + .any(|n| mdf.get_channel_data(n).is_some_and(|d| !d.is_empty())); + assert!(has_data, "All channels empty in CANopen time file"); + Ok(()) +} diff --git a/tests/channel_data_ops.rs b/tests/channel_data_ops.rs new file mode 100644 index 0000000..48c0e96 --- /dev/null +++ b/tests/channel_data_ops.rs @@ -0,0 +1,586 @@ +//! Integration tests for ChannelData methods. +//! These tests exercise public API methods that are not covered by the internal unit tests, +//! or cover additional variants to improve overall code coverage. + +use arrow::array::{ + Array, ArrayBuilder, FixedSizeBinaryBuilder, Float64Builder, Int8Builder, Int16Builder, + Int32Builder, Int64Builder, LargeStringBuilder, UInt8Builder, UInt16Builder, UInt32Builder, + UInt64Builder, +}; +use arrow::buffer::MutableBuffer; +use arrow::datatypes::{Float64Type, Int16Type, Int8Type, UInt8Type, UInt32Type, UInt64Type}; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::data_holder::complex_arrow::ComplexArrow; +use mdfr::data_holder::tensor_arrow::{Order, TensorArrow}; + +// ────────────────────────────────────────────────────────────────────────────── +// Group 1: zeros() for variants not previously covered +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_zeros_virtual_channel_cn_type_3() { + // cn_type 3 always returns UInt64 counter regardless of self type. + let cd = ChannelData::Float64(Float64Builder::new()); + let result = cd + .zeros(3, 5, 8, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::UInt64(_))); + assert_eq!(result.len(), 5); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2, 3, 4])); +} + +#[test] +fn test_zeros_virtual_channel_cn_type_6() { + let cd = ChannelData::Float64(Float64Builder::new()); + let result = cd + .zeros(6, 3, 8, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::UInt64(_))); + assert_eq!(result.len(), 3); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2])); +} + +#[test] +fn test_zeros_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + // shape=[2,3] → product=6, buffer = 6 i16 = 12 bytes + let result = cd + .zeros(0, 4, 6, (vec![2, 3], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::ArrayDInt16(_))); + // len = buffer_bytes / shape_product = 12 / 6 = 2 + assert!(!result.is_empty()); + // All values should be zero + if let ChannelData::ArrayDInt16(ta) = &result { + assert!(ta.values_slice().iter().all(|&v| v == 0)); + } +} + +#[test] +fn test_zeros_array_d_uint8() { + let cd = ChannelData::ArrayDUInt8(TensorArrow::new()); + let result = cd + .zeros(0, 5, 1, (vec![4], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::ArrayDUInt8(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_fixed_size_byte() { + let cd = ChannelData::FixedSizeByteArray(FixedSizeBinaryBuilder::with_capacity(1, 4)); + let result = cd + .zeros(0, 3, 4, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::FixedSizeByteArray(_))); + assert_eq!(result.len(), 0); // zeros creates empty builder with capacity +} + +#[test] +fn test_zeros_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + // zeros creates vec![0f32; cycle_count*2] = 2*2=4 f32 = 16 bytes + // ComplexArrow::new_from_buffer: len = byte_len / 2 = 16 / 2 = 8 + let result = cd + .zeros(0, 2, 8, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::Complex32(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + // zeros creates vec![0f64; cycle_count*2] = 3*2=6 f64 = 48 bytes + // ComplexArrow::new_from_buffer: len = byte_len / 2 = 48 / 2 = 24 + let result = cd + .zeros(0, 3, 16, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::Complex64(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_utf8() { + let mut b = LargeStringBuilder::new(); + b.append_value("x"); + let cd = ChannelData::Utf8(b); + let result = cd.zeros(0, 5, 10, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::Utf8(_))); + assert_eq!(result.len(), 0); // empty builder after zeros +} + +#[test] +fn test_zeros_array_d_float64() { + let cd = ChannelData::ArrayDFloat64(TensorArrow::new()); + // For ArrayDFloat64, zeros uses cycle_count * shape product + // cycle_count=2, shape=[1] → buffer = 2*1 f64 = 16 bytes, len = 16/1 = 16 + let result = cd + .zeros(0, 2, 8, (vec![1], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::ArrayDFloat64(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_array_d_int32() { + let cd = ChannelData::ArrayDInt32(TensorArrow::new()); + let result = cd + .zeros(0, 3, 4, (vec![3], Order::RowMajor)) + .unwrap(); + assert!(matches!(result, ChannelData::ArrayDInt32(_))); + if let ChannelData::ArrayDInt32(ta) = &result { + assert!(ta.values_slice().iter().all(|&v| v == 0)); + } +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 2: len() for ArrayD variant using new_from_buffer +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_len_array_d_from_buffer() { + // 4 f64 values, shape=[1]: buffer = 4*8 = 32 bytes, len = 32/1 = 32 + let buf = MutableBuffer::from_iter([1.0f64, 2.0, 3.0, 4.0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDFloat64(ta); + // internal len formula: byte_len / shape_product = 32 / 1 = 32 + assert_eq!(cd.len(), 32); + assert!(!cd.is_empty()); +} + +#[test] +fn test_len_array_d_int8_from_buffer() { + // 6 i8 values, shape=[3]: buffer = 6 bytes, len = 6/3 = 2 + let buf = MutableBuffer::from_iter([1i8, 2, 3, 4, 5, 6].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![3], Order::RowMajor); + let cd = ChannelData::ArrayDInt8(ta); + assert_eq!(cd.len(), 2); + assert!(!cd.is_empty()); +} + +#[test] +fn test_len_array_d_uint8_from_buffer() { + // 8 u8 values, shape=[2]: buffer = 8 bytes, len = 8/2 = 4 + let buf = MutableBuffer::from_iter([1u8, 2, 3, 4, 5, 6, 7, 8].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![2], Order::RowMajor); + let cd = ChannelData::ArrayDUInt8(ta); + assert_eq!(cd.len(), 4); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 3: min_max() for variants with new coverage +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_min_max_array_d_int16() { + let buf = MutableBuffer::from_iter([10i16, -5, 20, 0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDInt16(ta); + let (min, max) = cd.min_max(); + assert!(min.is_some()); + assert!(max.is_some()); + assert_eq!(min.unwrap(), -5.0); + assert_eq!(max.unwrap(), 20.0); +} + +#[test] +fn test_min_max_array_d_uint32() { + let buf = MutableBuffer::from_iter([100u32, 50, 200, 1].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDUInt32(ta); + let (min, max) = cd.min_max(); + assert_eq!(min.unwrap(), 1.0); + assert_eq!(max.unwrap(), 200.0); +} + +#[test] +fn test_min_max_array_d_uint64() { + let buf = MutableBuffer::from_iter([5u64, 1, 100].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDUInt64(ta); + let (min, max) = cd.min_max(); + assert_eq!(min.unwrap(), 1.0); + assert_eq!(max.unwrap(), 100.0); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 4: to_u64_vec() for all integer variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_to_u64_vec_int8() { + let mut b = Int8Builder::new(); + b.append_value(10); + b.append_value(-1); // cast to u64: 0xFFFFFFFFFFFFFFFF as u64 + let cd = ChannelData::Int8(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v.len(), 2); + assert_eq!(v[0], 10u64); + // -1i8 as u64 = 18446744073709551615 + assert_eq!(v[1], (-1i8) as u64); +} + +#[test] +fn test_to_u64_vec_uint8() { + let mut b = UInt8Builder::new(); + b.append_value(255u8); + b.append_value(0u8); + let cd = ChannelData::UInt8(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![255u64, 0u64]); +} + +#[test] +fn test_to_u64_vec_uint16() { + let mut b = UInt16Builder::new(); + b.append_value(1000u16); + let cd = ChannelData::UInt16(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![1000u64]); +} + +#[test] +fn test_to_u64_vec_uint32() { + let mut b = UInt32Builder::new(); + b.append_value(u32::MAX); + let cd = ChannelData::UInt32(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![u32::MAX as u64]); +} + +#[test] +fn test_to_u64_vec_uint64() { + let mut b = UInt64Builder::new(); + b.append_value(u64::MAX); + let cd = ChannelData::UInt64(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![u64::MAX]); +} + +#[test] +fn test_to_u64_vec_int64() { + let mut b = Int64Builder::new(); + b.append_value(42i64); + b.append_value(-100i64); + let cd = ChannelData::Int64(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v[0], 42u64); + assert_eq!(v[1], (-100i64) as u64); +} + +#[test] +fn test_to_u64_vec_int32() { + let mut b = Int32Builder::new(); + b.append_value(7i32); + b.append_value(-2i32); + let cd = ChannelData::Int32(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v[0], 7u64); + assert_eq!(v[1], (-2i32) as u64); +} + +#[test] +fn test_to_u64_vec_float64_returns_none() { + let mut b = Float64Builder::new(); + b.append_value(1.0); + let cd = ChannelData::Float64(b); + assert!(cd.to_u64_vec().is_none()); +} + +#[test] +fn test_to_u64_vec_utf8_returns_none() { + let mut b = LargeStringBuilder::new(); + b.append_value("abc"); + let cd = ChannelData::Utf8(b); + assert!(cd.to_u64_vec().is_none()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 5: ndim() for more ArrayD variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_ndim_array_d_int16_2d() { + let buf = MutableBuffer::from_iter([1i16, 2, 3, 4, 5, 6].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![2, 3], Order::RowMajor); + let cd = ChannelData::ArrayDInt16(ta); + assert_eq!(cd.ndim(), 2); +} + +#[test] +fn test_ndim_complex_is_1() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(cd.ndim(), 1); +} + +#[test] +fn test_ndim_fixed_size_byte_is_1() { + let b = FixedSizeBinaryBuilder::with_capacity(1, 4); + let cd = ChannelData::FixedSizeByteArray(b); + assert_eq!(cd.ndim(), 1); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 6: Clone for FixedSizeByteArray with nulls +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_clone_fixed_size_byte_with_nulls() { + let mut b = FixedSizeBinaryBuilder::with_capacity(3, 4); + b.append_value(b"AAAA").unwrap(); + b.append_null(); + b.append_value(b"CCCC").unwrap(); + let cd = ChannelData::FixedSizeByteArray(b); + let cloned = cd.clone(); + + assert_eq!(cloned.len(), cd.len()); + assert!(matches!(cloned, ChannelData::FixedSizeByteArray(_))); + if let ChannelData::FixedSizeByteArray(mut b) = cloned { + let arr = b.finish(); + assert_eq!(arr.len(), 3); + assert!(arr.is_null(1)); + assert!(!arr.is_null(0)); + assert!(!arr.is_null(2)); + assert_eq!(arr.value(0), b"AAAA"); + assert_eq!(arr.value(2), b"CCCC"); + } +} + +#[test] +fn test_clone_utf8() { + let mut b = LargeStringBuilder::new(); + b.append_value("hello"); + b.append_value("world"); + let cd = ChannelData::Utf8(b); + let cloned = cd.clone(); + assert_eq!(cloned.len(), 2); +} + +#[test] +fn test_clone_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + let cloned = cd.clone(); + assert_eq!(cloned.len(), 0); + assert!(matches!(cloned, ChannelData::Complex32(_))); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 7: bit_count() and byte_count() for variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_bit_count_int8() { + let mut b = Int8Builder::new(); + b.append_value(1); + let cd = ChannelData::Int8(b); + assert_eq!(cd.bit_count(), 8); +} + +#[test] +fn test_bit_count_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.bit_count(), 128); +} + +#[test] +fn test_bit_count_array_d_int8() { + let cd = ChannelData::ArrayDInt8(TensorArrow::new()); + assert_eq!(cd.bit_count(), 8); +} + +#[test] +fn test_bit_count_array_d_uint16() { + let cd = ChannelData::ArrayDUInt16(TensorArrow::new()); + assert_eq!(cd.bit_count(), 16); +} + +#[test] +fn test_byte_count_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(cd.byte_count(), 8); +} + +#[test] +fn test_byte_count_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.byte_count(), 16); +} + +#[test] +fn test_byte_count_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + assert_eq!(cd.byte_count(), 2); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 8: is_empty() for variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_is_empty_array_d_float32() { + let cd = ChannelData::ArrayDFloat32(TensorArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_empty_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_not_empty_array_d() { + let buf = MutableBuffer::from_iter([1.0f64, 2.0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDFloat64(ta); + assert!(!cd.is_empty()); +} + +#[test] +fn test_is_empty_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_empty_fixed_size_byte() { + let cd = ChannelData::FixedSizeByteArray(FixedSizeBinaryBuilder::with_capacity(0, 4)); + assert!(cd.is_empty()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 9: data_type() for uncovered variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_data_type_fixed_size_byte_array() { + let b = FixedSizeBinaryBuilder::with_capacity(1, 4); + let cd = ChannelData::FixedSizeByteArray(b); + // LE: FixedSizeByteArray → 10; BE: also 10 + assert_eq!(cd.data_type(false), 10); + assert_eq!(cd.data_type(true), 10); +} + +#[test] +fn test_data_type_array_d_int8() { + let cd = ChannelData::ArrayDInt8(TensorArrow::new()); + // LE: signed int → 2; BE: → 3 + assert_eq!(cd.data_type(false), 2); + assert_eq!(cd.data_type(true), 3); +} + +#[test] +fn test_data_type_array_d_uint8() { + let cd = ChannelData::ArrayDUInt8(TensorArrow::new()); + // LE: unsigned int → 0; BE: → 1 + assert_eq!(cd.data_type(false), 0); + assert_eq!(cd.data_type(true), 1); +} + +#[test] +fn test_data_type_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + // LE: 15; BE: 16 + assert_eq!(cd.data_type(false), 15); + assert_eq!(cd.data_type(true), 16); +} + +#[test] +fn test_data_type_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.data_type(false), 15); + assert_eq!(cd.data_type(true), 16); +} + +#[test] +fn test_data_type_array_d_float32() { + let cd = ChannelData::ArrayDFloat32(TensorArrow::new()); + // LE: 4; BE: 5 + assert_eq!(cd.data_type(false), 4); + assert_eq!(cd.data_type(true), 5); +} + +#[test] +fn test_data_type_array_d_int32() { + let cd = ChannelData::ArrayDInt32(TensorArrow::new()); + assert_eq!(cd.data_type(false), 2); + assert_eq!(cd.data_type(true), 3); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 10: as_u64_slice() for non-UInt64 variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_as_u64_slice_non_uint64_returns_none() { + let mut b = Int32Builder::new(); + b.append_value(1); + let cd = ChannelData::Int32(b); + assert!(cd.as_u64_slice().is_none()); +} + +#[test] +fn test_as_u64_slice_float64_returns_none() { + let mut b = Float64Builder::new(); + b.append_value(1.0); + let cd = ChannelData::Float64(b); + assert!(cd.as_u64_slice().is_none()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 11: byte_count() for additional scalar types +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_byte_count_int8() { + let cd = ChannelData::Int8(Int8Builder::new()); + assert_eq!(cd.byte_count(), 1); +} + +#[test] +fn test_byte_count_uint8() { + let cd = ChannelData::UInt8(UInt8Builder::new()); + assert_eq!(cd.byte_count(), 1); +} + +#[test] +fn test_byte_count_int16() { + let cd = ChannelData::Int16(Int16Builder::new()); + assert_eq!(cd.byte_count(), 2); +} + +#[test] +fn test_byte_count_int64() { + let cd = ChannelData::Int64(Int64Builder::new()); + assert_eq!(cd.byte_count(), 8); +} + +#[test] +fn test_byte_count_uint64() { + let cd = ChannelData::UInt64(UInt64Builder::new()); + assert_eq!(cd.byte_count(), 8); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 12: shape() for additional variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_shape_fixed_size_byte() { + let mut b = FixedSizeBinaryBuilder::with_capacity(2, 4); + b.append_value(b"AAAA").unwrap(); + b.append_value(b"BBBB").unwrap(); + let cd = ChannelData::FixedSizeByteArray(b); + let (shape, order) = cd.shape(); + assert_eq!(shape, vec![2]); + assert_eq!(order, Order::RowMajor); +} + +#[test] +fn test_shape_complex32_with_data() { + use arrow::datatypes::Float32Type; + // 4 f32 values = 16 bytes; ComplexArrow::new_from_buffer: len = 16/2 = 8 + let buf = MutableBuffer::from_iter([1.0f32, 2.0, 3.0, 4.0].iter().copied()); + let ca = mdfr::data_holder::complex_arrow::ComplexArrow::::new_from_buffer(buf); + let cd = ChannelData::Complex32(ca); + let (shape, order) = cd.shape(); + assert_eq!(shape, vec![8]); // len = byte_len / 2 = 16/2 = 8 + assert_eq!(order, Order::RowMajor); +} diff --git a/tests/conversions_int_types.rs b/tests/conversions_int_types.rs new file mode 100644 index 0000000..d79791b --- /dev/null +++ b/tests/conversions_int_types.rs @@ -0,0 +1,205 @@ +/// Integration tests exercising Int8/Int16/Float32 conversion arms in conversions4.rs. +/// +/// The existing sample files all use Float64 as raw channel type, so those arms +/// were never hit. These tests load a synthetic MDF4 fixture that has Int8/Int16/Float32 +/// channels with a linear CC block. +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +const FIXTURE_PATH: &str = "test_files/synthetic/int_linear_cc.mf4"; + +/// Ensure the fixture file exists before tests run. +static FIXTURE: LazyLock<()> = LazyLock::new(|| { + create_fixture().expect("failed to create int_linear_cc fixture"); +}); + +// ─── Minimal MDF4 binary builder ───────────────────────────────────────────── + +fn pu8(b: &mut Vec, v: u8) { b.push(v); } +fn pu16(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi16(b: &mut Vec, v: i16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu32(b: &mut Vec, v: u32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu64(b: &mut Vec, v: u64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi64(b: &mut Vec, v: i64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf32(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf64(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_le_bytes()); } +fn zeros(b: &mut Vec, n: usize) { b.extend(std::iter::repeat_n(0u8, n)); } + +fn id_block(b: &mut Vec) { + b.extend_from_slice(b"MDF "); b.extend_from_slice(b"4.30 "); b.extend_from_slice(b"mdfr "); + pu16(b,0); pu16(b,0); pu16(b,430); pu16(b,0); zeros(b,2); zeros(b,26); pu16(b,0); pu16(b,0); +} +fn hd4(b: &mut Vec, dg: i64, fh: i64) { + b.extend_from_slice(b"##HD"); zeros(b,4); pu64(b,104); pu64(b,6); + pi64(b,dg); pi64(b,fh); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pi16(b,0); pi16(b,0); pu8(b,0); pu8(b,0); pu8(b,0); pu8(b,0); + pf64(b,0.0); pf64(b,0.0); +} +fn fh(b: &mut Vec) { + b.extend_from_slice(b"##FH"); zeros(b,4); pu64(b,56); pu64(b,2); + pi64(b,0); pi64(b,0); pu64(b,0); pi16(b,0); pi16(b,0); pu8(b,0); zeros(b,3); +} +fn dg4(b: &mut Vec, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,0); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +fn cg4(b: &mut Vec, cn: i64, cycles: u64, data_bytes: u32) { + b.extend_from_slice(b"##CG"); zeros(b,4); pu64(b,104); + pu64(b,6); pi64(b,0); pi64(b,cn); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pu64(b,cycles); pu16(b,0); pu16(b,0); zeros(b,4); pu32(b,data_bytes); pu32(b,0); +} +#[allow(clippy::too_many_arguments)] +fn cn4(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, + next: i64, tx: i64, cc: i64) { + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,0); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +fn tx(b: &mut Vec, text: &str) { + let t = text.as_bytes(); + let len = 24u64 + t.len() as u64 + 1; + b.extend_from_slice(b"##TX"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(t); b.push(0); +} +fn cc_linear(b: &mut Vec, a0: f64, a1: f64) { + b.extend_from_slice(b"##CC"); zeros(b,4); pu64(b,96); + pu64(b,4); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,1); pu8(b,0); pu16(b,0); pu16(b,0); pu16(b,2); + pf64(b,0.0); pf64(b,0.0); pf64(b,a0); pf64(b,a1); +} +fn dt(b: &mut Vec, records: &[u8]) { + let len = 24u64 + records.len() as u64; + b.extend_from_slice(b"##DT"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(records); +} + +/// Creates `test_files/synthetic/int_linear_cc.mf4` if it doesn't already exist. +/// +/// Layout (offsets): +/// [0] IdBlock 64 b +/// [64] Hd4 104 b (hd_dg_first=224, hd_fh_first=168) +/// [168] FhBlock 56 b +/// [224] Dg4 64 b (cg_first=288, data=1260) +/// [288] CG 104 b (cn_first=392, cycles=4, data_bytes=15) +/// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, tx=1032, cc=0) +/// [552] CN_int8 160 b (IntLE/8bit, byte_off=8, tx=1064, cc=1164) +/// [712] CN_int16 160 b (IntLE/16bit, byte_off=9, tx=1096, cc=1164) +/// [872] CN_float32 160 b (FloatLE/32bit, byte_off=11, tx=1129, cc=1164) +/// [1032] TX "time_ch\0" 32 b +/// [1064] TX "int8_ch\0" 32 b +/// [1096] TX "int16_ch\0" 33 b +/// [1129] TX "float32_ch\0" 35 b +/// [1164] CC linear(0.5,2.0) 96 b +/// [1260] DT 4×15=60b + 24 hdr = 84 b +/// Total: 1344 b +fn create_fixture() -> Result<()> { + if std::path::Path::new(FIXTURE_PATH).exists() { + return Ok(()); + } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(1344); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 1260); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 15); debug_assert_eq!(b.len(), 392); + cn4(&mut b, 2, 1, 4, 0, 64, 552, 1032, 0); debug_assert_eq!(b.len(), 552); + cn4(&mut b, 0, 0, 2, 8, 8, 712, 1064, 1164); debug_assert_eq!(b.len(), 712); + cn4(&mut b, 0, 0, 2, 9, 16, 872, 1096, 1164); debug_assert_eq!(b.len(), 872); + cn4(&mut b, 0, 0, 4, 11, 32, 0, 1129, 1164); debug_assert_eq!(b.len(), 1032); + tx(&mut b, "time_ch"); debug_assert_eq!(b.len(), 1064); + tx(&mut b, "int8_ch"); debug_assert_eq!(b.len(), 1096); + tx(&mut b, "int16_ch"); debug_assert_eq!(b.len(), 1129); + tx(&mut b, "float32_ch"); debug_assert_eq!(b.len(), 1164); + cc_linear(&mut b, 0.5, 2.0); debug_assert_eq!(b.len(), 1260); + + let raw_i8: [i8; 4] = [-5, 0, 5, 10]; + let raw_i16: [i16; 4] = [-100, 0, 100, 200]; + let raw_f32: [f32; 4] = [ 1.5, 2.5, 3.5, 4.5]; + let mut recs: Vec = Vec::with_capacity(60); + for i in 0..4 { + pf64(&mut recs, i as f64); + recs.push(raw_i8[i] as u8); + recs.extend_from_slice(&raw_i16[i].to_le_bytes()); + pf32(&mut recs, raw_f32[i]); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 1344); + + std::fs::write(FIXTURE_PATH, &b)?; + Ok(()) +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +fn load_fixture() -> Result { + LazyLock::force(&FIXTURE); + let mut mdf = Mdf::new(FIXTURE_PATH)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn int8_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: -5, 0, 5, 10 → phys = raw * 2.0 + 0.5 → -9.5, 0.5, 10.5, 20.5 + let data = mdf.get_channel_data("int8_ch").expect("int8_ch not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64 after linear CC, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [-9.5f64, 0.5, 10.5, 20.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "int8_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn int16_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: -100, 0, 100, 200 → -199.5, 0.5, 200.5, 400.5 + let data = mdf.get_channel_data("int16_ch").expect("int16_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "expected Float64 after linear CC"); + if let ChannelData::Float64(arr) = data { + let expected = [-199.5f64, 0.5, 200.5, 400.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "int16_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn float32_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: 1.5, 2.5, 3.5, 4.5 → 3.5, 5.5, 7.5, 9.5 + let data = mdf.get_channel_data("float32_ch").expect("float32_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "expected Float64 after linear CC"); + if let ChannelData::Float64(arr) = data { + let expected = [3.5f64, 5.5, 7.5, 9.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-6, "float32_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn master_channel_loaded() -> Result<()> { + let mdf = load_fixture()?; + let data = mdf.get_channel_data("time_ch").expect("time_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "master should be Float64"); + assert_eq!(data.len(), 4); + Ok(()) +} diff --git a/tests/data_types.rs b/tests/data_types.rs index 84fbbf4..6a53e40 100644 --- a/tests/data_types.rs +++ b/tests/data_types.rs @@ -315,3 +315,52 @@ fn complex_types() -> Result<()> { mdf.load_all_channels_data_in_memory()?; Ok(()) } + +#[test] +fn string_no_zero_termination() -> Result<()> { + let file_name = format!( + "{}{}{}", + BASE_PATH_MDF4.as_str(), + "DataTypes/StringTypes/", + "Vector_Strings_NoZeroTermination_MDF430.mf4" + ); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in no-zero-termination string file"); + Ok(()) +} + +#[test] +fn half_float_values() -> Result<()> { + let file_name = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Halffloat/halffloat_sinus.mf4" + ); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + + // The file contains at least one channel; verify it loaded successfully + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in halffloat file"); + + // Find a channel with float data and check it has values + let has_data = names + .iter() + .any(|name| mdf.get_channel_data(name).map_or(false, |d| !d.is_empty())); + assert!(has_data, "No non-empty channel data in halffloat file"); + + // Half-float channels are decoded as Float32 or Float64 + let has_float_channel = names.iter().any(|name| { + mdf.get_channel_data(name).map_or(false, |d| { + matches!(d, ChannelData::Float32(_) | ChannelData::Float64(_)) + }) + }); + assert!( + has_float_channel, + "Expected at least one Float32/Float64 channel decoded from half-float" + ); + + Ok(()) +} diff --git a/tests/fixtures.rs b/tests/fixtures.rs new file mode 100644 index 0000000..b7319c3 --- /dev/null +++ b/tests/fixtures.rs @@ -0,0 +1,361 @@ +/// Synthetic MDF4 fixture builder. +/// +/// Creates minimal binary MDF4 files for coverage testing of conversion code paths +/// that are never hit by the existing sample files (which all use Float64 raw channels). +/// +/// Run once with `cargo test --test fixtures` to generate the files. +/// The files are checked in to test_files/synthetic/ so regular tests can load them. +use anyhow::Result; + +// ─── Low-level byte helpers ────────────────────────────────────────────────── + +fn push_u8(buf: &mut Vec, v: u8) { + buf.push(v); +} +fn push_u16(buf: &mut Vec, v: u16) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_i16(buf: &mut Vec, v: i16) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_u32(buf: &mut Vec, v: u32) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_u64(buf: &mut Vec, v: u64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_i64(buf: &mut Vec, v: i64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_f32(buf: &mut Vec, v: f32) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_f64(buf: &mut Vec, v: f64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_zeros(buf: &mut Vec, n: usize) { + buf.extend(std::iter::repeat_n(0u8, n)); +} + +// ─── Block writers ─────────────────────────────────────────────────────────── + +/// IdBlock (64 bytes, fixed MDF4 header) +fn write_id_block(buf: &mut Vec) { + buf.extend_from_slice(b"MDF "); // id_file_id (8) + buf.extend_from_slice(b"4.30 "); // id_vers (8) + buf.extend_from_slice(b"mdfr "); // id_prog (8) + push_u16(buf, 0); // id_default_byteorder + push_u16(buf, 0); // id_floatingpointformat + push_u16(buf, 430); // id_ver + push_u16(buf, 0); // id_codepage + push_zeros(buf, 2); // id_check + push_zeros(buf, 26); // id_fill + push_u16(buf, 0); // id_unfin_flags + push_u16(buf, 0); // id_custom_unfin_flags + // Total: 8+8+8+2+2+2+2+2+26+2+2 = 64 bytes +} + +/// Hd4 block (104 bytes, self-contained including ##HD header) +fn write_hd4(buf: &mut Vec, dg_first: i64, fh_first: i64) { + buf.extend_from_slice(b"##HD"); // hd_id + push_zeros(buf, 4); // hd_reserved + push_u64(buf, 104); // hd_len + push_u64(buf, 6); // hd_link_counts + push_i64(buf, dg_first); // hd_dg_first + push_i64(buf, fh_first); // hd_fh_first + push_i64(buf, 0); // hd_ch_first + push_i64(buf, 0); // hd_at_first + push_i64(buf, 0); // hd_ev_first + push_i64(buf, 0); // hd_md_comment + push_u64(buf, 0); // hd_start_time_ns + push_i16(buf, 0); // hd_tz_offset_min + push_i16(buf, 0); // hd_dst_offset_min + push_u8(buf, 0); // hd_time_flags + push_u8(buf, 0); // hd_time_class + push_u8(buf, 0); // hd_flags + push_u8(buf, 0); // hd_reserved2 + push_f64(buf, 0.0); // hd_start_angle_rad + push_f64(buf, 0.0); // hd_start_distance_m + // Total: 4+4+8+8 + 6×8 + 8+2+2+1+1+1+1+8+8 = 24+48+32 = 104 bytes +} + +/// FhBlock (56 bytes, self-contained including ##FH header) +fn write_fh(buf: &mut Vec) { + buf.extend_from_slice(b"##FH"); // fh_id + push_zeros(buf, 4); // fh_gap + push_u64(buf, 56); // fh_len + push_u64(buf, 2); // fh_links (fh_fh_next + fh_md_comment) + push_i64(buf, 0); // fh_fh_next (end of list) + push_i64(buf, 0); // fh_md_comment (none) + push_u64(buf, 0); // fh_time_ns + push_i16(buf, 0); // fh_tz_offset_min + push_i16(buf, 0); // fh_dst_offset_min + push_u8(buf, 0); // fh_time_flags + push_zeros(buf, 3); // fh_reserved + // Total: 4+4+8+8+8+8+8+2+2+1+3 = 56 bytes +} + +/// Dg4Block (64 bytes, self-contained including ##DG header) +fn write_dg4(buf: &mut Vec, cg_first: i64, data: i64) { + buf.extend_from_slice(b"##DG"); // dg_id + push_zeros(buf, 4); // reserved + push_u64(buf, 64); // dg_len + push_u64(buf, 4); // dg_links + push_i64(buf, 0); // dg_dg_next + push_i64(buf, cg_first); // dg_cg_first + push_i64(buf, data); // dg_data + push_i64(buf, 0); // dg_md_comment + push_u8(buf, 0); // dg_rec_id_size + push_zeros(buf, 7); // reserved_2 + // Total: 4+4+8+8+4×8+1+7 = 64 bytes +} + +/// CG block: Blockheader4Short(16) + Cg4Block body(88) = 104 bytes total. +/// Uses 6 standard links (no cg_cg_master). +fn write_cg4(buf: &mut Vec, cn_first: i64, cycle_count: u64, data_bytes: u32) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CG"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 104); // hdr_len (= 16 + 88) + // Cg4Block body (88 bytes): + push_u64(buf, 6); // cg_links (6 → no cg_cg_master) + push_i64(buf, 0); // cg_cg_next + push_i64(buf, cn_first); // cg_cn_first + push_i64(buf, 0); // cg_tx_acq_name + push_i64(buf, 0); // cg_si_acq_source + push_i64(buf, 0); // cg_sr_first + push_i64(buf, 0); // cg_md_comment + push_u64(buf, 0); // cg_record_id + push_u64(buf, cycle_count); // cg_cycle_count + push_u16(buf, 0); // cg_flags + push_u16(buf, 0); // cg_path_separator + push_zeros(buf, 4); // cg_reserved + push_u32(buf, data_bytes); // cg_data_bytes + push_u32(buf, 0); // cg_inval_bytes + // Body: 8+48+8+8+2+2+4+4+4 = 88 bytes → total 104 +} + +/// CN block: Blockheader4Short(16) + Cn4Block body(144) = 160 bytes total. +/// Uses 8 standard links (no extra CA/event links). +/// +/// - cn_type: 0=fixed, 2=master +/// - cn_sync_type: 0=none, 1=time +/// - cn_data_type: 2=IntLE, 4=FloatLE +#[allow(clippy::too_many_arguments)] +fn write_cn4( + buf: &mut Vec, + cn_type: u8, + cn_sync_type: u8, + cn_data_type: u8, + cn_byte_offset: u32, + cn_bit_count: u32, + cn_cn_next: i64, + cn_tx_name: i64, + cn_cc_conversion: i64, +) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CN"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 160); // hdr_len (= 16 + 144) + // Cn4Block body (8 + 64 + 72 = 144 bytes): + push_u64(buf, 8); // cn_links (8 standard links) + push_i64(buf, cn_cn_next); // cn_cn_next + push_i64(buf, 0); // cn_composition + push_i64(buf, cn_tx_name); // cn_tx_name + push_i64(buf, 0); // cn_si_source + push_i64(buf, cn_cc_conversion); // cn_cc_conversion + push_i64(buf, 0); // cn_data + push_i64(buf, 0); // cn_md_unit + push_i64(buf, 0); // cn_md_comment + // Data members (72 bytes): + push_u8(buf, cn_type); // cn_type + push_u8(buf, cn_sync_type); // cn_sync_type + push_u8(buf, cn_data_type); // cn_data_type + push_u8(buf, 0); // cn_bit_offset + push_u32(buf, cn_byte_offset); // cn_byte_offset + push_u32(buf, cn_bit_count); // cn_bit_count + push_u32(buf, 0); // cn_flags + push_u32(buf, 0); // cn_inval_bit_pos + push_u8(buf, 0xff); // cn_precision (unrestricted) + push_u8(buf, 0); // cn_alignment + push_u16(buf, 0); // cn_attachment_count + push_f64(buf, 0.0); // cn_val_range_min + push_f64(buf, 0.0); // cn_val_range_max + push_f64(buf, 0.0); // cn_limit_min + push_f64(buf, 0.0); // cn_limit_max + push_f64(buf, 0.0); // cn_limit_ext_min + push_f64(buf, 0.0); // cn_limit_ext_max + // Data members total: 1+1+1+1+4+4+4+4+1+1+2+8+8+8+8+8+8 = 72 bytes → total 160 +} + +/// TX block: Blockheader4(24) + null-terminated text. +/// `text` should NOT include the null terminator. +fn write_tx(buf: &mut Vec, text: &str) { + let text_bytes = text.as_bytes(); + let total_len = 24u64 + text_bytes.len() as u64 + 1; // +1 for null + buf.extend_from_slice(b"##TX"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, total_len); // hdr_len + push_u64(buf, 0); // hdr_links + buf.extend_from_slice(text_bytes); // text + buf.push(0); // null terminator +} + +/// CC block (linear, cc_type=1): Blockheader4Short(16) + Cc4Block body(80) = 96 bytes. +/// Formula: phys = a1 * raw + a0 +fn write_cc_linear(buf: &mut Vec, a0: f64, a1: f64) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CC"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 96); // hdr_len (= 16 + 80) + // Cc4Block body (80 bytes): + push_u64(buf, 4); // cc_links (4 standard links, no cc_ref) + push_i64(buf, 0); // cc_tx_name + push_i64(buf, 0); // cc_md_unit + push_i64(buf, 0); // cc_md_comment + push_i64(buf, 0); // cc_cc_inverse + // cc_ref: empty (cc_links == 4) + push_u8(buf, 1); // cc_type = 1 (Linear) + push_u8(buf, 0); // cc_precision + push_u16(buf, 0); // cc_flags + push_u16(buf, 0); // cc_ref_count + push_u16(buf, 2); // cc_val_count (a0, a1) + push_f64(buf, 0.0); // cc_phy_range_min + push_f64(buf, 0.0); // cc_phy_range_max + // cc_val: Real([a0, a1]) + push_f64(buf, a0); // a0 (offset) + push_f64(buf, a1); // a1 (factor) + // Body: 8+32+1+1+2+2+2+8+8+16 = 80 bytes → total 96 +} + +/// DT block: 4-byte id + Dt4Block(20) + raw records. +/// `records` is a flat byte slice of all records concatenated. +fn write_dt(buf: &mut Vec, records: &[u8]) { + let total_len = 24u64 + records.len() as u64; + buf.extend_from_slice(b"##DT"); // id (read separately by reader before Dt4Block) + push_zeros(buf, 4); // reserved + push_u64(buf, total_len); // len (total block size) + push_u64(buf, 0); // links = 0 + buf.extend_from_slice(records); // raw data +} + +// ─── Fixture: int_linear_cc ────────────────────────────────────────────────── + +/// Builds and writes `test_files/synthetic/int_linear_cc.mf4`. +/// +/// Contains 4 channels in a single channel group with 4 samples each: +/// - `time_ch` : Float64 LE master (sync_type=time), values 0.0..3.0 +/// - `int8_ch` : Int8 raw + linear CC (a0=0.5, a1=2.0) +/// - `int16_ch` : Int16 raw + linear CC (a0=0.5, a1=2.0) +/// - `float32_ch` : Float32 raw + linear CC (a0=0.5, a1=2.0) +/// +/// File layout (exact byte offsets): +/// ```text +/// [0] IdBlock 64 b +/// [64] Hd4 104 b +/// [168] FhBlock 56 b +/// [224] Dg4 64 b +/// [288] CG 104 b (cg_cn_first=392, cg_data_bytes=15, cg_cycle_count=4) +/// [392] CN_master 160 b (data_type=4/FloatLE, bit_count=64, byte_offset=0) +/// [552] CN_int8 160 b (data_type=2/IntLE, bit_count=8, byte_offset=8, cc=1164) +/// [712] CN_int16 160 b (data_type=2/IntLE, bit_count=16, byte_offset=9, cc=1164) +/// [872] CN_float32 160 b (data_type=4/FloatLE, bit_count=32, byte_offset=11, cc=1164) +/// [1032] TX "time_ch\0" 32 b +/// [1064] TX "int8_ch\0" 32 b +/// [1096] TX "int16_ch\0" 33 b +/// [1129] TX "float32_ch\0" 35 b +/// [1164] CC linear(a0=0.5, a1=2.0) 96 b +/// [1260] DT 84 b (4 records × 15 bytes) +/// Total: 1344 bytes +/// ``` +pub fn create_int_linear_cc_fixture() -> Result<()> { + const PATH: &str = "test_files/synthetic/int_linear_cc.mf4"; + std::fs::create_dir_all("test_files/synthetic")?; + + let mut buf: Vec = Vec::with_capacity(1344); + + write_id_block(&mut buf); + debug_assert_eq!(buf.len(), 64, "IdBlock size mismatch"); + + write_hd4(&mut buf, 224, 168); // dg_first=224, fh_first=168 + debug_assert_eq!(buf.len(), 168, "Hd4 size mismatch"); + + write_fh(&mut buf); + debug_assert_eq!(buf.len(), 224, "FhBlock size mismatch"); + + write_dg4(&mut buf, 288, 1260); // cg_first=288, data=1260 + debug_assert_eq!(buf.len(), 288, "Dg4Block size mismatch"); + + // CG: cn_first=392, cycle_count=4, data_bytes=15 (8+1+2+4) + write_cg4(&mut buf, 392, 4, 15); + debug_assert_eq!(buf.len(), 392, "CG size mismatch"); + + // CN_master: type=2, sync=1(time), data_type=4(FloatLE), byte_offset=0, bit_count=64 + // cn_cn_next=552, tx_name=1032, cc=0 + write_cn4(&mut buf, 2, 1, 4, 0, 64, 552, 1032, 0); + debug_assert_eq!(buf.len(), 552, "CN_master size mismatch"); + + // CN_int8: type=0, sync=0, data_type=2(IntLE), byte_offset=8, bit_count=8 + // cn_cn_next=712, tx_name=1064, cc=1164 + write_cn4(&mut buf, 0, 0, 2, 8, 8, 712, 1064, 1164); + debug_assert_eq!(buf.len(), 712, "CN_int8 size mismatch"); + + // CN_int16: type=0, sync=0, data_type=2(IntLE), byte_offset=9, bit_count=16 + // cn_cn_next=872, tx_name=1096, cc=1164 + write_cn4(&mut buf, 0, 0, 2, 9, 16, 872, 1096, 1164); + debug_assert_eq!(buf.len(), 872, "CN_int16 size mismatch"); + + // CN_float32: type=0, sync=0, data_type=4(FloatLE), byte_offset=11, bit_count=32 + // cn_cn_next=0, tx_name=1129, cc=1164 + write_cn4(&mut buf, 0, 0, 4, 11, 32, 0, 1129, 1164); + debug_assert_eq!(buf.len(), 1032, "CN_float32 size mismatch"); + + write_tx(&mut buf, "time_ch"); // 24 + 7 + 1 = 32 bytes → [1032..1064) + debug_assert_eq!(buf.len(), 1064, "TX time_ch size mismatch"); + + write_tx(&mut buf, "int8_ch"); // 32 bytes → [1064..1096) + debug_assert_eq!(buf.len(), 1096, "TX int8_ch size mismatch"); + + write_tx(&mut buf, "int16_ch"); // 24 + 8 + 1 = 33 bytes → [1096..1129) + debug_assert_eq!(buf.len(), 1129, "TX int16_ch size mismatch"); + + write_tx(&mut buf, "float32_ch"); // 24 + 10 + 1 = 35 bytes → [1129..1164) + debug_assert_eq!(buf.len(), 1164, "TX float32_ch size mismatch"); + + write_cc_linear(&mut buf, 0.5, 2.0); // 96 bytes → [1164..1260) + debug_assert_eq!(buf.len(), 1260, "CC linear size mismatch"); + + // DT: 4 records × 15 bytes = 60 bytes + 24-byte header = 84 bytes → [1260..1344) + // Record layout per row: + // [0..8) : f64 LE master time (0.0, 1.0, 2.0, 3.0) + // [8] : i8 raw int8 (-5, 0, 5, 10) + // [9..11) : i16 LE raw int16 (-100, 0, 100, 200) + // [11..15): f32 LE raw float32 (1.5, 2.5, 3.5, 4.5) + let raw_i8: [i8; 4] = [-5, 0, 5, 10]; + let raw_i16: [i16; 4] = [-100, 0, 100, 200]; + let raw_f32: [f32; 4] = [1.5, 2.5, 3.5, 4.5]; + let mut records: Vec = Vec::with_capacity(60); + for i in 0..4usize { + push_f64(&mut records, i as f64); // master time + records.push(raw_i8[i] as u8); // i8 raw (bit-cast) + records.extend_from_slice(&raw_i16[i].to_le_bytes()); // i16 raw + push_f32(&mut records, raw_f32[i]); // f32 raw + } + assert_eq!(records.len(), 60, "record data size mismatch"); + write_dt(&mut buf, &records); + debug_assert_eq!(buf.len(), 1344, "DT size mismatch"); + + std::fs::write(PATH, &buf)?; + Ok(()) +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +#[test] +fn create_fixtures() { + create_int_linear_cc_fixture().expect("failed to create int_linear_cc fixture"); + assert!( + std::path::Path::new("test_files/synthetic/int_linear_cc.mf4").exists(), + "fixture file not created" + ); +} diff --git a/tests/mdf3_conversion.rs b/tests/mdf3_conversion.rs new file mode 100644 index 0000000..a6889eb --- /dev/null +++ b/tests/mdf3_conversion.rs @@ -0,0 +1,86 @@ +use anyhow::Result; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static MDF3_PATH: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/mdf3/".to_string() +}); + +/// Helper: load a file and all its channel data +fn load_mdf3(filename: &str) -> Result { + let path = format!("{}{}", MDF3_PATH.as_str(), filename); + let mut mdf = Mdf::new(&path)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn mdf3_canape_loads() -> Result<()> { + let mdf = load_mdf3("MDF_CANAPE.mdf")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_ascet_loads() -> Result<()> { + let mdf = load_mdf3("ASCET.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_mda71_loads() -> Result<()> { + let mdf = load_mdf3("MDA71.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_rj_can_loads() -> Result<()> { + // RJ file has linear conversion channels + let mdf = load_mdf3("RJ_N16-12-363_BM-15C-0024_228_2_20170116094355_CAN.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_measure_loads() -> Result<()> { + let mdf = load_mdf3("Measure.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_t3_nedc_loads() -> Result<()> { + let mdf = load_mdf3("T3_121121_000_6NEDC.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_738l10_loads() -> Result<()> { + let mdf = load_mdf3("738L10_040410 Base Acc 30km_hr.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_hidden_bytes_loads() -> Result<()> { + let mdf = load_mdf3("Mdf3_hiddenBytes_NotAlignedBytes.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_tgt_loads() -> Result<()> { + let mdf = load_mdf3("TGT.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_canoe_unsorted_loads() -> Result<()> { + let mdf = load_mdf3("CANoe3_unsorted.mdf")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} From b3cebbff6800ce28464fda4d8d822d203085202a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Fri, 27 Mar 2026 22:43:52 +0100 Subject: [PATCH 40/46] updated to latest cargo dependencies --- Cargo.lock | 351 +++++++++++++++++++++++------------------------------ Cargo.toml | 28 ++--- 2 files changed, 166 insertions(+), 213 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4611e82..8ce7322 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -111,9 +102,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -132,9 +123,9 @@ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -161,12 +152,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -dependencies = [ - "backtrace", -] +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "ar_archive_writer" @@ -174,7 +162,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" dependencies = [ - "object 0.32.2", + "object", ] [[package]] @@ -213,9 +201,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -235,9 +223,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -249,9 +237,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -267,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -279,9 +267,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,9 +289,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -311,14 +299,14 @@ dependencies = [ "chrono", "csv", "csv-core", - "regex 1.12.2", + "regex 1.12.3", ] [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -329,9 +317,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -367,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -380,9 +368,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -392,9 +380,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -405,18 +393,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.10.0", ] [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -428,9 +416,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +427,7 @@ dependencies = [ "arrow-select", "memchr 2.7.6", "num-traits", - "regex 1.12.2", + "regex 1.12.3", "regex-syntax 0.8.8", ] @@ -535,21 +523,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if 1.0.4", - "libc", - "miniz_oxide", - "object 0.37.3", - "rustc-demangle", - "windows-link 0.2.1", -] - [[package]] name = "base64" version = "0.22.1" @@ -597,9 +570,9 @@ dependencies = [ [[package]] name = "binrw" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81419ff39e6ed10a92a7f125290859776ced35d9a08a665ae40b23e7ca702f30" +checksum = "d53195f985e88ab94d1cc87e80049dd2929fd39e4a772c5ae96a7e5c4aad3642" dependencies = [ "array-init", "binrw_derive", @@ -608,9 +581,9 @@ dependencies = [ [[package]] name = "binrw_derive" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "376404e55ec40d0d6f8b4b7df3f87b87954bd987f0cf9a7207ea3b6ea5c9add4" +checksum = "5910da05ee556b789032c8ff5a61fb99239580aa3fd0bfaa8f4d094b2aee00ad" dependencies = [ "either", "owo-colors", @@ -657,6 +630,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "boxcar" version = "0.2.14" @@ -756,7 +738,7 @@ version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799" dependencies = [ - "clap 4.5.58", + "clap 4.6.0", "heck", "indexmap", "log 0.4.29", @@ -899,18 +881,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -974,6 +956,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-random" version = "0.1.18" @@ -1045,15 +1033,15 @@ dependencies = [ [[package]] name = "criterion" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ "alloca", "anes", "cast", "ciborium", - "clap 4.5.58", + "clap 4.6.0", "criterion-plot", "itertools 0.13.0", "num-traits", @@ -1061,7 +1049,7 @@ dependencies = [ "page_size", "plotters", "rayon", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_json", "tinytemplate", @@ -1070,9 +1058,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools 0.13.0", @@ -1160,6 +1148,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "csv" version = "1.4.0" @@ -1193,8 +1190,19 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.1", ] [[package]] @@ -1240,12 +1248,12 @@ dependencies = [ [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log 0.4.29", - "regex 1.12.2", + "regex 1.12.3", ] [[package]] @@ -1260,9 +1268,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ "anstream", "anstyle", @@ -1377,9 +1385,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1578,12 +1586,6 @@ dependencies = [ "wasip3", ] -[[package]] -name = "gimli" -version = "0.32.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" - [[package]] name = "glob" version = "0.2.11" @@ -1706,7 +1708,7 @@ dependencies = [ "libloading 0.7.4", "mpi-sys", "pkg-config", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_derive", "winreg", @@ -1799,6 +1801,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hybrid-array" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8655f91cd07f2b9d0c24137bd650fe69617773435ee5ec83022377777ce65ef1" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2006,15 +2017,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "instant" version = "0.1.13" @@ -2078,9 +2080,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "log 0.4.29", @@ -2091,9 +2093,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote 1.0.44", @@ -2207,9 +2209,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" @@ -2317,9 +2319,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -2354,12 +2356,12 @@ dependencies = [ [[package]] name = "md-5" -version = "0.10.6" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ "cfg-if 1.0.4", - "digest", + "digest 0.11.2", ] [[package]] @@ -2372,12 +2374,12 @@ dependencies = [ "byteorder", "cbindgen", "chrono", - "clap 4.5.58", + "clap 4.6.0", "codepage", "criterion", "crossbeam-channel", "encoding_rs", - "env_logger 0.11.8", + "env_logger 0.11.10", "fasteval", "flate2", "glob 0.3.3", @@ -2430,15 +2432,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2484,21 +2477,6 @@ dependencies = [ "rawpointer", ] -[[package]] -name = "ndarray" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - [[package]] name = "ndarray" version = "0.17.2" @@ -2628,12 +2606,12 @@ dependencies = [ [[package]] name = "numpy" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2dba356160b54f5371b550575b78130a54718b4c6e46b3f33a6da74a27e78b" +checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" dependencies = [ "libc", - "ndarray 0.16.1", + "ndarray 0.17.2", "num-complex", "num-integer", "num-traits", @@ -2669,15 +2647,6 @@ dependencies = [ "memchr 2.7.6", ] -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr 2.7.6", -] - [[package]] name = "object_store" version = "0.13.1" @@ -2818,14 +2787,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -3080,7 +3048,7 @@ dependencies = [ "rand 0.9.2", "rand_distr", "rayon", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_json", "strum_macros", @@ -3113,7 +3081,7 @@ dependencies = [ "object_store", "parking_lot 0.12.5", "polars-arrow-format", - "regex 1.12.2", + "regex 1.12.3", "signal-hook", "simdutf8", ] @@ -3140,7 +3108,7 @@ dependencies = [ "rand 0.9.2", "rayon", "recursive", - "regex 1.12.2", + "regex 1.12.3", "version_check", ] @@ -3178,7 +3146,7 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "reqwest", "serde", "serde_json", @@ -3280,7 +3248,7 @@ dependencies = [ "polars-schema", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "regex-syntax 0.8.8", "strum_macros", "unicode-normalization", @@ -3307,7 +3275,7 @@ dependencies = [ "polars-error", "polars-parquet-format", "polars-utils", - "regex 1.12.2", + "regex 1.12.3", "serde", "simdutf8", "streaming-decompression", @@ -3352,7 +3320,7 @@ dependencies = [ "polars-utils", "rayon", "recursive", - "regex 1.12.2", + "regex 1.12.3", "sha2", "slotmap", "strum_macros", @@ -3404,7 +3372,7 @@ dependencies = [ "polars-plan", "polars-time", "polars-utils", - "regex 1.12.2", + "regex 1.12.3", "serde", "sqlparser", ] @@ -3472,7 +3440,7 @@ dependencies = [ "polars-ops", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "strum_macros", ] @@ -3501,7 +3469,7 @@ dependencies = [ "rand 0.9.2", "raw-cpuid", "rayon", - "regex 1.12.2", + "regex 1.12.3", "rmp-serde", "serde", "serde_json", @@ -3576,37 +3544,34 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" dependencies = [ "anyhow", - "indoc", "libc", - "memoffset", "num-complex", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" dependencies = [ "libc", "pyo3-build-config", @@ -3614,9 +3579,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3626,9 +3591,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" dependencies = [ "heck", "proc-macro2", @@ -3887,9 +3852,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick 1.1.4", "memchr 2.7.6", @@ -4007,12 +3972,6 @@ dependencies = [ "memchr 2.7.6", ] -[[package]] -name = "rustc-demangle" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -4242,7 +4201,7 @@ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if 1.0.4", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", ] [[package]] @@ -4522,7 +4481,7 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" dependencies = [ - "env_logger 0.11.8", + "env_logger 0.11.10", "test-log-macros", "tracing-subscriber", ] @@ -4921,12 +4880,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" - [[package]] name = "untrusted" version = "0.9.0" @@ -5720,9 +5673,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" [[package]] name = "zmij" diff --git a/Cargo.toml b/Cargo.toml index c13acf2..3d490fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,18 +14,18 @@ readme = "README.md" [features] default = ["numpy", "parquet", "polars"] numpy = ["dep:numpy", "dep:pyo3"] -polars = ["dep:polars", "dep:numpy", "dep:pyo3"] +polars = ["dep:polars", "dep:numpy", "dep:pyo3"] # numpy and pyo3 are required transitive deps for polars Python interop parquet = ["dep:parquet"] hdf5 = ["dep:hdf5", "ndarray"] ndarray = ["dep:ndarray"] hdf5-mpio = ["hdf5/mpio"] [dependencies] -clap = "4.5.58" # for input arguments -anyhow = { version = "1.0", features = ["backtrace"] } # error handling +clap = "4.6.0" # for input arguments +anyhow = { version = "1.0.102", features = ["backtrace"] } # error handling log = "0.4" # to log events byteorder = "1.4" # for bytes conversions -binrw = "0.15" # to efficiently read blocks +binrw = "0.15.1" # to efficiently read blocks num = "0.4" half = "2.7" # for f16 handling encoding_rs = "0.8" # for endian management and bytes to text conversion (utf8, SBC, UTF16) @@ -36,44 +36,44 @@ rayon = "1.11" # for general purpose parallel computations crossbeam-channel = "0.5" # for efficient channel between threads parking_lot = "0.12" # for efficient mutex roxmltree = "0.21" # for xml parsing -flate2 = "1.1" # for DZ block data deflate +flate2 = "1.1.9" # for DZ block data deflate zstd = "0.13" lz4 = "1.28" -md-5 = "0.10" # md5sum of attachments +md-5 = "0.11" # md5sum of attachments transpose = "0.2" # for DZBlock transpose fasteval = "0.2" # for algebraic conversion itertools = "0.14" serde = { version = "1.0", features = ["derive"] } # for serialization whoami = "2.1.1" # to get user name for writing file rand = "0.10" # for random numbers -arrow = { version = "57.3.0", features = [ +arrow = { version = "58.1.0", features = [ "pyarrow", "prettyprint", "ffi", ] } # for efficient data storing in memory -env_logger = "0.11" -libc = "0.2" # for the C api -numpy = { version = "0.26", optional = true } # to export in numpy +env_logger = "0.11.10" +libc = "0.2.183" # for the C api +numpy = { version = "0.28", optional = true } # to export in numpy polars = { version = "0.53", features = [ "dtype-full", "object", "fmt", ], optional = true } # for python dataframe -parquet = { version = "57.3", optional = true } # to write parquet file +parquet = { version = "58.1.0", optional = true } # to write parquet file hdf5 = { version = "0.8", optional = true, features = [ "lzf", ] } # to export into hdf5 file ndarray = { version = "0.17", optional = true } # to convert arraw data into ndarray, needed for hdf5 [dependencies.pyo3] -version = "0.26" +version = "0.28.2" features = ["extension-module", "num-complex", "anyhow"] optional = true [dev-dependencies] -criterion = "0.8" # for benchmark +criterion = "0.8.2" # for benchmark test-log = "0.2" -glob = "*" +glob = "0.3" [build-dependencies] cbindgen = "0.29" # to generate C api headers From 6c13add0538612d8f58e0fcd7e6ac61e3cd9a6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Sat, 28 Mar 2026 09:30:40 +0100 Subject: [PATCH 41/46] adding tests for big endian, arrays and complex --- tests/be_complex_array.rs | 553 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 tests/be_complex_array.rs diff --git a/tests/be_complex_array.rs b/tests/be_complex_array.rs new file mode 100644 index 0000000..cf3ac29 --- /dev/null +++ b/tests/be_complex_array.rs @@ -0,0 +1,553 @@ +/// Integration tests for BigEndian channels, Complex32, and ArrayDFloat64 fixtures. +/// +/// Three synthetic MDF4 files exercise paths in `read_channels_from_bytes` that +/// the existing test suite never hits: +/// +/// - `be_scalars.mf4` → BE Int16 (lines 724-731) and BE Float64 +/// - `complex_f32_le.mf4` → Complex32 LE f32-pair arm (lines 1040-1072) +/// - `array_f64_le.mf4` → ArrayDFloat64 LE (lines 1624-1635) + CA block parsing +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +// ─── Byte-push helpers ─────────────────────────────────────────────────────── + +fn pu8(b: &mut Vec, v: u8) { b.push(v); } +fn pu16(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu32(b: &mut Vec, v: u32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu64(b: &mut Vec, v: u64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi64(b: &mut Vec, v: i64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf32(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf64(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi16_be(b: &mut Vec, v: i16) { b.extend_from_slice(&v.to_be_bytes()); } +fn pu16_be(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_be_bytes()); } +fn pf32_be(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_be_bytes()); } +fn pf64_be(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_be_bytes()); } +fn zeros(b: &mut Vec, n: usize) { b.extend(std::iter::repeat_n(0u8, n)); } + +// ─── Shared block writers (identical to conversions_int_types.rs) ──────────── + +fn id_block(b: &mut Vec) { + b.extend_from_slice(b"MDF "); b.extend_from_slice(b"4.30 "); b.extend_from_slice(b"mdfr "); + pu16(b,0); pu16(b,0); pu16(b,430); pu16(b,0); zeros(b,2); zeros(b,26); pu16(b,0); pu16(b,0); +} +fn hd4(b: &mut Vec, dg: i64, fh: i64) { + b.extend_from_slice(b"##HD"); zeros(b,4); pu64(b,104); pu64(b,6); + pi64(b,dg); pi64(b,fh); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); b.extend_from_slice(&0i16.to_le_bytes()); b.extend_from_slice(&0i16.to_le_bytes()); + pu8(b,0); pu8(b,0); pu8(b,0); pu8(b,0); + pf64(b,0.0); pf64(b,0.0); +} +fn fh(b: &mut Vec) { + b.extend_from_slice(b"##FH"); zeros(b,4); pu64(b,56); pu64(b,2); + pi64(b,0); pi64(b,0); pu64(b,0); b.extend_from_slice(&0i16.to_le_bytes()); b.extend_from_slice(&0i16.to_le_bytes()); pu8(b,0); zeros(b,3); +} +fn dg4(b: &mut Vec, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,0); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +fn dg4_chain(b: &mut Vec, next: i64, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,next); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +/// LD block with one data link and equal_sample_count (56 bytes total). +fn ld1_block(b: &mut Vec, data_ptr: i64, cycle_count: u64) { + b.extend_from_slice(b"##LD"); zeros(b,4); pu64(b,56); pu64(b,2); // header + pi64(b,0); pi64(b,data_ptr); // ld_next=0, ld_links[0]=data_ptr + pu8(b,1); pu8(b,0); pu8(b,0); pu8(b,0); // ld_flags=1(equal_sample_count), rest 0 + pu32(b,1); pu64(b,cycle_count); // ld_count=1, ld_equal_sample_count +} +fn cg4(b: &mut Vec, cn: i64, cycles: u64, data_bytes: u32) { + b.extend_from_slice(b"##CG"); zeros(b,4); pu64(b,104); + pu64(b,6); pi64(b,0); pi64(b,cn); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pu64(b,cycles); pu16(b,0); pu16(b,0); zeros(b,4); pu32(b,data_bytes); pu32(b,0); +} +fn cn4(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, + next: i64, tx: i64, cc: i64) { + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,0); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +/// Like `cn4` but with a non-zero `composition` link (second link = CA block offset). +#[allow(clippy::too_many_arguments)] +fn cn4_ca(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, + next: i64, tx: i64, cc: i64, composition: i64) { + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,composition); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +fn tx(b: &mut Vec, text: &str) { + let t = text.as_bytes(); + let len = 24u64 + t.len() as u64 + 1; + b.extend_from_slice(b"##TX"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(t); b.push(0); +} +fn dt(b: &mut Vec, records: &[u8]) { + let len = 24u64 + records.len() as u64; + b.extend_from_slice(b"##DT"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(records); +} +/// Minimal 1D CA block (56 bytes). `dim_size` is the number of elements per cycle. +fn ca1d(b: &mut Vec, dim_size: u64) { + b.extend_from_slice(b"##CA"); zeros(b,4); pu64(b,56); pu64(b,1); // link_count=1 + pi64(b,0); // ca_composition = null + pu8(b,0); // ca_type = 0 (Array) + pu8(b,0); // ca_storage = 0 (CN template) + pu16(b,1); // ca_ndim = 1 + pu32(b,0); // ca_flags = 0 + pu32(b,0); // ca_byte_offset_base = 0 + pu32(b,0); // ca_inval_bit_pos_base = 0 + pu64(b,dim_size); // ca_dim_size[0] +} + +// ─── Fixture 1: be_scalars.mf4 ─────────────────────────────────────────────── +// +// Layout (all offsets are absolute): +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=965) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=18) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=872) +// [552] CN_be_i16 160 b (IntBE/16bit, byte_off=8, next=712, tx=903) +// [712] CN_be_f64 160 b (FloatBE/64bit, byte_off=10, next=0, tx=934) +// [872] TX "master" 31 b +// [903] TX "be_i16" 31 b +// [934] TX "be_f64" 31 b +// [965] DT 96 b (24 hdr + 4×18=72 data) +// Total: 1061 b + +const BE_SCALARS_PATH: &str = "test_files/synthetic/be_scalars.mf4"; +static FIXTURE_BE: LazyLock<()> = LazyLock::new(|| { + create_be_scalars().expect("failed to create be_scalars fixture"); +}); + +fn create_be_scalars() -> Result<()> { + if std::path::Path::new(BE_SCALARS_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(1061); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 965); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 18); debug_assert_eq!(b.len(), 392); + cn4(&mut b, 2, 1, 4, 0, 64, 552, 872, 0); debug_assert_eq!(b.len(), 552); + cn4(&mut b, 0, 0, 3, 8, 16, 712, 903, 0); debug_assert_eq!(b.len(), 712); // dtype=3=IntBE + cn4(&mut b, 0, 0, 5, 10, 64, 0, 934, 0); debug_assert_eq!(b.len(), 872); // dtype=5=FloatBE + tx(&mut b, "master"); debug_assert_eq!(b.len(), 903); + tx(&mut b, "be_i16"); debug_assert_eq!(b.len(), 934); + tx(&mut b, "be_f64"); debug_assert_eq!(b.len(), 965); + + // 4 records × 18 bytes: f64_LE(8) | i16_BE(2) | f64_BE(8) + let raw_i16_be: [i16; 4] = [-100, 0, 100, 200]; + let raw_f64_be: [f64; 4] = [1.5, 2.5, 3.5, 4.5]; + let mut recs: Vec = Vec::with_capacity(72); + for i in 0..4 { + pf64(&mut recs, i as f64); + pi16_be(&mut recs, raw_i16_be[i]); + pf64_be(&mut recs, raw_f64_be[i]); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 1061); + + std::fs::write(BE_SCALARS_PATH, &b)?; + Ok(()) +} + +// ─── Fixture 2: complex_f32_le.mf4 ────────────────────────────────────────── +// +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=775) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=16) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=712) +// [552] CN_cx32 160 b (ComplexLE/64bit, byte_off=8, next=0, tx=743) +// [712] TX "master" 31 b +// [743] TX "cx32_ch" 32 b +// [775] DT 88 b (24 hdr + 4×16=64 data) +// Total: 863 b + +const CX32_PATH: &str = "test_files/synthetic/complex_f32_le.mf4"; +static FIXTURE_CX: LazyLock<()> = LazyLock::new(|| { + create_complex_f32_le().expect("failed to create complex_f32_le fixture"); +}); + +fn create_complex_f32_le() -> Result<()> { + if std::path::Path::new(CX32_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(863); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 775); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 16); debug_assert_eq!(b.len(), 392); + cn4(&mut b, 2, 1, 4, 0, 64, 552, 712, 0); debug_assert_eq!(b.len(), 552); + cn4(&mut b, 0, 0, 15, 8, 64, 0, 743, 0); debug_assert_eq!(b.len(), 712); // dtype=15=ComplexLE + tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); + tx(&mut b, "cx32_ch"); debug_assert_eq!(b.len(), 775); + + // 4 records × 16 bytes: f64_LE(8) | f32_LE_real(4) | f32_LE_imag(4) + // Samples: (1+2j), (3+4j), (5+6j), (7+8j) + let samples: [(f32, f32); 4] = [(1.0, 2.0), (3.0, 4.0), (5.0, 6.0), (7.0, 8.0)]; + let mut recs: Vec = Vec::with_capacity(64); + for (i, (re, im)) in samples.iter().enumerate() { + pf64(&mut recs, i as f64); + pf32(&mut recs, *re); + pf32(&mut recs, *im); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 863); + + std::fs::write(CX32_PATH, &b)?; + Ok(()) +} + +// ─── Fixture 3: array_f64_le.mf4 ──────────────────────────────────────────── +// +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=831) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=32) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=712, composition=0) +// [552] CN_arr 160 b (FloatLE/64bit, byte_off=8, next=0, tx=743, composition=775) +// [712] TX "master" 31 b +// [743] TX "arr_f64" 32 b +// [775] CA block 56 b (ca_ndim=1, ca_dim_size=[3]) +// [831] DT 152 b (24 hdr + 4×32=128 data) +// Total: 983 b + +const ARR_F64_PATH: &str = "test_files/synthetic/array_f64_le.mf4"; +static FIXTURE_ARR: LazyLock<()> = LazyLock::new(|| { + create_array_f64_le().expect("failed to create array_f64_le fixture"); +}); + +fn create_array_f64_le() -> Result<()> { + if std::path::Path::new(ARR_F64_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(983); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 831); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 32); debug_assert_eq!(b.len(), 392); + // CN master: no CA block → composition=0, use plain cn4 + cn4(&mut b, 2, 1, 4, 0, 64, 552, 712, 0); debug_assert_eq!(b.len(), 552); + // CN_arr: composition link points to CA block at 775 + cn4_ca(&mut b, 0, 0, 4, 8, 64, 0, 743, 0, 775); debug_assert_eq!(b.len(), 712); + tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); + tx(&mut b, "arr_f64"); debug_assert_eq!(b.len(), 775); + ca1d(&mut b, 3); debug_assert_eq!(b.len(), 831); + + // 4 records × 32 bytes: f64_LE(8) | [f64_LE × 3](24) + // Array values: [1,2,3], [4,5,6], [7,8,9], [10,11,12] + let mut recs: Vec = Vec::with_capacity(128); + for cycle in 0..4u64 { + pf64(&mut recs, cycle as f64); + for elem in 1..=3u64 { + pf64(&mut recs, (cycle * 3 + elem) as f64); + } + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 983); + + std::fs::write(ARR_F64_PATH, &b)?; + Ok(()) +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +#[test] +fn be_int16_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_BE); + let mut mdf = Mdf::new(BE_SCALARS_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("be_i16").expect("be_i16 not found"); + assert!( + matches!(data, ChannelData::Int16(_)), + "expected Int16, got {}", + data.data_type(false) + ); + if let ChannelData::Int16(arr) = data { + let expected = [-100i16, 0, 100, 200]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "be_i16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn be_float64_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_BE); + let mut mdf = Mdf::new(BE_SCALARS_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("be_f64").expect("be_f64 not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [1.5f64, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "be_f64[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn complex32_le_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_CX); + let mut mdf = Mdf::new(CX32_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("cx32_ch").expect("cx32_ch not found"); + assert!( + matches!(data, ChannelData::Complex32(_)), + "expected Complex32, got {}", + data.data_type(false) + ); + if let ChannelData::Complex32(arr) = data { + // values_slice() returns interleaved [re0, im0, re1, im1, ...] + let vals = arr.values_slice(); + assert_eq!(vals.len(), 8, "expected 4 complex samples = 8 floats"); + let expected_re = [1.0f32, 3.0, 5.0, 7.0]; + let expected_im = [2.0f32, 4.0, 6.0, 8.0]; + for i in 0..4 { + assert!((vals[i * 2] - expected_re[i]).abs() < 1e-6, + "cx32_ch[{i}].re: expected {}, got {}", expected_re[i], vals[i * 2]); + assert!((vals[i * 2 + 1] - expected_im[i]).abs() < 1e-6, + "cx32_ch[{i}].im: expected {}, got {}", expected_im[i], vals[i * 2 + 1]); + } + } + Ok(()) +} + +#[test] +fn array_f64_le_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_ARR); + let mut mdf = Mdf::new(ARR_F64_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("arr_f64").expect("arr_f64 not found"); + assert!( + matches!(data, ChannelData::ArrayDFloat64(_)), + "expected ArrayDFloat64, got {}", + data.data_type(false) + ); + if let ChannelData::ArrayDFloat64(arr) = data { + let vals = arr.values_slice(); + // The zeros() pre-allocation for ArrayDFloat64 allocates cycle_count * product(shape) + // elements. Since shape=[4,3] already includes cycle_count=4 as first dim, + // the buffer is 4 * (4*3) = 48 slots, but only the first 12 are filled by reading. + assert!(vals.len() >= 12, "expected at least 12 f64 values, got {}", vals.len()); + let expected: Vec = (1..=12).map(|x| x as f64).collect(); + for (i, (&got, &exp)) in vals[..12].iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "arr_f64[{i}]: expected {exp}, got {got}"); + } + assert!(arr.ndim() >= 1, "expected ndim >= 1"); + } + Ok(()) +} + +// ─── Fixture 4: ld_be_channels.mf4 ────────────────────────────────────────── +// +// Four DGs, each with one LD-backed BE channel. Exercises the uncovered BE +// arms in `read_one_channel_array` (data_read4.rs lines 59-62, 72-75, 146-149, +// 272-275) which are only reachable via LD blocks (the optimised single-channel +// path) — existing real MDF files with LD blocks all use LE channels. +// +// Layout: +// [0] IdBlock 64 +// [64] HD4 104 (dg=224, fh=168) +// [168] FH 56 +// [224] DG1 64 (next=288, cg=480, data=1656) BE Int16 +// [288] DG2 64 (next=352, cg=584, data=1712) BE Float64 +// [352] DG3 64 (next=416, cg=688, data=1768) BE UInt16 +// [416] DG4 64 (next=0, cg=792, data=1824) BE Float32 +// [480] CG1 104 (cn=896, cycles=4, data_bytes=2) +// [584] CG2 104 (cn=1056, cycles=4, data_bytes=8) +// [688] CG3 104 (cn=1216, cycles=4, data_bytes=2) +// [792] CG4 104 (cn=1376, cycles=4, data_bytes=4) +// [896] CN_bei16 160 (dtype=3/IntBE, bits=16, tx=1536) +// [1056] CN_bef64 160 (dtype=5/FloatBE, bits=64, tx=1566) +// [1216] CN_beu16 160 (dtype=1/UIntBE, bits=16, tx=1596) +// [1376] CN_bef32 160 (dtype=5/FloatBE, bits=32, tx=1626) +// [1536] TX "bei16" 30 +// [1566] TX "bef64" 30 +// [1596] TX "beu16" 30 +// [1626] TX "bef32" 30 +// [1656] LD1 56 → DT1 at 1880 +// [1712] LD2 56 → DT2 at 1912 +// [1768] LD3 56 → DT3 at 1968 +// [1824] LD4 56 → DT4 at 2000 +// [1880] DT1 32 (24 hdr + 4×i16_BE = 8 bytes) +// [1912] DT2 56 (24 hdr + 4×f64_BE = 32 bytes) +// [1968] DT3 32 (24 hdr + 4×u16_BE = 8 bytes) +// [2000] DT4 40 (24 hdr + 4×f32_BE = 16 bytes) +// Total: 2040 bytes + +const LD_BE_PATH: &str = "test_files/synthetic/ld_be_channels.mf4"; +static FIXTURE_LD_BE: LazyLock<()> = LazyLock::new(|| { + create_ld_be_channels().expect("failed to create ld_be_channels fixture"); +}); + +fn create_ld_be_channels() -> Result<()> { + if std::path::Path::new(LD_BE_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(2040); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + // 4 chained DGs + dg4_chain(&mut b, 288, 480, 1656); debug_assert_eq!(b.len(), 288); // DG1 bei16 + dg4_chain(&mut b, 352, 584, 1712); debug_assert_eq!(b.len(), 352); // DG2 bef64 + dg4_chain(&mut b, 416, 688, 1768); debug_assert_eq!(b.len(), 416); // DG3 beu16 + dg4_chain(&mut b, 0, 792, 1824); debug_assert_eq!(b.len(), 480); // DG4 bef32 + // CGs (one per DG, one CN each) + cg4(&mut b, 896, 4, 2); debug_assert_eq!(b.len(), 584); // CG1 + cg4(&mut b, 1056, 4, 8); debug_assert_eq!(b.len(), 688); // CG2 + cg4(&mut b, 1216, 4, 2); debug_assert_eq!(b.len(), 792); // CG3 + cg4(&mut b, 1376, 4, 4); debug_assert_eq!(b.len(), 896); // CG4 + // CNs: dtype=3(IntBE), 5(FloatBE), 1(UIntBE), 5(FloatBE) + cn4(&mut b, 0, 0, 3, 0, 16, 0, 1536, 0); debug_assert_eq!(b.len(), 1056); // bei16 + cn4(&mut b, 0, 0, 5, 0, 64, 0, 1566, 0); debug_assert_eq!(b.len(), 1216); // bef64 + cn4(&mut b, 0, 0, 1, 0, 16, 0, 1596, 0); debug_assert_eq!(b.len(), 1376); // beu16 + cn4(&mut b, 0, 0, 5, 0, 32, 0, 1626, 0); debug_assert_eq!(b.len(), 1536); // bef32 + // TX blocks + tx(&mut b, "bei16"); debug_assert_eq!(b.len(), 1566); + tx(&mut b, "bef64"); debug_assert_eq!(b.len(), 1596); + tx(&mut b, "beu16"); debug_assert_eq!(b.len(), 1626); + tx(&mut b, "bef32"); debug_assert_eq!(b.len(), 1656); + // LD blocks pointing to DT blocks + ld1_block(&mut b, 1880, 4); debug_assert_eq!(b.len(), 1712); // LD1 → DT1 + ld1_block(&mut b, 1912, 4); debug_assert_eq!(b.len(), 1768); // LD2 → DT2 + ld1_block(&mut b, 1968, 4); debug_assert_eq!(b.len(), 1824); // LD3 → DT3 + ld1_block(&mut b, 2000, 4); debug_assert_eq!(b.len(), 1880); // LD4 → DT4 + // DT1: 4×i16_BE + { + let mut recs: Vec = Vec::with_capacity(8); + for v in [-100i16, 0, 100, 200] { pi16_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 1912); + } + // DT2: 4×f64_BE + { + let mut recs: Vec = Vec::with_capacity(32); + for v in [1.5f64, 2.5, 3.5, 4.5] { pf64_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 1968); + } + // DT3: 4×u16_BE + { + let mut recs: Vec = Vec::with_capacity(8); + for v in [100u16, 200, 300, 400] { pu16_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 2000); + } + // DT4: 4×f32_BE + { + let mut recs: Vec = Vec::with_capacity(16); + for v in [1.5f32, 2.5, 3.5, 4.5] { pf32_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 2040); + } + + std::fs::write(LD_BE_PATH, &b)?; + Ok(()) +} + +// ─── Tests for LD-backed BE channels ───────────────────────────────────────── + +fn load_ld_be() -> Result { + LazyLock::force(&FIXTURE_LD_BE); + let mut mdf = Mdf::new(LD_BE_PATH)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn ld_bei16_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bei16").expect("bei16 not found"); + assert!( + matches!(data, ChannelData::Int16(_)), + "expected Int16, got {}", + data.data_type(false) + ); + if let ChannelData::Int16(arr) = data { + let expected = [-100i16, 0, 100, 200]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "bei16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_bef64_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bef64").expect("bef64 not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [1.5f64, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "bef64[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_beu16_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("beu16").expect("beu16 not found"); + assert!( + matches!(data, ChannelData::UInt16(_)), + "expected UInt16, got {}", + data.data_type(false) + ); + if let ChannelData::UInt16(arr) = data { + let expected = [100u16, 200, 300, 400]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "beu16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_bef32_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bef32").expect("bef32 not found"); + assert!( + matches!(data, ChannelData::Float32(_)), + "expected Float32, got {}", + data.data_type(false) + ); + if let ChannelData::Float32(arr) = data { + let expected = [1.5f32, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-6, "bef32[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} From f828173a151216f35eb56bdb95e1087da3855736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Wed, 1 Apr 2026 19:02:05 +0200 Subject: [PATCH 42/46] expose pythonerror for conversion into python --- src/export/numpy.rs | 138 +++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 72 deletions(-) diff --git a/src/export/numpy.rs b/src/export/numpy.rs index 986ad10..83bfb20 100644 --- a/src/export/numpy.rs +++ b/src/export/numpy.rs @@ -37,7 +37,7 @@ pub(crate) fn to_py_array(_: Python, array: Arc) -> PyResult IntoPyObject<'py> for ChannelData { type Target = PyAny; // the Python type type Output = Bound<'py, Self::Target>; // in most cases this will be `Bound` - type Error = std::convert::Infallible; + type Error = PyErr; /// IntoPyObject implementation to convert a ChannelData into a PyObject fn into_pyobject(self, py: Python<'py>) -> Result { match self { @@ -63,83 +63,77 @@ impl<'py> IntoPyObject<'py> for ChannelData { .chunks(binary_array.value_length() as usize) .map(|x| x.to_vec()) .collect(); - Ok(out - .into_pyobject(py) - .expect("error converting fixed size binary array into python object")) + out.into_pyobject(py).map_err(Into::into) + } + ChannelData::ArrayDInt8(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt8(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt16(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt16(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDFloat32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDFloat64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) } - ChannelData::ArrayDInt8(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i8") - .into_any()), - ChannelData::ArrayDUInt8(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u8") - .into_any()), - ChannelData::ArrayDInt16(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u16") - .into_any()), - ChannelData::ArrayDUInt16(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i16") - .into_any()), - ChannelData::ArrayDInt32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i32") - .into_any()), - ChannelData::ArrayDUInt32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u32") - .into_any()), - ChannelData::ArrayDFloat32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape f32") - .into_any()), - ChannelData::ArrayDInt64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i64") - .into_any()), - ChannelData::ArrayDUInt64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u64") - .into_any()), - ChannelData::ArrayDFloat64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape f64") - .into_any()), ChannelData::Utf8(array) => { let string_array = array.finish_cloned(); let strings: Vec> = string_array.iter().collect(); - Ok(strings - .into_pyobject(py) - .expect("error converting Utf8 array into python object")) + strings.into_pyobject(py).map_err(Into::into) } ChannelData::Union(array) => { - let arrow_data = to_py_array(py, Arc::new(UnionArray::from(array.to_data()))) - .expect("error converting Union array into python object"); - Ok(arrow_data - .into_pyobject(py) - .expect("error converting Union PyArrow into python object")) + let arrow_data = + to_py_array(py, Arc::new(UnionArray::from(array.to_data())))?; + arrow_data.into_pyobject(py).map_err(Into::into) } } } From 3735d5149d39eaae1de6e183c0fd0a1521bd189f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 2 Apr 2026 00:27:38 +0200 Subject: [PATCH 43/46] fixed clippy --- .gitignore | 3 ++- src/export/numpy.rs | 6 +++--- tests/be_complex_array.rs | 42 +++++++++++++++++++++++--------------- tests/channel_data_ops.rs | 43 +++++++++++---------------------------- 4 files changed, 43 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index 8736572..95dba98 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ test_files/test.parquet /share /bin pyvenv.cfg -/.continue \ No newline at end of file +/.continue +test_files/synthetic \ No newline at end of file diff --git a/src/export/numpy.rs b/src/export/numpy.rs index 83bfb20..1f17f87 100644 --- a/src/export/numpy.rs +++ b/src/export/numpy.rs @@ -63,7 +63,7 @@ impl<'py> IntoPyObject<'py> for ChannelData { .chunks(binary_array.value_length() as usize) .map(|x| x.to_vec()) .collect(); - out.into_pyobject(py).map_err(Into::into) + out.into_pyobject(py) } ChannelData::ArrayDInt8(array) => { let flat = array.values_slice().to_pyarray(py); @@ -128,12 +128,12 @@ impl<'py> IntoPyObject<'py> for ChannelData { ChannelData::Utf8(array) => { let string_array = array.finish_cloned(); let strings: Vec> = string_array.iter().collect(); - strings.into_pyobject(py).map_err(Into::into) + strings.into_pyobject(py) } ChannelData::Union(array) => { let arrow_data = to_py_array(py, Arc::new(UnionArray::from(array.to_data())))?; - arrow_data.into_pyobject(py).map_err(Into::into) + arrow_data.into_pyobject(py) } } } diff --git a/tests/be_complex_array.rs b/tests/be_complex_array.rs index cf3ac29..aec9a3e 100644 --- a/tests/be_complex_array.rs +++ b/tests/be_complex_array.rs @@ -63,8 +63,17 @@ fn cg4(b: &mut Vec, cn: i64, cycles: u64, data_bytes: u32) { pu64(b,6); pi64(b,0); pi64(b,cn); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); pu64(b,0); pu64(b,cycles); pu16(b,0); pu16(b,0); zeros(b,4); pu32(b,data_bytes); pu32(b,0); } -fn cn4(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, - next: i64, tx: i64, cc: i64) { +/// `(cn_type, sync, dtype)` — channel kind descriptor +type CnDesc = (u8, u8, u8); +/// `(byte_offset, bit_count)` — data layout +type CnSpan = (u32, u32); +/// `(cn_next, cn_name_tx, cn_cc)` — block links +type CnRefs = (i64, i64, i64); + +fn cn4(b: &mut Vec, desc: CnDesc, span: CnSpan, refs: CnRefs) { + let (cn_type, sync, dtype) = desc; + let (byte_off, bits) = span; + let (next, tx, cc) = refs; b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); pu64(b,8); pi64(b,next); pi64(b,0); pi64(b,tx); pi64(b,0); pi64(b,cc); pi64(b,0); pi64(b,0); pi64(b,0); @@ -74,9 +83,10 @@ fn cn4(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); } /// Like `cn4` but with a non-zero `composition` link (second link = CA block offset). -#[allow(clippy::too_many_arguments)] -fn cn4_ca(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, - next: i64, tx: i64, cc: i64, composition: i64) { +fn cn4_ca(b: &mut Vec, desc: CnDesc, span: CnSpan, refs: CnRefs, composition: i64) { + let (cn_type, sync, dtype) = desc; + let (byte_off, bits) = span; + let (next, tx, cc) = refs; b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); pu64(b,8); pi64(b,next); pi64(b,composition); pi64(b,tx); pi64(b,0); pi64(b,cc); pi64(b,0); pi64(b,0); pi64(b,0); @@ -141,9 +151,9 @@ fn create_be_scalars() -> Result<()> { fh(&mut b); debug_assert_eq!(b.len(), 224); dg4(&mut b, 288, 965); debug_assert_eq!(b.len(), 288); cg4(&mut b, 392, 4, 18); debug_assert_eq!(b.len(), 392); - cn4(&mut b, 2, 1, 4, 0, 64, 552, 872, 0); debug_assert_eq!(b.len(), 552); - cn4(&mut b, 0, 0, 3, 8, 16, 712, 903, 0); debug_assert_eq!(b.len(), 712); // dtype=3=IntBE - cn4(&mut b, 0, 0, 5, 10, 64, 0, 934, 0); debug_assert_eq!(b.len(), 872); // dtype=5=FloatBE + cn4(&mut b, (2,1,4), (0,64), (552,872,0)); debug_assert_eq!(b.len(), 552); + cn4(&mut b, (0,0,3), (8,16), (712,903,0)); debug_assert_eq!(b.len(), 712); // dtype=3=IntBE + cn4(&mut b, (0,0,5), (10,64), (0,934,0)); debug_assert_eq!(b.len(), 872); // dtype=5=FloatBE tx(&mut b, "master"); debug_assert_eq!(b.len(), 903); tx(&mut b, "be_i16"); debug_assert_eq!(b.len(), 934); tx(&mut b, "be_f64"); debug_assert_eq!(b.len(), 965); @@ -193,8 +203,8 @@ fn create_complex_f32_le() -> Result<()> { fh(&mut b); debug_assert_eq!(b.len(), 224); dg4(&mut b, 288, 775); debug_assert_eq!(b.len(), 288); cg4(&mut b, 392, 4, 16); debug_assert_eq!(b.len(), 392); - cn4(&mut b, 2, 1, 4, 0, 64, 552, 712, 0); debug_assert_eq!(b.len(), 552); - cn4(&mut b, 0, 0, 15, 8, 64, 0, 743, 0); debug_assert_eq!(b.len(), 712); // dtype=15=ComplexLE + cn4(&mut b, (2,1,4), (0,64), (552,712,0)); debug_assert_eq!(b.len(), 552); + cn4(&mut b, (0,0,15), (8,64), (0,743,0)); debug_assert_eq!(b.len(), 712); // dtype=15=ComplexLE tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); tx(&mut b, "cx32_ch"); debug_assert_eq!(b.len(), 775); @@ -245,9 +255,9 @@ fn create_array_f64_le() -> Result<()> { dg4(&mut b, 288, 831); debug_assert_eq!(b.len(), 288); cg4(&mut b, 392, 4, 32); debug_assert_eq!(b.len(), 392); // CN master: no CA block → composition=0, use plain cn4 - cn4(&mut b, 2, 1, 4, 0, 64, 552, 712, 0); debug_assert_eq!(b.len(), 552); + cn4(&mut b, (2,1,4), (0,64), (552,712,0)); debug_assert_eq!(b.len(), 552); // CN_arr: composition link points to CA block at 775 - cn4_ca(&mut b, 0, 0, 4, 8, 64, 0, 743, 0, 775); debug_assert_eq!(b.len(), 712); + cn4_ca(&mut b, (0,0,4), (8,64), (0,743,0), 775); debug_assert_eq!(b.len(), 712); tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); tx(&mut b, "arr_f64"); debug_assert_eq!(b.len(), 775); ca1d(&mut b, 3); debug_assert_eq!(b.len(), 831); @@ -428,10 +438,10 @@ fn create_ld_be_channels() -> Result<()> { cg4(&mut b, 1216, 4, 2); debug_assert_eq!(b.len(), 792); // CG3 cg4(&mut b, 1376, 4, 4); debug_assert_eq!(b.len(), 896); // CG4 // CNs: dtype=3(IntBE), 5(FloatBE), 1(UIntBE), 5(FloatBE) - cn4(&mut b, 0, 0, 3, 0, 16, 0, 1536, 0); debug_assert_eq!(b.len(), 1056); // bei16 - cn4(&mut b, 0, 0, 5, 0, 64, 0, 1566, 0); debug_assert_eq!(b.len(), 1216); // bef64 - cn4(&mut b, 0, 0, 1, 0, 16, 0, 1596, 0); debug_assert_eq!(b.len(), 1376); // beu16 - cn4(&mut b, 0, 0, 5, 0, 32, 0, 1626, 0); debug_assert_eq!(b.len(), 1536); // bef32 + cn4(&mut b, (0,0,3), (0,16), (0,1536,0)); debug_assert_eq!(b.len(), 1056); // bei16 + cn4(&mut b, (0,0,5), (0,64), (0,1566,0)); debug_assert_eq!(b.len(), 1216); // bef64 + cn4(&mut b, (0,0,1), (0,16), (0,1596,0)); debug_assert_eq!(b.len(), 1376); // beu16 + cn4(&mut b, (0,0,5), (0,32), (0,1626,0)); debug_assert_eq!(b.len(), 1536); // bef32 // TX blocks tx(&mut b, "bei16"); debug_assert_eq!(b.len(), 1566); tx(&mut b, "bef64"); debug_assert_eq!(b.len(), 1596); diff --git a/tests/channel_data_ops.rs b/tests/channel_data_ops.rs index 48c0e96..f589d87 100644 --- a/tests/channel_data_ops.rs +++ b/tests/channel_data_ops.rs @@ -3,12 +3,11 @@ //! or cover additional variants to improve overall code coverage. use arrow::array::{ - Array, ArrayBuilder, FixedSizeBinaryBuilder, Float64Builder, Int8Builder, Int16Builder, - Int32Builder, Int64Builder, LargeStringBuilder, UInt8Builder, UInt16Builder, UInt32Builder, - UInt64Builder, + Array, FixedSizeBinaryBuilder, Float64Builder, Int8Builder, Int16Builder, Int32Builder, + Int64Builder, LargeStringBuilder, UInt8Builder, UInt16Builder, UInt32Builder, UInt64Builder, }; use arrow::buffer::MutableBuffer; -use arrow::datatypes::{Float64Type, Int16Type, Int8Type, UInt8Type, UInt32Type, UInt64Type}; +use arrow::datatypes::{Float64Type, Int8Type, Int16Type, UInt8Type, UInt32Type, UInt64Type}; use mdfr::data_holder::channel_data::ChannelData; use mdfr::data_holder::complex_arrow::ComplexArrow; use mdfr::data_holder::tensor_arrow::{Order, TensorArrow}; @@ -21,9 +20,7 @@ use mdfr::data_holder::tensor_arrow::{Order, TensorArrow}; fn test_zeros_virtual_channel_cn_type_3() { // cn_type 3 always returns UInt64 counter regardless of self type. let cd = ChannelData::Float64(Float64Builder::new()); - let result = cd - .zeros(3, 5, 8, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(3, 5, 8, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::UInt64(_))); assert_eq!(result.len(), 5); assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2, 3, 4])); @@ -32,9 +29,7 @@ fn test_zeros_virtual_channel_cn_type_3() { #[test] fn test_zeros_virtual_channel_cn_type_6() { let cd = ChannelData::Float64(Float64Builder::new()); - let result = cd - .zeros(6, 3, 8, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(6, 3, 8, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::UInt64(_))); assert_eq!(result.len(), 3); assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2])); @@ -44,9 +39,7 @@ fn test_zeros_virtual_channel_cn_type_6() { fn test_zeros_array_d_int16() { let cd = ChannelData::ArrayDInt16(TensorArrow::new()); // shape=[2,3] → product=6, buffer = 6 i16 = 12 bytes - let result = cd - .zeros(0, 4, 6, (vec![2, 3], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 4, 6, (vec![2, 3], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::ArrayDInt16(_))); // len = buffer_bytes / shape_product = 12 / 6 = 2 assert!(!result.is_empty()); @@ -59,9 +52,7 @@ fn test_zeros_array_d_int16() { #[test] fn test_zeros_array_d_uint8() { let cd = ChannelData::ArrayDUInt8(TensorArrow::new()); - let result = cd - .zeros(0, 5, 1, (vec![4], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 5, 1, (vec![4], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::ArrayDUInt8(_))); assert!(!result.is_empty()); } @@ -69,9 +60,7 @@ fn test_zeros_array_d_uint8() { #[test] fn test_zeros_fixed_size_byte() { let cd = ChannelData::FixedSizeByteArray(FixedSizeBinaryBuilder::with_capacity(1, 4)); - let result = cd - .zeros(0, 3, 4, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 3, 4, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::FixedSizeByteArray(_))); assert_eq!(result.len(), 0); // zeros creates empty builder with capacity } @@ -81,9 +70,7 @@ fn test_zeros_complex32() { let cd = ChannelData::Complex32(ComplexArrow::new()); // zeros creates vec![0f32; cycle_count*2] = 2*2=4 f32 = 16 bytes // ComplexArrow::new_from_buffer: len = byte_len / 2 = 16 / 2 = 8 - let result = cd - .zeros(0, 2, 8, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 2, 8, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::Complex32(_))); assert!(!result.is_empty()); } @@ -93,9 +80,7 @@ fn test_zeros_complex64() { let cd = ChannelData::Complex64(ComplexArrow::new()); // zeros creates vec![0f64; cycle_count*2] = 3*2=6 f64 = 48 bytes // ComplexArrow::new_from_buffer: len = byte_len / 2 = 48 / 2 = 24 - let result = cd - .zeros(0, 3, 16, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 3, 16, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::Complex64(_))); assert!(!result.is_empty()); } @@ -115,9 +100,7 @@ fn test_zeros_array_d_float64() { let cd = ChannelData::ArrayDFloat64(TensorArrow::new()); // For ArrayDFloat64, zeros uses cycle_count * shape product // cycle_count=2, shape=[1] → buffer = 2*1 f64 = 16 bytes, len = 16/1 = 16 - let result = cd - .zeros(0, 2, 8, (vec![1], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 2, 8, (vec![1], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::ArrayDFloat64(_))); assert!(!result.is_empty()); } @@ -125,9 +108,7 @@ fn test_zeros_array_d_float64() { #[test] fn test_zeros_array_d_int32() { let cd = ChannelData::ArrayDInt32(TensorArrow::new()); - let result = cd - .zeros(0, 3, 4, (vec![3], Order::RowMajor)) - .unwrap(); + let result = cd.zeros(0, 3, 4, (vec![3], Order::RowMajor)).unwrap(); assert!(matches!(result, ChannelData::ArrayDInt32(_))); if let ChannelData::ArrayDInt32(ta) = &result { assert!(ta.values_slice().iter().all(|&v| v == 0)); From 8bd97648d9edd875614986716f6f02616006d228 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 2 Apr 2026 00:28:18 +0200 Subject: [PATCH 44/46] fixed bug with value range conversion not matching spec --- src/mdfreader/conversions4.rs | 103 +++++++++++++++++++++++++--------- tests/conversion.rs | 10 +++- 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/src/mdfreader/conversions4.rs b/src/mdfreader/conversions4.rs index 94d0ff3..955f305 100644 --- a/src/mdfreader/conversions4.rs +++ b/src/mdfreader/conversions4.rs @@ -992,6 +992,11 @@ where .zip(values) .for_each(|(new_a, v)| { let a: f64 = (*v).as_(); + // MDF4 spec 6.17.7 (cc_type=5, value to value without interpolation): + // - Exact match: return value[i] + // - Below first key: return value[0] + // - Above last key: return value[n-1] + // - Between keys: return nearest neighbor; if equidistant, use lower key's value *new_a = match val .binary_search_by(|&(xi, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { @@ -1001,6 +1006,7 @@ where Err(idx) => { let (x0, y0) = val[idx - 1]; let (x1, y1) = val[idx]; + // spec: if (Int - key[i]) > (key[i+1] - Int) use upper, else lower if (a - x0) > (x1 - a) { *y1 } else { *y0 } } }; @@ -1201,6 +1207,7 @@ fn value_range_to_value_table_calculation( array: &PrimitiveBuilder, val: &[(f64, f64, f64)], default_value: &f64, + inclusive_upper: bool, ) -> Result, Error> where ::Native: AsPrimitive, @@ -1213,15 +1220,30 @@ where .zip(values) .for_each(|(new_a, v)| { let a: f64 = (*v).as_(); + // MDF4 spec 6.17.8 (cc_type=6, value range to value): + // - For float types (cn_data_type > 3): key_min[i] ≤ Int < key_max[i] (exclusive upper) + // - For integer types (cn_data_type ≤ 3): key_min[i] ≤ Int ≤ key_max[i] (both inclusive) + // Ranges are sorted ascending and shall not overlap (key_max[i-1] ≤ key_min[i]). + // For touching boundaries with float data, a == key_min[i] → range i wins (not i-1). + // Binary search on min keys handles both cases naturally: + // Ok(idx): a == min[idx] → range idx includes a as its lower bound ✓ + // Err(idx): min[idx-1] < a < min[idx] → check if a < max[idx-1] (exclusive upper) *new_a = match val .binary_search_by(|&(xi, _, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { Ok(idx) => val[idx].2, - Err(0) => *default_value, - Err(idx) if (idx >= val.len() && a <= val[idx - 1].1) => val[idx - 1].2, + Err(0) => *default_value, // below the minimum of all lower bounds Err(idx) => { - if a <= val[idx].1 { - val[idx].2 + // min[idx-1] < a < min[idx]: candidate is range idx-1 + // Float: upper exclusive (a < max[idx-1]) + // Integer: upper inclusive (a <= max[idx-1]) + let in_range = if inclusive_upper { + a <= val[idx - 1].1 + } else { + a < val[idx - 1].1 + }; + if in_range { + val[idx - 1].2 } else { *default_value } @@ -1243,64 +1265,65 @@ fn value_range_to_value_table(cn: &mut Cn4, cc_val: Vec) -> Result<(), Erro val.push((*a, *b, *c)); } let default_value = cc_val[cc_val.len() - 1]; + // MDF4 spec 6.17.8: integer data uses inclusive upper bound; float data uses exclusive upper. match &mut cn.data { ChannelData::Int8(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i8 channel")?, ); } ChannelData::UInt8(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u8 channel")?, ); } ChannelData::Int16(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i16 channel")?, ); } ChannelData::UInt16(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u16 channel")?, ); } ChannelData::Int32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i32 channel")?, ); } ChannelData::UInt32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u32 channel")?, ); } ChannelData::Float32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, false) .context("failed value range to value table conversion of f32 channel")?, ); } ChannelData::Int64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i64 channel")?, ); } ChannelData::UInt64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u64 channel")?, ); } ChannelData::Float64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, false) .context("failed value range to value table conversion of f64 channel")?, ); } @@ -2329,27 +2352,27 @@ mod tests { #[test] fn test_value_to_value_without_interpolation_primitive() { - // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 (MDF4 spec 6.17.7) let keys = [0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; let val: Vec<(&f64, &f64)> = keys.iter().tuples().collect(); let mut builder = Float64Builder::new(); - builder.append_value(0.0); // exact → 0 - builder.append_value(3.0); // nearer to x=0 (dist=3) than x=10 (dist=7) → 0 - builder.append_value(7.0); // nearer to x=10 (dist=3) than x=0 (dist=7) → 100 - builder.append_value(10.0); // exact → 100 - builder.append_value(-5.0); // below first → 0 - builder.append_value(25.0); // above last → 200 + builder.append_value(0.0); // exact match → 0 + builder.append_value(3.0); // between key[0]=0 and key[1]=10, nearer to 0 → 0 + builder.append_value(7.0); // between key[0]=0 and key[1]=10, nearer to 10 → 100 + builder.append_value(10.0); // exact match → 100 + builder.append_value(-5.0); // below first key → value[0] = 0 + builder.append_value(25.0); // above last key → value[n-1] = 200 let result = value_to_value_without_interpolation_primitive(&mut builder, val).unwrap(); let values = result.values_slice(); assert_eq!(values.len(), 6); - assert!((values[0] - 0.0).abs() < 1e-12); - assert!((values[1] - 0.0).abs() < 1e-12); - assert!((values[2] - 100.0).abs() < 1e-12); - assert!((values[3] - 100.0).abs() < 1e-12); - assert!((values[4] - 0.0).abs() < 1e-12); - assert!((values[5] - 200.0).abs() < 1e-12); + assert!((values[0] - 0.0).abs() < 1e-12); // exact → 0 + assert!((values[1] - 0.0).abs() < 1e-12); // nearest key=0 → 0 + assert!((values[2] - 100.0).abs() < 1e-12); // nearest key=10 → 100 + assert!((values[3] - 100.0).abs() < 1e-12); // exact → 100 + assert!((values[4] - 0.0).abs() < 1e-12); // below first → value[0]=0 + assert!((values[5] - 200.0).abs() < 1e-12); // above last → value[n-1]=200 } #[test] @@ -2369,7 +2392,7 @@ mod tests { builder.append_value(-5.0); // below all ranges → default builder.append_value(25.0); // above last key_min but within last upper bound → 300 - let result = value_range_to_value_table_calculation(&builder, &val, &default).unwrap(); + let result = value_range_to_value_table_calculation(&builder, &val, &default, false).unwrap(); let values = result.values_slice(); assert_eq!(values.len(), 5); assert!((values[0] - 100.0).abs() < 1e-12); @@ -2379,6 +2402,30 @@ mod tests { assert!((values[4] - 300.0).abs() < 1e-12); } + #[test] + fn test_value_range_float_touching_boundaries() { + // Replicate the Vector_ValueRange2ValueConversion.mf4 CC table (float [lo, hi) semantics) + let val = vec![ + (-10.0f64, -7.0, -1.0), // range 0 + (-7.0, -5.0, 0.0), // range 1 (touching: min=-7 == max of range 0) + (-5.0, 0.0, 1.0), + ]; + let default = -1.0f64; + + let mut builder = Float64Builder::new(); + builder.append_value(-9.0); // in range 0 [-10,-7): -10<=-9<-7 → True → -1 + builder.append_value(-7.0); // at touching boundary: Ok(1)→range 1→ 0 (not range 0 since -7 is not < -7) + builder.append_value(-6.0); // in range 1: -7<=-6<-5 → True → 0 + builder.append_value(100.0); // above all ranges → default -1 + + let result = value_range_to_value_table_calculation(&builder, &val, &default, false).unwrap(); + let values = result.values_slice(); + assert!((values[0] - (-1.0)).abs() < 1e-12, "raw=-9 should be -1, got {}", values[0]); + assert!((values[1] - 0.0).abs() < 1e-12, "raw=-7 should be 0, got {}", values[1]); + assert!((values[2] - 0.0).abs() < 1e-12, "raw=-6 should be 0, got {}", values[2]); + assert!((values[3] - (-1.0)).abs() < 1e-12, "raw=100 should be -1, got {}", values[3]); + } + #[test] fn test_algebraic_conversion_primitive() { // Expression: X * 2 + 1 diff --git a/tests/conversion.rs b/tests/conversion.rs index 5462b69..5d0a6dd 100644 --- a/tests/conversion.rs +++ b/tests/conversion.rs @@ -258,9 +258,15 @@ fn lookup_value_range_to_value() -> Result<()> { let mut mdf = Mdf::new(&file_name)?; mdf.load_all_channels_data_in_memory()?; if let Some(data) = mdf.get_channel_data("Data channel") { + // Spec 6.17.8: float data uses [lo, hi) exclusive upper bound. + // Raw data: -15…14 (step 1). CC ranges: + // [-10,-7)→-1, [-7,-5)→0, [-5,0)→1, [0,2)→2, [2,5)→3, + // [5,6)→5, [6,8.5)→6, [8.5,10)→7, [10,12)→8, [12,14)→9, default=-1 + // Values -15…-11 are below all ranges → default -1. + // Value 14 equals the exclusive upper of last range → default -1. let vect = Vec::from([ - -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 5.0, 5.0, 5.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, + -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, + 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 9.0, 9.0, -1.0, ]); assert_eq!( &ChannelData::Float64(Float64Builder::new_from_buffer(vect.into(), None)), From c6abcfaf07aa19cd2df1d2a1574add8fafef898f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 2 Apr 2026 00:28:34 +0200 Subject: [PATCH 45/46] VLSD bug fix --- src/mdfinfo/mdfinfo4/composition.rs | 20 ++++++- src/mdfreader/mdfreader4.rs | 88 ++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 30 deletions(-) diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs index f060bab..db07dbc 100644 --- a/src/mdfinfo/mdfinfo4/composition.rs +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -318,13 +318,13 @@ pub(super) fn parse_composition( if block_header_short.hdr_id == "##CA".as_bytes() { // Channel Array - let (block, mut shape, _snd, array_size) = + let (block, mut shape, _snd, mut array_size) = parse_ca_block(&mut block, block_header_short, cg_cycle_count) .context("Failed parsing CA block")?; position = pos; let ca_composition: Option>; if block.ca_composition != 0 { - let (ca, pos, _array_size, s, n_cns, cnss) = parse_composition( + let (ca, pos, inner_array_size, s, n_cns, cnss) = parse_composition( rdr, block.ca_composition, position, @@ -333,7 +333,21 @@ pub(super) fn parse_composition( cg_cycle_count, ) .context("Failed parsing composition block from CA block")?; - shape = s; + // If the inner composition is another CA block (array of arrays), combine + // the outer CA's dimensions with the inner CA's dimensions: + // outer shape = [cg_cycle_count, d1, ..., dm] + // inner shape = [cg_cycle_count, e1, ..., en] + // combined = [cg_cycle_count, d1, ..., dm, e1, ..., en] + // array_size (= total elements, used as list_size for reading) must also be + // the product of all dimensions: outer_pnd * inner_pnd. + // For any other inner block type (CN axis channel, etc.) the outer CA's + // ca_dim_size already encodes the full array shape — keep it unchanged. + if matches!(&ca.block, Compo::CA(_)) { + let mut combined = shape.0.clone(); + combined.extend_from_slice(&s.0[1..]); + shape = (combined, shape.1); + array_size *= inner_array_size; + } position = pos; cns = cnss; n_cn += n_cns; diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 74d51e7..1a98577 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -429,6 +429,44 @@ fn read_hl(rdr: &mut BufReader<&File>, mut position: i64) -> Result<(i64, [u8; 4 Ok((position, id)) } +/// Reads VLSD data from a chain of DL sub-blocks (##SD or ##DZ) without reinitialising arrays. +/// Used by read_sd to process DL-chained VLSD data for a single channel. +fn read_vlsd_from_dl_blocks( + rdr: &mut BufReader<&File>, + dl_blocks: Vec, + mut position: i64, + cn: &mut Cn4, + decoder: &mut Dec, +) -> Result { + let mut previous_index: usize = 0; + for dl in dl_blocks { + for data_pointer in dl.dl_data { + rdr.seek_relative(data_pointer - position) + .context("Could not reach VLSD sub-block from DL")?; + let mut id = [0u8; 4]; + rdr.read_exact(&mut id) + .context("could not read VLSD sub-block id")?; + let mut data = if id == *b"##DZ" { + let (dt, block_header) = parse_dz(rdr)?; + position = data_pointer + block_header.len as i64; + dt + } else { + // ##SD block (same header layout as Dt4Block) + let block_header: Dt4Block = rdr + .read_le() + .context("Could not read VLSD sub-block header")?; + let mut buf = vec![0u8; (block_header.len - 24) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read VLSD sub-block data")?; + position = data_pointer + block_header.len as i64; + buf + }; + previous_index = read_vlsd_from_bytes(&mut data, cn, previous_index, decoder)?; + } + } + Ok(position) +} + /// Reads Signal Data Block containing VLSD channel, pointed by cn_data fn read_sd( rdr: &mut BufReader<&File>, @@ -436,7 +474,7 @@ fn read_sd( vlsd_channels: &[(u8, i32)], mut position: i64, decoder: &mut Dec, - channel_names_to_read_in_dg: &HashSet, + _channel_names_to_read_in_dg: &HashSet, ) -> Result { for channel_group in dg.cg.values_mut() { for (cn_type, rec_pos) in vlsd_channels { @@ -468,28 +506,10 @@ fn read_sd( let (pos, _id) = read_hl(rdr, position)?; position = pos; let (dl_blocks, pos) = parser_dl4(rdr, position)?; - let (pos, _vlsd) = parser_dl4_sorted( - rdr, - dl_blocks, - pos, - channel_group, - decoder, - rec_pos, - channel_names_to_read_in_dg, - )?; - position = pos; + position = read_vlsd_from_dl_blocks(rdr, dl_blocks, pos, cn, decoder)?; } else if "##DL".as_bytes() == id { let (dl_blocks, pos) = parser_dl4(rdr, position)?; - let (pos, _vlsd) = parser_dl4_sorted( - rdr, - dl_blocks, - pos, - channel_group, - decoder, - rec_pos, - channel_names_to_read_in_dg, - )?; - position = pos; + position = read_vlsd_from_dl_blocks(rdr, dl_blocks, pos, cn, decoder)?; } } } @@ -609,9 +629,16 @@ fn read_vlsd_from_bytes( u32::from_le_bytes(len.try_into().context("Could not read length")?) as usize; if (position + length + 4) <= data_length { position += std::mem::size_of::(); - // Types 6 (SBC) and 7 (UTF-8) have null terminator to strip + // From MDF 4.3, null terminator is optional in VLSD strings. + // Strip trailing \0 only if actually present (check the last byte). let record_len = match cn_data_type { - 6 | 7 => if length > 0 { length - 1 } else { 0 }, + 6 | 7 => { + if length > 0 && data[position + length - 1] == 0 { + length - 1 + } else { + length + } + } _ => length, }; let record = &data[position..position + record_len]; @@ -998,11 +1025,9 @@ fn parser_dl4_sorted( let mut id = [0u8; 4]; rdr.read_exact(&mut id) .context("could not read data block id")?; - let block_length: usize; if id == "##DZ".as_bytes() { let (dt, block_header) = parse_dz(rdr)?; data.extend(dt); - block_length = block_header.dz_org_data_length as usize; position = data_pointer + block_header.len as i64; id[2..].copy_from_slice(&block_header.dz_org_block_type[..]); } else { @@ -1011,9 +1036,11 @@ fn parser_dl4_sorted( rdr.read_exact(&mut buf) .context("Could not read DT block data")?; data.extend(buf); - block_length = (block_header.len - 24) as usize; position = data_pointer + block_header.len as i64; } + // Use data.len() as block_length so that a partial-record tail carried over + // from the previous block (split records) is included in the count. + let block_length = data.len(); // Copies full sized records in block into channels arrays if id == "##SD".as_bytes() { @@ -1420,15 +1447,22 @@ fn read_all_channels_unsorted_from_bytes( // From sorted data block, copies data in channels arrays for (rec_id, (index, record_data)) in record_counter.iter_mut() { if let Some(channel_group) = dg.cg.get_mut(rec_id) { + let record_length = channel_group.record_length as usize; + let n_records = if record_length > 0 { + record_data.len() / record_length + } else { + 0 + }; read_channels_from_bytes( record_data, &mut channel_group.cn, - channel_group.record_length as usize, + record_length, *index, channel_names_to_read_in_dg, true, ) .context("failed reading channels from bytes after reading unsorted data")?; + *index += n_records; // advance write position for next DL block record_data.clear(); // clears data for new block, keeping capacity } } From 7ccb4765d0a8f5d159af730d38346a962af92e54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=ABratal=C2=BB?= <«ratal@ratal.org»> Date: Thu, 2 Apr 2026 22:45:44 +0200 Subject: [PATCH 46/46] ticked-up the tag --- .gitignore | 3 ++- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 6 +++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 95dba98..dbb4cd8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ test_files/test.parquet /bin pyvenv.cfg /.continue -test_files/synthetic \ No newline at end of file +test_files/synthetic +.claude/settings.json diff --git a/Cargo.lock b/Cargo.lock index 8ce7322..2b1b991 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2209,9 +2209,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.183" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libloading" @@ -2366,7 +2366,7 @@ dependencies = [ [[package]] name = "mdfr" -version = "0.6.5" +version = "0.6.6" dependencies = [ "anyhow", "arrow", @@ -3544,9 +3544,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.28.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ "anyhow", "libc", @@ -3560,18 +3560,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.28.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.28.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -3579,9 +3579,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.28.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3591,9 +3591,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.28.2" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 3d490fc..eb4a596 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mdfr" -version = "0.6.5" +version = "0.6.6" description = "A package for reading and writing MDF files" authors = ["ratal "] edition = "2024" @@ -52,7 +52,7 @@ arrow = { version = "58.1.0", features = [ "ffi", ] } # for efficient data storing in memory env_logger = "0.11.10" -libc = "0.2.183" # for the C api +libc = "0.2.184" # for the C api numpy = { version = "0.28", optional = true } # to export in numpy polars = { version = "0.53", features = [ "dtype-full", @@ -66,7 +66,7 @@ hdf5 = { version = "0.8", optional = true, features = [ ndarray = { version = "0.17", optional = true } # to convert arraw data into ndarray, needed for hdf5 [dependencies.pyo3] -version = "0.28.2" +version = "0.28.3" features = ["extension-module", "num-complex", "anyhow"] optional = true