diff --git a/.gitignore b/.gitignore index 8736572..dbb4cd8 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,6 @@ test_files/test.parquet /share /bin pyvenv.cfg -/.continue \ No newline at end of file +/.continue +test_files/synthetic +.claude/settings.json diff --git a/Cargo.lock b/Cargo.lock index a78f8df..2b1b991 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -111,9 +102,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -132,9 +123,9 @@ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -161,12 +152,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -dependencies = [ - "backtrace", -] +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "ar_archive_writer" @@ -174,7 +162,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" dependencies = [ - "object 0.32.2", + "object", ] [[package]] @@ -213,9 +201,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" +checksum = "d441fdda254b65f3e9025910eb2c2066b6295d9c8ed409522b8d2ace1ff8574c" dependencies = [ "arrow-arith", "arrow-array", @@ -235,9 +223,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" +checksum = "ced5406f8b720cc0bc3aa9cf5758f93e8593cda5490677aa194e4b4b383f9a59" dependencies = [ "arrow-array", "arrow-buffer", @@ -249,9 +237,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" +checksum = "772bd34cacdda8baec9418d80d23d0fb4d50ef0735685bd45158b83dfeb6e62d" dependencies = [ "ahash", "arrow-buffer", @@ -267,9 +255,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" +checksum = "898f4cf1e9598fdb77f356fdf2134feedfd0ee8d5a4e0a5f573e7d0aec16baa4" dependencies = [ "bytes", "half", @@ -279,9 +267,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" +checksum = "b0127816c96533d20fc938729f48c52d3e48f99717e7a0b5ade77d742510736d" dependencies = [ "arrow-array", "arrow-buffer", @@ -301,9 +289,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" +checksum = "ca025bd0f38eeecb57c2153c0123b960494138e6a957bbda10da2b25415209fe" dependencies = [ "arrow-array", "arrow-cast", @@ -311,14 +299,14 @@ dependencies = [ "chrono", "csv", "csv-core", - "regex 1.12.2", + "regex 1.12.3", ] [[package]] name = "arrow-data" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" +checksum = "42d10beeab2b1c3bb0b53a00f7c944a178b622173a5c7bcabc3cb45d90238df4" dependencies = [ "arrow-buffer", "arrow-schema", @@ -329,9 +317,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" +checksum = "609a441080e338147a84e8e6904b6da482cefb957c5cdc0f3398872f69a315d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -343,9 +331,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" +checksum = "6ead0914e4861a531be48fe05858265cf854a4880b9ed12618b1d08cba9bebc8" dependencies = [ "arrow-array", "arrow-buffer", @@ -367,9 +355,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" +checksum = "763a7ba279b20b52dad300e68cfc37c17efa65e68623169076855b3a9e941ca5" dependencies = [ "arrow-array", "arrow-buffer", @@ -380,9 +368,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18c442b4c266aaf3d7f7dd40fd7ae058cef7f113b00ff0cd8256e1e218ec544" +checksum = "e63351dc11981a316c828a6032a5021345bba882f68bc4a36c36825a50725089" dependencies = [ "arrow-array", "arrow-data", @@ -392,9 +380,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" +checksum = "e14fe367802f16d7668163ff647830258e6e0aeea9a4d79aaedf273af3bdcd3e" dependencies = [ "arrow-array", "arrow-buffer", @@ -405,18 +393,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" +checksum = "c30a1365d7a7dc50cc847e54154e6af49e4c4b0fddc9f607b687f29212082743" dependencies = [ "bitflags 2.10.0", ] [[package]] name = "arrow-select" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" +checksum = "78694888660a9e8ac949853db393af2a8b8fc82c19ce333132dfa2e72cc1a7fe" dependencies = [ "ahash", "arrow-array", @@ -428,9 +416,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" +checksum = "61e04a01f8bb73ce54437514c5fd3ee2aa3e8abe4c777ee5cc55853b1652f79e" dependencies = [ "arrow-array", "arrow-buffer", @@ -439,7 +427,7 @@ dependencies = [ "arrow-select", "memchr 2.7.6", "num-traits", - "regex 1.12.2", + "regex 1.12.3", "regex-syntax 0.8.8", ] @@ -535,21 +523,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "backtrace" -version = "0.3.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" -dependencies = [ - "addr2line", - "cfg-if 1.0.4", - "libc", - "miniz_oxide", - "object 0.37.3", - "rustc-demangle", - "windows-link 0.2.1", -] - [[package]] name = "base64" version = "0.22.1" @@ -597,9 +570,9 @@ dependencies = [ [[package]] name = "binrw" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81419ff39e6ed10a92a7f125290859776ced35d9a08a665ae40b23e7ca702f30" +checksum = "d53195f985e88ab94d1cc87e80049dd2929fd39e4a772c5ae96a7e5c4aad3642" dependencies = [ "array-init", "binrw_derive", @@ -608,9 +581,9 @@ dependencies = [ [[package]] name = "binrw_derive" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "376404e55ec40d0d6f8b4b7df3f87b87954bd987f0cf9a7207ea3b6ea5c9add4" +checksum = "5910da05ee556b789032c8ff5a61fb99239580aa3fd0bfaa8f4d094b2aee00ad" dependencies = [ "either", "owo-colors", @@ -657,6 +630,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "boxcar" version = "0.2.14" @@ -756,7 +738,7 @@ version = "0.29.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "befbfd072a8e81c02f8c507aefce431fe5e7d051f83d48a23ffc9b9fe5a11799" dependencies = [ - "clap 4.5.58", + "clap 4.6.0", "heck", "indexmap", "log 0.4.29", @@ -899,18 +881,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -974,6 +956,12 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "const-random" version = "0.1.18" @@ -1045,15 +1033,15 @@ dependencies = [ [[package]] name = "criterion" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ "alloca", "anes", "cast", "ciborium", - "clap 4.5.58", + "clap 4.6.0", "criterion-plot", "itertools 0.13.0", "num-traits", @@ -1061,7 +1049,7 @@ dependencies = [ "page_size", "plotters", "rayon", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_json", "tinytemplate", @@ -1070,9 +1058,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools 0.13.0", @@ -1160,6 +1148,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + [[package]] name = "csv" version = "1.4.0" @@ -1193,8 +1190,19 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.1", ] [[package]] @@ -1240,12 +1248,12 @@ dependencies = [ [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" dependencies = [ "log 0.4.29", - "regex 1.12.2", + "regex 1.12.3", ] [[package]] @@ -1260,9 +1268,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ "anstream", "anstyle", @@ -1377,9 +1385,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1578,12 +1586,6 @@ dependencies = [ "wasip3", ] -[[package]] -name = "gimli" -version = "0.32.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" - [[package]] name = "glob" version = "0.2.11" @@ -1706,7 +1708,7 @@ dependencies = [ "libloading 0.7.4", "mpi-sys", "pkg-config", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_derive", "winreg", @@ -1799,6 +1801,15 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hybrid-array" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8655f91cd07f2b9d0c24137bd650fe69617773435ee5ec83022377777ce65ef1" +dependencies = [ + "typenum", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2006,15 +2017,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "instant" version = "0.1.13" @@ -2078,9 +2080,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "log 0.4.29", @@ -2091,9 +2093,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote 1.0.44", @@ -2207,9 +2209,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.182" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libloading" @@ -2317,9 +2319,9 @@ dependencies = [ [[package]] name = "lz4_flex" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c23545df7ecf1b16c303910a69b079e8e251d60f7dd2cc9b4177f2afaf1746" +checksum = "db9a0d582c2874f68138a16ce1867e0ffde6c0bb0a0df85e1f36d04146db488a" dependencies = [ "twox-hash", ] @@ -2354,17 +2356,17 @@ dependencies = [ [[package]] name = "md-5" -version = "0.10.6" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ "cfg-if 1.0.4", - "digest", + "digest 0.11.2", ] [[package]] name = "mdfr" -version = "0.6.5" +version = "0.6.6" dependencies = [ "anyhow", "arrow", @@ -2372,12 +2374,12 @@ dependencies = [ "byteorder", "cbindgen", "chrono", - "clap 4.5.58", + "clap 4.6.0", "codepage", "criterion", "crossbeam-channel", "encoding_rs", - "env_logger 0.11.8", + "env_logger 0.11.10", "fasteval", "flate2", "glob 0.3.3", @@ -2430,15 +2432,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2484,21 +2477,6 @@ dependencies = [ "rawpointer", ] -[[package]] -name = "ndarray" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - [[package]] name = "ndarray" version = "0.17.2" @@ -2628,12 +2606,12 @@ dependencies = [ [[package]] name = "numpy" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2dba356160b54f5371b550575b78130a54718b4c6e46b3f33a6da74a27e78b" +checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" dependencies = [ "libc", - "ndarray 0.16.1", + "ndarray 0.17.2", "num-complex", "num-integer", "num-traits", @@ -2669,15 +2647,6 @@ dependencies = [ "memchr 2.7.6", ] -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr 2.7.6", -] - [[package]] name = "object_store" version = "0.13.1" @@ -2818,14 +2787,13 @@ dependencies = [ [[package]] name = "parquet" -version = "57.3.0" +version = "58.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" +checksum = "7d3f9f2205199603564127932b89695f52b62322f541d0fc7179d57c2e1c9877" dependencies = [ "ahash", "arrow-array", "arrow-buffer", - "arrow-cast", "arrow-data", "arrow-ipc", "arrow-schema", @@ -3080,7 +3048,7 @@ dependencies = [ "rand 0.9.2", "rand_distr", "rayon", - "regex 1.12.2", + "regex 1.12.3", "serde", "serde_json", "strum_macros", @@ -3113,7 +3081,7 @@ dependencies = [ "object_store", "parking_lot 0.12.5", "polars-arrow-format", - "regex 1.12.2", + "regex 1.12.3", "signal-hook", "simdutf8", ] @@ -3140,7 +3108,7 @@ dependencies = [ "rand 0.9.2", "rayon", "recursive", - "regex 1.12.2", + "regex 1.12.3", "version_check", ] @@ -3178,7 +3146,7 @@ dependencies = [ "polars-time", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "reqwest", "serde", "serde_json", @@ -3280,7 +3248,7 @@ dependencies = [ "polars-schema", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "regex-syntax 0.8.8", "strum_macros", "unicode-normalization", @@ -3307,7 +3275,7 @@ dependencies = [ "polars-error", "polars-parquet-format", "polars-utils", - "regex 1.12.2", + "regex 1.12.3", "serde", "simdutf8", "streaming-decompression", @@ -3352,7 +3320,7 @@ dependencies = [ "polars-utils", "rayon", "recursive", - "regex 1.12.2", + "regex 1.12.3", "sha2", "slotmap", "strum_macros", @@ -3404,7 +3372,7 @@ dependencies = [ "polars-plan", "polars-time", "polars-utils", - "regex 1.12.2", + "regex 1.12.3", "serde", "sqlparser", ] @@ -3472,7 +3440,7 @@ dependencies = [ "polars-ops", "polars-utils", "rayon", - "regex 1.12.2", + "regex 1.12.3", "strum_macros", ] @@ -3501,7 +3469,7 @@ dependencies = [ "rand 0.9.2", "raw-cpuid", "rayon", - "regex 1.12.2", + "regex 1.12.3", "rmp-serde", "serde", "serde_json", @@ -3576,37 +3544,34 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ "anyhow", - "indoc", "libc", - "memoffset", "num-complex", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -3614,9 +3579,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -3626,9 +3591,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.26.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -3669,9 +3634,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.14" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", "getrandom 0.3.4", @@ -3887,9 +3852,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick 1.1.4", "memchr 2.7.6", @@ -4007,12 +3972,6 @@ dependencies = [ "memchr 2.7.6", ] -[[package]] -name = "rustc-demangle" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -4079,9 +4038,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "ring", "rustls-pki-types", @@ -4242,7 +4201,7 @@ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if 1.0.4", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", ] [[package]] @@ -4522,7 +4481,7 @@ version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d53ac171c92a39e4769491c4b4dde7022c60042254b5fc044ae409d34a24d4" dependencies = [ - "env_logger 0.11.8", + "env_logger 0.11.10", "test-log-macros", "tracing-subscriber", ] @@ -4921,12 +4880,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "unindent" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" - [[package]] name = "untrusted" version = "0.9.0" @@ -5720,9 +5673,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" [[package]] name = "zmij" diff --git a/Cargo.toml b/Cargo.toml index c13acf2..eb4a596 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mdfr" -version = "0.6.5" +version = "0.6.6" description = "A package for reading and writing MDF files" authors = ["ratal "] edition = "2024" @@ -14,18 +14,18 @@ readme = "README.md" [features] default = ["numpy", "parquet", "polars"] numpy = ["dep:numpy", "dep:pyo3"] -polars = ["dep:polars", "dep:numpy", "dep:pyo3"] +polars = ["dep:polars", "dep:numpy", "dep:pyo3"] # numpy and pyo3 are required transitive deps for polars Python interop parquet = ["dep:parquet"] hdf5 = ["dep:hdf5", "ndarray"] ndarray = ["dep:ndarray"] hdf5-mpio = ["hdf5/mpio"] [dependencies] -clap = "4.5.58" # for input arguments -anyhow = { version = "1.0", features = ["backtrace"] } # error handling +clap = "4.6.0" # for input arguments +anyhow = { version = "1.0.102", features = ["backtrace"] } # error handling log = "0.4" # to log events byteorder = "1.4" # for bytes conversions -binrw = "0.15" # to efficiently read blocks +binrw = "0.15.1" # to efficiently read blocks num = "0.4" half = "2.7" # for f16 handling encoding_rs = "0.8" # for endian management and bytes to text conversion (utf8, SBC, UTF16) @@ -36,44 +36,44 @@ rayon = "1.11" # for general purpose parallel computations crossbeam-channel = "0.5" # for efficient channel between threads parking_lot = "0.12" # for efficient mutex roxmltree = "0.21" # for xml parsing -flate2 = "1.1" # for DZ block data deflate +flate2 = "1.1.9" # for DZ block data deflate zstd = "0.13" lz4 = "1.28" -md-5 = "0.10" # md5sum of attachments +md-5 = "0.11" # md5sum of attachments transpose = "0.2" # for DZBlock transpose fasteval = "0.2" # for algebraic conversion itertools = "0.14" serde = { version = "1.0", features = ["derive"] } # for serialization whoami = "2.1.1" # to get user name for writing file rand = "0.10" # for random numbers -arrow = { version = "57.3.0", features = [ +arrow = { version = "58.1.0", features = [ "pyarrow", "prettyprint", "ffi", ] } # for efficient data storing in memory -env_logger = "0.11" -libc = "0.2" # for the C api -numpy = { version = "0.26", optional = true } # to export in numpy +env_logger = "0.11.10" +libc = "0.2.184" # for the C api +numpy = { version = "0.28", optional = true } # to export in numpy polars = { version = "0.53", features = [ "dtype-full", "object", "fmt", ], optional = true } # for python dataframe -parquet = { version = "57.3", optional = true } # to write parquet file +parquet = { version = "58.1.0", optional = true } # to write parquet file hdf5 = { version = "0.8", optional = true, features = [ "lzf", ] } # to export into hdf5 file ndarray = { version = "0.17", optional = true } # to convert arraw data into ndarray, needed for hdf5 [dependencies.pyo3] -version = "0.26" +version = "0.28.3" features = ["extension-module", "num-complex", "anyhow"] optional = true [dev-dependencies] -criterion = "0.8" # for benchmark +criterion = "0.8.2" # for benchmark test-log = "0.2" -glob = "*" +glob = "0.3" [build-dependencies] cbindgen = "0.29" # to generate C api headers diff --git a/benches/mdf_benchmark.rs b/benches/mdf_benchmark.rs index 2418fe9..148ebfb 100644 --- a/benches/mdf_benchmark.rs +++ b/benches/mdf_benchmark.rs @@ -25,12 +25,23 @@ static WRITING_FILE3: LazyLock = LazyLock::new(|| format!("{}test.dat", static PARQUET_FILE: LazyLock = LazyLock::new(|| format!("{}test.parquet", MDFR_PATH)); fn python_launch() { - Command::new("python3") - .arg("-m") - .arg("timeit") - .arg("import mdfreader; yop=mdfreader.Mdf('/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/ASAM_COMMON_MDF_V4-1-0/Base_Standard/Examples/DataList/Vector_SD_List.MF4')") - .spawn() - .expect("mdfinfo command failed to start"); + // Check if mdfreader is available before running the comparison + let check = Command::new("python3") + .args(["-c", "import mdfreader"]) + .output(); + match check { + Ok(out) if out.status.success() => { + Command::new("python3") + .arg("-m") + .arg("timeit") + .arg("import mdfreader; yop=mdfreader.Mdf('/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/ASAM_COMMON_MDF_V4-1-0/Base_Standard/Examples/DataList/Vector_SD_List.MF4')") + .spawn() + .expect("mdfinfo command failed to start"); + } + _ => { + println!("skipping Python mdfreader comparison (module not available)"); + } + } } pub fn criterion_benchmark(c: &mut Criterion) { diff --git a/src/c_api.rs b/src/c_api.rs index f1adab9..4f8d623 100644 --- a/src/c_api.rs +++ b/src/c_api.rs @@ -4,7 +4,9 @@ use arrow::ffi::{FFI_ArrowArray, to_ffi}; use libc::c_char; use std::ffi::{CStr, CString, c_uchar, c_ushort}; -/// create a new mdf from a file and its metadata +/// create a new mdf from a file and its metadata. +/// Returns a heap-allocated Mdf pointer, or null on error. +/// Caller must free the returned pointer with `free_mdf()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { unsafe { @@ -16,108 +18,135 @@ pub unsafe extern "C" fn new_mdf(file_name: *const c_char) -> *mut Mdf { // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let f = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if file_name.is_null() { + return std::ptr::null_mut(); + } + let f = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null_mut(), + }; match Mdf::new(f) { - Ok(mut mdf) => { - let p: *mut Mdf = &mut mdf; - std::mem::forget(mdf); - p - } - Err(e) => panic!("{e:?}"), + Ok(mdf) => Box::into_raw(Box::new(mdf)), + Err(_) => std::ptr::null_mut(), } } } -/// returns mdf file version +/// frees an Mdf object previously returned by `new_mdf()`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_mdf(mdf: *mut Mdf) { + unsafe { + if !mdf.is_null() { + // SAFETY: mdf was created by Box::into_raw in new_mdf(); we reconstruct and drop it. + drop(Box::from_raw(mdf)); + } + } +} + +/// returns mdf file version. Returns 0 on null pointer. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_version(mdf: *const Mdf) -> c_ushort { unsafe { + // SAFETY: caller guarantees mdf is either null or a valid pointer returned by new_mdf(). + // as_ref() handles the null case safely. if let Some(mdf) = mdf.as_ref() { mdf.get_version() } else { - panic!("Null pointer given for Mdf Rust object") + 0 } } } /// returns channel's unit string -/// if no unit is existing for this channel, returns a null pointer +/// if no unit is existing for this channel, returns a null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_unit( mdf: *const Mdf, channel_name: *const c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. + // Null is checked before dereferencing. CStr::from_ptr requires a valid, null-terminated string. + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_unit(name) { - Ok(unit) => match unit { - Some(unit) => CString::new(unit) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer + Ok(Some(unit)) => match CString::new(unit) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), }, - Err(e) => panic!("{}", e), + _ => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } /// returns channel's description string -/// if no description is existing for this channel, returns null pointer +/// if no description is existing for this channel, returns null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_desc( mdf: *const Mdf, channel_name: *const libc::c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_desc(name) { - Ok(desc) => { - match desc { - Some(desc) => CString::new(desc) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer - } - } - Err(e) => panic!("{}", e), + Ok(Some(desc)) => match CString::new(desc) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), + }, + _ => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } /// returns channel's associated master channel name string -/// if no master channel existing, returns null pointer +/// if no master channel existing, returns null pointer. +/// The returned pointer is heap-allocated; caller must free it with `libc::free()`. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_master( mdf: *const Mdf, channel_name: *const libc::c_char, ) -> *const c_char { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_master(name) { - Some(st) => CString::new(st) - .expect("CString::new failed because of internal 0 byte") - .into_raw(), - None => std::ptr::null::(), // null pointer + Some(st) => match CString::new(st) { + Ok(cs) => cs.into_raw(), + Err(_) => std::ptr::null(), + }, + None => std::ptr::null(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() } } } @@ -131,39 +160,68 @@ pub unsafe extern "C" fn get_channel_master_type( channel_name: *const libc::c_char, ) -> c_uchar { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings. + if mdf.is_null() || channel_name.is_null() { + return 0; + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return 0, + }; if let Some(mdf) = mdf.as_ref() { mdf.get_channel_master_type(name) } else { - panic!("Null pointer given for Mdf Rust object") + 0 } } } -/// returns a sorted array of strings of all channel names contained in file +/// returns a sorted array of strings of all channel names contained in file. +/// The returned pointer is heap-allocated; call `free_channel_names_set(ptr, len)` to free it, +/// where `len` is the number of channel names (obtained separately, e.g. via a count function). +/// Returns null on null input. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c_char { unsafe { + // SAFETY: caller guarantees mdf is either null or a valid pointer from new_mdf(). if let Some(mdf) = mdf.as_ref() { let set = mdf.get_channel_names_set(); let mut s = set.into_iter().collect::>(); s.sort(); - let cstring_vec = s + let mut cstring_vec = s .iter() - .map(|e| { - CString::new(e.to_string()) - .expect("CString::new failed because of internal 0 byte") - .into_raw() - }) + .filter_map(|e| CString::new(e.as_str()).ok()) + .map(|cs| cs.into_raw()) .collect::>(); + cstring_vec.shrink_to_fit(); let p = cstring_vec.as_ptr(); + // SAFETY: We intentionally leak the Vec here to transfer ownership of the + // backing allocation to the caller. The pointer remains valid until + // free_channel_names_set() is called with the correct length. std::mem::forget(cstring_vec); p } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null() + } + } +} + +/// frees a channel names array returned by `get_channel_names_set()`. +/// `len` must match the number of channels returned. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_channel_names_set(ptr: *mut *mut c_char, len: usize) { + unsafe { + if ptr.is_null() { + return; } + // SAFETY: ptr and each element were allocated by Rust (CString::into_raw / Vec::into_raw_parts). + let slice = std::slice::from_raw_parts_mut(ptr, len); + for p in slice.iter_mut() { + if !p.is_null() { + drop(CString::from_raw(*p)); + } + } + drop(Vec::from_raw_parts(ptr, len, len)); } } @@ -171,40 +229,57 @@ pub unsafe extern "C" fn get_channel_names_set(mdf: *const Mdf) -> *const *mut c #[unsafe(no_mangle)] pub unsafe extern "C" fn load_all_channels_data_in_memory(mdf: *mut Mdf) { unsafe { - if let Some(mdf) = mdf.as_mut() { - match mdf.load_all_channels_data_in_memory() { - Ok(_) => {} - Err(e) => panic!("{}", e), - } - } else { - panic!("Null pointer given for Mdf Rust object") + // SAFETY: caller guarantees mdf is either null or a valid pointer from new_mdf(). + if let Some(mdf) = mdf.as_mut() + && let Err(e) = mdf.load_all_channels_data_in_memory() + { + log::error!("load_all_channels_data_in_memory failed: {e}"); } } } -/// returns channel's arrow Array. -/// null pointer returned if not found +/// returns channel's arrow Array as a heap-allocated pointer. +/// Caller must free it with `free_channel_array()`. +/// Returns null pointer if channel not found. #[unsafe(no_mangle)] pub unsafe extern "C" fn get_channel_array( mdf: *const Mdf, channel_name: *const libc::c_char, -) -> *const FFI_ArrowArray { +) -> *mut FFI_ArrowArray { unsafe { - let name = CStr::from_ptr(channel_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + // SAFETY: caller guarantees both pointers are either null or valid null-terminated C strings, + // and mdf is a valid pointer from new_mdf(). + if mdf.is_null() || channel_name.is_null() { + return std::ptr::null_mut(); + } + let name = match CStr::from_ptr(channel_name).to_str() { + Ok(s) => s, + Err(_) => return std::ptr::null_mut(), + }; if let Some(mdf) = mdf.as_ref() { match mdf.get_channel_data(name) { - Some(data) => { - let (array, _) = - to_ffi(&data.to_data()).expect("ffi failed converting arrow array into C"); - let array_ptr: *const FFI_ArrowArray = &array; - array_ptr - } - None => std::ptr::null::(), // null pointers + Some(data) => match to_ffi(&data.to_data()) { + Ok((array, _)) => Box::into_raw(Box::new(array)), + Err(e) => { + log::error!("get_channel_array: FFI conversion failed: {e}"); + std::ptr::null_mut() + } + }, + None => std::ptr::null_mut(), } } else { - panic!("Null pointer given for Mdf Rust object") + std::ptr::null_mut() + } + } +} + +/// frees an FFI_ArrowArray returned by `get_channel_array()`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn free_channel_array(array: *mut FFI_ArrowArray) { + unsafe { + if !array.is_null() { + // SAFETY: array was created by Box::into_raw in get_channel_array(). + drop(Box::from_raw(array)); } } } @@ -229,25 +304,25 @@ pub unsafe extern "C" fn export_to_parquet( // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let name = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); + if mdf.is_null() || file_name.is_null() { + return; + } + let name = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return, + }; let comp = if compression.is_null() { None } else { - Some( - CStr::from_ptr(compression) - .to_str() - .expect("Could not convert into utf8 the compression string"), - ) - }; - if let Some(mdf) = mdf.as_ref() { - match mdf.export_to_parquet(name, comp) { - Ok(_) => {} - Err(e) => panic!("{}", e), + match CStr::from_ptr(compression).to_str() { + Ok(s) => Some(s), + Err(_) => return, } - } else { - panic!("Null pointer given for Mdf Rust object") + }; + if let Some(mdf) = mdf.as_ref() + && let Err(e) = mdf.export_to_parquet(name, comp) + { + log::error!("export_to_parquet failed: {e}"); } } } @@ -256,7 +331,7 @@ pub unsafe extern "C" fn export_to_parquet( // Compression can be one of the following strings // "deflate", "lzf" // or null pointer if no compression wanted -#[no_mangle] +#[unsafe(no_mangle)] #[cfg(feature = "hdf5")] pub unsafe extern "C" fn export_to_hdf5( mdf: *const Mdf, @@ -271,24 +346,26 @@ pub unsafe extern "C" fn export_to_hdf5( // - points to valid, initialized data // - points to memory ending in a null byte // - won't be mutated for the duration of this function call - let name = CStr::from_ptr(file_name) - .to_str() - .expect("Could not convert into utf8 the file name string"); - let comp = if compression.is_null() { - None - } else { - Some( - CStr::from_ptr(compression) - .to_str() - .expect("Could not convert into utf8 the compression string"), - ) - }; - if let Some(mdf) = mdf.as_ref() { - match mdf.export_to_hdf5(name, comp) { - Ok(_) => {} - Err(e) => panic!("{}", e), + unsafe { + if mdf.is_null() || file_name.is_null() { + return; + } + let name = match CStr::from_ptr(file_name).to_str() { + Ok(s) => s, + Err(_) => return, + }; + let comp = if compression.is_null() { + None + } else { + match CStr::from_ptr(compression).to_str() { + Ok(s) => Some(s), + Err(_) => return, + } + }; + if let Some(mdf) = mdf.as_ref() { + if let Err(e) = mdf.export_to_hdf5(name, comp) { + log::error!("export_to_hdf5 failed: {e}"); + } } - } else { - panic!("Null pointer given for Mdf Rust object") } } diff --git a/src/data_holder/channel_data.rs b/src/data_holder/channel_data.rs index e5e8dc1..fe83efa 100644 --- a/src/data_holder/channel_data.rs +++ b/src/data_holder/channel_data.rs @@ -5,6 +5,8 @@ use anyhow::{Context, Error, Result, bail}; use arrow::array::{ Array, ArrayBuilder, ArrayData, ArrayRef, BinaryArray, BooleanBufferBuilder, FixedSizeBinaryArray, FixedSizeBinaryBuilder, FixedSizeListArray, Int8Builder, + Int16Builder, Int32Builder, Int64Builder, UInt8Builder, UInt16Builder, UInt32Builder, + UInt64Builder, Float32Builder, Float64Builder, LargeBinaryArray, LargeBinaryBuilder, LargeStringArray, LargeStringBuilder, PrimitiveBuilder, StringArray, UnionArray, as_primitive_array, }; @@ -100,85 +102,71 @@ impl PartialEq for ChannelData { impl Clone for ChannelData { fn clone(&self) -> Self { + // `finish_cloned()` creates a snapshot with Arc refcount == 1, so `into_builder()` + // always returns Ok. The `unwrap_or_else` fallback is logically unreachable but + // keeps this impl panic-free. + macro_rules! clone_primitive { + ($variant:ident, $builder:ident, $arg:expr) => {{ + let arr = $arg.finish_cloned(); + Self::$variant(arr.into_builder().unwrap_or_else(|arr| { + let mut b = $builder::with_capacity(arr.len()); + for v in arr.iter() { + match v { Some(x) => b.append_value(x), None => b.append_null() } + } + b + })) + }}; + } match self { - Self::Int8(arg0) => Self::Int8( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt8(arg0) => Self::UInt8( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int16(arg0) => Self::Int16( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt16(arg0) => Self::UInt16( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int32(arg0) => Self::Int32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt32(arg0) => Self::UInt32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Float32(arg0) => Self::Float32( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Int64(arg0) => Self::Int64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::UInt64(arg0) => Self::UInt64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), - Self::Float64(arg0) => Self::Float64( - arg0.finish_cloned() - .into_builder() - .expect("failed getting back mutable array"), - ), + Self::Int8(arg0) => clone_primitive!(Int8, Int8Builder, arg0), + Self::UInt8(arg0) => clone_primitive!(UInt8, UInt8Builder, arg0), + Self::Int16(arg0) => clone_primitive!(Int16, Int16Builder, arg0), + Self::UInt16(arg0) => clone_primitive!(UInt16, UInt16Builder, arg0), + Self::Int32(arg0) => clone_primitive!(Int32, Int32Builder, arg0), + Self::UInt32(arg0) => clone_primitive!(UInt32, UInt32Builder, arg0), + Self::Float32(arg0) => clone_primitive!(Float32, Float32Builder, arg0), + Self::Int64(arg0) => clone_primitive!(Int64, Int64Builder, arg0), + Self::UInt64(arg0) => clone_primitive!(UInt64, UInt64Builder, arg0), + Self::Float64(arg0) => clone_primitive!(Float64, Float64Builder, arg0), Self::Complex32(arg0) => Self::Complex32(arg0.clone()), Self::Complex64(arg0) => Self::Complex64(arg0.clone()), Self::Utf8(arg0) => Self::Utf8( arg0.finish_cloned() .into_builder() - .expect("failed getting back mutable array"), + .unwrap_or_else(|arr| { + // unreachable: finish_cloned() gives Arc refcount 1 + let mut b = LargeStringBuilder::with_capacity(arr.len(), arr.values().len()); + for v in arr.iter() { match v { Some(s) => b.append_value(s), None => b.append_null() } } + b + }), ), Self::VariableSizeByteArray(array) => Self::VariableSizeByteArray( array .finish_cloned() .into_builder() - .expect("failed getting back mutable array"), + .unwrap_or_else(|arr| { + // unreachable: finish_cloned() gives Arc refcount 1 + let mut b = LargeBinaryBuilder::with_capacity(arr.len(), arr.values().len()); + for v in arr.iter() { match v { Some(s) => b.append_value(s), None => b.append_null() } } + b + }), ), Self::FixedSizeByteArray(array) => { let array: FixedSizeBinaryArray = array.finish_cloned(); let mut new_array = FixedSizeBinaryBuilder::with_capacity(array.len(), array.value_length()); match array.logical_nulls() { + // append_value can only fail if slice length != value_length, + // which can't happen since we chunk by value_length(). Some(validity) => { array .values() .chunks(array.value_length() as usize) .zip(validity.iter()) - .for_each(|(value, validity)| { - if validity { - new_array - .append_value(value) - .expect("failed appending new fixed binary value"); + .for_each(|(value, valid)| { + if valid { + new_array.append_value(value) + .unwrap_or_else(|_| new_array.append_null()); } else { new_array.append_null(); } @@ -189,9 +177,8 @@ impl Clone for ChannelData { .values() .chunks(array.value_length() as usize) .for_each(|value| { - new_array - .append_value(value) - .expect("failed appending new fixed binary value"); + new_array.append_value(value) + .unwrap_or_else(|_| new_array.append_null()); }); } } @@ -1956,8 +1943,10 @@ impl fmt::Display for ChannelData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let format_option = FormatOptions::new(); let data = self.as_ref(); - let displayer = - ArrayFormatter::try_new(&data, &format_option).map_err(|_| std::fmt::Error)?; + let displayer = ArrayFormatter::try_new(&data, &format_option).map_err(|e| { + log::warn!("ChannelData Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; for i in 0..self.len() { write!(f, " {}", displayer.value(i))?; } diff --git a/src/export/numpy.rs b/src/export/numpy.rs index 986ad10..1f17f87 100644 --- a/src/export/numpy.rs +++ b/src/export/numpy.rs @@ -37,7 +37,7 @@ pub(crate) fn to_py_array(_: Python, array: Arc) -> PyResult IntoPyObject<'py> for ChannelData { type Target = PyAny; // the Python type type Output = Bound<'py, Self::Target>; // in most cases this will be `Bound` - type Error = std::convert::Infallible; + type Error = PyErr; /// IntoPyObject implementation to convert a ChannelData into a PyObject fn into_pyobject(self, py: Python<'py>) -> Result { match self { @@ -63,83 +63,77 @@ impl<'py> IntoPyObject<'py> for ChannelData { .chunks(binary_array.value_length() as usize) .map(|x| x.to_vec()) .collect(); - Ok(out - .into_pyobject(py) - .expect("error converting fixed size binary array into python object")) + out.into_pyobject(py) + } + ChannelData::ArrayDInt8(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt8(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt16(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt16(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDFloat32(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDInt64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDUInt64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) + } + ChannelData::ArrayDFloat64(array) => { + let flat = array.values_slice().to_pyarray(py); + Ok(flat + .reshape_with_order(array.shape().clone(), array.order().clone().into())? + .into_any()) } - ChannelData::ArrayDInt8(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i8") - .into_any()), - ChannelData::ArrayDUInt8(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u8") - .into_any()), - ChannelData::ArrayDInt16(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u16") - .into_any()), - ChannelData::ArrayDUInt16(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i16") - .into_any()), - ChannelData::ArrayDInt32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i32") - .into_any()), - ChannelData::ArrayDUInt32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u32") - .into_any()), - ChannelData::ArrayDFloat32(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape f32") - .into_any()), - ChannelData::ArrayDInt64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape i64") - .into_any()), - ChannelData::ArrayDUInt64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape u64") - .into_any()), - ChannelData::ArrayDFloat64(array) => Ok(array - .values_slice() - .to_pyarray(py) - .reshape_with_order(array.shape().clone(), array.order().clone().into()) - .expect("could not reshape f64") - .into_any()), ChannelData::Utf8(array) => { let string_array = array.finish_cloned(); let strings: Vec> = string_array.iter().collect(); - Ok(strings - .into_pyobject(py) - .expect("error converting Utf8 array into python object")) + strings.into_pyobject(py) } ChannelData::Union(array) => { - let arrow_data = to_py_array(py, Arc::new(UnionArray::from(array.to_data()))) - .expect("error converting Union array into python object"); - Ok(arrow_data - .into_pyobject(py) - .expect("error converting Union PyArrow into python object")) + let arrow_data = + to_py_array(py, Arc::new(UnionArray::from(array.to_data())))?; + arrow_data.into_pyobject(py) } } } diff --git a/src/main.rs b/src/main.rs index e219614..af98b8d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ fn main() -> Result<(), Error> { init(); let matches = Command::new("mdfr") .bin_name("mdfr") - .version("0.6.3") + .version(env!("CARGO_PKG_VERSION")) .author("Aymeric Rateau ") .about("reads ASAM mdf file") .arg( @@ -63,7 +63,8 @@ fn main() -> Result<(), Error> { .required(false) .num_args(1) .value_name("ALGORITHM") - .help("Compression algorithm for writing data in parquet file, valid values are snappy, gzip, lzo, lz4, zstd, brotli. Default is uncompressed"), + .value_parser(["snappy", "gzip", "lzo", "lz4", "zstd", "brotli"]) + .help("Compression algorithm for writing data in parquet file. Default is uncompressed"), ) .arg( Arg::new("export_to_hdf5") @@ -80,7 +81,8 @@ fn main() -> Result<(), Error> { .required(false) .num_args(1) .value_name("FILTER") - .help("Compression algorithm for writing data in hdf5 file, valid values are deflate and lzf. Default is uncompressed"), + .value_parser(["deflate", "lzf"]) + .help("Compression filter for writing data in hdf5 file. Default is uncompressed"), ) .arg( Arg::new("info") diff --git a/src/mdfinfo/mdfinfo3.rs b/src/mdfinfo/mdfinfo3.rs index f81b0ca..65f064b 100644 --- a/src/mdfinfo/mdfinfo3.rs +++ b/src/mdfinfo/mdfinfo3.rs @@ -16,6 +16,7 @@ use std::io::{Cursor, prelude::*}; use crate::data_holder::channel_data::{ChannelData, data_type_init}; use crate::data_holder::tensor_arrow::Order; use crate::mdfinfo::IdBlock; +use crate::mdfinfo::mdfinfo4::Endianness; use super::sym_buf_reader::SymBufReader; @@ -871,8 +872,8 @@ pub struct Cn3 { pub n_bytes: u16, /// channel data pub data: ChannelData, - /// false = little endian - pub endian: bool, + /// byte order of the channel's raw data + pub endian: Endianness, /// True if channel is valid = contains data converted pub channel_data_valid: bool, } @@ -1060,16 +1061,16 @@ fn parse_cn3_block( position = parse_ce(rdr, block1.cn_ce_source, position, sharable, encoding)?; } - let mut endian: bool = false; // Little endian by default + let mut endian = Endianness::Little; // Little endian by default if block2.cn_data_type >= 13 { - endian = false; // little endian + endian = Endianness::Little; } else if block2.cn_data_type >= 9 { - endian = true; // big endian + endian = Endianness::Big; } else if block2.cn_data_type <= 3 { if default_byte_order == 0 { - endian = false; // little endian + endian = Endianness::Little; } else { - endian = true; // big endian + endian = Endianness::Big; } } let data_type = convert_data_type_3to4(block2.cn_data_type); @@ -1134,7 +1135,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg, n_bytes: 2, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1153,7 +1154,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 2, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1172,7 +1173,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 3, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1191,7 +1192,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 4, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1210,7 +1211,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 5, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1229,7 +1230,7 @@ fn can_open_date(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3, Cn3, Cn3, pos_byte_beg: pos_byte_beg + 7, n_bytes: 1, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; (date_ms, min, hour, day, month, year) @@ -1258,7 +1259,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) { pos_byte_beg, n_bytes: 4, data: ChannelData::UInt32(UInt32Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; let block2 = Cn3Block2 { @@ -1277,7 +1278,7 @@ fn can_open_time(pos_byte_beg: u16, cn_bit_offset: u16) -> (Cn3, Cn3) { pos_byte_beg: pos_byte_beg + 4, n_bytes: 2, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, channel_data_valid: false, }; (ms, days) diff --git a/src/mdfinfo/mdfinfo4/cg_block.rs b/src/mdfinfo/mdfinfo4/cg_block.rs index 3234903..75ca217 100644 --- a/src/mdfinfo/mdfinfo4/cg_block.rs +++ b/src/mdfinfo/mdfinfo4/cg_block.rs @@ -34,6 +34,8 @@ pub const CG_F_EVENT_SIGNAL_GROUP: u16 = 1 << 4; pub const CG_F_VLSC: u16 = 1 << 5; /// Bit 6: Raw sensor event channel group pub const CG_F_RAW_SENSOR_EVENT: u16 = 1 << 6; +/// Bit 3: Remote master — the master channel resides in a different channel group +pub const CG_F_REMOTE_MASTER: u16 = 1 << 3; /// Bit 7: Protocol event channel group pub const CG_F_PROTOCOL_EVENT: u16 = 1 << 7; use super::si_block::Si4Block; @@ -127,7 +129,7 @@ impl Cg4Block { if (self.cg_flags & CG_F_PROTOCOL_EVENT) != 0 { flags.push("ProtocolEvent"); } - if (self.cg_flags & 0b1000) != 0 { + if (self.cg_flags & CG_F_REMOTE_MASTER) != 0 { // Bit 3: Remote master flags.push("RemoteMaster"); } @@ -834,7 +836,7 @@ mod tests { cg.cg_flags = CG_F_PROTOCOL_EVENT; assert_eq!(cg.get_flags_str(), "ProtocolEvent"); - cg.cg_flags = 0b1000; // RemoteMaster (bit 3) + cg.cg_flags = CG_F_REMOTE_MASTER; assert_eq!(cg.get_flags_str(), "RemoteMaster"); // Combination diff --git a/src/mdfinfo/mdfinfo4/cn_block.rs b/src/mdfinfo/mdfinfo4/cn_block.rs index d12fa83..bfbfd85 100644 --- a/src/mdfinfo/mdfinfo4/cn_block.rs +++ b/src/mdfinfo/mdfinfo4/cn_block.rs @@ -18,6 +18,34 @@ use super::cc_block::read_cc; use super::composition::{parse_composition, Composition}; use super::ev_block::{Ev4Block, parse_ev4_block}; +/// Byte order for channel data. +/// +/// `false`/`Little` = little-endian (default for most modern platforms and MDF files). +/// `true`/`Big` = big-endian. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Endianness { + /// Little-endian (LSB first) + #[default] + Little, + /// Big-endian (MSB first) + Big, +} + +impl Endianness { + /// Returns `true` if big-endian. + #[inline] + pub fn is_big(self) -> bool { + self == Endianness::Big + } +} + +impl From for Endianness { + /// Converts from a raw bool: `true` → `Big`, `false` → `Little`. + fn from(big: bool) -> Self { + if big { Endianness::Big } else { Endianness::Little } + } +} + // Channel (CN) flags - cn_flags field (u32) /// Bit 13: Event signal - channel contains event data, cn_data points to template EVBLOCK pub const CN_F_EVENT_SIGNAL: u32 = 1 << 13; @@ -39,6 +67,137 @@ pub const CN_F_PROTOCOL_EVENT: u32 = 1 << 19; pub const CN_F_DATA_DESCRIPTION_MODE: u32 = 1 << 20; use super::si_block::Si4Block; +/// Channel type (cn_type field) — spec section 6.6, Table 25 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CnChannelType { + /// 0: Fixed-length data channel (normal signal) + FixedLength = 0, + /// 1: Variable-length signal data (VLSD) + Vlsd = 1, + /// 2: Master channel (fixed length) + Master = 2, + /// 3: Virtual master channel (generated time/angle axis, no raw data) + VirtualMaster = 3, + /// 4: Synchronisation channel (references an attachment) + Synchronisation = 4, + /// 5: Maximum-length data channel + MaxLength = 5, + /// 6: Virtual data channel (generated data, no raw data) + VirtualData = 6, + /// 7: VLSC channel (stores offsets into VD block, MDF 4.3) + Vlsc = 7, +} + +impl TryFrom for CnChannelType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::FixedLength), + 1 => Ok(Self::Vlsd), + 2 => Ok(Self::Master), + 3 => Ok(Self::VirtualMaster), + 4 => Ok(Self::Synchronisation), + 5 => Ok(Self::MaxLength), + 6 => Ok(Self::VirtualData), + 7 => Ok(Self::Vlsc), + other => Err(other), + } + } +} + +/// Channel sync type (cn_sync_type field) — spec section 6.6, Table 26 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum CnSyncType { + /// 0: No synchronisation (normal data channel) + #[default] + None = 0, + /// 1: Time synchronisation (seconds) + Time = 1, + /// 2: Angle synchronisation (radians) + Angle = 2, + /// 3: Distance synchronisation (meters) + Distance = 3, + /// 4: Index synchronisation (zero-based sample index) + Index = 4, +} + +impl TryFrom for CnSyncType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::None), + 1 => Ok(Self::Time), + 2 => Ok(Self::Angle), + 3 => Ok(Self::Distance), + 4 => Ok(Self::Index), + other => Err(other), + } + } +} + +/// Channel data type (cn_data_type field) — spec section 6.6, Table 27 +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CnDataType { + /// 0: Unsigned integer, little-endian + UIntLE = 0, + /// 1: Unsigned integer, big-endian + UIntBE = 1, + /// 2: Signed integer, little-endian + IntLE = 2, + /// 3: Signed integer, big-endian + IntBE = 3, + /// 4: IEEE 754 float, little-endian + FloatLE = 4, + /// 5: IEEE 754 float, big-endian + FloatBE = 5, + /// 6: String (SBC/ISO-8859-1) + StringSbc = 6, + /// 7: String (UTF-8) + StringUtf8 = 7, + /// 8: String (UTF-16 LE) + StringUtf16LE = 8, + /// 9: String (UTF-16 BE) + StringUtf16BE = 9, + /// 10: Byte array + ByteArray = 10, + /// 11: MIME sample + MimeSample = 11, + /// 12: MIME stream + MimeStream = 12, + /// 13: CANopen date + CanopenDate = 13, + /// 14: CANopen time + CanopenTime = 14, + /// 15: Complex number, little-endian + ComplexLE = 15, + /// 16: Complex number, big-endian + ComplexBE = 16, + /// 17: String with BOM (Unicode with byte-order mark) + StringBom = 17, +} + +impl TryFrom for CnDataType { + type Error = u8; + fn try_from(v: u8) -> Result { + match v { + 0 => Ok(Self::UIntLE), 1 => Ok(Self::UIntBE), + 2 => Ok(Self::IntLE), 3 => Ok(Self::IntBE), + 4 => Ok(Self::FloatLE), 5 => Ok(Self::FloatBE), + 6 => Ok(Self::StringSbc), 7 => Ok(Self::StringUtf8), + 8 => Ok(Self::StringUtf16LE), 9 => Ok(Self::StringUtf16BE), + 10 => Ok(Self::ByteArray), + 11 => Ok(Self::MimeSample), 12 => Ok(Self::MimeStream), + 13 => Ok(Self::CanopenDate), 14 => Ok(Self::CanopenTime), + 15 => Ok(Self::ComplexLE), 16 => Ok(Self::ComplexBE), + 17 => Ok(Self::StringBom), + other => Err(other), + } + } +} + /// Cn4 Channel block struct #[derive(Debug, PartialEq, Clone)] #[binrw] @@ -165,6 +324,21 @@ impl Cn4Block { pub fn set_si_source(&mut self, si_source: i64) { self.cn_si_source = si_source; } + /// Returns the typed channel type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_channel_type(&self) -> Result { + CnChannelType::try_from(self.cn_type) + } + /// Returns the typed sync type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_sync_type_enum(&self) -> Result { + CnSyncType::try_from(self.cn_sync_type) + } + /// Returns the typed data type. Returns `Err(raw_value)` if the value is out of spec. + #[allow(dead_code)] + pub fn cn_data_type_enum(&self) -> Result { + CnDataType::try_from(self.cn_data_type) + } /// Returns a string representation of the channel type (cn_type) pub fn get_cn_type_str(&self) -> &'static str { match self.cn_type { @@ -242,19 +416,21 @@ pub struct Cn4 { pub block: Cn4Block, /// unique channel name string pub unique_name: String, + /// absolute file position of this CN block pub block_position: i64, /// beginning position of channel in record pub pos_byte_beg: u32, /// number of bytes taken by channel in record pub n_bytes: u32, + /// optional composition (CA array, nested CN structure, DS/CL/CV/CU VLSD layout) pub composition: Option, /// channel data pub data: ChannelData, - /// false = little endian - pub endian: bool, - /// List size: 1 for normal primitive, 2 for complex, pnd for arrays + /// byte order of the channel's raw data + pub endian: Endianness, + /// number of elements per sample: 1 for scalars, 2 for complex, N for arrays pub list_size: usize, - // Shape of array + /// shape of array data: (dimension sizes, storage order); scalar channels use an empty vec pub shape: (Vec, Order), /// optional invalid mask array, invalid byte position in record, invalid byte mask pub invalid_mask: Option<(Option, usize, u8)>, @@ -453,7 +629,7 @@ fn can_open_date( n_bytes: 2, composition: None, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -474,7 +650,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -495,7 +671,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -516,7 +692,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -537,7 +713,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -558,7 +734,7 @@ fn can_open_date( n_bytes: 1, composition: None, data: ChannelData::UInt8(UInt8Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -584,7 +760,7 @@ fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> n_bytes: 4, composition: None, data: ChannelData::UInt32(UInt32Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -605,7 +781,7 @@ fn can_open_time(block_position: i64, pos_byte_beg: u32, cn_byte_offset: u32) -> n_bytes: 2, composition: None, data: ChannelData::UInt16(UInt16Builder::new()), - endian: false, + endian: Endianness::Little, list_size: 1, shape: (vec![1], Order::RowMajor), invalid_mask: None, @@ -754,26 +930,26 @@ pub(super) fn parse_cn4_block( } } - let mut endian: bool = false; // Little endian by default + let mut endian = Endianness::Little; // Little endian by default if block.cn_data_type == 0 || block.cn_data_type == 2 || block.cn_data_type == 4 || block.cn_data_type == 8 || block.cn_data_type == 15 { - endian = false; // little endian + endian = Endianness::Little; } else if block.cn_data_type == 1 || block.cn_data_type == 3 || block.cn_data_type == 5 || block.cn_data_type == 9 || block.cn_data_type == 16 { - endian = true; // big endian + endian = Endianness::Big; } // For VLSC/VLSD channels, cn_data_type describes the signal data block encoding // (e.g. UTF-16 BE), not the byte order of the integer offsets stored in the DT block. if block.cn_type == 1 || block.cn_type == 7 { - endian = false; + endian = Endianness::Little; } let data_type = block.cn_data_type; let cn_type = block.cn_type; diff --git a/src/mdfinfo/mdfinfo4/composition.rs b/src/mdfinfo/mdfinfo4/composition.rs index d904bbd..db07dbc 100644 --- a/src/mdfinfo/mdfinfo4/composition.rs +++ b/src/mdfinfo/mdfinfo4/composition.rs @@ -27,7 +27,9 @@ pub type CompositionParseResult = (Composition, i64, usize, (Vec, Order), #[derive(Debug, Clone)] #[repr(C)] pub struct Composition { + /// The composition block at this level (CA, CN, CL, CV, CU, or DS). pub block: Compo, + /// Optional next composition in the chain (e.g. nested CA inside another CA). pub compo: Option>, } @@ -35,11 +37,17 @@ pub struct Composition { #[derive(Debug, Clone)] #[repr(C)] pub enum Compo { + /// Channel Array block: N-dimensional array layout (spec section 6.18) CA(Box), + /// Nested Channel block: structure composition via CN→CN chain CN(Box), + /// Channel List: named fields packed into a VLSD blob (spec section 6.25) CL(Box), + /// Column Variable-length: variable-length column in fixed-length records (spec section 6.26) CV(Box), + /// Column Unordered: unordered variable-length column (spec section 6.27) CU(Box), + /// Dynamic Size: variable-length fields in a VLSD blob (spec section 6.24) DS(Box), } @@ -310,13 +318,13 @@ pub(super) fn parse_composition( if block_header_short.hdr_id == "##CA".as_bytes() { // Channel Array - let (block, mut shape, _snd, array_size) = + let (block, mut shape, _snd, mut array_size) = parse_ca_block(&mut block, block_header_short, cg_cycle_count) .context("Failed parsing CA block")?; position = pos; let ca_composition: Option>; if block.ca_composition != 0 { - let (ca, pos, _array_size, s, n_cns, cnss) = parse_composition( + let (ca, pos, inner_array_size, s, n_cns, cnss) = parse_composition( rdr, block.ca_composition, position, @@ -325,7 +333,21 @@ pub(super) fn parse_composition( cg_cycle_count, ) .context("Failed parsing composition block from CA block")?; - shape = s; + // If the inner composition is another CA block (array of arrays), combine + // the outer CA's dimensions with the inner CA's dimensions: + // outer shape = [cg_cycle_count, d1, ..., dm] + // inner shape = [cg_cycle_count, e1, ..., en] + // combined = [cg_cycle_count, d1, ..., dm, e1, ..., en] + // array_size (= total elements, used as list_size for reading) must also be + // the product of all dimensions: outer_pnd * inner_pnd. + // For any other inner block type (CN axis channel, etc.) the outer CA's + // ca_dim_size already encodes the full array shape — keep it unchanged. + if matches!(&ca.block, Compo::CA(_)) { + let mut combined = shape.0.clone(); + combined.extend_from_slice(&s.0[1..]); + shape = (combined, shape.1); + array_size *= inner_array_size; + } position = pos; cns = cnss; n_cn += n_cns; diff --git a/src/mdfinfo/mdfinfo4/mod.rs b/src/mdfinfo/mdfinfo4/mod.rs index 32c65f6..f873899 100644 --- a/src/mdfinfo/mdfinfo4/mod.rs +++ b/src/mdfinfo/mdfinfo4/mod.rs @@ -335,7 +335,7 @@ impl MdfInfo4 { unique_name: channel_name.to_string(), data, block: cn_block, - endian: machine_endian, + endian: Endianness::from(machine_endian), block_position: cn_pos, pos_byte_beg: 0, n_bytes, diff --git a/src/mdfreader.rs b/src/mdfreader.rs index b6c9ea7..43b56b0 100644 --- a/src/mdfreader.rs +++ b/src/mdfreader.rs @@ -319,30 +319,30 @@ impl fmt::Display for Mdf { writeln!(f, "Comments: {}", mdfinfo3.hd_comment)?; for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}") - .expect("cannot print master channel name"); + writeln!(f, "\nMaster: {master_name}")?; } else { - writeln!(f, "\nWithout Master channel") - .expect("cannot print thre is no master channel"); + writeln!(f, "\nWithout Master channel")?; } for channel in list.iter() { - writeln!(f, " {channel} ").expect("cannot print channel name"); + writeln!(f, " {channel} ")?; if let Some(data) = self.get_channel_data(channel) && !data.is_empty() { let array = &data.as_ref(); let displayer = ArrayFormatter::try_new(array, &format_option) - .map_err(|_| std::fmt::Error)?; - write!(f, "{}", displayer.value(0)).expect("cannot channel data"); - write!(f, " ").expect("cannot print simple space character"); - write!(f, "{}", displayer.value(data.len() - 1)) - .expect("cannot channel data"); + .map_err(|e| { + log::warn!("Mdf Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; + write!(f, "{}", displayer.value(0))?; + write!(f, " ")?; + write!(f, "{}", displayer.value(data.len() - 1))?; } if let Ok(Some(unit)) = self.get_channel_unit(channel) { - writeln!(f, " {unit} ").expect("cannot print channel unit"); + writeln!(f, " {unit} ")?; } if let Ok(Some(desc)) = self.get_channel_desc(channel) { - writeln!(f, " {desc} ").expect("cannot print channel desc"); + writeln!(f, " {desc} ")?; } } } @@ -358,30 +358,30 @@ impl fmt::Display for Mdf { } for (master, list) in self.get_master_channel_names_set().iter() { if let Some(master_name) = master { - writeln!(f, "\nMaster: {master_name}") - .expect("cannot print master channel name"); + writeln!(f, "\nMaster: {master_name}")?; } else { - writeln!(f, "\nWithout Master channel") - .expect("cannot print thre is no master channel"); + writeln!(f, "\nWithout Master channel")?; } for channel in list.iter() { - writeln!(f, " {channel} ").expect("cannot print channel name"); + writeln!(f, " {channel} ")?; if let Some(data) = self.get_channel_data(channel) && !data.is_empty() { let array = &data.as_ref(); let displayer = ArrayFormatter::try_new(array, &format_option) - .map_err(|_| std::fmt::Error)?; - write!(f, "{}", displayer.value(0)).expect("cannot channel data"); - write!(f, " ").expect("cannot print simple space character"); - write!(f, "{}", displayer.value(data.len() - 1)) - .expect("cannot channel data"); + .map_err(|e| { + log::warn!("Mdf Display: ArrayFormatter failed: {e}"); + std::fmt::Error + })?; + write!(f, "{}", displayer.value(0))?; + write!(f, " ")?; + write!(f, "{}", displayer.value(data.len() - 1))?; } if let Ok(Some(unit)) = self.get_channel_unit(channel) { - writeln!(f, " {unit} ").expect("cannot print channel unit"); + writeln!(f, " {unit} ")?; } if let Ok(Some(desc)) = self.get_channel_desc(channel) { - writeln!(f, " {desc} ").expect("cannot print channel desc"); + writeln!(f, " {desc} ")?; } } } diff --git a/src/mdfreader/conversions3.rs b/src/mdfreader/conversions3.rs index 45fbca3..7404494 100644 --- a/src/mdfreader/conversions3.rs +++ b/src/mdfreader/conversions3.rs @@ -1362,4 +1362,152 @@ mod tests { assert_eq!(arr.value(2), "high"); assert_eq!(arr.value(3), "unknown"); } + + // ── Helper ── + + fn make_cn3(data: ChannelData) -> Cn3 { + Cn3 { + data, + ..Default::default() + } + } + + // ── linear_conversion (Cn3) tests ── + + #[test] + fn test_linear_cn3_uint16() { + use arrow::array::UInt16Builder; + let mut builder = UInt16Builder::new(); + builder.append_value(10); + let mut cn = make_cn3(ChannelData::UInt16(builder)); + linear_conversion(&mut cn, &[0.0, 3.0]).unwrap(); // 10*3+0 = 30 + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 30.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── polynomial_conversion (Cn3) tests ── + + #[test] + fn test_polynomial_cn3_float64() { + // polynomial_calculation uses array.finish() then Float64Builder::with_capacity(array.capacity()) + // After finish(), capacity resets to 0, so the output is an empty Float64 builder. + // This test verifies the conversion returns Ok and converts the channel to Float64. + let cc_val = vec![0.0, 1.0, 1.0, 0.0, 0.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(2.0); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = polynomial_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + // Data should now be Float64 (conversion applied) + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── exponential_conversion (Cn3) tests ── + + #[test] + fn test_exponential_cn3_float64_p4_zero() { + // exponential_calculation also uses array.finish() and Float64Builder::with_capacity(0) + // so the output is an empty builder. Test that it returns Ok and converts to Float64. + // p4=0 branch chosen with: p1=1, p2=1, p3=0, p4=0, p5=0, p6=1, p7=0 + let cc_val = vec![1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(std::f64::consts::E); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = exponential_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── logarithmic_conversion (Cn3) tests ── + + #[test] + fn test_logarithmic_cn3_float64_p4_zero() { + // logarithmic_calculation also uses array.finish() and Float64Builder::with_capacity(0). + // Test that it returns Ok and converts to Float64. + // p4=0 branch: p1=1, p2=1, p3=0, p4=0, p5=0, p6=1, p7=0 + let cc_val = vec![1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0]; + let mut builder = Float64Builder::new(); + builder.append_value(0.0); + let mut cn = make_cn3(ChannelData::Float64(builder)); + let result = logarithmic_conversion(&mut cn, &cc_val); + assert!(result.is_ok()); + assert!(matches!(cn.data, ChannelData::Float64(_))); + } + + // ── value_to_value_with_interpolation (Cn3) tests ── + + #[test] + fn test_cn3_vtv_interp_uint8() { + use arrow::array::UInt8Builder; + // Table: 0→0, 10→100 + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // 50 + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_value_with_interpolation(&mut cn, cc_val, &10).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 50.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_value_without_interpolation (Cn3) tests ── + + #[test] + fn test_cn3_vtv_no_interp_uint8() { + use arrow::array::UInt8Builder; + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(2); // exact match → 20.0 + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_value_without_interpolation(&mut cn, cc_val, &1).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 20.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_text (Cn3) tests ── + + #[test] + fn test_cn3_value_to_text() { + use arrow::array::UInt8Builder; + let cc_val_ref = vec![(1.0, "one".to_string()), (2.0, "two".to_string())]; + let mut builder = UInt8Builder::new(); + builder.append_value(1); + let mut cn = make_cn3(ChannelData::UInt8(builder)); + value_to_text(&mut cn, &cc_val_ref, &1).unwrap(); + if let ChannelData::Utf8(ref b) = cn.data { + let arr = b.finish_cloned(); + assert_eq!(arr.value(0), "one"); + } else { + panic!("Expected Utf8"); + } + } + + // ── value_range_to_text (Cn3) tests ── + + #[test] + fn test_cn3_value_range_to_text() { + let ranges = vec![(1.0, 2.0, "in_range".to_string()), (3.0, 4.0, "out".to_string())]; + let cc_val_ref = (ranges, "default".to_string()); + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // in [1.0, 2.0) + let mut cn = make_cn3(ChannelData::Float64(builder)); + value_range_to_text(&mut cn, &cc_val_ref, &1).unwrap(); + if let ChannelData::Utf8(ref b) = cn.data { + let arr = b.finish_cloned(); + assert_eq!(arr.value(0), "in_range"); + } else { + panic!("Expected Utf8"); + } + } } diff --git a/src/mdfreader/conversions4.rs b/src/mdfreader/conversions4.rs index 2398dc4..955f305 100644 --- a/src/mdfreader/conversions4.rs +++ b/src/mdfreader/conversions4.rs @@ -992,6 +992,11 @@ where .zip(values) .for_each(|(new_a, v)| { let a: f64 = (*v).as_(); + // MDF4 spec 6.17.7 (cc_type=5, value to value without interpolation): + // - Exact match: return value[i] + // - Below first key: return value[0] + // - Above last key: return value[n-1] + // - Between keys: return nearest neighbor; if equidistant, use lower key's value *new_a = match val .binary_search_by(|&(xi, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { @@ -1001,6 +1006,7 @@ where Err(idx) => { let (x0, y0) = val[idx - 1]; let (x1, y1) = val[idx]; + // spec: if (Int - key[i]) > (key[i+1] - Int) use upper, else lower if (a - x0) > (x1 - a) { *y1 } else { *y0 } } }; @@ -1201,6 +1207,7 @@ fn value_range_to_value_table_calculation( array: &PrimitiveBuilder, val: &[(f64, f64, f64)], default_value: &f64, + inclusive_upper: bool, ) -> Result, Error> where ::Native: AsPrimitive, @@ -1213,15 +1220,30 @@ where .zip(values) .for_each(|(new_a, v)| { let a: f64 = (*v).as_(); + // MDF4 spec 6.17.8 (cc_type=6, value range to value): + // - For float types (cn_data_type > 3): key_min[i] ≤ Int < key_max[i] (exclusive upper) + // - For integer types (cn_data_type ≤ 3): key_min[i] ≤ Int ≤ key_max[i] (both inclusive) + // Ranges are sorted ascending and shall not overlap (key_max[i-1] ≤ key_min[i]). + // For touching boundaries with float data, a == key_min[i] → range i wins (not i-1). + // Binary search on min keys handles both cases naturally: + // Ok(idx): a == min[idx] → range idx includes a as its lower bound ✓ + // Err(idx): min[idx-1] < a < min[idx] → check if a < max[idx-1] (exclusive upper) *new_a = match val .binary_search_by(|&(xi, _, _)| xi.partial_cmp(&a).unwrap_or(Ordering::Equal)) { Ok(idx) => val[idx].2, - Err(0) => *default_value, - Err(idx) if (idx >= val.len() && a <= val[idx - 1].1) => val[idx - 1].2, + Err(0) => *default_value, // below the minimum of all lower bounds Err(idx) => { - if a <= val[idx].1 { - val[idx].2 + // min[idx-1] < a < min[idx]: candidate is range idx-1 + // Float: upper exclusive (a < max[idx-1]) + // Integer: upper inclusive (a <= max[idx-1]) + let in_range = if inclusive_upper { + a <= val[idx - 1].1 + } else { + a < val[idx - 1].1 + }; + if in_range { + val[idx - 1].2 } else { *default_value } @@ -1243,64 +1265,65 @@ fn value_range_to_value_table(cn: &mut Cn4, cc_val: Vec) -> Result<(), Erro val.push((*a, *b, *c)); } let default_value = cc_val[cc_val.len() - 1]; + // MDF4 spec 6.17.8: integer data uses inclusive upper bound; float data uses exclusive upper. match &mut cn.data { ChannelData::Int8(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i8 channel")?, ); } ChannelData::UInt8(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u8 channel")?, ); } ChannelData::Int16(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i16 channel")?, ); } ChannelData::UInt16(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u16 channel")?, ); } ChannelData::Int32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i32 channel")?, ); } ChannelData::UInt32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u32 channel")?, ); } ChannelData::Float32(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, false) .context("failed value range to value table conversion of f32 channel")?, ); } ChannelData::Int64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of i64 channel")?, ); } ChannelData::UInt64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, true) .context("failed value range to value table conversion of u64 channel")?, ); } ChannelData::Float64(a) => { cn.data = ChannelData::Float64( - value_range_to_value_table_calculation(a, &val, &default_value) + value_range_to_value_table_calculation(a, &val, &default_value, false) .context("failed value range to value table conversion of f64 channel")?, ); } @@ -2329,27 +2352,27 @@ mod tests { #[test] fn test_value_to_value_without_interpolation_primitive() { - // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 + // Table pairs: x=0→y=0, x=10→y=100, x=20→y=200 (MDF4 spec 6.17.7) let keys = [0.0, 0.0, 10.0, 100.0, 20.0, 200.0]; let val: Vec<(&f64, &f64)> = keys.iter().tuples().collect(); let mut builder = Float64Builder::new(); - builder.append_value(0.0); // exact → 0 - builder.append_value(3.0); // nearer to x=0 (dist=3) than x=10 (dist=7) → 0 - builder.append_value(7.0); // nearer to x=10 (dist=3) than x=0 (dist=7) → 100 - builder.append_value(10.0); // exact → 100 - builder.append_value(-5.0); // below first → 0 - builder.append_value(25.0); // above last → 200 + builder.append_value(0.0); // exact match → 0 + builder.append_value(3.0); // between key[0]=0 and key[1]=10, nearer to 0 → 0 + builder.append_value(7.0); // between key[0]=0 and key[1]=10, nearer to 10 → 100 + builder.append_value(10.0); // exact match → 100 + builder.append_value(-5.0); // below first key → value[0] = 0 + builder.append_value(25.0); // above last key → value[n-1] = 200 let result = value_to_value_without_interpolation_primitive(&mut builder, val).unwrap(); let values = result.values_slice(); assert_eq!(values.len(), 6); - assert!((values[0] - 0.0).abs() < 1e-12); - assert!((values[1] - 0.0).abs() < 1e-12); - assert!((values[2] - 100.0).abs() < 1e-12); - assert!((values[3] - 100.0).abs() < 1e-12); - assert!((values[4] - 0.0).abs() < 1e-12); - assert!((values[5] - 200.0).abs() < 1e-12); + assert!((values[0] - 0.0).abs() < 1e-12); // exact → 0 + assert!((values[1] - 0.0).abs() < 1e-12); // nearest key=0 → 0 + assert!((values[2] - 100.0).abs() < 1e-12); // nearest key=10 → 100 + assert!((values[3] - 100.0).abs() < 1e-12); // exact → 100 + assert!((values[4] - 0.0).abs() < 1e-12); // below first → value[0]=0 + assert!((values[5] - 200.0).abs() < 1e-12); // above last → value[n-1]=200 } #[test] @@ -2369,7 +2392,7 @@ mod tests { builder.append_value(-5.0); // below all ranges → default builder.append_value(25.0); // above last key_min but within last upper bound → 300 - let result = value_range_to_value_table_calculation(&builder, &val, &default).unwrap(); + let result = value_range_to_value_table_calculation(&builder, &val, &default, false).unwrap(); let values = result.values_slice(); assert_eq!(values.len(), 5); assert!((values[0] - 100.0).abs() < 1e-12); @@ -2379,6 +2402,30 @@ mod tests { assert!((values[4] - 300.0).abs() < 1e-12); } + #[test] + fn test_value_range_float_touching_boundaries() { + // Replicate the Vector_ValueRange2ValueConversion.mf4 CC table (float [lo, hi) semantics) + let val = vec![ + (-10.0f64, -7.0, -1.0), // range 0 + (-7.0, -5.0, 0.0), // range 1 (touching: min=-7 == max of range 0) + (-5.0, 0.0, 1.0), + ]; + let default = -1.0f64; + + let mut builder = Float64Builder::new(); + builder.append_value(-9.0); // in range 0 [-10,-7): -10<=-9<-7 → True → -1 + builder.append_value(-7.0); // at touching boundary: Ok(1)→range 1→ 0 (not range 0 since -7 is not < -7) + builder.append_value(-6.0); // in range 1: -7<=-6<-5 → True → 0 + builder.append_value(100.0); // above all ranges → default -1 + + let result = value_range_to_value_table_calculation(&builder, &val, &default, false).unwrap(); + let values = result.values_slice(); + assert!((values[0] - (-1.0)).abs() < 1e-12, "raw=-9 should be -1, got {}", values[0]); + assert!((values[1] - 0.0).abs() < 1e-12, "raw=-7 should be 0, got {}", values[1]); + assert!((values[2] - 0.0).abs() < 1e-12, "raw=-6 should be 0, got {}", values[2]); + assert!((values[3] - (-1.0)).abs() < 1e-12, "raw=100 should be -1, got {}", values[3]); + } + #[test] fn test_algebraic_conversion_primitive() { // Expression: X * 2 + 1 @@ -2401,4 +2448,285 @@ mod tests { assert!((values[2] - 11.0).abs() < 1e-12); assert!((values[3] - (-5.0)).abs() < 1e-12); } + + // ── Helper for higher-level conversion tests ── + + fn make_cn4_with_data(data: ChannelData) -> Cn4 { + Cn4 { + data, + ..Default::default() + } + } + + // ── linear_conversion (Cn4) tests ── + + #[test] + fn test_linear_cn4_uint8() { + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(2); + builder.append_value(4); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + linear_conversion(&mut cn, &[1.0, 2.0]).unwrap(); // p1=1, p2=2 → v*2+1 + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 5.0).abs() < 1e-12); // 2*2+1 + assert!((vals[1] - 9.0).abs() < 1e-12); // 4*2+1 + } else { + panic!("Expected Float64 after linear conversion"); + } + } + + #[test] + fn test_linear_cn4_int16() { + use arrow::array::Int16Builder; + let mut builder = Int16Builder::new(); + builder.append_value(10); + let mut cn = make_cn4_with_data(ChannelData::Int16(builder)); + linear_conversion(&mut cn, &[0.0, 0.5]).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 5.0).abs() < 1e-12); // 10*0.5+0 + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_linear_cn4_array_d_int16() { + use arrow::array::Int16Builder; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + use arrow::datatypes::Int16Type; + let mut builder = Int16Builder::new(); + builder.append_value(1); + builder.append_value(2); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![2], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDInt16(tensor)); + linear_conversion(&mut cn, &[0.0, 3.0]).unwrap(); + if let ChannelData::ArrayDFloat64(ref t) = cn.data { + let vals = t.values_slice(); + assert!((vals[0] - 3.0).abs() < 1e-12); + assert!((vals[1] - 6.0).abs() < 1e-12); + } else { + panic!("Expected ArrayDFloat64"); + } + } + + #[test] + fn test_linear_cn4_array_d_float64() { + use arrow::datatypes::Float64Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let mut builder = Float64Builder::new(); + builder.append_value(1.0); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDFloat64(tensor)); + linear_conversion(&mut cn, &[0.0, 2.0]).unwrap(); + if let ChannelData::ArrayDFloat64(ref t) = cn.data { + let vals = t.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected ArrayDFloat64"); + } + } + + #[test] + fn test_linear_cn4_utf8_warn() { + use arrow::array::LargeStringBuilder; + // Utf8 data should produce a warn but NOT change data type and should return Ok + let mut cn = make_cn4_with_data(ChannelData::Utf8(LargeStringBuilder::new())); + let result = linear_conversion(&mut cn, &[1.0, 2.0]); + assert!(result.is_ok()); + // data type unchanged + assert!(matches!(cn.data, ChannelData::Utf8(_))); + } + + #[test] + fn test_linear_cn4_identity_no_op() { + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(7); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + // p1=0, p2=1 is identity → no change + linear_conversion(&mut cn, &[0.0, 1.0]).unwrap(); + // data type should remain UInt8 (identity skipped) + assert!(matches!(cn.data, ChannelData::UInt8(_))); + } + + // ── rational_conversion (Cn4) tests ── + + #[test] + fn test_rational_cn4_float64() { + // identity rational: (0*x^2 + 1*x + 0) / (0*x^2 + 0*x + 1) = x + let cc_val = vec![0.0, 1.0, 0.0, 0.0, 0.0, 1.0]; + let mut builder = Float64Builder::new(); + builder.append_value(2.0); + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + rational_conversion(&mut cn, &cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_rational_cn4_uint8() { + // (0*x^2 + 1*x + 0) / (0*x^2 + 0*x + 2) = x/2 + let cc_val = vec![0.0, 1.0, 0.0, 0.0, 0.0, 2.0]; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(4); + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + rational_conversion(&mut cn, &cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 2.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_to_value_with_interpolation (Cn4) tests ── + + #[test] + fn test_vtv_interp_cn4_float64() { + // cc_val: pairs (x, y) interleaved: 1.0→10.0, 2.0→20.0 + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // interpolated: 15.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 15.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_interp_cn4_uint8() { + // cc_val: 0→0, 10→100 + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // 50 + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 50.0).abs() < 1e-9); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_interp_cn4_array_d_uint8() { + use arrow::array::UInt8Builder; + use arrow::datatypes::UInt8Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let cc_val = vec![0.0, 0.0, 10.0, 100.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDUInt8(tensor)); + value_to_value_with_interpolation(&mut cn, cc_val).unwrap(); + assert!(matches!(cn.data, ChannelData::ArrayDFloat64(_))); + } + + #[test] + fn test_vtv_interp_cn4_int64_noop() { + use arrow::array::Int64Builder; + // Int64 not handled in value_to_value_with_interpolation (falls through to warn) + let mut builder = Int64Builder::new(); + builder.append_value(5); + let mut cn = make_cn4_with_data(ChannelData::Int64(builder)); + let result = value_to_value_with_interpolation(&mut cn, vec![0.0, 0.0, 10.0, 100.0]); + assert!(result.is_ok()); + } + + // ── value_to_value_without_interpolation (Cn4) tests ── + + #[test] + fn test_vtv_no_interp_cn4_float64() { + let cc_val = vec![1.0, 100.0, 2.0, 200.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.0); // exact match → 100.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_to_value_without_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 100.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vtv_no_interp_cn4_uint8() { + use arrow::array::UInt8Builder; + let cc_val = vec![1.0, 10.0, 2.0, 20.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(2); // exact match → 20.0 + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_to_value_without_interpolation(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 20.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + // ── value_range_to_value_table (Cn4) tests ── + + #[test] + fn test_vrv_cn4_float64() { + // cc_val groups of 3: (min, max, out_val) + // Range [1.0, 2.0] → 100.0 + let cc_val = vec![1.0, 2.0, 100.0]; + let mut builder = Float64Builder::new(); + builder.append_value(1.5); // in [1.0, 2.0] → 100.0 + let mut cn = make_cn4_with_data(ChannelData::Float64(builder)); + value_range_to_value_table(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 100.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vrv_cn4_uint8() { + use arrow::array::UInt8Builder; + // Range [0.0, 10.0] → 42.0 + let cc_val = vec![0.0, 10.0, 42.0]; + let mut builder = UInt8Builder::new(); + builder.append_value(5); // in [0, 10] + let mut cn = make_cn4_with_data(ChannelData::UInt8(builder)); + value_range_to_value_table(&mut cn, cc_val).unwrap(); + if let ChannelData::Float64(ref b) = cn.data { + let vals = b.values_slice(); + assert!((vals[0] - 42.0).abs() < 1e-12); + } else { + panic!("Expected Float64"); + } + } + + #[test] + fn test_vrv_cn4_array_d_warn() { + use arrow::array::Int8Builder; + use arrow::datatypes::Int8Type; + use crate::data_holder::tensor_arrow::{Order, TensorArrow}; + let mut builder = Int8Builder::new(); + builder.append_value(5); + let tensor = TensorArrow::::new_from_primitive(builder, None, vec![1], Order::RowMajor); + let mut cn = make_cn4_with_data(ChannelData::ArrayDInt8(tensor)); + // ArrayDInt8 falls through to warn path + let result = value_range_to_value_table(&mut cn, vec![0.0, 10.0, 42.0]); + assert!(result.is_ok()); + } } diff --git a/src/mdfreader/data_read3.rs b/src/mdfreader/data_read3.rs index 5b3b902..06de30a 100644 --- a/src/mdfreader/data_read3.rs +++ b/src/mdfreader/data_read3.rs @@ -46,7 +46,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -66,7 +66,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -87,7 +87,7 @@ pub fn read_channels_from_bytes( ChannelData::Int32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -102,7 +102,7 @@ pub fn read_channels_from_bytes( .context("Could not read le i24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -123,7 +123,7 @@ pub fn read_channels_from_bytes( ChannelData::UInt32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -138,7 +138,7 @@ pub fn read_channels_from_bytes( .context("Could not read le u24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -158,7 +158,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record @@ -198,7 +198,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -232,7 +232,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -297,7 +297,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; diff --git a/src/mdfreader/data_read4.rs b/src/mdfreader/data_read4.rs index 25e9815..ffaf1d0 100644 --- a/src/mdfreader/data_read4.rs +++ b/src/mdfreader/data_read4.rs @@ -40,6 +40,9 @@ pub fn read_one_channel_array( // cn_type == 7 : VLSC channel (stores offsets into VD block) let n_bytes = cn.n_bytes as usize; let list_size = cn.list_size; + // Clone shape once before the match — only one arm executes, so this is always ≤1 clone. + let shape_dims = cn.shape.0.clone(); + let shape_order = cn.shape.1.clone(); match &mut cn.data { ChannelData::Int8(a) => { let mut buf = vec![0; cycle_count]; @@ -53,7 +56,7 @@ pub fn read_one_channel_array( } ChannelData::Int16(a) => { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i16_into::(&mut buf) .context("Could not read be i16 array")?; @@ -66,7 +69,7 @@ pub fn read_one_channel_array( } ChannelData::UInt16(a) => { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u16_into::(&mut buf) .context("Could not read be u16 array")?; @@ -80,7 +83,7 @@ pub fn read_one_channel_array( ChannelData::Int32(a) => { if n_bytes == 4 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i32_into::(&mut buf) .context("Could not read be i32 array")?; @@ -92,7 +95,7 @@ pub fn read_one_channel_array( *a = Int32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 3 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_i24::() @@ -110,7 +113,7 @@ pub fn read_one_channel_array( ChannelData::UInt32(a) => { if n_bytes == 4 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u32_into::(&mut buf) .context("Could not read be u32 array")?; @@ -122,7 +125,7 @@ pub fn read_one_channel_array( *a = UInt32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 3 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_u24::() @@ -140,7 +143,7 @@ pub fn read_one_channel_array( ChannelData::Float32(a) => { if n_bytes == 4 { let mut buf = vec![0f32; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f32_into::(&mut buf) .context("Could not read be f32 array")?; @@ -152,7 +155,7 @@ pub fn read_one_channel_array( *a = Float32Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 2 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( @@ -174,7 +177,7 @@ pub fn read_one_channel_array( ChannelData::Int64(a) => { if n_bytes == 8 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i64_into::(&mut buf) .context("Could not read be i64 array")?; @@ -186,7 +189,7 @@ pub fn read_one_channel_array( *a = Int64Builder::new_from_buffer(buf.into(), None); } else if n_bytes == 6 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_i48::() @@ -204,7 +207,7 @@ pub fn read_one_channel_array( ChannelData::UInt64(a) => { if n_bytes == 8 { let mut buf = vec![0; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u64_into::(&mut buf) .context("Could not read be u64 array")?; @@ -217,7 +220,7 @@ pub fn read_one_channel_array( } else if n_bytes == 7 { let mut temp = [0u8; std::mem::size_of::()]; let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..7].copy_from_slice(&value[0..7]); data[i] = u64::from_be_bytes(temp); @@ -230,7 +233,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 6 { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { data[i] = value .read_u48::() @@ -247,7 +250,7 @@ pub fn read_one_channel_array( // n_bytes = 5 let mut temp = [0u8; 6]; let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..5].copy_from_slice(&value[0..n_bytes]); data[i] = Cursor::new(temp) @@ -266,7 +269,7 @@ pub fn read_one_channel_array( } ChannelData::Float64(a) => { let mut buf = vec![0f64; cycle_count]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f64_into::(&mut buf) .context("Could not read be f64 array")?; @@ -281,7 +284,7 @@ pub fn read_one_channel_array( let data = a.values().values_slice_mut(); if n_bytes <= 4 { // complex 16 - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f16::from_be_bytes( @@ -300,7 +303,7 @@ pub fn read_one_channel_array( } } else if n_bytes <= 8 { // complex 32 - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f32::from_be_bytes( @@ -319,7 +322,7 @@ pub fn read_one_channel_array( } ChannelData::Complex64(a) => { let data = a.values().values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { data[i] = f64::from_be_bytes( value.try_into().unwrap(), @@ -354,7 +357,7 @@ pub fn read_one_channel_array( } } else if cn.block.cn_data_type == 8 || cn.block.cn_data_type == 9 { // 8 | 9 :String UTF16 to be converted into UTF8 - if cn.endian { + if cn.endian.is_big() { let mut decoder = UTF_16BE.new_decoder(); for record in data_bytes.chunks(n_bytes) { let mut dst = String::new(); @@ -411,20 +414,20 @@ pub fn read_one_channel_array( .context("Could not read i8 array")?; *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt8(a) => { *a = TensorArrow::new_from_buffer( data_bytes.clone().into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt16(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_i16_into::(&mut buf) .context("Could not read be i16 array")?; @@ -435,13 +438,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt16(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u16_into::(&mut buf) .context("Could not read be u16 array")?; @@ -452,13 +455,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt32(a) => { let mut buf = vec![0i32; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value @@ -483,13 +486,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt32(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value @@ -514,13 +517,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDFloat32(a) => { let mut buf = vec![0f32; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, value) in data_bytes.chunks(std::mem::size_of::()).enumerate() { @@ -547,13 +550,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDInt64(a) => { let mut buf = vec![0; cycle_count * list_size]; - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { Cursor::new(data_bytes) .read_i64_into::(&mut buf) @@ -578,14 +581,14 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDUInt64(a) => { let mut buf = vec![0; cycle_count * list_size]; if n_bytes == 8 { - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_u64_into::(&mut buf) .context("Could not read be u64 array")?; @@ -596,7 +599,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 7 { let mut temp = [0u8; std::mem::size_of::()]; - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..7].copy_from_slice(&value[0..7]); buf[i] = u64::from_be_bytes(temp); @@ -608,7 +611,7 @@ pub fn read_one_channel_array( } } } else if n_bytes == 6 { - if cn.endian { + if cn.endian.is_big() { for (i, mut value) in data_bytes.chunks(n_bytes).enumerate() { buf[i] = value .read_u48::() @@ -623,7 +626,7 @@ pub fn read_one_channel_array( } } else if n_bytes == 5 { let mut temp = [0u8; 6]; - if cn.endian { + if cn.endian.is_big() { for (i, value) in data_bytes.chunks(n_bytes).enumerate() { temp[0..5].copy_from_slice(&value[0..n_bytes]); buf[i] = Cursor::new(temp) @@ -641,13 +644,13 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::ArrayDFloat64(a) => { let mut buf = vec![0f64; cycle_count * (list_size)]; - if cn.endian { + if cn.endian.is_big() { Cursor::new(data_bytes) .read_f64_into::(&mut buf) .context("Could not read be f64 array")?; @@ -658,8 +661,8 @@ pub fn read_one_channel_array( } *a = TensorArrow::new_from_buffer( buf.into(), - cn.shape.0.clone(), - cn.shape.1.clone(), + shape_dims, + shape_order, ); } ChannelData::Union(_) => {} // Union channels are constructed post-read @@ -718,7 +721,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -738,7 +741,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -759,7 +762,7 @@ pub fn read_channels_from_bytes( ChannelData::Int32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -774,7 +777,7 @@ pub fn read_channels_from_bytes( .context("Could not read le i24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -795,7 +798,7 @@ pub fn read_channels_from_bytes( ChannelData::UInt32(a) => { let data = a.values_slice_mut(); if n_bytes == 3 { - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; data[i + previous_index] = value @@ -810,7 +813,7 @@ pub fn read_channels_from_bytes( .context("Could not read le u24")?; } } - } else if cn.endian { + } else if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -830,7 +833,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record @@ -870,7 +873,7 @@ pub fn read_channels_from_bytes( } ChannelData::Int64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -904,7 +907,7 @@ pub fn read_channels_from_bytes( } ChannelData::UInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -971,7 +974,7 @@ pub fn read_channels_from_bytes( } ChannelData::Float64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { value = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -995,7 +998,7 @@ pub fn read_channels_from_bytes( // complex 16 let mut re_val: &[u8]; let mut im_val: &[u8]; - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1038,7 +1041,7 @@ pub fn read_channels_from_bytes( // complex 32 let mut re_val: &[u8]; let mut im_val: &[u8]; - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1080,7 +1083,7 @@ pub fn read_channels_from_bytes( let mut re_val: &[u8]; let mut im_val: &[u8]; let data = a.values().values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { re_val = &record [pos_byte_beg..pos_byte_beg + std::mem::size_of::()]; @@ -1138,7 +1141,7 @@ pub fn read_channels_from_bytes( } } else if cn.block.cn_data_type == 8 || cn.block.cn_data_type == 9 { // 8 | 9 :String UTF16 to be converted into UTF8 - if cn.endian { + if cn.endian.is_big() { let mut decoder = UTF_16BE.new_decoder(); for record in data_chunk.chunks(record_length) { value = &record[pos_byte_beg..pos_byte_beg + n_bytes]; @@ -1224,7 +1227,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() @@ -1254,7 +1257,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt16(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() @@ -1284,7 +1287,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1337,7 +1340,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 3 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1390,7 +1393,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDFloat32(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes <= 2 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1452,7 +1455,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1504,7 +1507,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDUInt64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { if n_bytes == 8 { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { @@ -1607,7 +1610,7 @@ pub fn read_channels_from_bytes( } ChannelData::ArrayDFloat64(a) => { let data = a.values_slice_mut(); - if cn.endian { + if cn.endian.is_big() { for (i, record) in data_chunk.chunks(record_length).enumerate() { for j in 0..cn.list_size { value = &record[pos_byte_beg + j * std::mem::size_of::() @@ -1638,7 +1641,7 @@ pub fn read_channels_from_bytes( let c_vlsd_channel = Arc::clone(&vlsd_channels); let mut vlsd_channel = c_vlsd_channel .lock() - .expect("Could not get lock from vlsd channel arc vec"); + .unwrap_or_else(|e| e.into_inner()); vlsd_channel.push((cn.block.cn_type, *rec_pos)); } } else { @@ -1669,7 +1672,7 @@ pub fn read_channels_from_bytes( let c_vlsd_channel = Arc::clone(&vlsd_channels); let mut vlsd_channel = c_vlsd_channel .lock() - .expect("Could not get lock from vlsd channel arc vec"); + .unwrap_or_else(|e| e.into_inner()); vlsd_channel.push((cn.block.cn_type, *rec_pos)); } } diff --git a/src/mdfreader/datastream_decoder.rs b/src/mdfreader/datastream_decoder.rs index e1a1ca5..c1254e2 100644 --- a/src/mdfreader/datastream_decoder.rs +++ b/src/mdfreader/datastream_decoder.rs @@ -283,7 +283,7 @@ pub fn decode_single_channel_value( final_byte_offset, bit_count, cn.block.cn_data_type, - cn.endian, + cn.endian.is_big(), )?; // Update stream position @@ -753,4 +753,338 @@ mod tests { state.reset_alignment(); assert_eq!(state.alignment_offset, 7); } + + // ── calculate_final_byte_offset tests ── + + #[test] + fn test_final_byte_offset_normal_alignment() { + // alignment != 255: result = aligned_offset + byte_offset (bit_offset ignored) + assert_eq!(calculate_final_byte_offset(8, 0, 3, 0), 11); + assert_eq!(calculate_final_byte_offset(8, 0, 3, 7), 11); // bit_offset ignored + assert_eq!(calculate_final_byte_offset(0, 1, 5, 0), 5); + } + + #[test] + fn test_final_byte_offset_bit_packed() { + // alignment == 255: result = (aligned_offset + byte_offset*8 + bit_offset) / 8 + assert_eq!(calculate_final_byte_offset(0, 255, 1, 4), 1); // (0 + 8 + 4) / 8 = 1 + assert_eq!(calculate_final_byte_offset(16, 255, 0, 4), 2); // (16 + 0 + 4) / 8 = 2 + } + + // ── decode_single_channel_value helper ── + + fn make_cn4( + cn_type: u8, + cn_bit_count: u32, + cn_alignment: u8, + cn_byte_offset: u32, + cn_bit_offset: u8, + cn_flags: u32, + ) -> Cn4 { + use crate::mdfinfo::mdfinfo4::Cn4Block; + let mut block = Cn4Block::default(); + block.cn_type = cn_type; + block.cn_bit_count = cn_bit_count; + block.cn_alignment = cn_alignment; + block.cn_byte_offset = cn_byte_offset; + block.cn_bit_offset = cn_bit_offset; + block.cn_flags = cn_flags; + Cn4 { + block, + ..Default::default() + } + } + + // ── decode_single_channel_value tests ── + + #[test] + fn test_decode_single_channel_byte_aligned() { + let data = [0xABu8, 0xCD]; + let cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0xAB]); + assert_eq!(consumed, 1); + assert_eq!(stream_state.bit_position, 8); + } + + #[test] + fn test_decode_single_channel_with_byte_offset() { + let data = [0x00u8, 0xAA, 0xBB]; + let cn = make_cn4(0, 8, 0, 1, 0, 0); // cn_byte_offset=1 + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0xAA]); + assert_eq!(consumed, 1); + } + + #[test] + fn test_decode_single_channel_vlsd() { + // 4-byte LE length = 3, then 3 bytes "ABC" + let data = [3u8, 0, 0, 0, 0x41, 0x42, 0x43]; + let cn = make_cn4(1, 0, 0, 0, 0, 0); // cn_type=1 (VLSD) + let mut stream_state = StreamState::new(); + let (bytes, consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes, vec![0x41, 0x42, 0x43]); + assert_eq!(consumed, 7); // 4 (length prefix) + 3 (data) + assert_eq!(stream_state.bit_position, 7 * 8); + } + + #[test] + fn test_decode_single_channel_vlsd_underrun() { + // length=3 but only 1 byte of data + let data = [3u8, 0, 0, 0, 0x41]; + let cn = make_cn4(1, 0, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let result = decode_single_channel_value(&data, &cn, &mut stream_state); + assert!(result.is_err()); + } + + #[test] + fn test_decode_single_channel_vlsc_bail() { + // cn_type=7 (VLSC), cn_bit_count=0 -> should bail + let data = [0u8; 16]; + let cn = make_cn4(7, 0, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + let result = decode_single_channel_value(&data, &cn, &mut stream_state); + assert!(result.is_err()); + } + + #[test] + fn test_decode_single_channel_alignment_reset_flag() { + // CN_F_ALIGNMENT_RESET = 1 << 18 = 0x40000 + let data = [0u8; 16]; + let cn = make_cn4(0, 8, 0, 0, 0, CN_F_ALIGNMENT_RESET); + let mut stream_state = StreamState::new(); + // advance stream position to 6 bytes (48 bits) + stream_state.bit_position = 48; + stream_state.alignment_offset = 0; + // When decode_single_channel_value is called, it should reset alignment_offset + // to byte_position (= 6), then calculate the aligned offset from there + let (bytes, _consumed) = decode_single_channel_value(&data, &cn, &mut stream_state).unwrap(); + assert_eq!(bytes.len(), 1); + // alignment_offset should have been reset to 6 before calculation + // aligned_offset for cn_alignment=0 = current_byte_pos = 6 + // final_byte_offset = 6 + 0 = 6 + // so bit_position = (6 + 1) * 8 = 56 + assert_eq!(stream_state.bit_position, 56); + } + + // ── decode_channel_union tests ── + + #[test] + fn test_decode_channel_union_two_members() { + use crate::mdfinfo::mdfinfo4::Cu4Block; + let data = [0x01u8, 0x00, 0x02, 0x00]; // 4 bytes + let cu_block = Cu4Block::default(); + + let mut cn_a = make_cn4(0, 16, 0, 0, 0, 0); + cn_a.unique_name = "chan_a".to_string(); + let mut cn_b = make_cn4(0, 16, 0, 0, 0, 0); + cn_b.unique_name = "chan_b".to_string(); + + let member_channels: Vec<&Cn4> = vec![&cn_a, &cn_b]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_union(&data, &cu_block, &member_channels, &mut stream_state).unwrap(); + assert!(result.contains_key("chan_a")); + assert!(result.contains_key("chan_b")); + // Both start at 0, each consume 2 bytes (16 bits). max_end = 16 bits. + assert_eq!(stream_state.bit_position, 16); + } + + #[test] + fn test_decode_channel_union_different_sizes() { + use crate::mdfinfo::mdfinfo4::Cu4Block; + let data = [0x01u8, 0x00, 0x02, 0x00, 0x03, 0x00]; // 6 bytes + let cu_block = Cu4Block::default(); + + let mut cn_small = make_cn4(0, 8, 0, 0, 0, 0); // 1 byte + cn_small.unique_name = "small".to_string(); + let mut cn_large = make_cn4(0, 32, 0, 0, 0, 0); // 4 bytes + cn_large.unique_name = "large".to_string(); + + let member_channels: Vec<&Cn4> = vec![&cn_small, &cn_large]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_union(&data, &cu_block, &member_channels, &mut stream_state).unwrap(); + assert!(result.contains_key("small")); + assert!(result.contains_key("large")); + // max_end is 32 bits (4 bytes from cn_large) + assert_eq!(stream_state.bit_position, 32); + } + + // ── decode_channel_variant tests ── + + #[test] + fn test_decode_channel_variant_match_first() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let mut cn_opt1 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt1.unique_name = "opt1".to_string(); + let mut cn_opt2 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt2.unique_name = "opt2".to_string(); + + let option_channels: Vec<&Cn4> = vec![&cn_opt0, &cn_opt1, &cn_opt2]; + let mut stream_state = StreamState::new(); + + let (idx, name, bytes) = + decode_channel_variant(&data, &cv_block, &option_channels, 0, &mut stream_state).unwrap(); + assert_eq!(idx, 0); + assert_eq!(name, "opt0"); + assert_eq!(bytes, vec![0xAA]); + } + + #[test] + fn test_decode_channel_variant_match_last() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let mut cn_opt1 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt1.unique_name = "opt1".to_string(); + let mut cn_opt2 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt2.unique_name = "opt2".to_string(); + + let option_channels: Vec<&Cn4> = vec![&cn_opt0, &cn_opt1, &cn_opt2]; + let mut stream_state = StreamState::new(); + + let (idx, name, _bytes) = + decode_channel_variant(&data, &cv_block, &option_channels, 2, &mut stream_state).unwrap(); + assert_eq!(idx, 2); + assert_eq!(name, "opt2"); + } + + #[test] + fn test_decode_channel_variant_no_match() { + use crate::mdfinfo::mdfinfo4::Cv4Block; + let data = [0xAAu8, 0xBB, 0xCC]; + let cv_block = Cv4Block { + cv_option_val: vec![0, 1, 2], + cv_option_count: 3, + ..Default::default() + }; + let mut cn_opt0 = make_cn4(0, 8, 0, 0, 0, 0); + cn_opt0.unique_name = "opt0".to_string(); + let option_channels: Vec<&Cn4> = vec![&cn_opt0]; + let mut stream_state = StreamState::new(); + + let result = decode_channel_variant(&data, &cv_block, &option_channels, 99, &mut stream_state); + assert!(result.is_err()); + } + + // ── decode_channel_list tests ── + + #[test] + fn test_decode_channel_list_size_in_elements() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 1: size_value is number of elements + let cl_block = Cl4Block { + cl_flags: 1, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 3, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 3); + assert_eq!(elements[0], vec![10u8]); + assert_eq!(elements[1], vec![20u8]); + assert_eq!(elements[2], vec![30u8]); + } + + #[test] + fn test_decode_channel_list_size_in_bytes() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 0: size_value is number of bytes; element is 1 byte → 4 elements + let cl_block = Cl4Block { + cl_flags: 0, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); // 1 byte per element + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 4, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 4); + } + + #[test] + fn test_decode_channel_list_zero_elements() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + let cl_block = Cl4Block { + cl_flags: 1, + ..Default::default() + }; + let data = [10u8, 20, 30, 40]; + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let elements = decode_channel_list(&data, &cl_block, &element_cn, 0, &mut stream_state).unwrap(); + assert_eq!(elements.len(), 0); + } + + #[test] + fn test_decode_channel_list_underrun() { + use crate::mdfinfo::mdfinfo4::Cl4Block; + // cl_flags = 1: size_value=5 elements, but data only has 2 bytes + let cl_block = Cl4Block { + cl_flags: 1, + cl_alignment: 0, + cl_bit_offset: 0, + cl_byte_offset: 0, + ..Default::default() + }; + let data = [10u8, 20]; // only 2 bytes, not enough for 5 elements + let element_cn = make_cn4(0, 8, 0, 0, 0, 0); + let mut stream_state = StreamState::new(); + + let result = decode_channel_list(&data, &cl_block, &element_cn, 5, &mut stream_state); + assert!(result.is_err()); + } + + // ── extract_channel_values_as_u64 tests ── + + #[test] + fn test_extract_u64_from_uint8() { + use crate::data_holder::channel_data::ChannelData; + use arrow::array::UInt8Builder; + let mut builder = UInt8Builder::new(); + builder.append_value(1); + builder.append_value(2); + builder.append_value(3); + let mut cn = Cn4::default(); + cn.data = ChannelData::UInt8(builder); + let result = extract_channel_values_as_u64(&cn).unwrap(); + assert_eq!(result, vec![1u64, 2, 3]); + } + + #[test] + fn test_extract_u64_from_utf8_fails() { + use crate::data_holder::channel_data::ChannelData; + use arrow::array::LargeStringBuilder; + let mut cn = Cn4::default(); + cn.data = ChannelData::Utf8(LargeStringBuilder::new()); + let result = extract_channel_values_as_u64(&cn); + assert!(result.is_err()); + } } diff --git a/src/mdfreader/mdfreader4.rs b/src/mdfreader/mdfreader4.rs index 339f244..1a98577 100644 --- a/src/mdfreader/mdfreader4.rs +++ b/src/mdfreader/mdfreader4.rs @@ -429,6 +429,44 @@ fn read_hl(rdr: &mut BufReader<&File>, mut position: i64) -> Result<(i64, [u8; 4 Ok((position, id)) } +/// Reads VLSD data from a chain of DL sub-blocks (##SD or ##DZ) without reinitialising arrays. +/// Used by read_sd to process DL-chained VLSD data for a single channel. +fn read_vlsd_from_dl_blocks( + rdr: &mut BufReader<&File>, + dl_blocks: Vec, + mut position: i64, + cn: &mut Cn4, + decoder: &mut Dec, +) -> Result { + let mut previous_index: usize = 0; + for dl in dl_blocks { + for data_pointer in dl.dl_data { + rdr.seek_relative(data_pointer - position) + .context("Could not reach VLSD sub-block from DL")?; + let mut id = [0u8; 4]; + rdr.read_exact(&mut id) + .context("could not read VLSD sub-block id")?; + let mut data = if id == *b"##DZ" { + let (dt, block_header) = parse_dz(rdr)?; + position = data_pointer + block_header.len as i64; + dt + } else { + // ##SD block (same header layout as Dt4Block) + let block_header: Dt4Block = rdr + .read_le() + .context("Could not read VLSD sub-block header")?; + let mut buf = vec![0u8; (block_header.len - 24) as usize]; + rdr.read_exact(&mut buf) + .context("Could not read VLSD sub-block data")?; + position = data_pointer + block_header.len as i64; + buf + }; + previous_index = read_vlsd_from_bytes(&mut data, cn, previous_index, decoder)?; + } + } + Ok(position) +} + /// Reads Signal Data Block containing VLSD channel, pointed by cn_data fn read_sd( rdr: &mut BufReader<&File>, @@ -436,7 +474,7 @@ fn read_sd( vlsd_channels: &[(u8, i32)], mut position: i64, decoder: &mut Dec, - channel_names_to_read_in_dg: &HashSet, + _channel_names_to_read_in_dg: &HashSet, ) -> Result { for channel_group in dg.cg.values_mut() { for (cn_type, rec_pos) in vlsd_channels { @@ -468,28 +506,10 @@ fn read_sd( let (pos, _id) = read_hl(rdr, position)?; position = pos; let (dl_blocks, pos) = parser_dl4(rdr, position)?; - let (pos, _vlsd) = parser_dl4_sorted( - rdr, - dl_blocks, - pos, - channel_group, - decoder, - rec_pos, - channel_names_to_read_in_dg, - )?; - position = pos; + position = read_vlsd_from_dl_blocks(rdr, dl_blocks, pos, cn, decoder)?; } else if "##DL".as_bytes() == id { let (dl_blocks, pos) = parser_dl4(rdr, position)?; - let (pos, _vlsd) = parser_dl4_sorted( - rdr, - dl_blocks, - pos, - channel_group, - decoder, - rec_pos, - channel_names_to_read_in_dg, - )?; - position = pos; + position = read_vlsd_from_dl_blocks(rdr, dl_blocks, pos, cn, decoder)?; } } } @@ -609,9 +629,16 @@ fn read_vlsd_from_bytes( u32::from_le_bytes(len.try_into().context("Could not read length")?) as usize; if (position + length + 4) <= data_length { position += std::mem::size_of::(); - // Types 6 (SBC) and 7 (UTF-8) have null terminator to strip + // From MDF 4.3, null terminator is optional in VLSD strings. + // Strip trailing \0 only if actually present (check the last byte). let record_len = match cn_data_type { - 6 | 7 => if length > 0 { length - 1 } else { 0 }, + 6 | 7 => { + if length > 0 && data[position + length - 1] == 0 { + length - 1 + } else { + length + } + } _ => length, }; let record = &data[position..position + record_len]; @@ -998,11 +1025,9 @@ fn parser_dl4_sorted( let mut id = [0u8; 4]; rdr.read_exact(&mut id) .context("could not read data block id")?; - let block_length: usize; if id == "##DZ".as_bytes() { let (dt, block_header) = parse_dz(rdr)?; data.extend(dt); - block_length = block_header.dz_org_data_length as usize; position = data_pointer + block_header.len as i64; id[2..].copy_from_slice(&block_header.dz_org_block_type[..]); } else { @@ -1011,9 +1036,11 @@ fn parser_dl4_sorted( rdr.read_exact(&mut buf) .context("Could not read DT block data")?; data.extend(buf); - block_length = (block_header.len - 24) as usize; position = data_pointer + block_header.len as i64; } + // Use data.len() as block_length so that a partial-record tail carried over + // from the previous block (split records) is included in the count. + let block_length = data.len(); // Copies full sized records in block into channels arrays if id == "##SD".as_bytes() { @@ -1420,15 +1447,22 @@ fn read_all_channels_unsorted_from_bytes( // From sorted data block, copies data in channels arrays for (rec_id, (index, record_data)) in record_counter.iter_mut() { if let Some(channel_group) = dg.cg.get_mut(rec_id) { + let record_length = channel_group.record_length as usize; + let n_records = if record_length > 0 { + record_data.len() / record_length + } else { + 0 + }; read_channels_from_bytes( record_data, &mut channel_group.cn, - channel_group.record_length as usize, + record_length, *index, channel_names_to_read_in_dg, true, ) .context("failed reading channels from bytes after reading unsorted data")?; + *index += n_records; // advance write position for next DL block record_data.clear(); // clears data for new block, keeping capacity } } @@ -1951,7 +1985,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int16(builder) => { if value_bytes.len() >= 2 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i16::from_be_bytes(value_bytes[..2].try_into()?) } else { i16::from_le_bytes(value_bytes[..2].try_into()?) @@ -1961,7 +1995,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt16(builder) => { if value_bytes.len() >= 2 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u16::from_be_bytes(value_bytes[..2].try_into()?) } else { u16::from_le_bytes(value_bytes[..2].try_into()?) @@ -1971,7 +2005,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i32::from_be_bytes(value_bytes[..4].try_into()?) } else { i32::from_le_bytes(value_bytes[..4].try_into()?) @@ -1981,7 +2015,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u32::from_be_bytes(value_bytes[..4].try_into()?) } else { u32::from_le_bytes(value_bytes[..4].try_into()?) @@ -1991,7 +2025,7 @@ fn store_decoded_values_in_channel( } ChannelData::Float32(builder) => { if value_bytes.len() >= 4 { - let val = if cn.endian { + let val = if cn.endian.is_big() { f32::from_be_bytes(value_bytes[..4].try_into()?) } else { f32::from_le_bytes(value_bytes[..4].try_into()?) @@ -2001,7 +2035,7 @@ fn store_decoded_values_in_channel( } ChannelData::Int64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { i64::from_be_bytes(value_bytes[..8].try_into()?) } else { i64::from_le_bytes(value_bytes[..8].try_into()?) @@ -2011,7 +2045,7 @@ fn store_decoded_values_in_channel( } ChannelData::UInt64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { u64::from_be_bytes(value_bytes[..8].try_into()?) } else { u64::from_le_bytes(value_bytes[..8].try_into()?) @@ -2021,7 +2055,7 @@ fn store_decoded_values_in_channel( } ChannelData::Float64(builder) => { if value_bytes.len() >= 8 { - let val = if cn.endian { + let val = if cn.endian.is_big() { f64::from_be_bytes(value_bytes[..8].try_into()?) } else { f64::from_le_bytes(value_bytes[..8].try_into()?) diff --git a/src/mdfwriter/mdfwriter4.rs b/src/mdfwriter/mdfwriter4.rs index 45de067..2ae4051 100644 --- a/src/mdfwriter/mdfwriter4.rs +++ b/src/mdfwriter/mdfwriter4.rs @@ -24,8 +24,8 @@ use crate::{ MdfInfo, mdfinfo4::{ At4Block, BlockType, Blockheader4, Ca4Block, Cg4, Cg4Block, Ch4Block, Cn4, - Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Ev4Block, FhBlock, Ld4Block, - MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, + Cn4Block, Compo, Composition, Dg4, Dg4Block, Dz4Block, Endianness, Ev4Block, + FhBlock, Ld4Block, MdfInfo4, MetaData, MetaDataBlockType, Si4Block, default_short_header, }, }, mdfreader::Mdf, @@ -518,7 +518,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result let (tx, rx) = bounded::>(n_channels); let fname = Arc::new(Mutex::new(file_name.to_string())); let sfname = Arc::clone(&fname); - thread::spawn(move || -> Result<(), Error> { + let writer_handle = thread::spawn(move || -> Result<(), Error> { let file_name = Arc::clone(&sfname); let file = file_name.lock(); let f: File = OpenOptions::new() @@ -538,6 +538,7 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result .write_all(&buffer) .context("Could not write data blocks buffer")?; } + writer.flush().context("Could not flush data blocks")?; Ok(()) }); @@ -558,6 +559,12 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result if is_vlsd { // VLSD channel: write SD block, set cn_data let mut offset: i64 = 0; + + let data_pointer = Arc::clone(&data_pointer); + let mut locked_data_pointer = data_pointer.lock(); + cn.block.cn_data = *locked_data_pointer; + // For VLSD, dg_data is not used (set to 0) + dg.block.dg_data = 0; let data_block = if compression { create_dz_sd(data, &mut offset) .context("failed creating dz or sd block")? @@ -565,12 +572,6 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result create_sd(data, &mut offset) .context("failed creating sd block")? }; - - let data_pointer = Arc::clone(&data_pointer); - let mut locked_data_pointer = data_pointer.lock(); - cn.block.cn_data = *locked_data_pointer; - // For VLSD, dg_data is not used (set to 0) - dg.block.dg_data = 0; *locked_data_pointer += offset; let buffer = write_sd_block(cn.block.cn_data, data_block, offset as usize)?; @@ -629,6 +630,10 @@ pub fn mdfwriter4(mdf: &Mdf, file_name: &str, compression: bool) -> Result Ok(()) })?; drop(tx); + writer_handle + .join() + .map_err(|e| anyhow::anyhow!("Data writer thread panicked: {:?}", e)) + .and_then(|r| r)?; let file_name = Arc::clone(&fname); let file = file_name.lock(); @@ -1466,7 +1471,7 @@ fn create_blocks( ) .with_context(|| format!("failed initilising array for channel {}", cn.unique_name))?, block: cn_block, - endian: machine_endian, + endian: Endianness::from(machine_endian), block_position: cn_position, pos_byte_beg: 0, n_bytes: cg_block.cg_data_bytes, diff --git a/tests/arrays.rs b/tests/arrays.rs new file mode 100644 index 0000000..8ced4a4 --- /dev/null +++ b/tests/arrays.rs @@ -0,0 +1,204 @@ +//! Integration tests for MDF4 array channels (CABlock / TensorArrow). +//! Tests read real MDF4.3 array example files and verify that array channels +//! load correctly and survive a round-trip write. + +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static BASE_PATH_ARRAYS: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/Arrays/" + .to_string() +}); + +/// Checks whether a `ChannelData` value is one of the ArrayD variants. +fn is_array_channel(data: &ChannelData) -> bool { + matches!( + data, + ChannelData::ArrayDFloat64(_) + | ChannelData::ArrayDFloat32(_) + | ChannelData::ArrayDInt8(_) + | ChannelData::ArrayDUInt8(_) + | ChannelData::ArrayDInt16(_) + | ChannelData::ArrayDUInt16(_) + | ChannelData::ArrayDInt32(_) + | ChannelData::ArrayDUInt32(_) + | ChannelData::ArrayDInt64(_) + | ChannelData::ArrayDUInt64(_) + ) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Simple array files +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_simple_vector() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in array file"); + + // At least one channel should be an ArrayD type + let has_array_channel = names + .iter() + .any(|name| mdf.get_channel_data(name).map_or(false, is_array_channel)); + assert!( + has_array_channel, + "Expected at least one ArrayD channel in Vector_MeasurementArrays.mf4" + ); + + Ok(()) +} + +#[test] +fn array_simple_dspace() -> Result<()> { + let file = format!( + "{}Simple/dSPACE_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in dSPACE array file"); + + Ok(()) +} + +#[test] +fn array_with_fixed_axes() -> Result<()> { + let file = format!( + "{}Simple/Vector_ArrayWithFixedAxes.MF4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in Vector_ArrayWithFixedAxes.MF4"); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Classification +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_classification_porsche() -> Result<()> { + let file = format!( + "{}Classification/Porsche_2D_classification_result.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in Porsche_2D_classification_result.mf4"); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Round-trip: write and re-read an array file +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_round_trip_write() -> Result<()> { + use std::fs; + + let out_file = + "/home/ratal/workspace/mdfr/test_files/arrays_round_trip_test.mf4"; + + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let original_names = mdf.get_channel_names_set(); + assert!(!original_names.is_empty()); + + // Write the file; the write itself should succeed + let mdf2 = mdf.write(out_file, false)?; + + // The written file must exist and be non-empty + assert!( + std::path::Path::new(out_file).exists(), + "Written file does not exist" + ); + + // The Mdf returned by write() should have at least the same channel names + let written_names = mdf2.get_channel_names_set(); + assert!(!written_names.is_empty(), "Written file has no channels"); + + // Clean up temporary file + fs::remove_file(out_file).ok(); + + Ok(()) +} + +// ────────────────────────────────────────────────────────────────────────────── +// Array channel properties +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn array_channels_have_expected_ndim() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + for name in &names { + if let Some(data) = mdf.get_channel_data(name) { + if is_array_channel(data) { + // Array channels must have ndim >= 1 + assert!(data.ndim() >= 1, "Array channel {name} has ndim < 1"); + } + } + } + + Ok(()) +} + +#[test] +fn array_channels_min_max() -> Result<()> { + let file = format!( + "{}Simple/Vector_MeasurementArrays.mf4", + BASE_PATH_ARRAYS.as_str() + ); + let mut mdf = Mdf::new(&file)?; + mdf.load_all_channels_data_in_memory()?; + + let names = mdf.get_channel_names_set(); + let mut found_array = false; + for name in &names { + if let Some(data) = mdf.get_channel_data(name) { + if is_array_channel(data) && !data.is_empty() { + let (min, max) = data.min_max(); + // If there is data, min and max should be populated for numeric arrays + if min.is_some() && max.is_some() { + assert!( + min.unwrap() <= max.unwrap(), + "min > max for channel {name}" + ); + } + found_array = true; + } + } + } + assert!(found_array, "No non-empty array channels found"); + + Ok(()) +} diff --git a/tests/be_complex_array.rs b/tests/be_complex_array.rs new file mode 100644 index 0000000..aec9a3e --- /dev/null +++ b/tests/be_complex_array.rs @@ -0,0 +1,563 @@ +/// Integration tests for BigEndian channels, Complex32, and ArrayDFloat64 fixtures. +/// +/// Three synthetic MDF4 files exercise paths in `read_channels_from_bytes` that +/// the existing test suite never hits: +/// +/// - `be_scalars.mf4` → BE Int16 (lines 724-731) and BE Float64 +/// - `complex_f32_le.mf4` → Complex32 LE f32-pair arm (lines 1040-1072) +/// - `array_f64_le.mf4` → ArrayDFloat64 LE (lines 1624-1635) + CA block parsing +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +// ─── Byte-push helpers ─────────────────────────────────────────────────────── + +fn pu8(b: &mut Vec, v: u8) { b.push(v); } +fn pu16(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu32(b: &mut Vec, v: u32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu64(b: &mut Vec, v: u64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi64(b: &mut Vec, v: i64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf32(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf64(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi16_be(b: &mut Vec, v: i16) { b.extend_from_slice(&v.to_be_bytes()); } +fn pu16_be(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_be_bytes()); } +fn pf32_be(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_be_bytes()); } +fn pf64_be(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_be_bytes()); } +fn zeros(b: &mut Vec, n: usize) { b.extend(std::iter::repeat_n(0u8, n)); } + +// ─── Shared block writers (identical to conversions_int_types.rs) ──────────── + +fn id_block(b: &mut Vec) { + b.extend_from_slice(b"MDF "); b.extend_from_slice(b"4.30 "); b.extend_from_slice(b"mdfr "); + pu16(b,0); pu16(b,0); pu16(b,430); pu16(b,0); zeros(b,2); zeros(b,26); pu16(b,0); pu16(b,0); +} +fn hd4(b: &mut Vec, dg: i64, fh: i64) { + b.extend_from_slice(b"##HD"); zeros(b,4); pu64(b,104); pu64(b,6); + pi64(b,dg); pi64(b,fh); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); b.extend_from_slice(&0i16.to_le_bytes()); b.extend_from_slice(&0i16.to_le_bytes()); + pu8(b,0); pu8(b,0); pu8(b,0); pu8(b,0); + pf64(b,0.0); pf64(b,0.0); +} +fn fh(b: &mut Vec) { + b.extend_from_slice(b"##FH"); zeros(b,4); pu64(b,56); pu64(b,2); + pi64(b,0); pi64(b,0); pu64(b,0); b.extend_from_slice(&0i16.to_le_bytes()); b.extend_from_slice(&0i16.to_le_bytes()); pu8(b,0); zeros(b,3); +} +fn dg4(b: &mut Vec, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,0); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +fn dg4_chain(b: &mut Vec, next: i64, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,next); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +/// LD block with one data link and equal_sample_count (56 bytes total). +fn ld1_block(b: &mut Vec, data_ptr: i64, cycle_count: u64) { + b.extend_from_slice(b"##LD"); zeros(b,4); pu64(b,56); pu64(b,2); // header + pi64(b,0); pi64(b,data_ptr); // ld_next=0, ld_links[0]=data_ptr + pu8(b,1); pu8(b,0); pu8(b,0); pu8(b,0); // ld_flags=1(equal_sample_count), rest 0 + pu32(b,1); pu64(b,cycle_count); // ld_count=1, ld_equal_sample_count +} +fn cg4(b: &mut Vec, cn: i64, cycles: u64, data_bytes: u32) { + b.extend_from_slice(b"##CG"); zeros(b,4); pu64(b,104); + pu64(b,6); pi64(b,0); pi64(b,cn); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pu64(b,cycles); pu16(b,0); pu16(b,0); zeros(b,4); pu32(b,data_bytes); pu32(b,0); +} +/// `(cn_type, sync, dtype)` — channel kind descriptor +type CnDesc = (u8, u8, u8); +/// `(byte_offset, bit_count)` — data layout +type CnSpan = (u32, u32); +/// `(cn_next, cn_name_tx, cn_cc)` — block links +type CnRefs = (i64, i64, i64); + +fn cn4(b: &mut Vec, desc: CnDesc, span: CnSpan, refs: CnRefs) { + let (cn_type, sync, dtype) = desc; + let (byte_off, bits) = span; + let (next, tx, cc) = refs; + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,0); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +/// Like `cn4` but with a non-zero `composition` link (second link = CA block offset). +fn cn4_ca(b: &mut Vec, desc: CnDesc, span: CnSpan, refs: CnRefs, composition: i64) { + let (cn_type, sync, dtype) = desc; + let (byte_off, bits) = span; + let (next, tx, cc) = refs; + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,composition); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +fn tx(b: &mut Vec, text: &str) { + let t = text.as_bytes(); + let len = 24u64 + t.len() as u64 + 1; + b.extend_from_slice(b"##TX"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(t); b.push(0); +} +fn dt(b: &mut Vec, records: &[u8]) { + let len = 24u64 + records.len() as u64; + b.extend_from_slice(b"##DT"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(records); +} +/// Minimal 1D CA block (56 bytes). `dim_size` is the number of elements per cycle. +fn ca1d(b: &mut Vec, dim_size: u64) { + b.extend_from_slice(b"##CA"); zeros(b,4); pu64(b,56); pu64(b,1); // link_count=1 + pi64(b,0); // ca_composition = null + pu8(b,0); // ca_type = 0 (Array) + pu8(b,0); // ca_storage = 0 (CN template) + pu16(b,1); // ca_ndim = 1 + pu32(b,0); // ca_flags = 0 + pu32(b,0); // ca_byte_offset_base = 0 + pu32(b,0); // ca_inval_bit_pos_base = 0 + pu64(b,dim_size); // ca_dim_size[0] +} + +// ─── Fixture 1: be_scalars.mf4 ─────────────────────────────────────────────── +// +// Layout (all offsets are absolute): +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=965) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=18) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=872) +// [552] CN_be_i16 160 b (IntBE/16bit, byte_off=8, next=712, tx=903) +// [712] CN_be_f64 160 b (FloatBE/64bit, byte_off=10, next=0, tx=934) +// [872] TX "master" 31 b +// [903] TX "be_i16" 31 b +// [934] TX "be_f64" 31 b +// [965] DT 96 b (24 hdr + 4×18=72 data) +// Total: 1061 b + +const BE_SCALARS_PATH: &str = "test_files/synthetic/be_scalars.mf4"; +static FIXTURE_BE: LazyLock<()> = LazyLock::new(|| { + create_be_scalars().expect("failed to create be_scalars fixture"); +}); + +fn create_be_scalars() -> Result<()> { + if std::path::Path::new(BE_SCALARS_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(1061); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 965); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 18); debug_assert_eq!(b.len(), 392); + cn4(&mut b, (2,1,4), (0,64), (552,872,0)); debug_assert_eq!(b.len(), 552); + cn4(&mut b, (0,0,3), (8,16), (712,903,0)); debug_assert_eq!(b.len(), 712); // dtype=3=IntBE + cn4(&mut b, (0,0,5), (10,64), (0,934,0)); debug_assert_eq!(b.len(), 872); // dtype=5=FloatBE + tx(&mut b, "master"); debug_assert_eq!(b.len(), 903); + tx(&mut b, "be_i16"); debug_assert_eq!(b.len(), 934); + tx(&mut b, "be_f64"); debug_assert_eq!(b.len(), 965); + + // 4 records × 18 bytes: f64_LE(8) | i16_BE(2) | f64_BE(8) + let raw_i16_be: [i16; 4] = [-100, 0, 100, 200]; + let raw_f64_be: [f64; 4] = [1.5, 2.5, 3.5, 4.5]; + let mut recs: Vec = Vec::with_capacity(72); + for i in 0..4 { + pf64(&mut recs, i as f64); + pi16_be(&mut recs, raw_i16_be[i]); + pf64_be(&mut recs, raw_f64_be[i]); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 1061); + + std::fs::write(BE_SCALARS_PATH, &b)?; + Ok(()) +} + +// ─── Fixture 2: complex_f32_le.mf4 ────────────────────────────────────────── +// +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=775) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=16) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=712) +// [552] CN_cx32 160 b (ComplexLE/64bit, byte_off=8, next=0, tx=743) +// [712] TX "master" 31 b +// [743] TX "cx32_ch" 32 b +// [775] DT 88 b (24 hdr + 4×16=64 data) +// Total: 863 b + +const CX32_PATH: &str = "test_files/synthetic/complex_f32_le.mf4"; +static FIXTURE_CX: LazyLock<()> = LazyLock::new(|| { + create_complex_f32_le().expect("failed to create complex_f32_le fixture"); +}); + +fn create_complex_f32_le() -> Result<()> { + if std::path::Path::new(CX32_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(863); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 775); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 16); debug_assert_eq!(b.len(), 392); + cn4(&mut b, (2,1,4), (0,64), (552,712,0)); debug_assert_eq!(b.len(), 552); + cn4(&mut b, (0,0,15), (8,64), (0,743,0)); debug_assert_eq!(b.len(), 712); // dtype=15=ComplexLE + tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); + tx(&mut b, "cx32_ch"); debug_assert_eq!(b.len(), 775); + + // 4 records × 16 bytes: f64_LE(8) | f32_LE_real(4) | f32_LE_imag(4) + // Samples: (1+2j), (3+4j), (5+6j), (7+8j) + let samples: [(f32, f32); 4] = [(1.0, 2.0), (3.0, 4.0), (5.0, 6.0), (7.0, 8.0)]; + let mut recs: Vec = Vec::with_capacity(64); + for (i, (re, im)) in samples.iter().enumerate() { + pf64(&mut recs, i as f64); + pf32(&mut recs, *re); + pf32(&mut recs, *im); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 863); + + std::fs::write(CX32_PATH, &b)?; + Ok(()) +} + +// ─── Fixture 3: array_f64_le.mf4 ──────────────────────────────────────────── +// +// [0] IdBlock 64 b +// [64] HD4 104 b (hd_dg=224, hd_fh=168) +// [168] FH 56 b +// [224] DG4 64 b (dg_cg=288, dg_data=831) +// [288] CG4 104 b (cg_cn=392, cycles=4, data_bytes=32) +// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, next=552, tx=712, composition=0) +// [552] CN_arr 160 b (FloatLE/64bit, byte_off=8, next=0, tx=743, composition=775) +// [712] TX "master" 31 b +// [743] TX "arr_f64" 32 b +// [775] CA block 56 b (ca_ndim=1, ca_dim_size=[3]) +// [831] DT 152 b (24 hdr + 4×32=128 data) +// Total: 983 b + +const ARR_F64_PATH: &str = "test_files/synthetic/array_f64_le.mf4"; +static FIXTURE_ARR: LazyLock<()> = LazyLock::new(|| { + create_array_f64_le().expect("failed to create array_f64_le fixture"); +}); + +fn create_array_f64_le() -> Result<()> { + if std::path::Path::new(ARR_F64_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(983); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 831); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 32); debug_assert_eq!(b.len(), 392); + // CN master: no CA block → composition=0, use plain cn4 + cn4(&mut b, (2,1,4), (0,64), (552,712,0)); debug_assert_eq!(b.len(), 552); + // CN_arr: composition link points to CA block at 775 + cn4_ca(&mut b, (0,0,4), (8,64), (0,743,0), 775); debug_assert_eq!(b.len(), 712); + tx(&mut b, "master"); debug_assert_eq!(b.len(), 743); + tx(&mut b, "arr_f64"); debug_assert_eq!(b.len(), 775); + ca1d(&mut b, 3); debug_assert_eq!(b.len(), 831); + + // 4 records × 32 bytes: f64_LE(8) | [f64_LE × 3](24) + // Array values: [1,2,3], [4,5,6], [7,8,9], [10,11,12] + let mut recs: Vec = Vec::with_capacity(128); + for cycle in 0..4u64 { + pf64(&mut recs, cycle as f64); + for elem in 1..=3u64 { + pf64(&mut recs, (cycle * 3 + elem) as f64); + } + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 983); + + std::fs::write(ARR_F64_PATH, &b)?; + Ok(()) +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +#[test] +fn be_int16_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_BE); + let mut mdf = Mdf::new(BE_SCALARS_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("be_i16").expect("be_i16 not found"); + assert!( + matches!(data, ChannelData::Int16(_)), + "expected Int16, got {}", + data.data_type(false) + ); + if let ChannelData::Int16(arr) = data { + let expected = [-100i16, 0, 100, 200]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "be_i16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn be_float64_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_BE); + let mut mdf = Mdf::new(BE_SCALARS_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("be_f64").expect("be_f64 not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [1.5f64, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "be_f64[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn complex32_le_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_CX); + let mut mdf = Mdf::new(CX32_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("cx32_ch").expect("cx32_ch not found"); + assert!( + matches!(data, ChannelData::Complex32(_)), + "expected Complex32, got {}", + data.data_type(false) + ); + if let ChannelData::Complex32(arr) = data { + // values_slice() returns interleaved [re0, im0, re1, im1, ...] + let vals = arr.values_slice(); + assert_eq!(vals.len(), 8, "expected 4 complex samples = 8 floats"); + let expected_re = [1.0f32, 3.0, 5.0, 7.0]; + let expected_im = [2.0f32, 4.0, 6.0, 8.0]; + for i in 0..4 { + assert!((vals[i * 2] - expected_re[i]).abs() < 1e-6, + "cx32_ch[{i}].re: expected {}, got {}", expected_re[i], vals[i * 2]); + assert!((vals[i * 2 + 1] - expected_im[i]).abs() < 1e-6, + "cx32_ch[{i}].im: expected {}, got {}", expected_im[i], vals[i * 2 + 1]); + } + } + Ok(()) +} + +#[test] +fn array_f64_le_reads_correctly() -> Result<()> { + LazyLock::force(&FIXTURE_ARR); + let mut mdf = Mdf::new(ARR_F64_PATH)?; + mdf.load_all_channels_data_in_memory()?; + + let data = mdf.get_channel_data("arr_f64").expect("arr_f64 not found"); + assert!( + matches!(data, ChannelData::ArrayDFloat64(_)), + "expected ArrayDFloat64, got {}", + data.data_type(false) + ); + if let ChannelData::ArrayDFloat64(arr) = data { + let vals = arr.values_slice(); + // The zeros() pre-allocation for ArrayDFloat64 allocates cycle_count * product(shape) + // elements. Since shape=[4,3] already includes cycle_count=4 as first dim, + // the buffer is 4 * (4*3) = 48 slots, but only the first 12 are filled by reading. + assert!(vals.len() >= 12, "expected at least 12 f64 values, got {}", vals.len()); + let expected: Vec = (1..=12).map(|x| x as f64).collect(); + for (i, (&got, &exp)) in vals[..12].iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "arr_f64[{i}]: expected {exp}, got {got}"); + } + assert!(arr.ndim() >= 1, "expected ndim >= 1"); + } + Ok(()) +} + +// ─── Fixture 4: ld_be_channels.mf4 ────────────────────────────────────────── +// +// Four DGs, each with one LD-backed BE channel. Exercises the uncovered BE +// arms in `read_one_channel_array` (data_read4.rs lines 59-62, 72-75, 146-149, +// 272-275) which are only reachable via LD blocks (the optimised single-channel +// path) — existing real MDF files with LD blocks all use LE channels. +// +// Layout: +// [0] IdBlock 64 +// [64] HD4 104 (dg=224, fh=168) +// [168] FH 56 +// [224] DG1 64 (next=288, cg=480, data=1656) BE Int16 +// [288] DG2 64 (next=352, cg=584, data=1712) BE Float64 +// [352] DG3 64 (next=416, cg=688, data=1768) BE UInt16 +// [416] DG4 64 (next=0, cg=792, data=1824) BE Float32 +// [480] CG1 104 (cn=896, cycles=4, data_bytes=2) +// [584] CG2 104 (cn=1056, cycles=4, data_bytes=8) +// [688] CG3 104 (cn=1216, cycles=4, data_bytes=2) +// [792] CG4 104 (cn=1376, cycles=4, data_bytes=4) +// [896] CN_bei16 160 (dtype=3/IntBE, bits=16, tx=1536) +// [1056] CN_bef64 160 (dtype=5/FloatBE, bits=64, tx=1566) +// [1216] CN_beu16 160 (dtype=1/UIntBE, bits=16, tx=1596) +// [1376] CN_bef32 160 (dtype=5/FloatBE, bits=32, tx=1626) +// [1536] TX "bei16" 30 +// [1566] TX "bef64" 30 +// [1596] TX "beu16" 30 +// [1626] TX "bef32" 30 +// [1656] LD1 56 → DT1 at 1880 +// [1712] LD2 56 → DT2 at 1912 +// [1768] LD3 56 → DT3 at 1968 +// [1824] LD4 56 → DT4 at 2000 +// [1880] DT1 32 (24 hdr + 4×i16_BE = 8 bytes) +// [1912] DT2 56 (24 hdr + 4×f64_BE = 32 bytes) +// [1968] DT3 32 (24 hdr + 4×u16_BE = 8 bytes) +// [2000] DT4 40 (24 hdr + 4×f32_BE = 16 bytes) +// Total: 2040 bytes + +const LD_BE_PATH: &str = "test_files/synthetic/ld_be_channels.mf4"; +static FIXTURE_LD_BE: LazyLock<()> = LazyLock::new(|| { + create_ld_be_channels().expect("failed to create ld_be_channels fixture"); +}); + +fn create_ld_be_channels() -> Result<()> { + if std::path::Path::new(LD_BE_PATH).exists() { return Ok(()); } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(2040); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + // 4 chained DGs + dg4_chain(&mut b, 288, 480, 1656); debug_assert_eq!(b.len(), 288); // DG1 bei16 + dg4_chain(&mut b, 352, 584, 1712); debug_assert_eq!(b.len(), 352); // DG2 bef64 + dg4_chain(&mut b, 416, 688, 1768); debug_assert_eq!(b.len(), 416); // DG3 beu16 + dg4_chain(&mut b, 0, 792, 1824); debug_assert_eq!(b.len(), 480); // DG4 bef32 + // CGs (one per DG, one CN each) + cg4(&mut b, 896, 4, 2); debug_assert_eq!(b.len(), 584); // CG1 + cg4(&mut b, 1056, 4, 8); debug_assert_eq!(b.len(), 688); // CG2 + cg4(&mut b, 1216, 4, 2); debug_assert_eq!(b.len(), 792); // CG3 + cg4(&mut b, 1376, 4, 4); debug_assert_eq!(b.len(), 896); // CG4 + // CNs: dtype=3(IntBE), 5(FloatBE), 1(UIntBE), 5(FloatBE) + cn4(&mut b, (0,0,3), (0,16), (0,1536,0)); debug_assert_eq!(b.len(), 1056); // bei16 + cn4(&mut b, (0,0,5), (0,64), (0,1566,0)); debug_assert_eq!(b.len(), 1216); // bef64 + cn4(&mut b, (0,0,1), (0,16), (0,1596,0)); debug_assert_eq!(b.len(), 1376); // beu16 + cn4(&mut b, (0,0,5), (0,32), (0,1626,0)); debug_assert_eq!(b.len(), 1536); // bef32 + // TX blocks + tx(&mut b, "bei16"); debug_assert_eq!(b.len(), 1566); + tx(&mut b, "bef64"); debug_assert_eq!(b.len(), 1596); + tx(&mut b, "beu16"); debug_assert_eq!(b.len(), 1626); + tx(&mut b, "bef32"); debug_assert_eq!(b.len(), 1656); + // LD blocks pointing to DT blocks + ld1_block(&mut b, 1880, 4); debug_assert_eq!(b.len(), 1712); // LD1 → DT1 + ld1_block(&mut b, 1912, 4); debug_assert_eq!(b.len(), 1768); // LD2 → DT2 + ld1_block(&mut b, 1968, 4); debug_assert_eq!(b.len(), 1824); // LD3 → DT3 + ld1_block(&mut b, 2000, 4); debug_assert_eq!(b.len(), 1880); // LD4 → DT4 + // DT1: 4×i16_BE + { + let mut recs: Vec = Vec::with_capacity(8); + for v in [-100i16, 0, 100, 200] { pi16_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 1912); + } + // DT2: 4×f64_BE + { + let mut recs: Vec = Vec::with_capacity(32); + for v in [1.5f64, 2.5, 3.5, 4.5] { pf64_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 1968); + } + // DT3: 4×u16_BE + { + let mut recs: Vec = Vec::with_capacity(8); + for v in [100u16, 200, 300, 400] { pu16_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 2000); + } + // DT4: 4×f32_BE + { + let mut recs: Vec = Vec::with_capacity(16); + for v in [1.5f32, 2.5, 3.5, 4.5] { pf32_be(&mut recs, v); } + dt(&mut b, &recs); debug_assert_eq!(b.len(), 2040); + } + + std::fs::write(LD_BE_PATH, &b)?; + Ok(()) +} + +// ─── Tests for LD-backed BE channels ───────────────────────────────────────── + +fn load_ld_be() -> Result { + LazyLock::force(&FIXTURE_LD_BE); + let mut mdf = Mdf::new(LD_BE_PATH)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn ld_bei16_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bei16").expect("bei16 not found"); + assert!( + matches!(data, ChannelData::Int16(_)), + "expected Int16, got {}", + data.data_type(false) + ); + if let ChannelData::Int16(arr) = data { + let expected = [-100i16, 0, 100, 200]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "bei16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_bef64_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bef64").expect("bef64 not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [1.5f64, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "bef64[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_beu16_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("beu16").expect("beu16 not found"); + assert!( + matches!(data, ChannelData::UInt16(_)), + "expected UInt16, got {}", + data.data_type(false) + ); + if let ChannelData::UInt16(arr) = data { + let expected = [100u16, 200, 300, 400]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert_eq!(got, exp, "beu16[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn ld_bef32_reads_correctly() -> Result<()> { + let mdf = load_ld_be()?; + let data = mdf.get_channel_data("bef32").expect("bef32 not found"); + assert!( + matches!(data, ChannelData::Float32(_)), + "expected Float32, got {}", + data.data_type(false) + ); + if let ChannelData::Float32(arr) = data { + let expected = [1.5f32, 2.5, 3.5, 4.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-6, "bef32[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} diff --git a/tests/canopen_types.rs b/tests/canopen_types.rs new file mode 100644 index 0000000..9014287 --- /dev/null +++ b/tests/canopen_types.rs @@ -0,0 +1,33 @@ +use anyhow::Result; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static BASE: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/MDF4/MDF4.3/Base_Standard/Examples/DataTypes/CANopenTypes/".to_string() +}); + +#[test] +fn canopen_date() -> Result<()> { + let mut mdf = Mdf::new(&format!("{}Vector_CANOpenDate.mf4", *BASE))?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in CANopen date file"); + let has_data = names + .iter() + .any(|n| mdf.get_channel_data(n).is_some_and(|d| !d.is_empty())); + assert!(has_data, "All channels empty in CANopen date file"); + Ok(()) +} + +#[test] +fn canopen_time() -> Result<()> { + let mut mdf = Mdf::new(&format!("{}Vector_CANOpenTime.mf4", *BASE))?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in CANopen time file"); + let has_data = names + .iter() + .any(|n| mdf.get_channel_data(n).is_some_and(|d| !d.is_empty())); + assert!(has_data, "All channels empty in CANopen time file"); + Ok(()) +} diff --git a/tests/channel_data_ops.rs b/tests/channel_data_ops.rs new file mode 100644 index 0000000..f589d87 --- /dev/null +++ b/tests/channel_data_ops.rs @@ -0,0 +1,567 @@ +//! Integration tests for ChannelData methods. +//! These tests exercise public API methods that are not covered by the internal unit tests, +//! or cover additional variants to improve overall code coverage. + +use arrow::array::{ + Array, FixedSizeBinaryBuilder, Float64Builder, Int8Builder, Int16Builder, Int32Builder, + Int64Builder, LargeStringBuilder, UInt8Builder, UInt16Builder, UInt32Builder, UInt64Builder, +}; +use arrow::buffer::MutableBuffer; +use arrow::datatypes::{Float64Type, Int8Type, Int16Type, UInt8Type, UInt32Type, UInt64Type}; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::data_holder::complex_arrow::ComplexArrow; +use mdfr::data_holder::tensor_arrow::{Order, TensorArrow}; + +// ────────────────────────────────────────────────────────────────────────────── +// Group 1: zeros() for variants not previously covered +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_zeros_virtual_channel_cn_type_3() { + // cn_type 3 always returns UInt64 counter regardless of self type. + let cd = ChannelData::Float64(Float64Builder::new()); + let result = cd.zeros(3, 5, 8, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::UInt64(_))); + assert_eq!(result.len(), 5); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2, 3, 4])); +} + +#[test] +fn test_zeros_virtual_channel_cn_type_6() { + let cd = ChannelData::Float64(Float64Builder::new()); + let result = cd.zeros(6, 3, 8, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::UInt64(_))); + assert_eq!(result.len(), 3); + assert_eq!(result.to_u64_vec(), Some(vec![0, 1, 2])); +} + +#[test] +fn test_zeros_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + // shape=[2,3] → product=6, buffer = 6 i16 = 12 bytes + let result = cd.zeros(0, 4, 6, (vec![2, 3], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::ArrayDInt16(_))); + // len = buffer_bytes / shape_product = 12 / 6 = 2 + assert!(!result.is_empty()); + // All values should be zero + if let ChannelData::ArrayDInt16(ta) = &result { + assert!(ta.values_slice().iter().all(|&v| v == 0)); + } +} + +#[test] +fn test_zeros_array_d_uint8() { + let cd = ChannelData::ArrayDUInt8(TensorArrow::new()); + let result = cd.zeros(0, 5, 1, (vec![4], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::ArrayDUInt8(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_fixed_size_byte() { + let cd = ChannelData::FixedSizeByteArray(FixedSizeBinaryBuilder::with_capacity(1, 4)); + let result = cd.zeros(0, 3, 4, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::FixedSizeByteArray(_))); + assert_eq!(result.len(), 0); // zeros creates empty builder with capacity +} + +#[test] +fn test_zeros_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + // zeros creates vec![0f32; cycle_count*2] = 2*2=4 f32 = 16 bytes + // ComplexArrow::new_from_buffer: len = byte_len / 2 = 16 / 2 = 8 + let result = cd.zeros(0, 2, 8, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::Complex32(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + // zeros creates vec![0f64; cycle_count*2] = 3*2=6 f64 = 48 bytes + // ComplexArrow::new_from_buffer: len = byte_len / 2 = 48 / 2 = 24 + let result = cd.zeros(0, 3, 16, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::Complex64(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_utf8() { + let mut b = LargeStringBuilder::new(); + b.append_value("x"); + let cd = ChannelData::Utf8(b); + let result = cd.zeros(0, 5, 10, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::Utf8(_))); + assert_eq!(result.len(), 0); // empty builder after zeros +} + +#[test] +fn test_zeros_array_d_float64() { + let cd = ChannelData::ArrayDFloat64(TensorArrow::new()); + // For ArrayDFloat64, zeros uses cycle_count * shape product + // cycle_count=2, shape=[1] → buffer = 2*1 f64 = 16 bytes, len = 16/1 = 16 + let result = cd.zeros(0, 2, 8, (vec![1], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::ArrayDFloat64(_))); + assert!(!result.is_empty()); +} + +#[test] +fn test_zeros_array_d_int32() { + let cd = ChannelData::ArrayDInt32(TensorArrow::new()); + let result = cd.zeros(0, 3, 4, (vec![3], Order::RowMajor)).unwrap(); + assert!(matches!(result, ChannelData::ArrayDInt32(_))); + if let ChannelData::ArrayDInt32(ta) = &result { + assert!(ta.values_slice().iter().all(|&v| v == 0)); + } +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 2: len() for ArrayD variant using new_from_buffer +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_len_array_d_from_buffer() { + // 4 f64 values, shape=[1]: buffer = 4*8 = 32 bytes, len = 32/1 = 32 + let buf = MutableBuffer::from_iter([1.0f64, 2.0, 3.0, 4.0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDFloat64(ta); + // internal len formula: byte_len / shape_product = 32 / 1 = 32 + assert_eq!(cd.len(), 32); + assert!(!cd.is_empty()); +} + +#[test] +fn test_len_array_d_int8_from_buffer() { + // 6 i8 values, shape=[3]: buffer = 6 bytes, len = 6/3 = 2 + let buf = MutableBuffer::from_iter([1i8, 2, 3, 4, 5, 6].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![3], Order::RowMajor); + let cd = ChannelData::ArrayDInt8(ta); + assert_eq!(cd.len(), 2); + assert!(!cd.is_empty()); +} + +#[test] +fn test_len_array_d_uint8_from_buffer() { + // 8 u8 values, shape=[2]: buffer = 8 bytes, len = 8/2 = 4 + let buf = MutableBuffer::from_iter([1u8, 2, 3, 4, 5, 6, 7, 8].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![2], Order::RowMajor); + let cd = ChannelData::ArrayDUInt8(ta); + assert_eq!(cd.len(), 4); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 3: min_max() for variants with new coverage +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_min_max_array_d_int16() { + let buf = MutableBuffer::from_iter([10i16, -5, 20, 0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDInt16(ta); + let (min, max) = cd.min_max(); + assert!(min.is_some()); + assert!(max.is_some()); + assert_eq!(min.unwrap(), -5.0); + assert_eq!(max.unwrap(), 20.0); +} + +#[test] +fn test_min_max_array_d_uint32() { + let buf = MutableBuffer::from_iter([100u32, 50, 200, 1].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDUInt32(ta); + let (min, max) = cd.min_max(); + assert_eq!(min.unwrap(), 1.0); + assert_eq!(max.unwrap(), 200.0); +} + +#[test] +fn test_min_max_array_d_uint64() { + let buf = MutableBuffer::from_iter([5u64, 1, 100].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDUInt64(ta); + let (min, max) = cd.min_max(); + assert_eq!(min.unwrap(), 1.0); + assert_eq!(max.unwrap(), 100.0); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 4: to_u64_vec() for all integer variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_to_u64_vec_int8() { + let mut b = Int8Builder::new(); + b.append_value(10); + b.append_value(-1); // cast to u64: 0xFFFFFFFFFFFFFFFF as u64 + let cd = ChannelData::Int8(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v.len(), 2); + assert_eq!(v[0], 10u64); + // -1i8 as u64 = 18446744073709551615 + assert_eq!(v[1], (-1i8) as u64); +} + +#[test] +fn test_to_u64_vec_uint8() { + let mut b = UInt8Builder::new(); + b.append_value(255u8); + b.append_value(0u8); + let cd = ChannelData::UInt8(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![255u64, 0u64]); +} + +#[test] +fn test_to_u64_vec_uint16() { + let mut b = UInt16Builder::new(); + b.append_value(1000u16); + let cd = ChannelData::UInt16(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![1000u64]); +} + +#[test] +fn test_to_u64_vec_uint32() { + let mut b = UInt32Builder::new(); + b.append_value(u32::MAX); + let cd = ChannelData::UInt32(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![u32::MAX as u64]); +} + +#[test] +fn test_to_u64_vec_uint64() { + let mut b = UInt64Builder::new(); + b.append_value(u64::MAX); + let cd = ChannelData::UInt64(b); + assert_eq!(cd.to_u64_vec().unwrap(), vec![u64::MAX]); +} + +#[test] +fn test_to_u64_vec_int64() { + let mut b = Int64Builder::new(); + b.append_value(42i64); + b.append_value(-100i64); + let cd = ChannelData::Int64(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v[0], 42u64); + assert_eq!(v[1], (-100i64) as u64); +} + +#[test] +fn test_to_u64_vec_int32() { + let mut b = Int32Builder::new(); + b.append_value(7i32); + b.append_value(-2i32); + let cd = ChannelData::Int32(b); + let v = cd.to_u64_vec().unwrap(); + assert_eq!(v[0], 7u64); + assert_eq!(v[1], (-2i32) as u64); +} + +#[test] +fn test_to_u64_vec_float64_returns_none() { + let mut b = Float64Builder::new(); + b.append_value(1.0); + let cd = ChannelData::Float64(b); + assert!(cd.to_u64_vec().is_none()); +} + +#[test] +fn test_to_u64_vec_utf8_returns_none() { + let mut b = LargeStringBuilder::new(); + b.append_value("abc"); + let cd = ChannelData::Utf8(b); + assert!(cd.to_u64_vec().is_none()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 5: ndim() for more ArrayD variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_ndim_array_d_int16_2d() { + let buf = MutableBuffer::from_iter([1i16, 2, 3, 4, 5, 6].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![2, 3], Order::RowMajor); + let cd = ChannelData::ArrayDInt16(ta); + assert_eq!(cd.ndim(), 2); +} + +#[test] +fn test_ndim_complex_is_1() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(cd.ndim(), 1); +} + +#[test] +fn test_ndim_fixed_size_byte_is_1() { + let b = FixedSizeBinaryBuilder::with_capacity(1, 4); + let cd = ChannelData::FixedSizeByteArray(b); + assert_eq!(cd.ndim(), 1); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 6: Clone for FixedSizeByteArray with nulls +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_clone_fixed_size_byte_with_nulls() { + let mut b = FixedSizeBinaryBuilder::with_capacity(3, 4); + b.append_value(b"AAAA").unwrap(); + b.append_null(); + b.append_value(b"CCCC").unwrap(); + let cd = ChannelData::FixedSizeByteArray(b); + let cloned = cd.clone(); + + assert_eq!(cloned.len(), cd.len()); + assert!(matches!(cloned, ChannelData::FixedSizeByteArray(_))); + if let ChannelData::FixedSizeByteArray(mut b) = cloned { + let arr = b.finish(); + assert_eq!(arr.len(), 3); + assert!(arr.is_null(1)); + assert!(!arr.is_null(0)); + assert!(!arr.is_null(2)); + assert_eq!(arr.value(0), b"AAAA"); + assert_eq!(arr.value(2), b"CCCC"); + } +} + +#[test] +fn test_clone_utf8() { + let mut b = LargeStringBuilder::new(); + b.append_value("hello"); + b.append_value("world"); + let cd = ChannelData::Utf8(b); + let cloned = cd.clone(); + assert_eq!(cloned.len(), 2); +} + +#[test] +fn test_clone_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + let cloned = cd.clone(); + assert_eq!(cloned.len(), 0); + assert!(matches!(cloned, ChannelData::Complex32(_))); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 7: bit_count() and byte_count() for variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_bit_count_int8() { + let mut b = Int8Builder::new(); + b.append_value(1); + let cd = ChannelData::Int8(b); + assert_eq!(cd.bit_count(), 8); +} + +#[test] +fn test_bit_count_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.bit_count(), 128); +} + +#[test] +fn test_bit_count_array_d_int8() { + let cd = ChannelData::ArrayDInt8(TensorArrow::new()); + assert_eq!(cd.bit_count(), 8); +} + +#[test] +fn test_bit_count_array_d_uint16() { + let cd = ChannelData::ArrayDUInt16(TensorArrow::new()); + assert_eq!(cd.bit_count(), 16); +} + +#[test] +fn test_byte_count_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert_eq!(cd.byte_count(), 8); +} + +#[test] +fn test_byte_count_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.byte_count(), 16); +} + +#[test] +fn test_byte_count_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + assert_eq!(cd.byte_count(), 2); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 8: is_empty() for variants not covered by internal tests +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_is_empty_array_d_float32() { + let cd = ChannelData::ArrayDFloat32(TensorArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_empty_array_d_int16() { + let cd = ChannelData::ArrayDInt16(TensorArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_not_empty_array_d() { + let buf = MutableBuffer::from_iter([1.0f64, 2.0].iter().copied()); + let ta = TensorArrow::::new_from_buffer(buf, vec![1], Order::RowMajor); + let cd = ChannelData::ArrayDFloat64(ta); + assert!(!cd.is_empty()); +} + +#[test] +fn test_is_empty_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + assert!(cd.is_empty()); +} + +#[test] +fn test_is_empty_fixed_size_byte() { + let cd = ChannelData::FixedSizeByteArray(FixedSizeBinaryBuilder::with_capacity(0, 4)); + assert!(cd.is_empty()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 9: data_type() for uncovered variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_data_type_fixed_size_byte_array() { + let b = FixedSizeBinaryBuilder::with_capacity(1, 4); + let cd = ChannelData::FixedSizeByteArray(b); + // LE: FixedSizeByteArray → 10; BE: also 10 + assert_eq!(cd.data_type(false), 10); + assert_eq!(cd.data_type(true), 10); +} + +#[test] +fn test_data_type_array_d_int8() { + let cd = ChannelData::ArrayDInt8(TensorArrow::new()); + // LE: signed int → 2; BE: → 3 + assert_eq!(cd.data_type(false), 2); + assert_eq!(cd.data_type(true), 3); +} + +#[test] +fn test_data_type_array_d_uint8() { + let cd = ChannelData::ArrayDUInt8(TensorArrow::new()); + // LE: unsigned int → 0; BE: → 1 + assert_eq!(cd.data_type(false), 0); + assert_eq!(cd.data_type(true), 1); +} + +#[test] +fn test_data_type_complex32() { + let cd = ChannelData::Complex32(ComplexArrow::new()); + // LE: 15; BE: 16 + assert_eq!(cd.data_type(false), 15); + assert_eq!(cd.data_type(true), 16); +} + +#[test] +fn test_data_type_complex64() { + let cd = ChannelData::Complex64(ComplexArrow::new()); + assert_eq!(cd.data_type(false), 15); + assert_eq!(cd.data_type(true), 16); +} + +#[test] +fn test_data_type_array_d_float32() { + let cd = ChannelData::ArrayDFloat32(TensorArrow::new()); + // LE: 4; BE: 5 + assert_eq!(cd.data_type(false), 4); + assert_eq!(cd.data_type(true), 5); +} + +#[test] +fn test_data_type_array_d_int32() { + let cd = ChannelData::ArrayDInt32(TensorArrow::new()); + assert_eq!(cd.data_type(false), 2); + assert_eq!(cd.data_type(true), 3); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 10: as_u64_slice() for non-UInt64 variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_as_u64_slice_non_uint64_returns_none() { + let mut b = Int32Builder::new(); + b.append_value(1); + let cd = ChannelData::Int32(b); + assert!(cd.as_u64_slice().is_none()); +} + +#[test] +fn test_as_u64_slice_float64_returns_none() { + let mut b = Float64Builder::new(); + b.append_value(1.0); + let cd = ChannelData::Float64(b); + assert!(cd.as_u64_slice().is_none()); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 11: byte_count() for additional scalar types +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_byte_count_int8() { + let cd = ChannelData::Int8(Int8Builder::new()); + assert_eq!(cd.byte_count(), 1); +} + +#[test] +fn test_byte_count_uint8() { + let cd = ChannelData::UInt8(UInt8Builder::new()); + assert_eq!(cd.byte_count(), 1); +} + +#[test] +fn test_byte_count_int16() { + let cd = ChannelData::Int16(Int16Builder::new()); + assert_eq!(cd.byte_count(), 2); +} + +#[test] +fn test_byte_count_int64() { + let cd = ChannelData::Int64(Int64Builder::new()); + assert_eq!(cd.byte_count(), 8); +} + +#[test] +fn test_byte_count_uint64() { + let cd = ChannelData::UInt64(UInt64Builder::new()); + assert_eq!(cd.byte_count(), 8); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Group 12: shape() for additional variants +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_shape_fixed_size_byte() { + let mut b = FixedSizeBinaryBuilder::with_capacity(2, 4); + b.append_value(b"AAAA").unwrap(); + b.append_value(b"BBBB").unwrap(); + let cd = ChannelData::FixedSizeByteArray(b); + let (shape, order) = cd.shape(); + assert_eq!(shape, vec![2]); + assert_eq!(order, Order::RowMajor); +} + +#[test] +fn test_shape_complex32_with_data() { + use arrow::datatypes::Float32Type; + // 4 f32 values = 16 bytes; ComplexArrow::new_from_buffer: len = 16/2 = 8 + let buf = MutableBuffer::from_iter([1.0f32, 2.0, 3.0, 4.0].iter().copied()); + let ca = mdfr::data_holder::complex_arrow::ComplexArrow::::new_from_buffer(buf); + let cd = ChannelData::Complex32(ca); + let (shape, order) = cd.shape(); + assert_eq!(shape, vec![8]); // len = byte_len / 2 = 16/2 = 8 + assert_eq!(order, Order::RowMajor); +} diff --git a/tests/conversion.rs b/tests/conversion.rs index 5462b69..5d0a6dd 100644 --- a/tests/conversion.rs +++ b/tests/conversion.rs @@ -258,9 +258,15 @@ fn lookup_value_range_to_value() -> Result<()> { let mut mdf = Mdf::new(&file_name)?; mdf.load_all_channels_data_in_memory()?; if let Some(data) = mdf.get_channel_data("Data channel") { + // Spec 6.17.8: float data uses [lo, hi) exclusive upper bound. + // Raw data: -15…14 (step 1). CC ranges: + // [-10,-7)→-1, [-7,-5)→0, [-5,0)→1, [0,2)→2, [2,5)→3, + // [5,6)→5, [6,8.5)→6, [8.5,10)→7, [10,12)→8, [12,14)→9, default=-1 + // Values -15…-11 are below all ranges → default -1. + // Value 14 equals the exclusive upper of last range → default -1. let vect = Vec::from([ - -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 5.0, 5.0, 5.0, 6.0, 7.0, 7.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, + -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, + 2.0, 3.0, 3.0, 3.0, 5.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 9.0, 9.0, -1.0, ]); assert_eq!( &ChannelData::Float64(Float64Builder::new_from_buffer(vect.into(), None)), diff --git a/tests/conversions_int_types.rs b/tests/conversions_int_types.rs new file mode 100644 index 0000000..d79791b --- /dev/null +++ b/tests/conversions_int_types.rs @@ -0,0 +1,205 @@ +/// Integration tests exercising Int8/Int16/Float32 conversion arms in conversions4.rs. +/// +/// The existing sample files all use Float64 as raw channel type, so those arms +/// were never hit. These tests load a synthetic MDF4 fixture that has Int8/Int16/Float32 +/// channels with a linear CC block. +use anyhow::Result; +use mdfr::data_holder::channel_data::ChannelData; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +const FIXTURE_PATH: &str = "test_files/synthetic/int_linear_cc.mf4"; + +/// Ensure the fixture file exists before tests run. +static FIXTURE: LazyLock<()> = LazyLock::new(|| { + create_fixture().expect("failed to create int_linear_cc fixture"); +}); + +// ─── Minimal MDF4 binary builder ───────────────────────────────────────────── + +fn pu8(b: &mut Vec, v: u8) { b.push(v); } +fn pu16(b: &mut Vec, v: u16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi16(b: &mut Vec, v: i16) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu32(b: &mut Vec, v: u32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pu64(b: &mut Vec, v: u64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pi64(b: &mut Vec, v: i64) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf32(b: &mut Vec, v: f32) { b.extend_from_slice(&v.to_le_bytes()); } +fn pf64(b: &mut Vec, v: f64) { b.extend_from_slice(&v.to_le_bytes()); } +fn zeros(b: &mut Vec, n: usize) { b.extend(std::iter::repeat_n(0u8, n)); } + +fn id_block(b: &mut Vec) { + b.extend_from_slice(b"MDF "); b.extend_from_slice(b"4.30 "); b.extend_from_slice(b"mdfr "); + pu16(b,0); pu16(b,0); pu16(b,430); pu16(b,0); zeros(b,2); zeros(b,26); pu16(b,0); pu16(b,0); +} +fn hd4(b: &mut Vec, dg: i64, fh: i64) { + b.extend_from_slice(b"##HD"); zeros(b,4); pu64(b,104); pu64(b,6); + pi64(b,dg); pi64(b,fh); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pi16(b,0); pi16(b,0); pu8(b,0); pu8(b,0); pu8(b,0); pu8(b,0); + pf64(b,0.0); pf64(b,0.0); +} +fn fh(b: &mut Vec) { + b.extend_from_slice(b"##FH"); zeros(b,4); pu64(b,56); pu64(b,2); + pi64(b,0); pi64(b,0); pu64(b,0); pi16(b,0); pi16(b,0); pu8(b,0); zeros(b,3); +} +fn dg4(b: &mut Vec, cg: i64, data: i64) { + b.extend_from_slice(b"##DG"); zeros(b,4); pu64(b,64); pu64(b,4); + pi64(b,0); pi64(b,cg); pi64(b,data); pi64(b,0); pu8(b,0); zeros(b,7); +} +fn cg4(b: &mut Vec, cn: i64, cycles: u64, data_bytes: u32) { + b.extend_from_slice(b"##CG"); zeros(b,4); pu64(b,104); + pu64(b,6); pi64(b,0); pi64(b,cn); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu64(b,0); pu64(b,cycles); pu16(b,0); pu16(b,0); zeros(b,4); pu32(b,data_bytes); pu32(b,0); +} +#[allow(clippy::too_many_arguments)] +fn cn4(b: &mut Vec, cn_type: u8, sync: u8, dtype: u8, byte_off: u32, bits: u32, + next: i64, tx: i64, cc: i64) { + b.extend_from_slice(b"##CN"); zeros(b,4); pu64(b,160); + pu64(b,8); pi64(b,next); pi64(b,0); pi64(b,tx); pi64(b,0); pi64(b,cc); + pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,cn_type); pu8(b,sync); pu8(b,dtype); pu8(b,0); + pu32(b,byte_off); pu32(b,bits); pu32(b,0); pu32(b,0); + pu8(b,0xff); pu8(b,0); pu16(b,0); + pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); pf64(b,0.0); +} +fn tx(b: &mut Vec, text: &str) { + let t = text.as_bytes(); + let len = 24u64 + t.len() as u64 + 1; + b.extend_from_slice(b"##TX"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(t); b.push(0); +} +fn cc_linear(b: &mut Vec, a0: f64, a1: f64) { + b.extend_from_slice(b"##CC"); zeros(b,4); pu64(b,96); + pu64(b,4); pi64(b,0); pi64(b,0); pi64(b,0); pi64(b,0); + pu8(b,1); pu8(b,0); pu16(b,0); pu16(b,0); pu16(b,2); + pf64(b,0.0); pf64(b,0.0); pf64(b,a0); pf64(b,a1); +} +fn dt(b: &mut Vec, records: &[u8]) { + let len = 24u64 + records.len() as u64; + b.extend_from_slice(b"##DT"); zeros(b,4); pu64(b,len); pu64(b,0); + b.extend_from_slice(records); +} + +/// Creates `test_files/synthetic/int_linear_cc.mf4` if it doesn't already exist. +/// +/// Layout (offsets): +/// [0] IdBlock 64 b +/// [64] Hd4 104 b (hd_dg_first=224, hd_fh_first=168) +/// [168] FhBlock 56 b +/// [224] Dg4 64 b (cg_first=288, data=1260) +/// [288] CG 104 b (cn_first=392, cycles=4, data_bytes=15) +/// [392] CN_master 160 b (FloatLE/64bit, byte_off=0, tx=1032, cc=0) +/// [552] CN_int8 160 b (IntLE/8bit, byte_off=8, tx=1064, cc=1164) +/// [712] CN_int16 160 b (IntLE/16bit, byte_off=9, tx=1096, cc=1164) +/// [872] CN_float32 160 b (FloatLE/32bit, byte_off=11, tx=1129, cc=1164) +/// [1032] TX "time_ch\0" 32 b +/// [1064] TX "int8_ch\0" 32 b +/// [1096] TX "int16_ch\0" 33 b +/// [1129] TX "float32_ch\0" 35 b +/// [1164] CC linear(0.5,2.0) 96 b +/// [1260] DT 4×15=60b + 24 hdr = 84 b +/// Total: 1344 b +fn create_fixture() -> Result<()> { + if std::path::Path::new(FIXTURE_PATH).exists() { + return Ok(()); + } + std::fs::create_dir_all("test_files/synthetic")?; + let mut b: Vec = Vec::with_capacity(1344); + + id_block(&mut b); debug_assert_eq!(b.len(), 64); + hd4(&mut b, 224, 168); debug_assert_eq!(b.len(), 168); + fh(&mut b); debug_assert_eq!(b.len(), 224); + dg4(&mut b, 288, 1260); debug_assert_eq!(b.len(), 288); + cg4(&mut b, 392, 4, 15); debug_assert_eq!(b.len(), 392); + cn4(&mut b, 2, 1, 4, 0, 64, 552, 1032, 0); debug_assert_eq!(b.len(), 552); + cn4(&mut b, 0, 0, 2, 8, 8, 712, 1064, 1164); debug_assert_eq!(b.len(), 712); + cn4(&mut b, 0, 0, 2, 9, 16, 872, 1096, 1164); debug_assert_eq!(b.len(), 872); + cn4(&mut b, 0, 0, 4, 11, 32, 0, 1129, 1164); debug_assert_eq!(b.len(), 1032); + tx(&mut b, "time_ch"); debug_assert_eq!(b.len(), 1064); + tx(&mut b, "int8_ch"); debug_assert_eq!(b.len(), 1096); + tx(&mut b, "int16_ch"); debug_assert_eq!(b.len(), 1129); + tx(&mut b, "float32_ch"); debug_assert_eq!(b.len(), 1164); + cc_linear(&mut b, 0.5, 2.0); debug_assert_eq!(b.len(), 1260); + + let raw_i8: [i8; 4] = [-5, 0, 5, 10]; + let raw_i16: [i16; 4] = [-100, 0, 100, 200]; + let raw_f32: [f32; 4] = [ 1.5, 2.5, 3.5, 4.5]; + let mut recs: Vec = Vec::with_capacity(60); + for i in 0..4 { + pf64(&mut recs, i as f64); + recs.push(raw_i8[i] as u8); + recs.extend_from_slice(&raw_i16[i].to_le_bytes()); + pf32(&mut recs, raw_f32[i]); + } + dt(&mut b, &recs); + debug_assert_eq!(b.len(), 1344); + + std::fs::write(FIXTURE_PATH, &b)?; + Ok(()) +} + +// ─── Tests ──────────────────────────────────────────────────────────────────── + +fn load_fixture() -> Result { + LazyLock::force(&FIXTURE); + let mut mdf = Mdf::new(FIXTURE_PATH)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn int8_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: -5, 0, 5, 10 → phys = raw * 2.0 + 0.5 → -9.5, 0.5, 10.5, 20.5 + let data = mdf.get_channel_data("int8_ch").expect("int8_ch not found"); + assert!( + matches!(data, ChannelData::Float64(_)), + "expected Float64 after linear CC, got {}", + data.data_type(false) + ); + if let ChannelData::Float64(arr) = data { + let expected = [-9.5f64, 0.5, 10.5, 20.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "int8_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn int16_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: -100, 0, 100, 200 → -199.5, 0.5, 200.5, 400.5 + let data = mdf.get_channel_data("int16_ch").expect("int16_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "expected Float64 after linear CC"); + if let ChannelData::Float64(arr) = data { + let expected = [-199.5f64, 0.5, 200.5, 400.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-9, "int16_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn float32_linear_conversion() -> Result<()> { + let mdf = load_fixture()?; + // raw: 1.5, 2.5, 3.5, 4.5 → 3.5, 5.5, 7.5, 9.5 + let data = mdf.get_channel_data("float32_ch").expect("float32_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "expected Float64 after linear CC"); + if let ChannelData::Float64(arr) = data { + let expected = [3.5f64, 5.5, 7.5, 9.5]; + for (i, (&got, &exp)) in arr.values_slice().iter().zip(expected.iter()).enumerate() { + assert!((got - exp).abs() < 1e-6, "float32_ch[{i}]: expected {exp}, got {got}"); + } + } + Ok(()) +} + +#[test] +fn master_channel_loaded() -> Result<()> { + let mdf = load_fixture()?; + let data = mdf.get_channel_data("time_ch").expect("time_ch not found"); + assert!(matches!(data, ChannelData::Float64(_)), "master should be Float64"); + assert_eq!(data.len(), 4); + Ok(()) +} diff --git a/tests/data_types.rs b/tests/data_types.rs index 84fbbf4..6a53e40 100644 --- a/tests/data_types.rs +++ b/tests/data_types.rs @@ -315,3 +315,52 @@ fn complex_types() -> Result<()> { mdf.load_all_channels_data_in_memory()?; Ok(()) } + +#[test] +fn string_no_zero_termination() -> Result<()> { + let file_name = format!( + "{}{}{}", + BASE_PATH_MDF4.as_str(), + "DataTypes/StringTypes/", + "Vector_Strings_NoZeroTermination_MDF430.mf4" + ); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels in no-zero-termination string file"); + Ok(()) +} + +#[test] +fn half_float_values() -> Result<()> { + let file_name = format!( + "{}{}", + BASE_PATH_MDF4.as_str(), + "Halffloat/halffloat_sinus.mf4" + ); + let mut mdf = Mdf::new(&file_name)?; + mdf.load_all_channels_data_in_memory()?; + + // The file contains at least one channel; verify it loaded successfully + let names = mdf.get_channel_names_set(); + assert!(!names.is_empty(), "No channels found in halffloat file"); + + // Find a channel with float data and check it has values + let has_data = names + .iter() + .any(|name| mdf.get_channel_data(name).map_or(false, |d| !d.is_empty())); + assert!(has_data, "No non-empty channel data in halffloat file"); + + // Half-float channels are decoded as Float32 or Float64 + let has_float_channel = names.iter().any(|name| { + mdf.get_channel_data(name).map_or(false, |d| { + matches!(d, ChannelData::Float32(_) | ChannelData::Float64(_)) + }) + }); + assert!( + has_float_channel, + "Expected at least one Float32/Float64 channel decoded from half-float" + ); + + Ok(()) +} diff --git a/tests/fixtures.rs b/tests/fixtures.rs new file mode 100644 index 0000000..b7319c3 --- /dev/null +++ b/tests/fixtures.rs @@ -0,0 +1,361 @@ +/// Synthetic MDF4 fixture builder. +/// +/// Creates minimal binary MDF4 files for coverage testing of conversion code paths +/// that are never hit by the existing sample files (which all use Float64 raw channels). +/// +/// Run once with `cargo test --test fixtures` to generate the files. +/// The files are checked in to test_files/synthetic/ so regular tests can load them. +use anyhow::Result; + +// ─── Low-level byte helpers ────────────────────────────────────────────────── + +fn push_u8(buf: &mut Vec, v: u8) { + buf.push(v); +} +fn push_u16(buf: &mut Vec, v: u16) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_i16(buf: &mut Vec, v: i16) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_u32(buf: &mut Vec, v: u32) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_u64(buf: &mut Vec, v: u64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_i64(buf: &mut Vec, v: i64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_f32(buf: &mut Vec, v: f32) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_f64(buf: &mut Vec, v: f64) { + buf.extend_from_slice(&v.to_le_bytes()); +} +fn push_zeros(buf: &mut Vec, n: usize) { + buf.extend(std::iter::repeat_n(0u8, n)); +} + +// ─── Block writers ─────────────────────────────────────────────────────────── + +/// IdBlock (64 bytes, fixed MDF4 header) +fn write_id_block(buf: &mut Vec) { + buf.extend_from_slice(b"MDF "); // id_file_id (8) + buf.extend_from_slice(b"4.30 "); // id_vers (8) + buf.extend_from_slice(b"mdfr "); // id_prog (8) + push_u16(buf, 0); // id_default_byteorder + push_u16(buf, 0); // id_floatingpointformat + push_u16(buf, 430); // id_ver + push_u16(buf, 0); // id_codepage + push_zeros(buf, 2); // id_check + push_zeros(buf, 26); // id_fill + push_u16(buf, 0); // id_unfin_flags + push_u16(buf, 0); // id_custom_unfin_flags + // Total: 8+8+8+2+2+2+2+2+26+2+2 = 64 bytes +} + +/// Hd4 block (104 bytes, self-contained including ##HD header) +fn write_hd4(buf: &mut Vec, dg_first: i64, fh_first: i64) { + buf.extend_from_slice(b"##HD"); // hd_id + push_zeros(buf, 4); // hd_reserved + push_u64(buf, 104); // hd_len + push_u64(buf, 6); // hd_link_counts + push_i64(buf, dg_first); // hd_dg_first + push_i64(buf, fh_first); // hd_fh_first + push_i64(buf, 0); // hd_ch_first + push_i64(buf, 0); // hd_at_first + push_i64(buf, 0); // hd_ev_first + push_i64(buf, 0); // hd_md_comment + push_u64(buf, 0); // hd_start_time_ns + push_i16(buf, 0); // hd_tz_offset_min + push_i16(buf, 0); // hd_dst_offset_min + push_u8(buf, 0); // hd_time_flags + push_u8(buf, 0); // hd_time_class + push_u8(buf, 0); // hd_flags + push_u8(buf, 0); // hd_reserved2 + push_f64(buf, 0.0); // hd_start_angle_rad + push_f64(buf, 0.0); // hd_start_distance_m + // Total: 4+4+8+8 + 6×8 + 8+2+2+1+1+1+1+8+8 = 24+48+32 = 104 bytes +} + +/// FhBlock (56 bytes, self-contained including ##FH header) +fn write_fh(buf: &mut Vec) { + buf.extend_from_slice(b"##FH"); // fh_id + push_zeros(buf, 4); // fh_gap + push_u64(buf, 56); // fh_len + push_u64(buf, 2); // fh_links (fh_fh_next + fh_md_comment) + push_i64(buf, 0); // fh_fh_next (end of list) + push_i64(buf, 0); // fh_md_comment (none) + push_u64(buf, 0); // fh_time_ns + push_i16(buf, 0); // fh_tz_offset_min + push_i16(buf, 0); // fh_dst_offset_min + push_u8(buf, 0); // fh_time_flags + push_zeros(buf, 3); // fh_reserved + // Total: 4+4+8+8+8+8+8+2+2+1+3 = 56 bytes +} + +/// Dg4Block (64 bytes, self-contained including ##DG header) +fn write_dg4(buf: &mut Vec, cg_first: i64, data: i64) { + buf.extend_from_slice(b"##DG"); // dg_id + push_zeros(buf, 4); // reserved + push_u64(buf, 64); // dg_len + push_u64(buf, 4); // dg_links + push_i64(buf, 0); // dg_dg_next + push_i64(buf, cg_first); // dg_cg_first + push_i64(buf, data); // dg_data + push_i64(buf, 0); // dg_md_comment + push_u8(buf, 0); // dg_rec_id_size + push_zeros(buf, 7); // reserved_2 + // Total: 4+4+8+8+4×8+1+7 = 64 bytes +} + +/// CG block: Blockheader4Short(16) + Cg4Block body(88) = 104 bytes total. +/// Uses 6 standard links (no cg_cg_master). +fn write_cg4(buf: &mut Vec, cn_first: i64, cycle_count: u64, data_bytes: u32) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CG"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 104); // hdr_len (= 16 + 88) + // Cg4Block body (88 bytes): + push_u64(buf, 6); // cg_links (6 → no cg_cg_master) + push_i64(buf, 0); // cg_cg_next + push_i64(buf, cn_first); // cg_cn_first + push_i64(buf, 0); // cg_tx_acq_name + push_i64(buf, 0); // cg_si_acq_source + push_i64(buf, 0); // cg_sr_first + push_i64(buf, 0); // cg_md_comment + push_u64(buf, 0); // cg_record_id + push_u64(buf, cycle_count); // cg_cycle_count + push_u16(buf, 0); // cg_flags + push_u16(buf, 0); // cg_path_separator + push_zeros(buf, 4); // cg_reserved + push_u32(buf, data_bytes); // cg_data_bytes + push_u32(buf, 0); // cg_inval_bytes + // Body: 8+48+8+8+2+2+4+4+4 = 88 bytes → total 104 +} + +/// CN block: Blockheader4Short(16) + Cn4Block body(144) = 160 bytes total. +/// Uses 8 standard links (no extra CA/event links). +/// +/// - cn_type: 0=fixed, 2=master +/// - cn_sync_type: 0=none, 1=time +/// - cn_data_type: 2=IntLE, 4=FloatLE +#[allow(clippy::too_many_arguments)] +fn write_cn4( + buf: &mut Vec, + cn_type: u8, + cn_sync_type: u8, + cn_data_type: u8, + cn_byte_offset: u32, + cn_bit_count: u32, + cn_cn_next: i64, + cn_tx_name: i64, + cn_cc_conversion: i64, +) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CN"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 160); // hdr_len (= 16 + 144) + // Cn4Block body (8 + 64 + 72 = 144 bytes): + push_u64(buf, 8); // cn_links (8 standard links) + push_i64(buf, cn_cn_next); // cn_cn_next + push_i64(buf, 0); // cn_composition + push_i64(buf, cn_tx_name); // cn_tx_name + push_i64(buf, 0); // cn_si_source + push_i64(buf, cn_cc_conversion); // cn_cc_conversion + push_i64(buf, 0); // cn_data + push_i64(buf, 0); // cn_md_unit + push_i64(buf, 0); // cn_md_comment + // Data members (72 bytes): + push_u8(buf, cn_type); // cn_type + push_u8(buf, cn_sync_type); // cn_sync_type + push_u8(buf, cn_data_type); // cn_data_type + push_u8(buf, 0); // cn_bit_offset + push_u32(buf, cn_byte_offset); // cn_byte_offset + push_u32(buf, cn_bit_count); // cn_bit_count + push_u32(buf, 0); // cn_flags + push_u32(buf, 0); // cn_inval_bit_pos + push_u8(buf, 0xff); // cn_precision (unrestricted) + push_u8(buf, 0); // cn_alignment + push_u16(buf, 0); // cn_attachment_count + push_f64(buf, 0.0); // cn_val_range_min + push_f64(buf, 0.0); // cn_val_range_max + push_f64(buf, 0.0); // cn_limit_min + push_f64(buf, 0.0); // cn_limit_max + push_f64(buf, 0.0); // cn_limit_ext_min + push_f64(buf, 0.0); // cn_limit_ext_max + // Data members total: 1+1+1+1+4+4+4+4+1+1+2+8+8+8+8+8+8 = 72 bytes → total 160 +} + +/// TX block: Blockheader4(24) + null-terminated text. +/// `text` should NOT include the null terminator. +fn write_tx(buf: &mut Vec, text: &str) { + let text_bytes = text.as_bytes(); + let total_len = 24u64 + text_bytes.len() as u64 + 1; // +1 for null + buf.extend_from_slice(b"##TX"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, total_len); // hdr_len + push_u64(buf, 0); // hdr_links + buf.extend_from_slice(text_bytes); // text + buf.push(0); // null terminator +} + +/// CC block (linear, cc_type=1): Blockheader4Short(16) + Cc4Block body(80) = 96 bytes. +/// Formula: phys = a1 * raw + a0 +fn write_cc_linear(buf: &mut Vec, a0: f64, a1: f64) { + // Blockheader4Short (16 bytes) + buf.extend_from_slice(b"##CC"); // hdr_id + push_zeros(buf, 4); // hdr_gap + push_u64(buf, 96); // hdr_len (= 16 + 80) + // Cc4Block body (80 bytes): + push_u64(buf, 4); // cc_links (4 standard links, no cc_ref) + push_i64(buf, 0); // cc_tx_name + push_i64(buf, 0); // cc_md_unit + push_i64(buf, 0); // cc_md_comment + push_i64(buf, 0); // cc_cc_inverse + // cc_ref: empty (cc_links == 4) + push_u8(buf, 1); // cc_type = 1 (Linear) + push_u8(buf, 0); // cc_precision + push_u16(buf, 0); // cc_flags + push_u16(buf, 0); // cc_ref_count + push_u16(buf, 2); // cc_val_count (a0, a1) + push_f64(buf, 0.0); // cc_phy_range_min + push_f64(buf, 0.0); // cc_phy_range_max + // cc_val: Real([a0, a1]) + push_f64(buf, a0); // a0 (offset) + push_f64(buf, a1); // a1 (factor) + // Body: 8+32+1+1+2+2+2+8+8+16 = 80 bytes → total 96 +} + +/// DT block: 4-byte id + Dt4Block(20) + raw records. +/// `records` is a flat byte slice of all records concatenated. +fn write_dt(buf: &mut Vec, records: &[u8]) { + let total_len = 24u64 + records.len() as u64; + buf.extend_from_slice(b"##DT"); // id (read separately by reader before Dt4Block) + push_zeros(buf, 4); // reserved + push_u64(buf, total_len); // len (total block size) + push_u64(buf, 0); // links = 0 + buf.extend_from_slice(records); // raw data +} + +// ─── Fixture: int_linear_cc ────────────────────────────────────────────────── + +/// Builds and writes `test_files/synthetic/int_linear_cc.mf4`. +/// +/// Contains 4 channels in a single channel group with 4 samples each: +/// - `time_ch` : Float64 LE master (sync_type=time), values 0.0..3.0 +/// - `int8_ch` : Int8 raw + linear CC (a0=0.5, a1=2.0) +/// - `int16_ch` : Int16 raw + linear CC (a0=0.5, a1=2.0) +/// - `float32_ch` : Float32 raw + linear CC (a0=0.5, a1=2.0) +/// +/// File layout (exact byte offsets): +/// ```text +/// [0] IdBlock 64 b +/// [64] Hd4 104 b +/// [168] FhBlock 56 b +/// [224] Dg4 64 b +/// [288] CG 104 b (cg_cn_first=392, cg_data_bytes=15, cg_cycle_count=4) +/// [392] CN_master 160 b (data_type=4/FloatLE, bit_count=64, byte_offset=0) +/// [552] CN_int8 160 b (data_type=2/IntLE, bit_count=8, byte_offset=8, cc=1164) +/// [712] CN_int16 160 b (data_type=2/IntLE, bit_count=16, byte_offset=9, cc=1164) +/// [872] CN_float32 160 b (data_type=4/FloatLE, bit_count=32, byte_offset=11, cc=1164) +/// [1032] TX "time_ch\0" 32 b +/// [1064] TX "int8_ch\0" 32 b +/// [1096] TX "int16_ch\0" 33 b +/// [1129] TX "float32_ch\0" 35 b +/// [1164] CC linear(a0=0.5, a1=2.0) 96 b +/// [1260] DT 84 b (4 records × 15 bytes) +/// Total: 1344 bytes +/// ``` +pub fn create_int_linear_cc_fixture() -> Result<()> { + const PATH: &str = "test_files/synthetic/int_linear_cc.mf4"; + std::fs::create_dir_all("test_files/synthetic")?; + + let mut buf: Vec = Vec::with_capacity(1344); + + write_id_block(&mut buf); + debug_assert_eq!(buf.len(), 64, "IdBlock size mismatch"); + + write_hd4(&mut buf, 224, 168); // dg_first=224, fh_first=168 + debug_assert_eq!(buf.len(), 168, "Hd4 size mismatch"); + + write_fh(&mut buf); + debug_assert_eq!(buf.len(), 224, "FhBlock size mismatch"); + + write_dg4(&mut buf, 288, 1260); // cg_first=288, data=1260 + debug_assert_eq!(buf.len(), 288, "Dg4Block size mismatch"); + + // CG: cn_first=392, cycle_count=4, data_bytes=15 (8+1+2+4) + write_cg4(&mut buf, 392, 4, 15); + debug_assert_eq!(buf.len(), 392, "CG size mismatch"); + + // CN_master: type=2, sync=1(time), data_type=4(FloatLE), byte_offset=0, bit_count=64 + // cn_cn_next=552, tx_name=1032, cc=0 + write_cn4(&mut buf, 2, 1, 4, 0, 64, 552, 1032, 0); + debug_assert_eq!(buf.len(), 552, "CN_master size mismatch"); + + // CN_int8: type=0, sync=0, data_type=2(IntLE), byte_offset=8, bit_count=8 + // cn_cn_next=712, tx_name=1064, cc=1164 + write_cn4(&mut buf, 0, 0, 2, 8, 8, 712, 1064, 1164); + debug_assert_eq!(buf.len(), 712, "CN_int8 size mismatch"); + + // CN_int16: type=0, sync=0, data_type=2(IntLE), byte_offset=9, bit_count=16 + // cn_cn_next=872, tx_name=1096, cc=1164 + write_cn4(&mut buf, 0, 0, 2, 9, 16, 872, 1096, 1164); + debug_assert_eq!(buf.len(), 872, "CN_int16 size mismatch"); + + // CN_float32: type=0, sync=0, data_type=4(FloatLE), byte_offset=11, bit_count=32 + // cn_cn_next=0, tx_name=1129, cc=1164 + write_cn4(&mut buf, 0, 0, 4, 11, 32, 0, 1129, 1164); + debug_assert_eq!(buf.len(), 1032, "CN_float32 size mismatch"); + + write_tx(&mut buf, "time_ch"); // 24 + 7 + 1 = 32 bytes → [1032..1064) + debug_assert_eq!(buf.len(), 1064, "TX time_ch size mismatch"); + + write_tx(&mut buf, "int8_ch"); // 32 bytes → [1064..1096) + debug_assert_eq!(buf.len(), 1096, "TX int8_ch size mismatch"); + + write_tx(&mut buf, "int16_ch"); // 24 + 8 + 1 = 33 bytes → [1096..1129) + debug_assert_eq!(buf.len(), 1129, "TX int16_ch size mismatch"); + + write_tx(&mut buf, "float32_ch"); // 24 + 10 + 1 = 35 bytes → [1129..1164) + debug_assert_eq!(buf.len(), 1164, "TX float32_ch size mismatch"); + + write_cc_linear(&mut buf, 0.5, 2.0); // 96 bytes → [1164..1260) + debug_assert_eq!(buf.len(), 1260, "CC linear size mismatch"); + + // DT: 4 records × 15 bytes = 60 bytes + 24-byte header = 84 bytes → [1260..1344) + // Record layout per row: + // [0..8) : f64 LE master time (0.0, 1.0, 2.0, 3.0) + // [8] : i8 raw int8 (-5, 0, 5, 10) + // [9..11) : i16 LE raw int16 (-100, 0, 100, 200) + // [11..15): f32 LE raw float32 (1.5, 2.5, 3.5, 4.5) + let raw_i8: [i8; 4] = [-5, 0, 5, 10]; + let raw_i16: [i16; 4] = [-100, 0, 100, 200]; + let raw_f32: [f32; 4] = [1.5, 2.5, 3.5, 4.5]; + let mut records: Vec = Vec::with_capacity(60); + for i in 0..4usize { + push_f64(&mut records, i as f64); // master time + records.push(raw_i8[i] as u8); // i8 raw (bit-cast) + records.extend_from_slice(&raw_i16[i].to_le_bytes()); // i16 raw + push_f32(&mut records, raw_f32[i]); // f32 raw + } + assert_eq!(records.len(), 60, "record data size mismatch"); + write_dt(&mut buf, &records); + debug_assert_eq!(buf.len(), 1344, "DT size mismatch"); + + std::fs::write(PATH, &buf)?; + Ok(()) +} + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +#[test] +fn create_fixtures() { + create_int_linear_cc_fixture().expect("failed to create int_linear_cc fixture"); + assert!( + std::path::Path::new("test_files/synthetic/int_linear_cc.mf4").exists(), + "fixture file not created" + ); +} diff --git a/tests/mdf3_conversion.rs b/tests/mdf3_conversion.rs new file mode 100644 index 0000000..a6889eb --- /dev/null +++ b/tests/mdf3_conversion.rs @@ -0,0 +1,86 @@ +use anyhow::Result; +use mdfr::mdfreader::Mdf; +use std::sync::LazyLock; + +static MDF3_PATH: LazyLock = LazyLock::new(|| { + "/home/ratal/workspace/mdfreader/mdfreader/tests/mdf3/".to_string() +}); + +/// Helper: load a file and all its channel data +fn load_mdf3(filename: &str) -> Result { + let path = format!("{}{}", MDF3_PATH.as_str(), filename); + let mut mdf = Mdf::new(&path)?; + mdf.load_all_channels_data_in_memory()?; + Ok(mdf) +} + +#[test] +fn mdf3_canape_loads() -> Result<()> { + let mdf = load_mdf3("MDF_CANAPE.mdf")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_ascet_loads() -> Result<()> { + let mdf = load_mdf3("ASCET.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_mda71_loads() -> Result<()> { + let mdf = load_mdf3("MDA71.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_rj_can_loads() -> Result<()> { + // RJ file has linear conversion channels + let mdf = load_mdf3("RJ_N16-12-363_BM-15C-0024_228_2_20170116094355_CAN.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_measure_loads() -> Result<()> { + let mdf = load_mdf3("Measure.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_t3_nedc_loads() -> Result<()> { + let mdf = load_mdf3("T3_121121_000_6NEDC.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_738l10_loads() -> Result<()> { + let mdf = load_mdf3("738L10_040410 Base Acc 30km_hr.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_hidden_bytes_loads() -> Result<()> { + let mdf = load_mdf3("Mdf3_hiddenBytes_NotAlignedBytes.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_tgt_loads() -> Result<()> { + let mdf = load_mdf3("TGT.dat")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +} + +#[test] +fn mdf3_canoe_unsorted_loads() -> Result<()> { + let mdf = load_mdf3("CANoe3_unsorted.mdf")?; + assert!(!mdf.get_channel_names_set().is_empty()); + Ok(()) +}