From a7e0054c7b3684ed1f488992e5f15cf14a577714 Mon Sep 17 00:00:00 2001 From: Pat Welch Date: Mon, 30 Mar 2026 21:15:27 -0700 Subject: [PATCH 1/5] feat: add --skipAll, --keepFirst, --sort flags; fix 10 audit issues New features (ported from dbd2netcdf-python): - --skipAll/-A: skip first record of ALL files including the first - --keepFirst: explicitly keep first records (default behavior) - --skipFirst/-s, --skipAll, --keepFirst are mutually exclusive - --sort: sort input files by none (default), header_time, or lexicographic - Header::parseFileOpenTime() added to shared library for cross-project reuse Audit fixes: 1. Decompress.C: LZ4 error codes stored as int, check < 0 (was silent fail) 2. Header.C: cap num_ascii_tags at 10000 to prevent DoS 3. PD0.C: validate nBytes >= 4 before subtraction (integer underflow) 4. dbd2netCDF.C: wrap first-pass header scan in try-catch (was inconsistent) 5. Data.C: exponential (1.5x) column growth instead of linear (O(n^2) fix) 6. Cache file sizes in first pass to avoid redundant fs::file_size calls 7. Add fuzz_data and fuzz_decompress fuzz targets 8. Update all man pages to v1.7.1 with all missing options 9. Add -Wshadow -Wold-style-cast; fix all C-style casts 10. MyNetCDF.C: split signed/unsigned chunking loop into forward/reverse Co-Authored-By: Claude Opus 4.6 (1M context) --- doc/dbd2csv.1 | 33 ++++++++++++--- doc/dbd2netCDF.1 | 47 ++++++++++++++++++--- doc/dbdSensors.1 | 4 +- src/CMakeLists.txt | 4 +- src/Data.C | 3 +- src/Decompress.C | 13 +++--- src/Header.C | 26 +++++++++++- src/Header.H | 3 ++ src/MyNetCDF.C | 41 +++++++++++------- src/PD0.C | 30 ++++++++------ src/PD0.H | 4 +- src/Sensor.C | 6 +-- src/dbd2csv.C | 53 ++++++++++++++++++++++-- src/dbd2netCDF.C | 78 ++++++++++++++++++++++++++++++----- src/pd02netCDF.C | 6 +-- test/dbd2csv | 52 +++++++++++++++++++++++ test/dbd2netCDF | 68 ++++++++++++++++++++++++++++++ test/fuzz/CMakeLists.txt | 28 ++++++++++++- test/fuzz/fuzz_data.cpp | 51 +++++++++++++++++++++++ test/fuzz/fuzz_decompress.cpp | 51 +++++++++++++++++++++++ test/unit/test_header.cpp | 41 ++++++++++++++++++ 21 files changed, 568 insertions(+), 74 deletions(-) create mode 100644 test/fuzz/fuzz_data.cpp create mode 100644 test/fuzz/fuzz_decompress.cpp diff --git a/doc/dbd2csv.1 b/doc/dbd2csv.1 index b441676..a727fd2 100644 --- a/doc/dbd2csv.1 +++ b/doc/dbd2csv.1 @@ -1,18 +1,20 @@ -.TH dbd2csv "February 2012" "Version 1.0" "USER COMMANDS" +.TH dbd2csv "March 2026" "Version 1.7.1" "USER COMMANDS" .SH NAME dbd2csv \- translate .I "Dinkum Binary Data" - files into a comma seperated value, + files into a comma separated value, .I CSV, .SH SYNOPSIS .B dbd2csv -.B [\-hsVv] +.B [\-hrsASVv] .B "[\-c filename]" .B "[\-C directory]" .B "[\-k filename]" +.B "[\-l level]" .B "[\-m mission]" .B "[\-M mission]" .B "[\-o filename]" +.B "[--sort order]" dbdFiles... .SH DESCRIPTION Read in a set of @@ -82,10 +84,31 @@ Filename of generated comma separated value output. .B "The \-o option is required." .TP -.B \-s +.B \-r, \-\-repair +Attempt to repair bad data records by scanning for the next valid data tag. +.TP +.B \-s, \-\-skipFirst Skip the first data record in each .I "Dinkum Binary Data" -file, except for the first file. +file, except for the first file. Mutually exclusive with \-A and \-\-keepFirst. +.TP +.B \-A, \-\-skipAll +Skip the first data record in ALL files including the first. +Mutually exclusive with \-s and \-\-keepFirst. +.TP +.B \-\-keepFirst +Keep the first record of all files (default behavior). Mutually exclusive +with \-s and \-A. +.TP +.B \-S, \-\-strict +Fail immediately on any file error (no partial results). +.TP +.B "\-\-sort order" +File sort order before processing. Choices: none (default, preserve command\-line order), +header_time (sort by fileopen_time from DBD headers), lexicographic (alphabetical). +.TP +.B "\-l level, \-\-log\-level level" +Set the logging level (trace, debug, info, warn, error, critical, off). Default: warn. .TP .B \-V Print out software version. diff --git a/doc/dbd2netCDF.1 b/doc/dbd2netCDF.1 index 396dea2..f3bd3c9 100644 --- a/doc/dbd2netCDF.1 +++ b/doc/dbd2netCDF.1 @@ -1,4 +1,4 @@ -.TH dbd2netCDF "February 2012" "Version 1.0" "USER COMMANDS" +.TH dbd2netCDF "March 2026" "Version 1.7.1" "USER COMMANDS" .SH NAME dbd2netCDF \- translate .I "Dinkum Binary Data" @@ -7,13 +7,17 @@ dbd2netCDF \- translate file .SH SYNOPSIS .B dbd2netCDF -.B [\-hsVv] +.B [\-ahrsASVv] +.B "[\-b size]" .B "[\-c filename]" .B "[\-C directory]" .B "[\-k filename]" +.B "[\-l level]" .B "[\-m mission]" .B "[\-M mission]" .B "[\-o filename]" +.B "[\-z level]" +.B "[--sort order]" dbdFiles... .SH DESCRIPTION Read in a set of @@ -87,10 +91,41 @@ output file. .B "The \-o option is required." .TP -.B \-s +.B \-a, \-\-append +Append to an existing NetCDF file instead of overwriting. +.TP +.B "\-b size, \-\-batch\-size size" +Number of files per batch (default 100). Set to 0 to process all files at once. +Batching releases HDF5 chunk metadata between batches to reduce memory usage. +.TP +.B \-r, \-\-repair +Attempt to repair bad data records by scanning for the next valid data tag. +.TP +.B \-s, \-\-skipFirst Skip the first data record in each .I "Dinkum Binary Data" -file, except for the first file. +file, except for the first file. Mutually exclusive with \-A and \-\-keepFirst. +.TP +.B \-A, \-\-skipAll +Skip the first data record in ALL files including the first. +Mutually exclusive with \-s and \-\-keepFirst. +.TP +.B \-\-keepFirst +Keep the first record of all files (default behavior). Mutually exclusive +with \-s and \-A. +.TP +.B \-S, \-\-strict +Fail immediately on any file error (no partial results). +.TP +.B "\-z level, \-\-compression level" +Zlib compression level for the NetCDF output (0=none, 9=max, default 5). +.TP +.B "\-\-sort order" +File sort order before processing. Choices: none (default, preserve command\-line order), +header_time (sort by fileopen_time from DBD headers), lexicographic (alphabetical). +.TP +.B "\-l level, \-\-log\-level level" +Set the logging level (trace, debug, info, warn, error, critical, off). Default: warn. .TP .B \-V Print out software version. @@ -128,8 +163,8 @@ Each dbd file's header information is recorded in a set of variables prefixed by .B hdr_ and indexed by -.B i -. +.B j +(the file dimension). Use .B ncdump -h foobar.nc diff --git a/doc/dbdSensors.1 b/doc/dbdSensors.1 index c2619a8..13429ab 100644 --- a/doc/dbdSensors.1 +++ b/doc/dbdSensors.1 @@ -1,4 +1,4 @@ -.TH dbdSensors "February 2012" "Version 1.0" "USER COMMANDS" +.TH dbdSensors "March 2026" "Version 1.7.1" "USER COMMANDS" .SH NAME dbdSensors \- extract sensor information from .I "Dinkum Binary Data" @@ -95,7 +95,7 @@ gliders. .SH AUTHOR Pat Welch (pat (at) mousebrains.com) .SH SEE ALSO -dbdnetCDF(1) +dbd2netCDF(1) dbd2csv(1) ncdump(1) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b505ed3..5d921a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,7 +25,7 @@ if(MSVC) target_compile_definitions(dbd_common PRIVATE _CRT_SECURE_NO_WARNINGS) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "/W0") else() - target_compile_options(dbd_common PRIVATE -Wall -Wextra -pedantic -Werror) + target_compile_options(dbd_common PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow -Wold-style-cast) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "-w") endif() @@ -78,7 +78,7 @@ foreach(target ${ALL_TARGETS}) # Disable warnings for third-party code (lz4) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "/W0") else() - target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic -Werror) + target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow -Wold-style-cast) # Disable -Werror for third-party code (lz4) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "-w") endif() diff --git a/src/Data.C b/src/Data.C index 0009284..39c2a79 100644 --- a/src/Data.C +++ b/src/Data.C @@ -66,7 +66,8 @@ Data::load(std::istream& is, }; auto growColumns = [&]() { - const size_t newSize(mData[0].size() + dSize); + const size_t oldSize(mData[0].size()); + const size_t newSize(oldSize + oldSize / 2 + 1); // 1.5x exponential growth for (size_t j(0); j < nToStore; ++j) { mData[j].resize(newSize, NAN); } diff --git a/src/Decompress.C b/src/Decompress.C index b42574f..047a5b0 100644 --- a/src/Decompress.C +++ b/src/Decompress.C @@ -38,14 +38,15 @@ int DecompressTWRBuf::underflow() { if (!this->mIS.read(frame.data(), n)) { // EOF return std::char_traits::eof(); } - const size_t j(LZ4_decompress_safe(frame.data(), this->mBuffer, static_cast(n), sizeof(this->mBuffer))); - if (j > sizeof(this->mBuffer)) { // Probably a corrupted file - LOG_ERROR("Attempt to decompress lz4 block with too much data: {} > {} in {} (block size {})", - j, sizeof(this->mBuffer), this->mFilename, n); + const int j(LZ4_decompress_safe(frame.data(), this->mBuffer, static_cast(n), sizeof(this->mBuffer))); + if (j < 0) { // LZ4 decompression error + LOG_ERROR("LZ4 decompression failed (error {}) in {} (block size {})", + j, this->mFilename, n); return std::char_traits::eof(); } - this->setg(this->mBuffer, this->mBuffer, this->mBuffer + j); - this->mPos += j; + const size_t decompressedSize(static_cast(j)); + this->setg(this->mBuffer, this->mBuffer, this->mBuffer + decompressedSize); + this->mPos += decompressedSize; } else { // Not compressed if (this->mIS.read(this->mBuffer, sizeof(this->mBuffer)) || this->mIS.gcount()) { const auto n = this->mIS.gcount(); diff --git a/src/Header.C b/src/Header.C index ad3d27a..f435e0f 100644 --- a/src/Header.C +++ b/src/Header.C @@ -22,6 +22,10 @@ #include "Logger.H" #include #include +#include +#include +#include +#include namespace { std::string tolower(std::string str) { @@ -52,7 +56,10 @@ Header::Header(std::istream& is, const char *fn) mRecords.insert(std::make_pair(key, value)); if (key == "num_ascii_tags") { try { - nLines = std::stoi(value); + const int parsed = std::stoi(value); + nLines = (parsed > 0 && parsed <= 10000) + ? static_cast(parsed) + : 0; } catch (const std::exception&) { nLines = 0; // Default to 0 on parse error } @@ -126,6 +133,23 @@ Header::qProcessMission(const tMissions& toSkip, return toKeep.empty() || (toKeep.find(mission) != toKeep.end()); } +time_t +Header::parseFileOpenTime(const std::string& timeStr) +{ + if (timeStr.empty()) return std::numeric_limits::max(); + std::string s(timeStr); + std::replace(s.begin(), s.end(), '_', ' '); + struct tm tm = {}; + std::istringstream iss(s); + iss >> std::get_time(&tm, "%a %b %d %H:%M:%S %Y"); + if (iss.fail()) return std::numeric_limits::max(); +#ifdef _WIN32 + return _mkgmtime(&tm); +#else + return timegm(&tm); +#endif +} + std::ostream& operator << (std::ostream& os, const Header& hdr) diff --git a/src/Header.H b/src/Header.H index 7c99218..721ddf2 100644 --- a/src/Header.H +++ b/src/Header.H @@ -7,6 +7,7 @@ #include #include #include +#include class Header { private: @@ -30,6 +31,8 @@ public: static void addMission(std::string name, tMissions& missionList); bool qProcessMission(const tMissions& toSkip, const tMissions& toProcess) const; + static time_t parseFileOpenTime(const std::string& timeStr); + friend std::ostream& operator << (std::ostream& os, const Header& hdr); }; // Header diff --git a/src/MyNetCDF.C b/src/MyNetCDF.C index 499fa2e..68c74bf 100644 --- a/src/MyNetCDF.C +++ b/src/MyNetCDF.C @@ -164,25 +164,34 @@ NetCDF::createVar(const std::string& name, } if (availSize > 0) { // space available for unlimited dimensions - const size_t iStart(mChunkPriority ? 0 : nDims - 1); - const int iOp(mChunkPriority ? 1 : -1); - for (size_t i(iStart); i < nDims; i += iOp) { - if (lengths[i] == 0) { // An unlimited dimension - tDimensionLimits::const_iterator it(mDimensionLimits.find(dims[i])); - if ((it != mDimensionLimits.end()) && - (it->second > 0) && - (it->second < availSize)) { - chunkSizes[i] = it->second; - availSize /= it->second; - if (availSize == 0) - break; - } else { - chunkSizes[i] = availSize; - break; - } + auto assignUnlimited = [&](size_t i) -> bool { + if (lengths[i] == 0) { // An unlimited dimension + tDimensionLimits::const_iterator it(mDimensionLimits.find(dims[i])); + if ((it != mDimensionLimits.end()) && + (it->second > 0) && + (it->second < availSize)) { + chunkSizes[i] = it->second; + availSize /= it->second; + if (availSize == 0) + return false; // stop iterating + } else { + chunkSizes[i] = availSize; + return false; // stop iterating } } + return true; // continue + }; + + if (mChunkPriority) { + for (size_t i(0); i < nDims; ++i) { + if (!assignUnlimited(i)) break; + } + } else { + for (size_t i(nDims); i-- > 0; ) { + if (!assignUnlimited(i)) break; + } } + } retval = nc_def_var_chunking(mId, varId, NC_CHUNKED, chunkSizes.data()); if (retval != 0) { diff --git a/src/PD0.C b/src/PD0.C index 7e65e52..a2c086b 100644 --- a/src/PD0.C +++ b/src/PD0.C @@ -60,6 +60,12 @@ PD0::loadBlock(std::istream& is) const size_t nBytes(readUInt16(is)); // Number of bytes in ensemble + if (nBytes < 4) { + std::ostringstream oss; + oss << "PD0 ensemble size too small (" << nBytes << " bytes) in '" << mFilename << "'"; + throw(MyException(oss.str())); + } + std::vector buffer(nBytes); // RAII - automatic cleanup buffer[0] = 0x7f; @@ -84,7 +90,7 @@ PD0::loadBlock(std::istream& is) unsigned int sum(0); for (size_t i(0); i < nBytes; ++i) { - sum = (sum + (unsigned char) buffer[i]) & 0xffff; + sum = (sum + static_cast(buffer[i])) & 0xffff; } if (sum != chkSum) { @@ -228,7 +234,7 @@ PD0::readByte(std::istream& is, return 0; } - return ((unsigned char) c) & 0xff; + return static_cast(c) & 0xff; } uint16_t @@ -371,12 +377,12 @@ PD0::Common::dump(std::ostream& os) } for (tArray::size_type j(0), je(mItems[i].mArray.size()); j < je; ++j) { switch (mItems[i].mType) { - case dtUInt8: os << " " << (uint16_t) mItems[i].mArray[j].ui32; break; - case dtUInt16: os << " " << (uint16_t) mItems[i].mArray[j].ui32; break; - case dtUInt32: os << " " << (uint32_t) mItems[i].mArray[j].ui32; break; - case dtInt8: os << " " << (int16_t) mItems[i].mArray[j].ui32; break; - case dtInt16: os << " " << (int16_t) mItems[i].mArray[j].ui32; break; - case dtInt32: os << " " << (int32_t) mItems[i].mArray[j].ui32; break; + case dtUInt8: os << " " << static_cast(mItems[i].mArray[j].ui32); break; + case dtUInt16: os << " " << static_cast(mItems[i].mArray[j].ui32); break; + case dtUInt32: os << " " << static_cast(mItems[i].mArray[j].ui32); break; + case dtInt8: os << " " << static_cast(mItems[i].mArray[j].ui32); break; + case dtInt16: os << " " << static_cast(mItems[i].mArray[j].ui32); break; + case dtInt32: os << " " << static_cast(mItems[i].mArray[j].ui32); break; } } os << std::endl; @@ -568,7 +574,7 @@ PD0::Velocity::load(std::istream& is, mItems.push_back(Item("velocity", dtInt16, nSize, -1, "mm/s")); } - return ((Common *) this)->load(is, pd0); + return static_cast(this)->load(is, pd0); } void @@ -611,7 +617,7 @@ PD0::Correlation::load(std::istream& is, mItems.push_back(Item(mName, dtUInt8, nSize)); } - return ((Common *) this)->load(is, pd0); + return static_cast(this)->load(is, pd0); } void @@ -673,7 +679,7 @@ bool PD0::BottomTrack::load(std::istream& is, PD0& pd0) { - return ((Common *) this)->load(is, pd0); + return static_cast(this)->load(is, pd0); } PD0::VMDAS::VMDAS() @@ -706,7 +712,7 @@ bool PD0::VMDAS::load(std::istream& is, PD0& pd0) { - return ((Common *) this)->load(is, pd0); + return static_cast(this)->load(is, pd0); } uint8_t diff --git a/src/PD0.H b/src/PD0.H index cc0be06..31c3d35 100644 --- a/src/PD0.H +++ b/src/PD0.H @@ -143,9 +143,9 @@ private: bool readAndCheckByte(std::istream& is, const int expectedValue, const bool qThrow = true); uint8_t readByte(std::istream& is, const bool qThrow = true); uint16_t readUInt16(std::istream& is, const bool qThrow = true); - int16_t readInt16(std::istream& is, const bool qThrow = true) {return (int16_t) readUInt16(is, qThrow);} + int16_t readInt16(std::istream& is, const bool qThrow = true) {return static_cast(readUInt16(is, qThrow));} uint32_t readUInt32(std::istream& is, const bool qThrow = true); - int32_t readInt32(std::istream& is, const bool qThrow = true) {return (int32_t) readUInt32(is, qThrow);} + int32_t readInt32(std::istream& is, const bool qThrow = true) {return static_cast(readUInt32(is, qThrow));} public: PD0() : mCorrelation("correlation"), mEcho("echo"), mPercentGood("percent_good"), mMaxNumberOfCells(0) {} diff --git a/src/Sensor.C b/src/Sensor.C index 39e4cbd..bccb6a4 100644 --- a/src/Sensor.C +++ b/src/Sensor.C @@ -87,9 +87,9 @@ Sensor::read(std::istream& is, double val(NAN); switch (mSize) { - case 1: val = (double) kb.read8(is); break; - case 2: val = (double) kb.read16(is); break; - case 4: val = (double) kb.read32(is); break; + case 1: val = static_cast(kb.read8(is)); break; + case 2: val = static_cast(kb.read16(is)); break; + case 4: val = static_cast(kb.read32(is)); break; case 8: val = kb.read64(is); break; default: std::ostringstream oss; diff --git a/src/dbd2csv.C b/src/dbd2csv.C index ee06832..128b3d4 100644 --- a/src/dbd2csv.C +++ b/src/dbd2csv.C @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include int @@ -50,9 +52,12 @@ main(int argc, std::vector inputFiles; std::string logLevel = "warn"; bool qSkipFirstRecord(false); + bool qSkipAllFirst(false); + bool qKeepFirst(false); bool qRepair(false); bool qStrict(false); bool qVerbose(false); + std::string sortOrder = "none"; CLI::App app{"Convert Dinkum Binary Data files to CSV", "dbd2csv"}; app.footer(std::string("\nReport bugs to ") + MAINTAINER); @@ -63,10 +68,17 @@ main(int argc, app.add_option("-m,--skipMission", missionsToSkipVec, "Mission to skip (can be repeated)")->type_size(1)->allow_extra_args(false); app.add_option("-M,--keepMission", missionsToKeepVec, "Mission to keep (can be repeated)")->type_size(1)->allow_extra_args(false); app.add_option("-o,--output", outputFilename, "Where to store the data"); - app.add_flag("-s,--skipFirst", qSkipFirstRecord, "Skip first record in each file, but the first file"); + auto* skipGroup = app.add_option_group("first-record", "First record handling"); + skipGroup->add_flag("-s,--skipFirst", qSkipFirstRecord, "Skip first record in each file, but the first"); + skipGroup->add_flag("-A,--skipAll", qSkipAllFirst, "Skip first record in ALL files including the first"); + skipGroup->add_flag("--keepFirst", qKeepFirst, "Keep first record of all files (default)"); + skipGroup->require_option(0, 1); app.add_flag("-r,--repair", qRepair, "Attempt to repair bad data records"); app.add_flag("-S,--strict", qStrict, "Fail immediately on any file error (no partial results)"); app.add_flag("-v,--verbose", qVerbose, "Enable some diagnostic output"); + app.add_option("--sort", sortOrder, "File sort order (none, header_time, lexicographic)") + ->default_val("none") + ->check(CLI::IsMember({"none", "header_time", "lexicographic"})); app.add_option("-l,--log-level", logLevel, "Log level (trace,debug,info,warn,error,critical,off)") ->default_val("warn"); app.add_option("files", inputFiles, "Input DBD files")->required()->check(CLI::ExistingFile); @@ -115,8 +127,11 @@ main(int argc, // Go through and grab all the known sensors + // First pass: discover sensors across all files (files re-opened in second pass for data) typedef std::vector tFileIndices; tFileIndices fileIndices; + std::vector fileOpenTimes; + std::vector fileSizes; for (size_t i = 0; i < inputFiles.size(); ++i) { const char* fn = inputFiles[i].c_str(); @@ -130,6 +145,8 @@ main(int argc, if (!hdr.empty() && hdr.qProcessMission(missionsToSkip, missionsToKeep)) { smap.insert(is, hdr, false); fileIndices.push_back(i); + fileOpenTimes.push_back(Header::parseFileOpenTime(hdr.find("fileopen_time"))); + fileSizes.push_back(fs::file_size(fn)); } } catch (MyException& e) { if (qStrict) { @@ -145,6 +162,36 @@ main(int argc, return(1); } + if (sortOrder == "header_time") { + std::vector perm(fileIndices.size()); + std::iota(perm.begin(), perm.end(), 0); + std::sort(perm.begin(), perm.end(), [&](size_t a, size_t b) { + return fileOpenTimes[a] < fileOpenTimes[b]; + }); + tFileIndices sortedIdx(fileIndices.size()); + std::vector sortedSizes(fileSizes.size()); + for (size_t i = 0; i < perm.size(); ++i) { + sortedIdx[i] = fileIndices[perm[i]]; + sortedSizes[i] = fileSizes[perm[i]]; + } + fileIndices = std::move(sortedIdx); + fileSizes = std::move(sortedSizes); + } else if (sortOrder == "lexicographic") { + std::vector perm(fileIndices.size()); + std::iota(perm.begin(), perm.end(), 0); + std::sort(perm.begin(), perm.end(), [&](size_t a, size_t b) { + return inputFiles[fileIndices[a]] < inputFiles[fileIndices[b]]; + }); + tFileIndices sortedIdx(fileIndices.size()); + std::vector sortedSizes(fileSizes.size()); + for (size_t i = 0; i < perm.size(); ++i) { + sortedIdx[i] = fileIndices[perm[i]]; + sortedSizes[i] = fileSizes[perm[i]]; + } + fileIndices = std::move(sortedIdx); + fileSizes = std::move(sortedSizes); + } + smap.qKeep(toKeep); smap.qCriteria(criteria); smap.setUpForData(); // Get a common list of sensors @@ -182,7 +229,7 @@ main(int argc, const Sensors& sensors(smap.find(hdr)); const KnownBytes kb(is); // Get little/big endian Data data; - const size_t nBytes(fs::file_size(fn)); + const size_t nBytes(fileSizes[ii]); try { data.load(is, kb, sensors, qRepair, nBytes); @@ -199,7 +246,7 @@ main(int argc, data.delim(","); const size_t n(data.size()); - const size_t kStart(ii == 0 ? 0 : k0); + const size_t kStart(qSkipAllFirst ? 1 : (ii == 0 ? 0 : k0)); if (n > kStart) { // some data to output for (size_t k(kStart); k < n; ++k) { diff --git a/src/dbd2netCDF.C b/src/dbd2netCDF.C index b45feaa..83695c1 100644 --- a/src/dbd2netCDF.C +++ b/src/dbd2netCDF.C @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include int @@ -53,11 +55,14 @@ main(int argc, std::string logLevel = "warn"; bool qAppend(false); bool qSkipFirstRecord(false); + bool qSkipAllFirst(false); + bool qKeepFirst(false); bool qRepair(false); bool qStrict(false); bool qVerbose(false); int compressionLevel(5); size_t batchSize(100); + std::string sortOrder = "none"; CLI::App app{"Convert Dinkum Binary Data files to NetCDF", "dbd2netCDF"}; app.footer(std::string("\nReport bugs to ") + MAINTAINER); @@ -69,7 +74,11 @@ main(int argc, app.add_option("-m,--skipMission", missionsToSkipVec, "Mission to skip (can be repeated)")->type_size(1)->allow_extra_args(false); app.add_option("-M,--keepMission", missionsToKeepVec, "Mission to keep (can be repeated)")->type_size(1)->allow_extra_args(false); app.add_option("-o,--output", outputFilename, "Where to store the data")->required(); - app.add_flag("-s,--skipFirst", qSkipFirstRecord, "Skip first record in each file, but the first"); + auto* skipGroup = app.add_option_group("first-record", "First record handling"); + skipGroup->add_flag("-s,--skipFirst", qSkipFirstRecord, "Skip first record in each file, but the first"); + skipGroup->add_flag("-A,--skipAll", qSkipAllFirst, "Skip first record in ALL files including the first"); + skipGroup->add_flag("--keepFirst", qKeepFirst, "Keep first record of all files (default)"); + skipGroup->require_option(0, 1); app.add_flag("-r,--repair", qRepair, "Attempt to repair bad data records"); app.add_flag("-S,--strict", qStrict, "Fail immediately on any file error (no partial results)"); app.add_flag("-v,--verbose", qVerbose, "Enable some diagnostic output"); @@ -78,6 +87,9 @@ main(int argc, ->check(CLI::Range(0, 9)); app.add_option("-b,--batch-size", batchSize, "Files per batch (0=all at once, reduces memory)") ->default_val("100"); + app.add_option("--sort", sortOrder, "File sort order (none, header_time, lexicographic)") + ->default_val("none") + ->check(CLI::IsMember({"none", "header_time", "lexicographic"})); app.add_option("-l,--log-level", logLevel, "Log level (trace,debug,info,warn,error,critical,off)") ->default_val("warn"); app.add_option("files", inputFiles, "Input DBD files")->required()->check(CLI::ExistingFile); @@ -116,8 +128,11 @@ main(int argc, // Go through and grab all the known sensors + // First pass: discover sensors across all files (files re-opened in second pass for data) typedef std::vector tFileIndices; tFileIndices fileIndices; + std::vector fileOpenTimes; + std::vector fileSizes; // Cache file sizes to avoid repeated fs::file_size calls for (size_t i = 0; i < inputFiles.size(); ++i) { const char* fn = inputFiles[i].c_str(); @@ -126,10 +141,20 @@ main(int argc, LOG_ERROR("Error opening '{}': {}", fn, strerror(errno)); return(1); } - const Header hdr(is, fn); - if (!hdr.empty() && hdr.qProcessMission(missionsToSkip, missionsToKeep)) { - smap.insert(is, hdr, false); - fileIndices.push_back(i); + try { + const Header hdr(is, fn); + if (!hdr.empty() && hdr.qProcessMission(missionsToSkip, missionsToKeep)) { + smap.insert(is, hdr, false); + fileIndices.push_back(i); + fileOpenTimes.push_back(Header::parseFileOpenTime(hdr.find("fileopen_time"))); + fileSizes.push_back(fs::file_size(fn)); + } + } catch (MyException& e) { + if (qStrict) { + LOG_ERROR("Error processing '{}': {}", fn, e.what()); + return(1); + } + LOG_WARN("Error processing '{}': {} (skipping file)", fn, e.what()); } } @@ -138,6 +163,37 @@ main(int argc, return(1); } + if (sortOrder == "header_time") { + std::vector perm(fileIndices.size()); + std::iota(perm.begin(), perm.end(), 0); + std::sort(perm.begin(), perm.end(), [&](size_t a, size_t b) { + return fileOpenTimes[a] < fileOpenTimes[b]; + }); + tFileIndices sortedIdx(fileIndices.size()); + std::vector sortedSizes(fileSizes.size()); + for (size_t i = 0; i < perm.size(); ++i) { + sortedIdx[i] = fileIndices[perm[i]]; + sortedSizes[i] = fileSizes[perm[i]]; + } + fileIndices = std::move(sortedIdx); + fileSizes = std::move(sortedSizes); + } else if (sortOrder == "lexicographic") { + // Build permutation to keep fileSizes in sync + std::vector perm(fileIndices.size()); + std::iota(perm.begin(), perm.end(), 0); + std::sort(perm.begin(), perm.end(), [&](size_t a, size_t b) { + return inputFiles[fileIndices[a]] < inputFiles[fileIndices[b]]; + }); + tFileIndices sortedIdx(fileIndices.size()); + std::vector sortedSizes(fileSizes.size()); + for (size_t i = 0; i < perm.size(); ++i) { + sortedIdx[i] = fileIndices[perm[i]]; + sortedSizes[i] = fileSizes[perm[i]]; + } + fileIndices = std::move(sortedIdx); + fileSizes = std::move(sortedSizes); + } + smap.qKeep(toKeep); smap.qCriteria(criteria); smap.setUpForData(); // Get a common list of sensors @@ -236,7 +292,7 @@ main(int argc, const Sensors& sensors(smap.find(hdr)); const KnownBytes kb(is); // Get little/big endian Data data; - const size_t nBytes(fs::file_size(fn)); + const size_t nBytes(fileSizes[ii]); try { data.load(is, kb, sensors, qRepair, nBytes); @@ -251,20 +307,20 @@ main(int argc, if (data.empty()) continue; const size_t n(data.size()); - const size_t kStart(ii == 0 ? 0 : k0); + const size_t kStart(qSkipAllFirst ? 1 : (ii == 0 ? 0 : k0)); { // Update file info for (tVars::size_type j(0), je(hdrVars.size()); j < je; ++j) { const std::string str(hdr.find(hdrNames[j])); - ncid.putVar(hdrVars[j], (size_t) ii + jOffset, str); + ncid.putVar(hdrVars[j], static_cast(ii) + jOffset, str); } if (n > kStart) { const unsigned int stopIndex(static_cast(indexOffset + n - kStart - 1)); - ncid.putVar(hdrStartIndex, (size_t) ii + jOffset, (unsigned int) indexOffset); - ncid.putVar(hdrStopIndex, (size_t) ii + jOffset, stopIndex); + ncid.putVar(hdrStartIndex, static_cast(ii) + jOffset, static_cast(indexOffset)); + ncid.putVar(hdrStopIndex, static_cast(ii) + jOffset, stopIndex); } - ncid.putVar(hdrLength, (size_t) ii + jOffset, (unsigned int)(n - kStart)); + ncid.putVar(hdrLength, static_cast(ii) + jOffset, static_cast(n - kStart)); } if (n <= kStart) { // No data to be written diff --git a/src/pd02netCDF.C b/src/pd02netCDF.C index c35428b..8219f52 100644 --- a/src/pd02netCDF.C +++ b/src/pd02netCDF.C @@ -56,7 +56,7 @@ main(int argc, } if (qVerbose) - std::cout << "Maximum number of cells " << ((unsigned int) nCells) << std::endl; + std::cout << "Maximum number of cells " << static_cast(nCells) << std::endl; NetCDF nc(ofn); const int hDim(nc.createDim("h")); @@ -80,8 +80,8 @@ main(int argc, if (index != sIndex) { nc.putVar(hdrFilename, hIndex, fn); - nc.putVar(hdrStartIndex, hIndex, (unsigned int) sIndex); - nc.putVar(hdrStopIndex, hIndex, (unsigned int) index - 1); + nc.putVar(hdrStartIndex, hIndex, static_cast(sIndex)); + nc.putVar(hdrStopIndex, hIndex, static_cast(index - 1)); if (qVerbose) std::cout << "Found " << (index - sIndex) << " records in " << fn << std::endl; diff --git a/test/dbd2csv b/test/dbd2csv index 540622d..1dc07f5 100755 --- a/test/dbd2csv +++ b/test/dbd2csv @@ -92,4 +92,56 @@ if ! "$CMD" -l debug -o "$TMP/debug.csv" data/00300000.dcd 2>/dev/null; then fi rm -f "$TMP/debug.csv" +# Test --skipAll with single file +echo "Testing --skipAll with single file..." +skipall_csv=$TMP/skipAll.csv +baseline_csv=$TMP/baseline.csv +if ! "$CMD" -o "$baseline_csv" test.sbd; then + echo "Failed baseline CSV generation" + rm -f "$baseline_csv" + exit 1 +fi +if ! "$CMD" -A -o "$skipall_csv" test.sbd; then + echo "Failed to execute $CMD with --skipAll" + rm -f "$skipall_csv" "$baseline_csv" + exit 1 +fi +# baseline has header + 94 data lines; skipAll should have header + 93 +baseline_lines=$(wc -l < "$baseline_csv" | tr -d ' ') +skipall_lines=$(wc -l < "$skipall_csv" | tr -d ' ') +expected_lines=$((baseline_lines - 1)) +if [ "$skipall_lines" != "$expected_lines" ]; then + echo "skipAll CSV line count mismatch: expected $expected_lines, got $skipall_lines" + rm -f "$skipall_csv" "$baseline_csv" + exit 1 +fi +rm -f "$skipall_csv" "$baseline_csv" + +# Test mutual exclusion +echo "Testing mutual exclusion of --skipFirst and --skipAll..." +if "$CMD" -s -A -o /dev/null test.sbd 2>/dev/null; then + echo "Should have failed with both --skipFirst and --skipAll" + exit 1 +fi + +# Test --sort header_time +echo "Testing --sort header_time..." +sorthdr_csv=$TMP/sort_hdr.csv +if ! "$CMD" --sort header_time -o "$sorthdr_csv" test.sbd test.tbd; then + echo "Failed to execute $CMD with --sort header_time" + rm -f "$sorthdr_csv" + exit 1 +fi +rm -f "$sorthdr_csv" + +# Test --sort lexicographic +echo "Testing --sort lexicographic..." +sortlex_csv=$TMP/sort_lex.csv +if ! "$CMD" --sort lexicographic -o "$sortlex_csv" test.sbd test.tbd; then + echo "Failed to execute $CMD with --sort lexicographic" + rm -f "$sortlex_csv" + exit 1 +fi +rm -f "$sortlex_csv" + exit 0 diff --git a/test/dbd2netCDF b/test/dbd2netCDF index 4d7fdee..e6c580a 100755 --- a/test/dbd2netCDF +++ b/test/dbd2netCDF @@ -108,6 +108,74 @@ if [ "$actual_dim" != "287" ]; then fi rm -f "$skipfn" +# Test --skipAll with single file +echo "Testing --skipAll with single file..." +skipall1=$TMP/skipAll1.nc +if ! "$CMD" -A -o "$skipall1" test.sbd; then + echo "Failed to execute $CMD with --skipAll (single file)" + rm -f "$skipall1" + exit 1 +fi +# sbd has 94 records; --skipAll skips first => 93 +actual_dim=$(ncdump -h "$skipall1" | grep 'i = UNLIMITED' | grep -o '[0-9]\+') +if [ "$actual_dim" != "93" ]; then + echo "skipAll single-file dimension mismatch: expected 93, got $actual_dim" + rm -f "$skipall1" + exit 1 +fi +rm -f "$skipall1" + +# Test --skipAll with two files +echo "Testing --skipAll with two files..." +skipall2=$TMP/skipAll2.nc +if ! "$CMD" -A -o "$skipall2" test.sbd test.tbd; then + echo "Failed to execute $CMD with --skipAll (two files)" + rm -f "$skipall2" + exit 1 +fi +# sbd=94, tbd=194; --skipAll skips first of both => 93+193=286 +actual_dim=$(ncdump -h "$skipall2" | grep 'i = UNLIMITED' | grep -o '[0-9]\+') +if [ "$actual_dim" != "286" ]; then + echo "skipAll two-file dimension mismatch: expected 286, got $actual_dim" + rm -f "$skipall2" + exit 1 +fi +rm -f "$skipall2" + +# Test mutual exclusion of skip flags +echo "Testing mutual exclusion of --skipFirst and --skipAll..." +if "$CMD" -s -A -o /dev/null test.sbd 2>/dev/null; then + echo "Should have failed with both --skipFirst and --skipAll" + exit 1 +fi + +# Test --sort header_time +echo "Testing --sort header_time..." +sorthdr=$TMP/sort_hdr.nc +if ! "$CMD" --sort header_time -o "$sorthdr" test.sbd test.tbd; then + echo "Failed to execute $CMD with --sort header_time" + rm -f "$sorthdr" + exit 1 +fi +rm -f "$sorthdr" + +# Test --sort lexicographic +echo "Testing --sort lexicographic..." +sortlex=$TMP/sort_lex.nc +if ! "$CMD" --sort lexicographic -o "$sortlex" test.sbd test.tbd; then + echo "Failed to execute $CMD with --sort lexicographic" + rm -f "$sortlex" + exit 1 +fi +rm -f "$sortlex" + +# Test invalid --sort value +echo "Testing invalid --sort value..." +if "$CMD" --sort bogus -o /dev/null test.sbd 2>/dev/null; then + echo "Should have failed with invalid sort value" + exit 1 +fi + # Test command-line options echo "Testing version flag..." if ! "$CMD" --version >/dev/null 2>&1; then diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt index a50dfed..0ec202f 100644 --- a/test/fuzz/CMakeLists.txt +++ b/test/fuzz/CMakeLists.txt @@ -50,10 +50,34 @@ if(BUILD_FUZZ_TESTS) RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin ) + # Fuzz target: Data binary record parsing + add_executable(fuzz_data fuzz_data.cpp ${dbd2netcdf_SOURCE_DIR}/src/Data.C) + target_link_libraries(fuzz_data PRIVATE dbd_common) + target_compile_options(fuzz_data PRIVATE ${FUZZ_COMPILE_FLAGS}) + target_link_options(fuzz_data PRIVATE ${FUZZ_LINK_FLAGS}) + set_target_properties(fuzz_data PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin + ) + + # Fuzz target: LZ4 decompression stream + add_executable(fuzz_decompress fuzz_decompress.cpp) + target_link_libraries(fuzz_decompress PRIVATE dbd_common) + target_compile_options(fuzz_decompress PRIVATE ${FUZZ_COMPILE_FLAGS}) + target_link_options(fuzz_decompress PRIVATE ${FUZZ_LINK_FLAGS}) + set_target_properties(fuzz_decompress PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin + ) + # Create corpus directories for seeds file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fuzz_corpus/sensor) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fuzz_corpus/header) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fuzz_corpus/knownbytes) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fuzz_corpus/data) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fuzz_corpus/decompress) # Custom target to run fuzz tests add_custom_target(fuzz @@ -61,7 +85,9 @@ if(BUILD_FUZZ_TESTS) COMMAND ${CMAKE_COMMAND} -E echo " ./bin/fuzz_sensor fuzz_corpus/sensor -max_total_time=60" COMMAND ${CMAKE_COMMAND} -E echo " ./bin/fuzz_header fuzz_corpus/header -max_total_time=60" COMMAND ${CMAKE_COMMAND} -E echo " ./bin/fuzz_knownbytes fuzz_corpus/knownbytes -max_total_time=60" - DEPENDS fuzz_sensor fuzz_header fuzz_knownbytes + COMMAND ${CMAKE_COMMAND} -E echo " ./bin/fuzz_data fuzz_corpus/data -max_total_time=60" + COMMAND ${CMAKE_COMMAND} -E echo " ./bin/fuzz_decompress fuzz_corpus/decompress -max_total_time=60" + DEPENDS fuzz_sensor fuzz_header fuzz_knownbytes fuzz_data fuzz_decompress WORKING_DIRECTORY ${CMAKE_BINARY_DIR} ) endif() diff --git a/test/fuzz/fuzz_data.cpp b/test/fuzz/fuzz_data.cpp new file mode 100644 index 0000000..8fb17b2 --- /dev/null +++ b/test/fuzz/fuzz_data.cpp @@ -0,0 +1,51 @@ +// Fuzz test for Data binary record parsing +// Tests robustness of Data::load against malformed binary input + +#include "Data.H" +#include "KnownBytes.H" +#include "Sensors.H" +#include "Sensor.H" +#include "MyException.H" +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 16) return 0; // Need enough for KnownBytes + at least one record + + std::string input(reinterpret_cast(data), size); + std::istringstream iss(input, std::ios_base::in | std::ios_base::binary); + + try { + // Build a minimal sensor list from the first few bytes + const size_t nSensors = (data[0] % 4) + 1; // 1-4 sensors + Sensors sensors; + for (size_t i = 0; i < nSensors; ++i) { + const int sizeVal = (1 << (i % 4)); // 1, 2, 4, 8 + std::ostringstream line; + line << "s: T " << i << " " << i << " " << sizeVal << " sensor" << i << " units"; + sensors.insert(Sensor(line.str())); + } + sensors.nToStore(nSensors); + + // Skip past the bytes we used for sensor config + iss.seekg(1, std::ios::beg); + + KnownBytes kb(iss); + Data result; + result.load(iss, kb, sensors, true, size); + + // Exercise accessors + (void)result.size(); + (void)result.empty(); + if (!result.empty() && result.nColumns() > 0) { + (void)result(0, 0); + } + } catch (const MyException&) { + // Expected for malformed input + } catch (const std::exception&) { + // Other standard exceptions are also acceptable + } + + return 0; +} diff --git a/test/fuzz/fuzz_decompress.cpp b/test/fuzz/fuzz_decompress.cpp new file mode 100644 index 0000000..60cb1b1 --- /dev/null +++ b/test/fuzz/fuzz_decompress.cpp @@ -0,0 +1,51 @@ +// Fuzz test for LZ4 decompression stream +// Tests robustness of DecompressTWRBuf against malformed compressed data + +#include "Decompress.H" +#include "MyException.H" +#include +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 2) return 0; + + // Write fuzz input to a temporary file (DecompressTWR requires a file) + char tmpname[] = "/tmp/fuzz_decompress_XXXXXX"; + int fd = mkstemp(tmpname); + if (fd < 0) return 0; + + FILE* f = fdopen(fd, "wb"); + if (!f) { close(fd); unlink(tmpname); return 0; } + fwrite(data, 1, size, f); + fclose(f); + + try { + // Test compressed path + DecompressTWR is(tmpname, true); + char buf[4096]; + while (is.read(buf, sizeof(buf)) || is.gcount()) { + // Just consume the data + } + } catch (const MyException&) { + // Expected for malformed input + } catch (const std::exception&) { + // Other standard exceptions are also acceptable + } + + try { + // Test uncompressed path + DecompressTWR is(tmpname, false); + char buf[4096]; + while (is.read(buf, sizeof(buf)) || is.gcount()) { + // Just consume the data + } + } catch (const std::exception&) { + // Acceptable + } + + unlink(tmpname); + return 0; +} diff --git a/test/unit/test_header.cpp b/test/unit/test_header.cpp index 0f26bea..39b2182 100644 --- a/test/unit/test_header.cpp +++ b/test/unit/test_header.cpp @@ -4,6 +4,8 @@ #include #include "Header.H" #include +#include +#include TEST_CASE("Header::trim removes whitespace", "[header]") { SECTION("Trim leading whitespace") { @@ -58,3 +60,42 @@ TEST_CASE("Header mission filtering", "[header]") { CHECK(missions.count("uppercase") == 1); } } + +TEST_CASE("Header::parseFileOpenTime", "[header]") { + SECTION("Parses valid fileopen_time string") { + // Tue_Sep_20_17:39:01_2011 => 2011-09-20 17:39:01 UTC + time_t t = Header::parseFileOpenTime("Tue_Sep_20_17:39:01_2011"); + REQUIRE(t != std::numeric_limits::max()); + struct tm* utc = gmtime(&t); + CHECK(utc->tm_year + 1900 == 2011); + CHECK(utc->tm_mon + 1 == 9); + CHECK(utc->tm_mday == 20); + CHECK(utc->tm_hour == 17); + CHECK(utc->tm_min == 39); + CHECK(utc->tm_sec == 1); + } + + SECTION("Parses another valid timestamp") { + // Mon_Nov_18_09:40:04_2024 + time_t t = Header::parseFileOpenTime("Mon_Nov_18_09:40:04_2024"); + REQUIRE(t != std::numeric_limits::max()); + struct tm* utc = gmtime(&t); + CHECK(utc->tm_year + 1900 == 2024); + CHECK(utc->tm_mon + 1 == 11); + CHECK(utc->tm_mday == 18); + } + + SECTION("Ordering: earlier time < later time") { + time_t t1 = Header::parseFileOpenTime("Tue_Sep_20_17:39:01_2011"); + time_t t2 = Header::parseFileOpenTime("Mon_Nov_18_09:40:04_2024"); + REQUIRE(t1 < t2); + } + + SECTION("Empty string returns max sentinel") { + CHECK(Header::parseFileOpenTime("") == std::numeric_limits::max()); + } + + SECTION("Garbage string returns max sentinel") { + CHECK(Header::parseFileOpenTime("not_a_timestamp") == std::numeric_limits::max()); + } +} From 34cf565148e0ebe4e7141efbc0d333993faec1f4 Mon Sep 17 00:00:00 2001 From: Pat Welch Date: Mon, 30 Mar 2026 21:57:46 -0700 Subject: [PATCH 2/5] fix: harden binary parsing, logging, and NetCDF output; bump to v1.7.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit fixes and Python-project backports: - KnownBytes: replace union type-punning with memcpy (UB fix), add gcount checks for short reads, fix tag error message hex formatting, save errno - SensorsMap: validate sensor byte-size consistency across files (throws on mismatch), check getline returns in sensor skip loop - Sensors: try-catch in cache load for corrupt lines, thread_local RNG - Data: bound repair scan to 64KB max, convert inf→NaN for repeated values - MyNetCDF: enable shuffle filter for float/double (~20-40% better compression), add putGlobalAtt method, write CF-1.10 Conventions/history/source attributes - PD0: replace all std::cerr with logger, remove redundant prints before throws - pd02netCDF: add logger initialization and --log-level option - decompressTWR: rename -s to -p/--pipe (avoid conflict with --skipFirst), fix tolower cast, enable temp file cleanup - All tools: fix --verbose to not downgrade log level below --log-level mkOne.py defaults changed to --skipAll and --sort header_time. Version bumped to 1.7.2. Man pages and reference files updated. Co-Authored-By: Claude Opus 4.6 (1M context) --- VERSION | 2 +- doc/dbd2csv.1 | 2 +- doc/dbd2netCDF.1 | 2 +- doc/dbdSensors.1 | 2 +- mkOne.py | 11 +++++-- src/Data.C | 8 +++-- src/Header.C | 2 ++ src/KnownBytes.C | 53 +++++++++++++++++------------- src/MyNetCDF.C | 9 ++++- src/MyNetCDF.H | 2 ++ src/PD0.C | 19 +++-------- src/Sensors.C | 16 +++++---- src/SensorsMap.C | 14 +++++++- src/dbd2csv.C | 2 +- src/dbd2netCDF.C | 8 ++++- src/decompressTWR.C | 15 +++++---- src/pd02netCDF.C | 19 +++++++---- test/data/00300000.combined.ncdump | 7 +++- test/data/00300000.dcd.ncdump | 7 +++- test/dbd2netCDF | 19 +++++++++++ test/test.both.netCDF | 7 +++- test/test.sbd.netCDF | 7 +++- test/test.tbd.netCDF | 7 +++- 23 files changed, 167 insertions(+), 73 deletions(-) diff --git a/VERSION b/VERSION index 943f9cb..f8a696c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.1 +1.7.2 diff --git a/doc/dbd2csv.1 b/doc/dbd2csv.1 index a727fd2..73a14af 100644 --- a/doc/dbd2csv.1 +++ b/doc/dbd2csv.1 @@ -1,4 +1,4 @@ -.TH dbd2csv "March 2026" "Version 1.7.1" "USER COMMANDS" +.TH dbd2csv "March 2026" "Version 1.7.2" "USER COMMANDS" .SH NAME dbd2csv \- translate .I "Dinkum Binary Data" diff --git a/doc/dbd2netCDF.1 b/doc/dbd2netCDF.1 index f3bd3c9..c41222e 100644 --- a/doc/dbd2netCDF.1 +++ b/doc/dbd2netCDF.1 @@ -1,4 +1,4 @@ -.TH dbd2netCDF "March 2026" "Version 1.7.1" "USER COMMANDS" +.TH dbd2netCDF "March 2026" "Version 1.7.2" "USER COMMANDS" .SH NAME dbd2netCDF \- translate .I "Dinkum Binary Data" diff --git a/doc/dbdSensors.1 b/doc/dbdSensors.1 index 13429ab..93b11e0 100644 --- a/doc/dbdSensors.1 +++ b/doc/dbdSensors.1 @@ -1,4 +1,4 @@ -.TH dbdSensors "March 2026" "Version 1.7.1" "USER COMMANDS" +.TH dbdSensors "March 2026" "Version 1.7.2" "USER COMMANDS" .SH NAME dbdSensors \- extract sensor information from .I "Dinkum Binary Data" diff --git a/mkOne.py b/mkOne.py index 1a9f7da..cd838e8 100755 --- a/mkOne.py +++ b/mkOne.py @@ -114,8 +114,6 @@ def processAll(filenames:list, args:ArgumentParser, suffix:str, sensorsFilename: filenames = list(filenames) # ensure it is a list if not filenames: return # Nothing to do - filenames.sort() # Sort the input files for consistent processing order - ofn = args.outputPrefix + suffix # Output filename cmd = [os.path.join(args.bindir, "dbd2netCDF"), @@ -124,7 +122,11 @@ def processAll(filenames:list, args:ArgumentParser, suffix:str, sensorsFilename: ] if args.verbose: cmd.append("--verbose") if args.repair: cmd.append("--repair") - if not args.keepFirst: cmd.append("--skipFirst") + if args.keepFirst: + cmd.append("--keepFirst") + else: + cmd.append("--skipAll") + cmd.extend(["--sort", args.sort]) if args.compression is not None: cmd.extend(["--compression", str(args.compression)]) if sensorsFilename: cmd.extend(["--sensorOutput", sensorsFilename]) @@ -190,6 +192,9 @@ def processPD0(filenames:list, args:ArgumentParser, suffix:str="pd0.nc") -> None grp.add_argument("--repair", action="store_true", help="Should corrupted files be 'repaired'") grp.add_argument("--keepFirst", action="store_true", help="Should the first record not be discarded on all the files?") +grp.add_argument("--sort", type=str, default="header_time", + choices=["none", "header_time", "lexicographic"], + help="File sort order (default: header_time)") grp.add_argument("--compression", type=int, default=None, choices=range(10), metavar="[0-9]", help="Zlib compression level (0=none, 9=max)") diff --git a/src/Data.C b/src/Data.C index 39c2a79..7f6d0e8 100644 --- a/src/Data.C +++ b/src/Data.C @@ -88,11 +88,14 @@ Data::load(std::istream& is, // Not a data tag, so assume we've encountered garbage and look for a data tag const size_t pos = is.tellg(); // Where the bad tag was found bool qContinue = false; - while (true) { // look for the next d + size_t scanCount = 0; + constexpr size_t MAX_REPAIR_SCAN = 65536; + while (scanCount < MAX_REPAIR_SCAN) { // look for the next d int8_t c; if (!is.read(reinterpret_cast(&c), 1)) { // EOF looking for the next 'd' break; } + ++scanCount; if (c == 'd') { qContinue = true; break; @@ -151,7 +154,8 @@ Data::load(std::istream& is, const size_t index(sensor.index()); qKeep |= sensor.qCriteria(); if (sensor.qKeep()) { - mData[index][nRows] = prevValue[index]; + const double prev = prevValue[index]; + mData[index][nRows] = std::isinf(prev) ? NAN : prev; } } else if (code == 2) { // New Value const Sensor& sensor(sensors[i]); diff --git a/src/Header.C b/src/Header.C index f435e0f..1bb531c 100644 --- a/src/Header.C +++ b/src/Header.C @@ -133,6 +133,8 @@ Header::qProcessMission(const tMissions& toSkip, return toKeep.empty() || (toKeep.find(mission) != toKeep.end()); } +// Parse fileopen_time header value (format: "Day_Mon_DD_HH:MM:SS_YYYY") to UTC epoch. +// Assumes all timestamps are UTC (standard for Slocum glider data at sea). time_t Header::parseFileOpenTime(const std::string& timeStr) { diff --git a/src/KnownBytes.C b/src/KnownBytes.C index 02d26dc..43b3878 100644 --- a/src/KnownBytes.C +++ b/src/KnownBytes.C @@ -48,7 +48,9 @@ KnownBytes::KnownBytes(std::istream& is) static_assert(sizeof(double) == 8, "sizeof(double) != 8"); if (tag != 's') { - throw(MyException("Error known bytes cycle tag(%c) != 's'")); + std::ostringstream oss; + oss << "Error known bytes cycle tag(0x" << std::hex << (tag & 0xff) << ") != 's'"; + throw MyException(oss.str()); } @@ -92,8 +94,9 @@ KnownBytes::read8(std::istream& is) const int8_t val; if (!is.read(reinterpret_cast(&val), 1)) { + const int saved_errno = errno; std::ostringstream oss; - oss << "Error reading a byte, " << strerror(errno); + oss << "Error reading a byte, " << strerror(saved_errno); throw MyException(oss.str()); } @@ -105,9 +108,10 @@ KnownBytes::read16(std::istream& is) const { int16_t val; - if (!is.read(reinterpret_cast(&val), 2)) { + if (!is.read(reinterpret_cast(&val), 2) || is.gcount() != 2) { + const int saved_errno = errno; std::ostringstream oss; - oss << "Error reading two bytes, " << strerror(errno); + oss << "Error reading two bytes, " << strerror(saved_errno); throw MyException(oss.str()); } @@ -117,42 +121,47 @@ KnownBytes::read16(std::istream& is) const float KnownBytes::read32(std::istream& is) const { - union uVal { - float fnum; - int32_t inum; - } val; + char buf[4]; - if (!is.read(reinterpret_cast(&val), 4)) { + if (!is.read(buf, 4) || is.gcount() != 4) { + const int saved_errno = errno; std::ostringstream oss; - oss << "Error reading four bytes, " << strerror(errno); + oss << "Error reading four bytes, " << strerror(saved_errno); throw MyException(oss.str()); } + int32_t inum; + std::memcpy(&inum, buf, 4); if (mFlip) - val.inum = ntohl(val.inum); + inum = ntohl(inum); - return val.fnum; + float fnum; + std::memcpy(&fnum, &inum, 4); + return fnum; } double KnownBytes::read64(std::istream& is) const { - union uVal { - double fnum; - int32_t i32[2]; - } val; + char buf[8]; - if (!is.read(reinterpret_cast(&val), 8)) { + if (!is.read(buf, 8) || is.gcount() != 8) { + const int saved_errno = errno; std::ostringstream oss; - oss << "Error reading eight bytes, " << strerror(errno); + oss << "Error reading eight bytes, " << strerror(saved_errno); throw MyException(oss.str()); } + int32_t i32[2]; + std::memcpy(i32, buf, 8); + if (mFlip) { - const int32_t itmp(ntohl(val.i32[0])); - val.i32[0] = ntohl(val.i32[1]); - val.i32[1] = itmp; + const int32_t itmp(ntohl(i32[0])); + i32[0] = ntohl(i32[1]); + i32[1] = itmp; } - return val.fnum; + double fnum; + std::memcpy(&fnum, i32, 8); + return fnum; } diff --git a/src/MyNetCDF.C b/src/MyNetCDF.C index 68c74bf..eb7d9e8 100644 --- a/src/MyNetCDF.C +++ b/src/MyNetCDF.C @@ -211,7 +211,7 @@ NetCDF::createVar(const std::string& name, // chunkSizes automatically cleaned up on scope exit } // Chunking - const bool qShuffle((idType == NC_SHORT) || (idType == NC_INT) || (idType == NC_INT64)); + const bool qShuffle(idType != NC_STRING && idType != NC_BYTE && idType != NC_UBYTE); const bool qCompress(idType != NC_STRING); if (qShuffle || qCompress) { @@ -405,6 +405,13 @@ NetCDF::mkCountOne(const size_t len) return mCountOne.data(); } +void +NetCDF::putGlobalAtt(const std::string& name, const std::string& value) +{ + basicOp(nc_put_att_text(mId, NC_GLOBAL, name.c_str(), value.size(), value.c_str()), + "setting global attribute '" + name + "'"); +} + std::string NetCDF::typeToStr(const nc_type t) { diff --git a/src/MyNetCDF.H b/src/MyNetCDF.H index f7401b6..5a3d6aa 100644 --- a/src/MyNetCDF.H +++ b/src/MyNetCDF.H @@ -99,6 +99,8 @@ public: void putVar(const int varId, const size_t indices[], const size_t len, const uint32_t value); void putVar(const int varId, const size_t indices[], const size_t len, const int32_t value); + void putGlobalAtt(const std::string& name, const std::string& value); + static std::string typeToStr(const nc_type t); }; // NetCDF diff --git a/src/PD0.C b/src/PD0.C index a2c086b..80d21c5 100644 --- a/src/PD0.C +++ b/src/PD0.C @@ -1,6 +1,7 @@ #include "PD0.H" #include "MyNetCDF.H" #include "MyException.H" +#include "Logger.H" #include #include #include @@ -28,7 +29,6 @@ PD0::load(const std::string& fn, if (!is) { const std::string msg("Error opening '" + fn + "', " + strerror(errno)); - std::cerr << msg << std::endl; throw(MyException(msg)); } @@ -79,7 +79,6 @@ PD0::loadBlock(std::istream& is) const std::string reason(is.eof() ? "end-of-file" : (is.fail() ? "fail" : "bad")); std::ostringstream oss; oss << "Error reading " << nBytes << " bytes from '" << mFilename << "', " << reason; - std::cerr << oss.str() << std::endl; if (!is.eof()) throw(MyException(oss.str())); // buffer automatically cleaned up return false; @@ -100,7 +99,6 @@ PD0::loadBlock(std::istream& is) << " calculated check sum 0x" << std::hex << sum << " file's checksum 0x" << chkSum << std::dec; - std::cerr << oss.str() << std::endl; throw(MyException(oss.str())); // buffer automatically cleaned up } @@ -158,9 +156,7 @@ PD0::loadBlock(std::istream& is) default: if (mSeenHeaderTypes.find(hdr) == mSeenHeaderTypes.end()) { mSeenHeaderTypes.insert(hdr); - std::cerr << "Unsupported header type 0x" << std::hex << hdr - << " in " << mFilename - << std::endl; + LOG_WARN("Unsupported header type 0x{:x} in {}", hdr, mFilename); } } } @@ -228,7 +224,6 @@ PD0::readByte(std::istream& is, if (qThrow) { std::ostringstream msg; msg << "Error reading a byte from " << mFilename << ", " << strerror(errno); - std::cerr << msg.str() << std::endl; throw(MyException(msg.str())); } return 0; @@ -247,8 +242,7 @@ PD0::readUInt16(std::istream& is, if (!is) { // EOF if (qThrow) { std::ostringstream msg; - msg << "Error reading a byte from " << mFilename << ", " << strerror(errno); - std::cerr << msg.str() << std::endl; + msg << "Error reading two bytes from " << mFilename << ", " << strerror(errno); throw(MyException(msg.str())); } return 0; @@ -267,8 +261,7 @@ PD0::readUInt32(std::istream& is, if (!is) { // EOF if (qThrow) { std::ostringstream msg; - msg << "Error reading a byte from " << mFilename << ", " << strerror(errno); - std::cerr << msg.str() << std::endl; + msg << "Error reading four bytes from " << mFilename << ", " << strerror(errno); throw(MyException(msg.str())); } return 0; @@ -295,12 +288,11 @@ PD0::readAndCheckByte(std::istream& is, std::ostringstream msg; msg << "Incorrect header byte(0x" << std::hex << c << ") should have been 0x" << expectedValue << ")"; - std::cerr << msg.str() << std::endl; - if (qThrow) { throw(MyException(msg.str())); } + LOG_WARN("{}", msg.str()); return false; } @@ -722,7 +714,6 @@ PD0::maxNumberOfCells(const std::string& fn) if (!is) { const std::string msg("Error opening '" + fn + "', " + strerror(errno)); - std::cerr << msg << std::endl; throw(MyException(msg)); } diff --git a/src/Sensors.C b/src/Sensors.C index fe52603..e543b74 100644 --- a/src/Sensors.C +++ b/src/Sensors.C @@ -156,9 +156,9 @@ Sensors::mkFilename(const std::string& dir) const namespace { // Generate a unique temporary filename suffix std::string uniqueTempSuffix() { - static std::random_device rd; - static std::mt19937 gen(rd()); - static std::uniform_int_distribution<> dis(100000, 999999); + thread_local std::random_device rd; + thread_local std::mt19937 gen(rd()); + thread_local std::uniform_int_distribution<> dis(100000, 999999); return std::to_string(dis(gen)); } } @@ -257,9 +257,13 @@ Sensors::load(const std::string& dir, } for (std::string line; getline(is, line);) { - const Sensor sensor(line); - if (sensor.qAvailable()) { - mSensors.push_back(sensor); + try { + const Sensor sensor(line); + if (sensor.qAvailable()) { + mSensors.push_back(sensor); + } + } catch (const MyException& e) { + LOG_WARN("Skipping corrupt cache line in '{}': {}", filename, e.what()); } } diff --git a/src/SensorsMap.C b/src/SensorsMap.C index bb84eb7..6245319 100644 --- a/src/SensorsMap.C +++ b/src/SensorsMap.C @@ -20,8 +20,10 @@ #include "SensorsMap.H" #include "Header.H" #include "MyException.H" +#include "Logger.H" #include #include +#include #include const Sensors& @@ -71,7 +73,10 @@ SensorsMap::insert(std::istream& is, if (qPosition && !hdr.qFactored()) { // Read in nSensors worth of lines, but skip processing for (size_t i = hdr.nSensors(); i; --i) { std::string line; - getline(is, line); + if (!getline(is, line)) { + LOG_WARN("Unexpected EOF skipping sensor lines for CRC '{}'", hdr.crc()); + break; + } } } } @@ -97,6 +102,13 @@ SensorsMap::setUpForData() tNames::const_iterator nt(names.find(sensor.name())); if (nt != names.end()) { // Already known sensor.index(static_cast(nt->second)); + const Sensor& existing = mAllSensors[nt->second]; + if (sensor.size() != existing.size()) { + std::ostringstream oss; + oss << "Sensor '" << sensor.name() << "' has size " << sensor.size() + << " but was previously seen with size " << existing.size(); + throw MyException(oss.str()); + } } else { // Not seen yet sensor.index(static_cast(names.size())); names.insert(std::make_pair(sensor.name(), static_cast(names.size()))); diff --git a/src/dbd2csv.C b/src/dbd2csv.C index 128b3d4..d4c2ed7 100644 --- a/src/dbd2csv.C +++ b/src/dbd2csv.C @@ -88,7 +88,7 @@ main(int argc, // Initialize logger dbd::logger().init("dbd2csv", dbd::logLevelFromString(logLevel)); - if (qVerbose) { + if (qVerbose && logLevel == "warn") { dbd::logger().setLevel(dbd::LogLevel::Info); } diff --git a/src/dbd2netCDF.C b/src/dbd2netCDF.C index 83695c1..d0e12d3 100644 --- a/src/dbd2netCDF.C +++ b/src/dbd2netCDF.C @@ -99,7 +99,7 @@ main(int argc, // Initialize logger dbd::logger().init("dbd2netCDF", dbd::logLevelFromString(logLevel)); - if (qVerbose) { + if (qVerbose && logLevel == "warn") { dbd::logger().setLevel(dbd::LogLevel::Info); } @@ -238,6 +238,12 @@ main(int argc, NetCDF ncid(ofn, qAppend || batchStart > 0); ncid.compressionLevel(compressionLevel); + if (batchStart == 0 && !qAppend) { + ncid.putGlobalAtt("Conventions", "CF-1.10"); + ncid.putGlobalAtt("history", std::string("Created by dbd2netCDF ") + VERSION); + ncid.putGlobalAtt("source", "Slocum Glider Dinkum Binary Data files"); + } + const int iDim(ncid.maybeCreateDim(DATA_DIMENSION)); const int jDim(ncid.maybeCreateDim(FILE_DIMENSION)); diff --git a/src/decompressTWR.C b/src/decompressTWR.C index f33843d..65ba3bd 100644 --- a/src/decompressTWR.C +++ b/src/decompressTWR.C @@ -39,7 +39,7 @@ namespace { const std::string fn(inPath.filename().string()); fs::path outPath = dir.empty() ? fs::path(fn) : (fs::path(dir) / fn); std::string ext(outPath.extension().string()); - if ((ext.size() == 4) && (tolower(ext[2]) == 'c')) { + if ((ext.size() == 4) && (std::tolower(static_cast(ext[2])) == 'c')) { switch (ext[3]) { case 'g': ext[2] = 'l'; break; case 'G': ext[2] = 'L'; break; @@ -53,9 +53,9 @@ namespace { // Cross-platform unique ID generation (replaces getpid()) std::string uniqueSuffix() { - static std::random_device rd; - static std::mt19937 gen(rd()); - static std::uniform_int_distribution<> dis(100000, 999999); + thread_local std::random_device rd; + thread_local std::mt19937 gen(rd()); + thread_local std::uniform_int_distribution<> dis(100000, 999999); return std::to_string(dis(gen)); } } // Anonymous namespace @@ -74,7 +74,7 @@ main(int argc, app.footer(std::string("\nReport bugs to ") + MAINTAINER); app.add_option("-o,--output", directory, "Directory where to store the data"); - app.add_flag("-s,--stdout", qStdOut, "Output to stdout"); + app.add_flag("-p,--pipe", qStdOut, "Output to stdout"); app.add_flag("-v,--verbose", qVerbose, "Enable some diagnostic output"); app.add_option("-l,--log-level", logLevel, "Log level (trace,debug,info,warn,error,critical,off)") ->default_val("warn"); @@ -85,7 +85,7 @@ main(int argc, // Initialize logger dbd::logger().init("decompressTWR", dbd::logLevelFromString(logLevel)); - if (qVerbose) { + if (qVerbose && logLevel == "warn") { dbd::logger().setLevel(dbd::LogLevel::Info); } @@ -128,7 +128,8 @@ main(int argc, LOG_INFO("Decompressed '{}' -> '{}'", ifn, ofn); } catch (int e) { LOG_ERROR("Error creating '{}': {}", ofn, strerror(e)); - // remove(tfn); // Not found on Macos + std::error_code ec; + fs::remove(tfn, ec); } } diff --git a/src/pd02netCDF.C b/src/pd02netCDF.C index 8219f52..dece8da 100644 --- a/src/pd02netCDF.C +++ b/src/pd02netCDF.C @@ -23,6 +23,7 @@ #include "MyNetCDF.H" #include "PD0.H" #include "MyException.H" +#include "Logger.H" #include "config.h" #include #include @@ -34,6 +35,7 @@ main(int argc, { std::string outputFilename; std::vector inputFiles; + std::string logLevel = "warn"; bool qVerbose(false); CLI::App app{"Convert PD0 files to NetCDF", "pd02netCDF"}; @@ -41,11 +43,18 @@ main(int argc, app.add_option("-o,--output", outputFilename, "Where to store the data")->required(); app.add_flag("-v,--verbose", qVerbose, "Enable some diagnostic output"); + app.add_option("-l,--log-level", logLevel, "Log level (trace,debug,info,warn,error,critical,off)") + ->default_val("warn"); app.add_option("files", inputFiles, "Input PD0 files")->required()->check(CLI::ExistingFile); app.set_version_flag("-V,--version", VERSION); CLI11_PARSE(app, argc, argv); + dbd::logger().init("pd02netCDF", dbd::logLevelFromString(logLevel)); + if (qVerbose && logLevel == "warn") { + dbd::logger().setLevel(dbd::LogLevel::Info); + } + const char *ofn = outputFilename.c_str(); uint8_t nCells(0); @@ -55,8 +64,7 @@ main(int argc, nCells = (nCells >= mCells) ? nCells : mCells; } - if (qVerbose) - std::cout << "Maximum number of cells " << static_cast(nCells) << std::endl; + LOG_INFO("Maximum number of cells {}", static_cast(nCells)); NetCDF nc(ofn); const int hDim(nc.createDim("h")); @@ -83,10 +91,9 @@ main(int argc, nc.putVar(hdrStartIndex, hIndex, static_cast(sIndex)); nc.putVar(hdrStopIndex, hIndex, static_cast(index - 1)); - if (qVerbose) - std::cout << "Found " << (index - sIndex) << " records in " << fn << std::endl; - } else if (qVerbose) { - std::cout << "No records found in " << fn << std::endl; + LOG_INFO("Found {} records in {}", index - sIndex, fn); + } else { + LOG_WARN("No records found in {}", fn); } } diff --git a/test/data/00300000.combined.ncdump b/test/data/00300000.combined.ncdump index dd99892..689ba04 100644 --- a/test/data/00300000.combined.ncdump +++ b/test/data/00300000.combined.ncdump @@ -1,4 +1,4 @@ -netcdf test.combined { +netcdf combined { dimensions: i = UNLIMITED ; // (62 currently) j = UNLIMITED ; // (4 currently) @@ -5515,6 +5515,11 @@ variables: uint hdr_start_index(j) ; uint hdr_stop_index(j) ; uint hdr_nRecords(j) ; + +// global attributes: + :Conventions = "CF-1.10" ; + :history = "Created by dbd2netCDF 1.7.2" ; + :source = "Slocum Glider Dinkum Binary Data files" ; data: c_wpt_lat = 0, 0, _, _, _, _, _, _, _, _, _, _, _, _, 0, 0, _, _, _, _, _, diff --git a/test/data/00300000.dcd.ncdump b/test/data/00300000.dcd.ncdump index 9d2bbb2..12f49c8 100644 --- a/test/data/00300000.dcd.ncdump +++ b/test/data/00300000.dcd.ncdump @@ -1,4 +1,4 @@ -netcdf test.dcd { +netcdf dcd { dimensions: i = UNLIMITED ; // (14 currently) j = UNLIMITED ; // (1 currently) @@ -5131,6 +5131,11 @@ variables: uint hdr_start_index(j) ; uint hdr_stop_index(j) ; uint hdr_nRecords(j) ; + +// global attributes: + :Conventions = "CF-1.10" ; + :history = "Created by dbd2netCDF 1.7.2" ; + :source = "Slocum Glider Dinkum Binary Data files" ; data: cc_behavior_state = 0, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 ; diff --git a/test/dbd2netCDF b/test/dbd2netCDF index e6c580a..7881047 100755 --- a/test/dbd2netCDF +++ b/test/dbd2netCDF @@ -159,6 +159,25 @@ if ! "$CMD" --sort header_time -o "$sorthdr" test.sbd test.tbd; then fi rm -f "$sorthdr" +# Test --sort header_time produces correct order (pass files in reverse time order) +echo "Testing --sort header_time order..." +sorthdr_order=$TMP/sort_hdr_order.nc +if ! "$CMD" --sort header_time -o "$sorthdr_order" test.tbd test.sbd; then + echo "Failed to execute $CMD with --sort header_time (order test)" + rm -f "$sorthdr_order" + exit 1 +fi +# sbd=Sep_20 (earlier), tbd=Sep_21 (later). After sort, sbd should come first. +first_time=$(ncdump -v hdr_fileopen_time "$sorthdr_order" | grep 'hdr_fileopen_time =' | grep -o '"[^"]*"' | head -1) +if echo "$first_time" | grep -q "Sep_20"; then + echo "Sort order correct: Sep_20 before Sep_21" +else + echo "Sort order WRONG: expected Sep_20 first, got $first_time" + rm -f "$sorthdr_order" + exit 1 +fi +rm -f "$sorthdr_order" + # Test --sort lexicographic echo "Testing --sort lexicographic..." sortlex=$TMP/sort_lex.nc diff --git a/test/test.both.netCDF b/test/test.both.netCDF index 46ea8d1..03d5d3a 100644 --- a/test/test.both.netCDF +++ b/test/test.both.netCDF @@ -1,4 +1,4 @@ -netcdf test.both { +netcdf both { dimensions: i = UNLIMITED ; // (288 currently) j = UNLIMITED ; // (2 currently) @@ -70,6 +70,11 @@ variables: uint hdr_start_index(j) ; uint hdr_stop_index(j) ; uint hdr_nRecords(j) ; + +// global attributes: + :Conventions = "CF-1.10" ; + :history = "Created by dbd2netCDF 1.7.2" ; + :source = "Slocum Glider Dinkum Binary Data files" ; data: sci_m_present_time = _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, diff --git a/test/test.sbd.netCDF b/test/test.sbd.netCDF index 6acca80..d0ce80e 100644 --- a/test/test.sbd.netCDF +++ b/test/test.sbd.netCDF @@ -1,4 +1,4 @@ -netcdf test.sbd { +netcdf sbd { dimensions: i = UNLIMITED ; // (94 currently) j = UNLIMITED ; // (1 currently) @@ -46,6 +46,11 @@ variables: uint hdr_start_index(j) ; uint hdr_stop_index(j) ; uint hdr_nRecords(j) ; + +// global attributes: + :Conventions = "CF-1.10" ; + :history = "Created by dbd2netCDF 1.7.2" ; + :source = "Slocum Glider Dinkum Binary Data files" ; data: m_altitude = 0, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, diff --git a/test/test.tbd.netCDF b/test/test.tbd.netCDF index 7cca97f..ca34ce4 100644 --- a/test/test.tbd.netCDF +++ b/test/test.tbd.netCDF @@ -1,4 +1,4 @@ -netcdf test.tbd { +netcdf tbd { dimensions: i = UNLIMITED ; // (194 currently) j = UNLIMITED ; // (1 currently) @@ -37,6 +37,11 @@ variables: uint hdr_start_index(j) ; uint hdr_stop_index(j) ; uint hdr_nRecords(j) ; + +// global attributes: + :Conventions = "CF-1.10" ; + :history = "Created by dbd2netCDF 1.7.2" ; + :source = "Slocum Glider Dinkum Binary Data files" ; data: sci_m_present_time = 1316565559.71042, 1316566273.89282, 1316566290.53412, From c05a0bc4187a71d85793de041d0c56b48e694088 Mon Sep 17 00:00:00 2001 From: Pat Welch Date: Mon, 30 Mar 2026 22:02:45 -0700 Subject: [PATCH 3/5] ci: update GitHub Actions to latest versions for Node.js 22 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - actions/checkout: v4 → v6 - actions/upload-artifact: v4 → v7 - actions/download-artifact: v4 → v8 - actions/cache: v4 → v5 - codecov/codecov-action: v5 → v6 Fixes Node.js 20 deprecation warnings. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-test.yml | 28 +++++++++++++------------- .github/workflows/dependency-check.yml | 2 +- .github/workflows/fuzz.yml | 4 ++-- .github/workflows/release.yml | 18 ++++++++--------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index aba705f..7a4fcff 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -79,7 +79,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install shellcheck run: | @@ -96,7 +96,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install cppcheck run: | @@ -127,7 +127,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -152,7 +152,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -182,7 +182,7 @@ jobs: lcov --list coverage.info - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@v6 with: files: coverage.info token: ${{ secrets.CODECOV_TOKEN }} @@ -195,7 +195,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -230,7 +230,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Initialize CodeQL uses: github/codeql-action/init@v4 @@ -261,7 +261,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -306,10 +306,10 @@ jobs: shell: cmd - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Cache Cygwin - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: D:\cygwin key: cygwin-v6-${{ hashFiles('.github/workflows/build-test.yml') }} @@ -436,7 +436,7 @@ jobs: dnf install -y git - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -485,7 +485,7 @@ jobs: dnf install -y git - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -525,7 +525,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Setup MSVC uses: ilammy/msvc-dev-cmd@v1 diff --git a/.github/workflows/dependency-check.yml b/.github/workflows/dependency-check.yml index 643fa56..fd7542f 100644 --- a/.github/workflows/dependency-check.yml +++ b/.github/workflows/dependency-check.yml @@ -15,7 +15,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Check for dependency updates env: diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 0cf273a..09e336b 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -23,7 +23,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -81,7 +81,7 @@ jobs: - name: Upload crash artifacts if: failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: fuzz-crashes path: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d837b97..972d301 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Get version from tag id: get-version @@ -43,7 +43,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -72,7 +72,7 @@ jobs: cpack --config CPackSourceConfig.cmake - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: linux-packages path: | @@ -87,7 +87,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install dependencies run: | @@ -112,7 +112,7 @@ jobs: cpack -G TGZ - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: macos-packages path: build/dbd2netcdf-*-Darwin*.tar.gz @@ -127,7 +127,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Setup MSVC uses: ilammy/msvc-dev-cmd@v1 @@ -166,7 +166,7 @@ jobs: cpack -G ZIP - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: windows-packages path: build/dbd2netcdf-*-win64*.zip @@ -178,10 +178,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Download all artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: path: artifacts merge-multiple: true From 64e367a1ad6aa54bb404c7e31638378eaf1625be Mon Sep 17 00:00:00 2001 From: Pat Welch Date: Mon, 30 Mar 2026 22:08:34 -0700 Subject: [PATCH 4/5] fix: apply -Wold-style-cast only to C++ files, add zlib to Windows conda - Use generator expression $<$:-Wold-style-cast> so lz4.c (C code) is not affected by C++-only warning flag - Add zlib to conda install for Windows builds (CMake 4.3 FindZLIB requires explicit zlib package) Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-test.yml | 2 +- .github/workflows/release.yml | 2 +- src/CMakeLists.txt | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 7a4fcff..1366699 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -542,7 +542,7 @@ jobs: - name: Install dependencies via conda run: | - conda install -y libnetcdf hdf5 cmake ninja + conda install -y libnetcdf hdf5 zlib cmake ninja - name: Configure CMake run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 972d301..1409876 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -144,7 +144,7 @@ jobs: - name: Install dependencies via conda run: | - conda install -y libnetcdf hdf5 cmake ninja + conda install -y libnetcdf hdf5 zlib cmake ninja - name: Configure CMake run: | diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5d921a2..47987fd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,7 +25,8 @@ if(MSVC) target_compile_definitions(dbd_common PRIVATE _CRT_SECURE_NO_WARNINGS) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "/W0") else() - target_compile_options(dbd_common PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow -Wold-style-cast) + target_compile_options(dbd_common PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow + $<$:-Wold-style-cast>) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "-w") endif() @@ -78,7 +79,8 @@ foreach(target ${ALL_TARGETS}) # Disable warnings for third-party code (lz4) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "/W0") else() - target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow -Wold-style-cast) + target_compile_options(${target} PRIVATE -Wall -Wextra -pedantic -Werror -Wshadow + $<$:-Wold-style-cast>) # Disable -Werror for third-party code (lz4) set_source_files_properties(lz4.c PROPERTIES COMPILE_FLAGS "-w") endif() From 782ba5a777cec70fe6a511e1105cdc899e6a6a54 Mon Sep 17 00:00:00 2001 From: Pat Welch Date: Mon, 30 Mar 2026 22:22:30 -0700 Subject: [PATCH 5/5] ci: opt into Node.js 24 for Windows jobs Silences Node.js 20 deprecation warnings for conda-incubator/setup-miniconda@v3 and ilammy/msvc-dev-cmd@v1 which have no newer major versions yet. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/build-test.yml | 2 ++ .github/workflows/release.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 1366699..bb48fbd 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -519,6 +519,8 @@ jobs: build-windows: name: Windows (MSVC) runs-on: windows-latest + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true defaults: run: shell: pwsh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1409876..2107599 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -121,6 +121,8 @@ jobs: name: Windows (MSVC) needs: validate-version runs-on: windows-latest + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true defaults: run: shell: pwsh