From ad24b9c2d9c9828bcaa057d940a3fa2a8b4b7c0e Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Thu, 12 Mar 2026 12:57:17 -0700 Subject: [PATCH 01/11] adds --riot-csv and --riot-positions to L2 command and riot_csv_writer --- .gitignore | 3 + src/glide/cli.py | 13 ++++ src/glide/riot_csv_writer.py | 135 +++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 src/glide/riot_csv_writer.py diff --git a/.gitignore b/.gitignore index d71e300..5c82f02 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,6 @@ tests/data/slocum.gps.csv # Notebooks *.ipynb + +# Pycharm projects +*.idea \ No newline at end of file diff --git a/src/glide/cli.py b/src/glide/cli.py index eee8acd..578dd70 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -8,6 +8,7 @@ import netCDF4 as nc import typer +from typing import Union from typing_extensions import Annotated from . import ancillery, config, hotel, process_l1, process_l2, process_l3 @@ -112,6 +113,14 @@ def l2( "-d", help="Minimum distance between profiles in number of data points." ), ] = 20, + riot_csv: Annotated[str, typer.Option( + "-r", "--riot-csv", + help="File path to output a RIOT-compatible CSV file in addition " + "to netCDF.")] = + None, + riot_add_positions: Annotated[bool, typer.Option( + "--riot-positions", + help="Interpolate and add latitude and longitude into RIOT CSV output.")] = False, ) -> None: """ Generate L2 data from L1 data. @@ -144,6 +153,10 @@ def l2( out.to_netcdf(out_file) + if riot_csv: + from .riot_csv_writer import write_riot_csv + write_riot_csv(out, riot_add_positions, riot_csv) + @app.command() @log_args diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py new file mode 100644 index 0000000..c3e197e --- /dev/null +++ b/src/glide/riot_csv_writer.py @@ -0,0 +1,135 @@ +import logging + +import pandas as pd +import xarray as xr +import numpy as np + +_log = logging.getLogger(__name__) + +# Timestamp difference threshold: +# The limit that the riot timestamp must be from the glider timestamp +# to be considered close enough to interpolate position variables. +TS_DIFF_THRESHOLD = 20 # seconds + + +def write_riot_csv( + ds: xr.Dataset, add_positions: bool, output_path: str) -> None: + """Write xarray Dataset to CSV format compatible with RIOT. + + RIOT expects a CSV with columns: timestamp, variable_name, value + """ + _log.debug(f'Gathering RIOT variables for CSV {output_path}') + riot_vars = [ + 'sr_ping_epoch_days', + 'sr_ping_secs', + 'sr_ping_msecs', + 'sr_ping_rt_msecs', + 'sr_ping_freq', + 'sr_ping_detection_level', + 'sr_ping_sequence_number', + 'sr_ping_platform_id', + 'sr_ping_slot', + 'sr_ping_group', + 'sr_platform_state', + ] + + # including depth in positions assumes that the thermodynamic + # calculations were added. + position_vars = ['depth', 'lat', 'lon', 'time'] + + # Check that all required RIOT variables are present in the dataset + if not set(riot_vars).issubset(set(ds.data_vars)): + _log.error("Dataset is missing required RIOT variables") + return + + # Optionally add position variables if they are present in the dataset + if add_positions and not set(position_vars).issubset(ds.variables): + missing_vars = set(position_vars).difference(ds.variables) + _log.warning( + f'Position variables {missing_vars} are missing from ' + 'dataset, positions cannot be added to RIOT CSV') + add_positions = False + + # Drop any variables that are not needed for RIOT output + vars_to_drop = set(ds.data_vars).difference(riot_vars) + riot_ds = ds.drop_vars(vars_to_drop) + + # ToDo: this drop zeros section should be moved to processing L2 + # for issue#32, but finish the riot_csv branch first + # Drop any records with all zeros or NaNs + temp_riot_array = riot_ds.to_array() + rows_to_keep = np.logical_not(np.all( + np.logical_or( + np.isnan(temp_riot_array), + temp_riot_array == 0), + axis=0 + )) + riot_ds = riot_ds.where(rows_to_keep, drop=True) + + # typecasting according to RIOT User data manual + epoch_days = riot_ds['sr_ping_epoch_days'].values.astype(np.int64) + secs = riot_ds['sr_ping_secs'].values.astype(np.int64) + msecs = riot_ds['sr_ping_msecs'].values.astype(np.int64) + # calculate the epoch time in milliseconds + epoch_msecs = np.empty_like(epoch_days, dtype=np.int64) + epoch_msecs[:] = epoch_days * 86400 * 1000 + secs * 1000 + msecs + + rt_msecs = riot_ds['sr_ping_rt_msecs'].values.astype(np.uint32) + freq = riot_ds['sr_ping_freq'].values.astype(np.uint32) + detection_level = riot_ds['sr_ping_detection_level'].values.astype(np.uint16) + sequence_number = riot_ds['sr_ping_sequence_number'].values.astype(np.uint32) + platform_id = riot_ds['sr_ping_platform_id'].values.astype(np.uint8) + slot = riot_ds['sr_ping_slot'].values.astype(np.uint8) + group = riot_ds['sr_ping_group'].values.astype(np.uint8) + platform_state = riot_ds['sr_platform_state'].values.astype(np.int32) + + riot_df = pd.DataFrame({ + 'riotData_prefix': np.full( + len(epoch_msecs), '$riotData'), + 'epoch_msecs': epoch_msecs, + 'rt_msecs': rt_msecs, + 'freq': freq, + 'detection_level': detection_level, + 'sequence_number': sequence_number, + 'platform_id': platform_id, + 'slot': slot, + 'group': group, + 'state': platform_state, + 'num_records': np.full(len(epoch_msecs), np.nan), + }) + + if add_positions: + _log.debug('Adding position variables to RIOT CSV') + vars_to_drop = set(ds.data_vars).difference(position_vars) + position_ds = ds.drop_vars(vars_to_drop) + position_ds = position_ds.where(rows_to_keep, drop=True) + riot_ts = epoch_msecs / 1000 + glider_ts = position_ds['time'] + # ToDo: potentially move this section to _interpolate ... + if np.all(abs(riot_ts - glider_ts) < TS_DIFF_THRESHOLD): + _log.debug('Interpolating position variables to RIOT timestamps') + riot_df['depth'] = np.interp( + riot_ts, glider_ts, position_ds['depth']) + riot_df['lat'] = np.interp( + riot_ts, glider_ts, position_ds['lat']) + riot_df['lon'] = np.interp( + riot_ts, glider_ts, position_ds['lon']) + else: + _log.warning( + f'RIOT timestamps greater than ' + f'Threshold:{TS_DIFF_THRESHOLD} from glider timestamps. ' + f"Using record's coordinates instead of interpolating.") + riot_df['depth'] = position_ds['depth'] + riot_df['latitude'] = position_ds['lat'] + riot_df['longitude'] = position_ds['lon'] + + # Write to CSV + _log.debug('Writing to RIOT CSV') + riot_df.to_csv( + output_path, index=False, header=False, + lineterminator='\n', mode='a') + + +def _interpolate_to_riot_timestamps(ds: xr.Dataset, config: dict) -> xr.Dataset: + """Interpolate dataset to RIOT timestamps if specified in config.""" + pass \ No newline at end of file From 02db51e677e874f30f4af5c218b760ed417b5463 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Thu, 12 Mar 2026 14:11:06 -0700 Subject: [PATCH 02/11] fixes failed test for None type as default for the --riot-csv option instead of string --- src/glide/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glide/cli.py b/src/glide/cli.py index 578dd70..bea5a01 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -117,7 +117,7 @@ def l2( "-r", "--riot-csv", help="File path to output a RIOT-compatible CSV file in addition " "to netCDF.")] = - None, + '', riot_add_positions: Annotated[bool, typer.Option( "--riot-positions", help="Interpolate and add latitude and longitude into RIOT CSV output.")] = False, From 48fbb799b86a1d8e88e96a73fdb679422b04bfaf Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Thu, 12 Mar 2026 14:40:29 -0700 Subject: [PATCH 03/11] fixes failed test for an invalid return type from an unused function --- src/glide/cli.py | 8 ++++---- src/glide/riot_csv_writer.py | 38 ++++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/glide/cli.py b/src/glide/cli.py index bea5a01..f1ab6bb 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -113,14 +113,14 @@ def l2( "-d", help="Minimum distance between profiles in number of data points." ), ] = 20, - riot_csv: Annotated[str, typer.Option( + riot_csv: Annotated[Union[str | None], typer.Option( "-r", "--riot-csv", help="File path to output a RIOT-compatible CSV file in addition " - "to netCDF.")] = - '', + "to netCDF.")] = None, riot_add_positions: Annotated[bool, typer.Option( "--riot-positions", - help="Interpolate and add latitude and longitude into RIOT CSV output.")] = False, + help="Interpolate and add depth, latitude, and longitude into RIOT CSV " + "output.")] = False, ) -> None: """ Generate L2 data from L1 data. diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index c3e197e..d476c40 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -14,9 +14,32 @@ def write_riot_csv( ds: xr.Dataset, add_positions: bool, output_path: str) -> None: - """Write xarray Dataset to CSV format compatible with RIOT. - - RIOT expects a CSV with columns: timestamp, variable_name, value + """Write xarray Dataset to a RIOT `$riotData`-style CSV file. + The output is a wide, record-oriented CSV (one row per ping) whose + columns correspond to the fixed RIOT variables expected in a + `$riotData` file format. + At a minimum, this function writes the following RIOT ping fields + as individual columns: + - ``sr_ping_epoch_days`` + - ``sr_ping_secs`` + - ``sr_ping_msecs`` + - ``sr_ping_rt_msecs`` + - ``sr_ping_freq`` + - ``sr_ping_detection_level`` + - ``sr_ping_sequence_number`` + - ``sr_ping_platform_id`` + - ``sr_ping_slot`` + - ``sr_ping_group`` + - ``sr_platform_state`` + - ``sr_num_records_in_file`` + If ``add_positions`` is True and the dataset contains them, the + following position variables are also included as additional + columns: + - ``depth`` + - ``lat`` + - ``lon`` + The resulting CSV, containing one record per ping with these + columns, is written to ``output_path``. """ _log.debug(f'Gathering RIOT variables for CSV {output_path}') riot_vars = [ @@ -120,16 +143,11 @@ def write_riot_csv( f'Threshold:{TS_DIFF_THRESHOLD} from glider timestamps. ' f"Using record's coordinates instead of interpolating.") riot_df['depth'] = position_ds['depth'] - riot_df['latitude'] = position_ds['lat'] - riot_df['longitude'] = position_ds['lon'] + riot_df['lat'] = position_ds['lat'] + riot_df['lon'] = position_ds['lon'] # Write to CSV _log.debug('Writing to RIOT CSV') riot_df.to_csv( output_path, index=False, header=False, lineterminator='\n', mode='a') - - -def _interpolate_to_riot_timestamps(ds: xr.Dataset, config: dict) -> xr.Dataset: - """Interpolate dataset to RIOT timestamps if specified in config.""" - pass \ No newline at end of file From f665b82b35947446168fa9d3588af52d82c59dfb Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Fri, 13 Mar 2026 17:44:11 -0700 Subject: [PATCH 04/11] updated based on review suggestions and new formatting requirements --- src/glide/cli.py | 28 ++-- src/glide/riot_csv_writer.py | 239 ++++++++++++++++++++++------------- 2 files changed, 169 insertions(+), 98 deletions(-) diff --git a/src/glide/cli.py b/src/glide/cli.py index f1ab6bb..c202b6a 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -5,10 +5,10 @@ import logging from importlib.metadata import version from pathlib import Path +from typing import Union import netCDF4 as nc import typer -from typing import Union from typing_extensions import Annotated from . import ancillery, config, hotel, process_l1, process_l2, process_l3 @@ -113,14 +113,23 @@ def l2( "-d", help="Minimum distance between profiles in number of data points." ), ] = 20, - riot_csv: Annotated[Union[str | None], typer.Option( - "-r", "--riot-csv", - help="File path to output a RIOT-compatible CSV file in addition " - "to netCDF.")] = None, - riot_add_positions: Annotated[bool, typer.Option( - "--riot-positions", - help="Interpolate and add depth, latitude, and longitude into RIOT CSV " - "output.")] = False, + riot_csv: Annotated[ + Union[str | None], + typer.Option( + "-r", + "--riot-csv", + help="File path to output a RIOT-compatible CSV file in addition " + "to netCDF.", + ), + ] = None, + riot_add_positions: Annotated[ + bool, + typer.Option( + "--riot-positions", + help="Interpolate and add depth, latitude, and longitude into RIOT CSV " + "output.", + ), + ] = False, ) -> None: """ Generate L2 data from L1 data. @@ -155,6 +164,7 @@ def l2( if riot_csv: from .riot_csv_writer import write_riot_csv + write_riot_csv(out, riot_add_positions, riot_csv) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index d476c40..5d80c3b 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -1,8 +1,8 @@ import logging +import os -import pandas as pd -import xarray as xr import numpy as np +import xarray as xr _log = logging.getLogger(__name__) @@ -12,12 +12,11 @@ TS_DIFF_THRESHOLD = 20 # seconds -def write_riot_csv( - ds: xr.Dataset, add_positions: bool, output_path: str) -> None: - """Write xarray Dataset to a RIOT `$riotData`-style CSV file. +def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> None: + """Write xarray Dataset to a RIOT-formatted CSV file. The output is a wide, record-oriented CSV (one row per ping) whose columns correspond to the fixed RIOT variables expected in a - `$riotData` file format. + RIOT Data User manual file format. At a minimum, this function writes the following RIOT ping fields as individual columns: - ``sr_ping_epoch_days`` @@ -41,113 +40,175 @@ def write_riot_csv( The resulting CSV, containing one record per ping with these columns, is written to ``output_path``. """ - _log.debug(f'Gathering RIOT variables for CSV {output_path}') + _log.debug(f"Gathering RIOT variables for CSV {output_path}") riot_vars = [ - 'sr_ping_epoch_days', - 'sr_ping_secs', - 'sr_ping_msecs', - 'sr_ping_rt_msecs', - 'sr_ping_freq', - 'sr_ping_detection_level', - 'sr_ping_sequence_number', - 'sr_ping_platform_id', - 'sr_ping_slot', - 'sr_ping_group', - 'sr_platform_state', + "sr_ping_epoch_days", + "sr_ping_secs", + "sr_ping_msecs", + "sr_ping_rt_msecs", + "sr_ping_freq", + "sr_ping_detection_level", + "sr_ping_sequence_number", + "sr_ping_platform_id", + "sr_ping_slot", + "sr_ping_group", + "sr_platform_state", ] - # including depth in positions assumes that the thermodynamic - # calculations were added. - position_vars = ['depth', 'lat', 'lon', 'time'] - # Check that all required RIOT variables are present in the dataset if not set(riot_vars).issubset(set(ds.data_vars)): _log.error("Dataset is missing required RIOT variables") return - # Optionally add position variables if they are present in the dataset - if add_positions and not set(position_vars).issubset(ds.variables): - missing_vars = set(position_vars).difference(ds.variables) - _log.warning( - f'Position variables {missing_vars} are missing from ' - 'dataset, positions cannot be added to RIOT CSV') - add_positions = False - # Drop any variables that are not needed for RIOT output - vars_to_drop = set(ds.data_vars).difference(riot_vars) + vars_to_drop = set(ds.variables).difference(riot_vars) riot_ds = ds.drop_vars(vars_to_drop) + if riot_ds.sizes == 0: + _log.error("No RIOT data available to create the CSV") + return # ToDo: this drop zeros section should be moved to processing L2 # for issue#32, but finish the riot_csv branch first # Drop any records with all zeros or NaNs temp_riot_array = riot_ds.to_array() - rows_to_keep = np.logical_not(np.all( - np.logical_or( - np.isnan(temp_riot_array), - temp_riot_array == 0), - axis=0 - )) + rows_to_keep = np.logical_not( + np.all(np.logical_or(np.isnan(temp_riot_array), temp_riot_array == 0), axis=0) + ) riot_ds = riot_ds.where(rows_to_keep, drop=True) + if riot_ds.sizes["time"] == 0: + _log.error("No RIOT data available to create the CSV") + return # typecasting according to RIOT User data manual - epoch_days = riot_ds['sr_ping_epoch_days'].values.astype(np.int64) - secs = riot_ds['sr_ping_secs'].values.astype(np.int64) - msecs = riot_ds['sr_ping_msecs'].values.astype(np.int64) + epoch_days = riot_ds["sr_ping_epoch_days"].values.astype(np.int64) + secs = riot_ds["sr_ping_secs"].values.astype(np.int64) + msecs = riot_ds["sr_ping_msecs"].values.astype(np.int64) # calculate the epoch time in milliseconds epoch_msecs = np.empty_like(epoch_days, dtype=np.int64) epoch_msecs[:] = epoch_days * 86400 * 1000 + secs * 1000 + msecs - rt_msecs = riot_ds['sr_ping_rt_msecs'].values.astype(np.uint32) - freq = riot_ds['sr_ping_freq'].values.astype(np.uint32) - detection_level = riot_ds['sr_ping_detection_level'].values.astype(np.uint16) - sequence_number = riot_ds['sr_ping_sequence_number'].values.astype(np.uint32) - platform_id = riot_ds['sr_ping_platform_id'].values.astype(np.uint8) - slot = riot_ds['sr_ping_slot'].values.astype(np.uint8) - group = riot_ds['sr_ping_group'].values.astype(np.uint8) - platform_state = riot_ds['sr_platform_state'].values.astype(np.int32) - - riot_df = pd.DataFrame({ - 'riotData_prefix': np.full( - len(epoch_msecs), '$riotData'), - 'epoch_msecs': epoch_msecs, - 'rt_msecs': rt_msecs, - 'freq': freq, - 'detection_level': detection_level, - 'sequence_number': sequence_number, - 'platform_id': platform_id, - 'slot': slot, - 'group': group, - 'state': platform_state, - 'num_records': np.full(len(epoch_msecs), np.nan), - }) + # converting everything to Int64 type makes it all integers but with + # 'NA' as a missing value, which will fill in as blank in the CSV. + riot_df = riot_ds.to_pandas().astype("Int64") + + # drop the columns used to create epoch_msecs + riot_df = riot_df.drop( + ["sr_ping_epoch_days", "sr_ping_secs", "sr_ping_msecs"], axis=1 + ) + + # rename columns to match headers in RIOT Data User Manual + riot_df.columns = [ + "rtMsecs", + "freq", + "detectionLevel", + "sequenceNumber", + "platformId", + "slot", + "group", + "platformState", + ] + + # Add the additional columns + riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) + riot_df.insert(loc=0, column="riotDataPrefix", value="$riotData") + # riot_df['recNumInFile'] = np.nan # unused record number in file. if add_positions: - _log.debug('Adding position variables to RIOT CSV') - vars_to_drop = set(ds.data_vars).difference(position_vars) - position_ds = ds.drop_vars(vars_to_drop) - position_ds = position_ds.where(rows_to_keep, drop=True) - riot_ts = epoch_msecs / 1000 - glider_ts = position_ds['time'] - # ToDo: potentially move this section to _interpolate ... - if np.all(abs(riot_ts - glider_ts) < TS_DIFF_THRESHOLD): - _log.debug('Interpolating position variables to RIOT timestamps') - riot_df['depth'] = np.interp( - riot_ts, glider_ts, position_ds['depth']) - riot_df['lat'] = np.interp( - riot_ts, glider_ts, position_ds['lat']) - riot_df['lon'] = np.interp( - riot_ts, glider_ts, position_ds['lon']) - else: - _log.warning( - f'RIOT timestamps greater than ' - f'Threshold:{TS_DIFF_THRESHOLD} from glider timestamps. ' - f"Using record's coordinates instead of interpolating.") - riot_df['depth'] = position_ds['depth'] - riot_df['lat'] = position_ds['lat'] - riot_df['lon'] = position_ds['lon'] + riot_df = _add_positions(ds, riot_df, rows_to_keep) # Write to CSV - _log.debug('Writing to RIOT CSV') + _log.debug("Writing to RIOT CSV") + # If the file exists already, it will append, so don't write + # the header. + if os.path.exists(output_path): + headerwrite = False + else: + headerwrite = True + riot_df.to_csv( - output_path, index=False, header=False, - lineterminator='\n', mode='a') + output_path, index=False, header=headerwrite, lineterminator="\n", mode="a" + ) + + +def _add_positions(ds, riot_df, rows_to_keep): + """Add position variables (depth, lat, lon) to the RIOT DataFrame by + interpolating from the glider position data to the RIOT + timestamps. Only RIOT timestamps that fall within the time + boundaries of the available glider position data will be + interpolated; others will be left as NaN (and thus blank in the + CSV). + """ + _log.debug("Adding position variables to RIOT CSV") + # including depth in positions assumes that the thermodynamic + # calculations were added. + position_vars = ["depth", "lat", "lon", "time"] + + if not set(position_vars).issubset(ds.variables): + missing_vars = set(position_vars).difference(ds.variables) + _log.warning( + f"Position variables {missing_vars} are missing from " + "dataset, positions cannot be added to RIOT CSV filling " + "with blanks" + ) + riot_df["depth"] = np.nan + riot_df["lat"] = np.nan + riot_df["lon"] = np.nan + return riot_df + + vars_to_drop = set(ds.variables).difference(position_vars) + position_ds = ds.drop_vars(vars_to_drop) + position_ds = position_ds.where(rows_to_keep, drop=True) + + # Gather the timestamps for checking if interpolation is possible + riot_ts = riot_df["epochMsecs"] / 1000 + glider_ts = position_ds["time"].values + + # pre-allocate arrays with NaNs + depth = np.full(riot_ts.shape, np.nan) + lat = np.full(riot_ts.shape, np.nan) + lon = np.full(riot_ts.shape, np.nan) + + q_depth = np.logical_and( + np.isfinite(position_ds["depth"]), position_ds["depth"] != 0 + ) + q_pos = np.logical_and( + np.isfinite(position_ds["lat"]), np.isfinite(position_ds["lon"]) + ) + + if np.sum(q_depth) == 0: + _log.warning("No valid depths found. adding blank positions") + riot_df["depth"] = depth + riot_df["lat"] = lat + riot_df["lon"] = lon + return riot_df + + _log.debug( + "Interpolating position variables to RIOT timestamps that fall " + "within the glider position time boundaries" + ) + + # Only interpolate to timestamps that fall within the time boundaries + # of the available glider position data. Any that fall outside + # will be NaNs and ultimately blanks in the CSV file. + qdepth_in_tbnds = np.logical_and( + riot_ts >= glider_ts[q_depth][0], riot_ts <= glider_ts[q_depth][-1] + ) + qpos_in_tbnds = np.logical_and( + riot_ts >= glider_ts[q_pos][0], riot_ts <= glider_ts[q_pos][-1] + ) + + depth[qdepth_in_tbnds] = np.interp( + riot_ts[qdepth_in_tbnds], glider_ts[q_depth], position_ds["depth"][q_depth] + ) + lat[qpos_in_tbnds] = np.interp( + riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lat"][q_pos] + ) + lon[qpos_in_tbnds] = np.interp( + riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lon"][q_pos] + ) + + riot_df["depth"] = depth + riot_df["lat"] = lat + riot_df["lon"] = lon + + return riot_df From 2c7b11ec419e11153be2585141a45b6dd49bb0e2 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Fri, 13 Mar 2026 18:17:28 -0700 Subject: [PATCH 05/11] fixed mypy typing errors --- src/glide/riot_csv_writer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index 5d80c3b..d2db1d8 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -2,6 +2,7 @@ import os import numpy as np +import pandas as pd import xarray as xr _log = logging.getLogger(__name__) @@ -90,6 +91,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non # converting everything to Int64 type makes it all integers but with # 'NA' as a missing value, which will fill in as blank in the CSV. riot_df = riot_ds.to_pandas().astype("Int64") + assert isinstance(riot_df, pd.DataFrame), "Expected DataFrame from multi-var Dataset" # drop the columns used to create epoch_msecs riot_df = riot_df.drop( From b821b882f03629d36806975305b44e33da10deb4 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Sun, 15 Mar 2026 11:38:03 -0700 Subject: [PATCH 06/11] Got local environment solved and able to pre-test. Should pass now --- src/glide/riot_csv_writer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index d2db1d8..21dcd34 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -90,8 +90,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non # converting everything to Int64 type makes it all integers but with # 'NA' as a missing value, which will fill in as blank in the CSV. - riot_df = riot_ds.to_pandas().astype("Int64") - assert isinstance(riot_df, pd.DataFrame), "Expected DataFrame from multi-var Dataset" + riot_df = riot_ds.to_dataframe().astype("Int64") # drop the columns used to create epoch_msecs riot_df = riot_df.drop( @@ -99,7 +98,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non ) # rename columns to match headers in RIOT Data User Manual - riot_df.columns = [ + riot_df.columns = pd.Index([ "rtMsecs", "freq", "detectionLevel", @@ -108,7 +107,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non "slot", "group", "platformState", - ] + ]) # Add the additional columns riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) From 7bd51304de9fe203a031c631e55ec8d8987077f7 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Sun, 15 Mar 2026 11:48:23 -0700 Subject: [PATCH 07/11] fixes ruff formatting changes --- src/glide/riot_csv_writer.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index 21dcd34..4ea6daa 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -98,16 +98,18 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non ) # rename columns to match headers in RIOT Data User Manual - riot_df.columns = pd.Index([ - "rtMsecs", - "freq", - "detectionLevel", - "sequenceNumber", - "platformId", - "slot", - "group", - "platformState", - ]) + riot_df.columns = pd.Index( + [ + "rtMsecs", + "freq", + "detectionLevel", + "sequenceNumber", + "platformId", + "slot", + "group", + "platformState", + ] + ) # Add the additional columns riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) From a3597a41ef818550b90887e1fedaef7f7b986bc4 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Sun, 15 Mar 2026 13:12:01 -0700 Subject: [PATCH 08/11] Updated code based on Copilot review, allowed Claude to write tests --- src/glide/riot_csv_writer.py | 84 +++++++--------- tests/test_riot_csv_writer.py | 183 ++++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+), 47 deletions(-) create mode 100644 tests/test_riot_csv_writer.py diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index 4ea6daa..a29813d 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -2,16 +2,10 @@ import os import numpy as np -import pandas as pd import xarray as xr _log = logging.getLogger(__name__) -# Timestamp difference threshold: -# The limit that the riot timestamp must be from the glider timestamp -# to be considered close enough to interpolate position variables. -TS_DIFF_THRESHOLD = 20 # seconds - def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> None: """Write xarray Dataset to a RIOT-formatted CSV file. @@ -31,7 +25,6 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non - ``sr_ping_slot`` - ``sr_ping_group`` - ``sr_platform_state`` - - ``sr_num_records_in_file`` If ``add_positions`` is True and the dataset contains them, the following position variables are also included as additional columns: @@ -64,7 +57,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non # Drop any variables that are not needed for RIOT output vars_to_drop = set(ds.variables).difference(riot_vars) riot_ds = ds.drop_vars(vars_to_drop) - if riot_ds.sizes == 0: + if riot_ds.sizes.get("time", 0) == 0: _log.error("No RIOT data available to create the CSV") return @@ -98,18 +91,17 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non ) # rename columns to match headers in RIOT Data User Manual - riot_df.columns = pd.Index( - [ - "rtMsecs", - "freq", - "detectionLevel", - "sequenceNumber", - "platformId", - "slot", - "group", - "platformState", - ] - ) + csv_columns_map = { + "sr_ping_rt_msecs": "rtMsecs", + "sr_ping_freq": "freq", + "sr_ping_detection_level": "detectionLevel", + "sr_ping_sequence_number": "sequenceNumber", + "sr_ping_platform_id": "platformId", + "sr_ping_slot": "slot", + "sr_ping_group": "group", + "sr_platform_state": "platformState", + } + riot_df = riot_df.rename(columns=csv_columns_map) # Add the additional columns riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) @@ -178,37 +170,35 @@ def _add_positions(ds, riot_df, rows_to_keep): np.isfinite(position_ds["lat"]), np.isfinite(position_ds["lon"]) ) - if np.sum(q_depth) == 0: - _log.warning("No valid depths found. adding blank positions") - riot_df["depth"] = depth - riot_df["lat"] = lat - riot_df["lon"] = lon - return riot_df - - _log.debug( - "Interpolating position variables to RIOT timestamps that fall " - "within the glider position time boundaries" - ) - # Only interpolate to timestamps that fall within the time boundaries # of the available glider position data. Any that fall outside # will be NaNs and ultimately blanks in the CSV file. - qdepth_in_tbnds = np.logical_and( - riot_ts >= glider_ts[q_depth][0], riot_ts <= glider_ts[q_depth][-1] - ) - qpos_in_tbnds = np.logical_and( - riot_ts >= glider_ts[q_pos][0], riot_ts <= glider_ts[q_pos][-1] - ) + if not q_depth.values.any(): + _log.warning("No valid depths found. Adding blank depths") + else: + _log.debug( + "Interpolating depth variable to RIOT timestamps that fall " + "within the glider depth time boundaries" + ) + qdepth_in_tbnds = np.logical_and( + riot_ts >= glider_ts[q_depth][0], riot_ts <= glider_ts[q_depth][-1] + ) + depth[qdepth_in_tbnds] = np.interp( + riot_ts[qdepth_in_tbnds], glider_ts[q_depth], position_ds["depth"][q_depth] + ) - depth[qdepth_in_tbnds] = np.interp( - riot_ts[qdepth_in_tbnds], glider_ts[q_depth], position_ds["depth"][q_depth] - ) - lat[qpos_in_tbnds] = np.interp( - riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lat"][q_pos] - ) - lon[qpos_in_tbnds] = np.interp( - riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lon"][q_pos] - ) + if not q_pos.values.any(): + _log.warning("No valid positions found. Adding blank positions") + else: + qpos_in_tbnds = np.logical_and( + riot_ts >= glider_ts[q_pos][0], riot_ts <= glider_ts[q_pos][-1] + ) + lat[qpos_in_tbnds] = np.interp( + riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lat"][q_pos] + ) + lon[qpos_in_tbnds] = np.interp( + riot_ts[qpos_in_tbnds], glider_ts[q_pos], position_ds["lon"][q_pos] + ) riot_df["depth"] = depth riot_df["lat"] = lat diff --git a/tests/test_riot_csv_writer.py b/tests/test_riot_csv_writer.py new file mode 100644 index 0000000..65aed2c --- /dev/null +++ b/tests/test_riot_csv_writer.py @@ -0,0 +1,183 @@ +"""Tests for the riot_csv_writer module and the --riot-csv / --riot-positions +CLI options on the ``l2`` command. + +This file is self-contained so it can be removed cleanly if the tests +are no longer wanted. +""" +# Author: Claude Opus 4.6 and Stuart Pearce + +import os +import textwrap + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from glide.riot_csv_writer import write_riot_csv + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_riot_dataset(n: int = 5, include_positions: bool = False) -> xr.Dataset: + """Build a minimal xr.Dataset that satisfies write_riot_csv requirements. + + Parameters + ---------- + n : int + Number of time steps. + include_positions : bool + If True, add depth / lat / lon variables so that ``_add_positions`` + has something to interpolate. + """ + time = np.arange(n, dtype=np.float64) + 1.0 # non-zero epoch seconds + + ds = xr.Dataset( + { + "sr_ping_epoch_days": ("time", np.full(n, 19500, dtype=np.float64)), + "sr_ping_secs": ("time", np.arange(n, dtype=np.float64) * 10), + "sr_ping_msecs": ("time", np.arange(n, dtype=np.float64) * 100), + "sr_ping_rt_msecs": ("time", np.arange(n, dtype=np.float64) * 1000), + "sr_ping_freq": ("time", np.full(n, 69000, dtype=np.float64)), + "sr_ping_detection_level": ( + "time", + np.random.default_rng(0).integers(0, 100, n).astype(np.float64), + ), + "sr_ping_sequence_number": ("time", np.arange(n, dtype=np.float64)), + "sr_ping_platform_id": ("time", np.full(n, 42, dtype=np.float64)), + "sr_ping_slot": ("time", np.ones(n, dtype=np.float64)), + "sr_ping_group": ("time", np.ones(n, dtype=np.float64)), + "sr_platform_state": ("time", np.full(n, 3, dtype=np.float64)), + }, + coords={"time": time}, + ) + + if include_positions: + ds["depth"] = ("time", np.linspace(10, 50, n)) + ds["lat"] = ("time", np.linspace(44.0, 44.1, n)) + ds["lon"] = ("time", np.linspace(-124.0, -123.9, n)) + + return ds + + +# --------------------------------------------------------------------------- +# Unit tests – write_riot_csv +# --------------------------------------------------------------------------- + + +class TestWriteRiotCsv: + """Tests for write_riot_csv.""" + + def test_creates_csv(self, tmp_path: object) -> None: + """A valid dataset produces a CSV file.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset() + write_riot_csv(ds, add_positions=False, output_path=out) + + assert os.path.exists(out) + + def test_csv_columns_without_positions(self, tmp_path: object) -> None: + """CSV should have the standard RIOT columns when positions are off.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + write_riot_csv(_make_riot_dataset(), add_positions=False, output_path=out) + + df = pd.read_csv(out) + expected = [ + "riotDataPrefix", + "epochMsecs", + "rtMsecs", + "freq", + "detectionLevel", + "sequenceNumber", + "platformId", + "slot", + "group", + "platformState", + ] + assert list(df.columns) == expected + + def test_csv_columns_with_positions(self, tmp_path: object) -> None: + """When positions are enabled and available, depth/lat/lon columns appear.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset(include_positions=True) + write_riot_csv(ds, add_positions=True, output_path=out) + + df = pd.read_csv(out) + for col in ("depth", "lat", "lon"): + assert col in df.columns, f"Missing column: {col}" + + def test_csv_columns_with_positions_missing_vars(self, tmp_path: object) -> None: + """When positions are requested but vars are missing, blank columns appear.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset(include_positions=False) + write_riot_csv(ds, add_positions=True, output_path=out) + + df = pd.read_csv(out) + for col in ("depth", "lat", "lon"): + assert col in df.columns + assert df[col].isna().all(), f"{col} should be all NaN" + + def test_row_count(self, tmp_path: object) -> None: + """Output should have as many rows as valid (non-zero) time steps.""" + n = 8 + out = str(tmp_path / "riot.csv") # type: ignore[operator] + write_riot_csv(_make_riot_dataset(n=n), add_positions=False, output_path=out) + + df = pd.read_csv(out) + assert len(df) == n + + def test_epoch_msecs_calculation(self, tmp_path: object) -> None: + """epochMsecs should equal days*86400000 + secs*1000 + msecs.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset(n=3) + write_riot_csv(ds, add_positions=False, output_path=out) + + df = pd.read_csv(out) + days = ds["sr_ping_epoch_days"].values.astype(np.int64) + secs = ds["sr_ping_secs"].values.astype(np.int64) + msecs = ds["sr_ping_msecs"].values.astype(np.int64) + expected = days * 86400 * 1000 + secs * 1000 + msecs + np.testing.assert_array_equal(df["epochMsecs"].values, expected) + + def test_append_mode(self, tmp_path: object) -> None: + """Calling write_riot_csv twice should append without repeating headers.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset(n=3) + write_riot_csv(ds, add_positions=False, output_path=out) + write_riot_csv(ds, add_positions=False, output_path=out) + + with open(out) as f: + lines = f.readlines() + + # header once + 3 rows + 3 appended rows = 7 lines + assert len(lines) == 7 + + def test_riot_data_prefix(self, tmp_path: object) -> None: + """Every row should have '$riotData' in the riotDataPrefix column.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + write_riot_csv(_make_riot_dataset(), add_positions=False, output_path=out) + + df = pd.read_csv(out) + assert (df["riotDataPrefix"] == "$riotData").all() + + def test_missing_variables_returns_early(self, tmp_path: object) -> None: + """If required RIOT vars are missing, no file is created.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = xr.Dataset({"dummy": ("time", [1, 2, 3])}) + write_riot_csv(ds, add_positions=False, output_path=out) + + assert not os.path.exists(out) + + def test_all_zeros_dropped(self, tmp_path: object) -> None: + """Rows where all RIOT variables are zero should be dropped.""" + out = str(tmp_path / "riot.csv") # type: ignore[operator] + ds = _make_riot_dataset(n=5) + # Set all RIOT vars in the first row to 0 + for var in ds.data_vars: + ds[var].values[0] = 0 + write_riot_csv(ds, add_positions=False, output_path=out) + + df = pd.read_csv(out) + assert len(df) == 4 # first row should have been dropped From dfef40f4293b493fdd55f7016ddd2e7041596077 Mon Sep 17 00:00:00 2001 From: Stuart Pearce Date: Mon, 16 Mar 2026 11:13:17 -0700 Subject: [PATCH 09/11] updated to include column in the CSV --- src/glide/riot_csv_writer.py | 2 +- tests/test_riot_csv_writer.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index a29813d..15f69d5 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -106,7 +106,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non # Add the additional columns riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) riot_df.insert(loc=0, column="riotDataPrefix", value="$riotData") - # riot_df['recNumInFile'] = np.nan # unused record number in file. + riot_df["recNumInFile"] = 65535 # unused record number in file. if add_positions: riot_df = _add_positions(ds, riot_df, rows_to_keep) diff --git a/tests/test_riot_csv_writer.py b/tests/test_riot_csv_writer.py index 65aed2c..3d63a1f 100644 --- a/tests/test_riot_csv_writer.py +++ b/tests/test_riot_csv_writer.py @@ -95,6 +95,7 @@ def test_csv_columns_without_positions(self, tmp_path: object) -> None: "slot", "group", "platformState", + "recNumInFile", ] assert list(df.columns) == expected From ddb0017fa54d09ff12336ad8dec122840305a1a5 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Mon, 16 Mar 2026 20:47:47 -0700 Subject: [PATCH 10/11] fix order of csv file --- src/glide/cli.py | 3 +-- src/glide/riot_csv_writer.py | 34 ++++++++++++++++++++-------------- tests/test_riot_csv_writer.py | 25 +++---------------------- 3 files changed, 24 insertions(+), 38 deletions(-) diff --git a/src/glide/cli.py b/src/glide/cli.py index c202b6a..5798456 100644 --- a/src/glide/cli.py +++ b/src/glide/cli.py @@ -5,7 +5,6 @@ import logging from importlib.metadata import version from pathlib import Path -from typing import Union import netCDF4 as nc import typer @@ -114,7 +113,7 @@ def l2( ), ] = 20, riot_csv: Annotated[ - Union[str | None], + str | None, typer.Option( "-r", "--riot-csv", diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index 15f69d5..77793f8 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -1,5 +1,8 @@ +# Functions to parse the RIOT acoustics data and write to CSV + import logging import os +from collections import OrderedDict import numpy as np import xarray as xr @@ -34,7 +37,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non The resulting CSV, containing one record per ping with these columns, is written to ``output_path``. """ - _log.debug(f"Gathering RIOT variables for CSV {output_path}") + riot_vars = [ "sr_ping_epoch_days", "sr_ping_secs", @@ -48,6 +51,18 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non "sr_ping_group", "sr_platform_state", ] + csv_columns_map = OrderedDict( + { + "sr_ping_rt_msecs": "rtMsecs", + "sr_ping_freq": "freq", + "sr_ping_detection_level": "detectionLevel", + "sr_ping_sequence_number": "sequenceNumber", + "sr_ping_group": "group", + "sr_ping_slot": "slot", + "sr_ping_platform_id": "platformId", + "sr_platform_state": "platformState", + } + ) # Check that all required RIOT variables are present in the dataset if not set(riot_vars).issubset(set(ds.data_vars)): @@ -55,6 +70,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non return # Drop any variables that are not needed for RIOT output + _log.debug(f"Gathering RIOT variables for CSV {output_path}") vars_to_drop = set(ds.variables).difference(riot_vars) riot_ds = ds.drop_vars(vars_to_drop) if riot_ds.sizes.get("time", 0) == 0: @@ -90,18 +106,8 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non ["sr_ping_epoch_days", "sr_ping_secs", "sr_ping_msecs"], axis=1 ) - # rename columns to match headers in RIOT Data User Manual - csv_columns_map = { - "sr_ping_rt_msecs": "rtMsecs", - "sr_ping_freq": "freq", - "sr_ping_detection_level": "detectionLevel", - "sr_ping_sequence_number": "sequenceNumber", - "sr_ping_platform_id": "platformId", - "sr_ping_slot": "slot", - "sr_ping_group": "group", - "sr_platform_state": "platformState", - } - riot_df = riot_df.rename(columns=csv_columns_map) + # rename columns and reorder to match RIOT User data manual format + riot_df = riot_df.rename(columns=csv_columns_map)[csv_columns_map.values()] # Add the additional columns riot_df.insert(loc=0, column="epochMsecs", value=epoch_msecs) @@ -112,7 +118,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non riot_df = _add_positions(ds, riot_df, rows_to_keep) # Write to CSV - _log.debug("Writing to RIOT CSV") + _log.debug(f"Writing to RIOT CSV: {output_path}") # If the file exists already, it will append, so don't write # the header. if os.path.exists(output_path): diff --git a/tests/test_riot_csv_writer.py b/tests/test_riot_csv_writer.py index 3d63a1f..33f5234 100644 --- a/tests/test_riot_csv_writer.py +++ b/tests/test_riot_csv_writer.py @@ -1,25 +1,11 @@ -"""Tests for the riot_csv_writer module and the --riot-csv / --riot-positions -CLI options on the ``l2`` command. - -This file is self-contained so it can be removed cleanly if the tests -are no longer wanted. -""" -# Author: Claude Opus 4.6 and Stuart Pearce - import os -import textwrap import numpy as np import pandas as pd -import pytest import xarray as xr from glide.riot_csv_writer import write_riot_csv -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - def _make_riot_dataset(n: int = 5, include_positions: bool = False) -> xr.Dataset: """Build a minimal xr.Dataset that satisfies write_riot_csv requirements. @@ -62,11 +48,6 @@ def _make_riot_dataset(n: int = 5, include_positions: bool = False) -> xr.Datase return ds -# --------------------------------------------------------------------------- -# Unit tests – write_riot_csv -# --------------------------------------------------------------------------- - - class TestWriteRiotCsv: """Tests for write_riot_csv.""" @@ -84,16 +65,16 @@ def test_csv_columns_without_positions(self, tmp_path: object) -> None: write_riot_csv(_make_riot_dataset(), add_positions=False, output_path=out) df = pd.read_csv(out) - expected = [ + expected = [ # This is the RIOT order specified in the docs "riotDataPrefix", "epochMsecs", "rtMsecs", "freq", "detectionLevel", "sequenceNumber", - "platformId", - "slot", "group", + "slot", + "platformId", "platformState", "recNumInFile", ] From 8f367240ade487d64c28fcaaff2e1d8d41ee4ec7 Mon Sep 17 00:00:00 2001 From: Jesse Cusack Date: Mon, 16 Mar 2026 21:08:20 -0700 Subject: [PATCH 11/11] more informative errors --- src/glide/riot_csv_writer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/glide/riot_csv_writer.py b/src/glide/riot_csv_writer.py index 77793f8..4216a8f 100644 --- a/src/glide/riot_csv_writer.py +++ b/src/glide/riot_csv_writer.py @@ -66,15 +66,16 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non # Check that all required RIOT variables are present in the dataset if not set(riot_vars).issubset(set(ds.data_vars)): - _log.error("Dataset is missing required RIOT variables") + missing_vars = set(riot_vars).difference(ds.data_vars) + _log.error(f"Dataset is missing required RIOT variables: {missing_vars}") return # Drop any variables that are not needed for RIOT output - _log.debug(f"Gathering RIOT variables for CSV {output_path}") + _log.debug(f"Gathering RIOT variables {output_path}") vars_to_drop = set(ds.variables).difference(riot_vars) riot_ds = ds.drop_vars(vars_to_drop) if riot_ds.sizes.get("time", 0) == 0: - _log.error("No RIOT data available to create the CSV") + _log.error("Time dimension of RIOT dataset is empty, no data to write to CSV") return # ToDo: this drop zeros section should be moved to processing L2 @@ -86,7 +87,7 @@ def write_riot_csv(ds: xr.Dataset, add_positions: bool, output_path: str) -> Non ) riot_ds = riot_ds.where(rows_to_keep, drop=True) if riot_ds.sizes["time"] == 0: - _log.error("No RIOT data available to create the CSV") + _log.error("All RIOT records are zeros or NaNs, no data to write to CSV") return # typecasting according to RIOT User data manual