Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.DS_Store
gliderdac*

# Python-generated files
__pycache__/
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ build-backend = "hatchling.build"

[dependency-groups]
dev = [
"compliance-checker>=5.4.2",
"ipykernel>=6.29.5",
"matplotlib>=3.10.0",
"mypy>=1.15.0",
Expand Down
31 changes: 25 additions & 6 deletions scripts/netcdf2csv.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,28 @@

out = Path("../tests/data").resolve()

xr.open_dataset(Path(out, "osu684.sbd.nc")).drop_dims("j").to_pandas().to_csv(
Path(out, "osu684.sbd.csv")
)
xr.open_dataset(Path(out, "osu684.tbd.nc")).drop_dims("j").to_pandas().to_csv(
Path(out, "osu684.tbd.csv")
)
# Commented out because I didn't keep the input files...
# xr.open_dataset(Path(out, "osu684.sbd.nc")).drop_dims("j").to_pandas().to_csv(
# Path(out, "osu684.sbd.csv")
# )
# xr.open_dataset(Path(out, "osu684.tbd.nc")).drop_dims("j").to_pandas().to_csv(
# Path(out, "osu684.tbd.csv")
# )

input_sbd = [
"osu685-2025-056-0-27.sbd.nc",
"osu685-2025-056-0-28.sbd.nc",
"osu685-2025-056-0-29.sbd.nc",
"osu685-2025-056-0-30.sbd.nc",
]
input_tbd = [
"osu685-2025-056-0-27.tbd.nc",
"osu685-2025-056-0-28.tbd.nc",
"osu685-2025-056-0-29.tbd.nc",
"osu685-2025-056-0-30.tbd.nc",
]

for f in input_sbd + input_tbd:
xr.open_dataset(Path("../", f)).drop_dims("j").to_pandas().to_csv(
Path(out, f.replace(".nc", ".csv"))
)
75 changes: 72 additions & 3 deletions src/glide/assets/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,18 @@

--- ### PART 1 GLOBAL CONFIGURATION

# This part contains global attributes.

# Edit the name for your deployment
trajectory:
name: YYYYMMDD_gliderSN
attributes: # Don't change the attributes, they are required by IOOS
cf_role: trajectory_id
comment: "A trajectory is a single deployment of a glider and may span multiple data files."
long_name: "Trajectory/Deployment Name"

# Add CF standard global attributes here
netcdf_attributes:
title: Example Slocum glider dataset generated by the python package glide.
comment: glide is developed at https://github.com/OSUGliders/glide
comment: glide is developed by the OSU glider group at https://github.com/OSUGliders/glide
conventions: "CF-1.6, Unidata Dataset Discovery v1.0"

--- ### PART 2 VARIABLE DEFINITIONS, QC CONTROLS, AND METADATA
Expand Down Expand Up @@ -128,6 +135,68 @@ m_gps_lon:
valid_min: -180.0
coordinate_reference_frame: urn:ogc:crs:EPSG::4326

u:
dtype: f4
CF:
comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives."
long_name: "Depth-Averaged Eastward Sea Water Velocity"
observation_type: "calculated"
platform: "platform"
source_sensor: "m_water_vx"
standard_name: "eastward_sea_water_velocity"
units: "m s-1"
valid_max: 10.
valid_min: -10.

v:
dtype: f4
CF:
comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives."
long_name: "Depth-Averaged Northward Sea Water Velocity"
observation_type: "calculated"
platform: "platform"
source_sensor: "m_water_vy"
standard_name: "northward_sea_water_velocity"
units: "m s-1"
valid_max: 10.
valid_min: -10.

time_uv:
CF:
calendar: "gregorian"
comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives."
long_name: "Depth-Averaged Time"
observation_type: "calculated"
source_sensor: "m_present_time"
standard_name: "time"
units: "seconds since 1970-01-01T00:00:00Z"

lat_uv:
dtype: f4
CF:
comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives."
long_name: "Depth-Averaged Latitude"
observation_type: "calculated"
platform: "platform"
source_sensor: "m_gps_lat"
standard_name: "latitude"
units: "degrees_north"
valid_max: 90.
valid_min: -90.

lon_uv:
dtype: f4
CF:
comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives."
long_name: "Depth-Averaged Longitude"
observation_type: "calculated"
platform: "platform"
source_sensor: "m_gps_lon"
standard_name: "longitude"
units: "degrees_east"
valid_max: 180.
valid_min: -180.

heading:
source: m_heading
conversion: rad_to_deg
Expand Down
133 changes: 130 additions & 3 deletions src/glide/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,9 @@ def l2(
"""
conf = config.load_config(config_file)

flt = process_l1.parse_l1(flt_file)
flt_raw = process_l1.parse_l1(flt_file) # Keep raw for velocity extraction
flt = process_l1.format_l1(flt_raw.copy(), conf)
sci = process_l1.parse_l1(sci_file)

flt = process_l1.format_l1(flt, conf)
sci = process_l1.format_l1(sci, conf)

flt = process_l1.apply_qc(flt, conf)
Expand All @@ -133,6 +132,10 @@ def l2(

out = process_l1.get_profiles(merged, shallowest_profile, profile_distance)

out = process_l1.assign_surface_state(out, flt=flt_raw)

out = process_l1.add_velocity(out, conf, flt=flt_raw)

out = process_l1.enforce_types(out, conf)

out.attrs = conf["globals"]["netcdf_attributes"]
Expand Down Expand Up @@ -295,6 +298,130 @@ def concat(
ds.to_netcdf(out_file)


@app.command()
@log_args
def backfill(
l2_files: Annotated[
list[str], typer.Argument(help="L2 files to check for missing velocity.")
],
raw_dir: Annotated[
str,
typer.Option("-r", "--raw-dir", help="Directory containing raw sbd/dbd files."),
],
extra_files: Annotated[
int,
typer.Option(
"-n",
"--extra",
help="Number of extra raw files to load after last L2 file.",
),
] = 3,
) -> None:
"""
Backfill depth-averaged velocity to L2 files.

Uses the glider state variable in L2 files to identify dive cycles, then
looks up velocity from the corresponding raw flight files. Updates velocity
if missing or if the new estimate differs significantly from the existing one.

File naming convention: L2 files should be named like 'basename.l2.nc'
where 'basename.sbd.nc' or 'basename.dbd.nc' is the corresponding raw file.
"""
raw_path = Path(raw_dir)

# Sort L2 files by name to ensure chronological order
l2_files_sorted = sorted([Path(f) for f in l2_files])

# Filter to files that have velocity variables
files_to_update = []
for l2_file in l2_files_sorted:
try:
with nc.Dataset(str(l2_file), "r") as ds:
if "time_uv" in ds.variables:
files_to_update.append(l2_file)
except Exception as e:
_log.warning("Could not read %s: %s", l2_file, e)
continue

if not files_to_update:
typer.echo("No L2 files with time_uv variable found.")
return

typer.echo(f"Processing {len(files_to_update)} L2 files.")

# Extract base names from L2 files (remove .l2.nc suffix)
def get_base_name(l2_file: Path) -> str:
name = l2_file.name
for suffix in [".l2.nc", ".L2.nc", ".nc"]:
if name.endswith(suffix):
return name[: -len(suffix)]
return name.rsplit(".", 1)[0]

first_base = get_base_name(files_to_update[0])
last_base = get_base_name(files_to_update[-1])

# Get all sbd/dbd files in raw directory, sorted
# Look for .sbd.nc, .sbd.csv, .dbd.nc, .dbd.csv patterns
raw_files = sorted(
list(raw_path.glob("*.sbd.nc"))
+ list(raw_path.glob("*.sbd.csv"))
+ list(raw_path.glob("*.dbd.nc"))
+ list(raw_path.glob("*.dbd.csv"))
)

if not raw_files:
typer.echo(f"No raw flight files found in {raw_dir}")
return

# Extract base names from raw files (remove .sbd.nc, .sbd.csv, etc.)
def get_raw_base_name(raw_file: Path) -> str:
name = raw_file.name
for suffix in [
".sbd.nc",
".sbd.csv",
".dbd.nc",
".dbd.csv",
".tbd.nc",
".tbd.csv",
]:
if name.endswith(suffix):
return name[: -len(suffix)]
return name.rsplit(".", 1)[0]

raw_names = [get_raw_base_name(f) for f in raw_files]

# Find indices of first and last files to load using exact base name match
try:
first_idx = raw_names.index(first_base)
except ValueError:
typer.echo(f"Could not find raw file matching {first_base}")
return

try:
last_idx = raw_names.index(last_base)
except ValueError:
last_idx = first_idx

# Load from first to last + extra_files
end_idx = min(last_idx + extra_files + 1, len(raw_files))
raw_files_to_load = [str(f) for f in raw_files[first_idx:end_idx]]

typer.echo(f"Loading {len(raw_files_to_load)} raw flight files...")

# Update each L2 file
updated = []
for l2_file in files_to_update:
if process_l1.backfill_velocity(str(l2_file), raw_files_to_load):
updated.append(str(l2_file))

if updated:
typer.echo(f"\nUpdated {len(updated)} files:")
for f in updated:
typer.echo(f" {f}")
else:
typer.echo("No files were updated.")


@app.command()
@log_args
def cfg(
Expand Down
Loading