Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/glide/ancillery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Functions for reading non-glide files
import logging

import xarray as xr

_log = logging.getLogger(__name__)


# Public functions


def concat(file_list: list[str], concat_dim: str = "time") -> xr.Dataset:
_log.debug("Loading files")
return xr.open_mfdataset(
file_list,
concat_dim=concat_dim,
combine="nested",
compat="override",
coords="minimal",
decode_timedelta=False,
data_vars="minimal",
).load()


def parse_q(q_file: str) -> xr.Dataset:
_log.debug("Loading Q files")
return xr.open_mfdataset(q_file, decode_timedelta=False)[
["e_1", "e_2", "pressure"]
].load()
82 changes: 56 additions & 26 deletions src/glide/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from importlib.metadata import version
from pathlib import Path

import netCDF4 as nc
import typer
from typing_extensions import Annotated

from . import config, hotel, process_l1, process_l2, process_l3
from . import ancillery, config, hotel, process_l1, process_l2, process_l3

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -86,7 +87,9 @@ def l1b(
"""
conf = config.load_config(config_file)

ds = process_l1.parse_l1(file, conf)
ds = process_l1.parse_l1(file)

ds = process_l1.format_l1(ds, conf)

ds = process_l1.apply_qc(ds, conf)

Expand Down Expand Up @@ -115,8 +118,11 @@ def l2(
"""
conf = config.load_config(config_file)

flt = process_l1.parse_l1(flt_file, conf)
sci = process_l1.parse_l1(sci_file, conf)
flt = process_l1.parse_l1(flt_file)
sci = process_l1.parse_l1(sci_file)

flt = process_l1.format_l1(flt, conf)
sci = process_l1.format_l1(sci, conf)

flt = process_l1.apply_qc(flt, conf)
sci = process_l1.apply_qc(sci, conf)
Expand Down Expand Up @@ -163,7 +169,7 @@ def l3(
if q_netcdf is not None:
conf = config.load_config(config_file)

q = process_l3.parse_q(q_netcdf)
q = ancillery.parse_q(q_netcdf)

out = process_l3.bin_q(out, q, bin_size, conf)

Expand All @@ -172,45 +178,69 @@ def l3(

@app.command()
@log_args
def ml3(
l3_file: Annotated[str, typer.Argument(help="The L3 dataset.")],
out_file: _out_file_annotation = "slocum.l3.nc",
q_netcdf: Annotated[
str | None,
typer.Option("--q-in", "-q", help="netCDF file(s) processed by q2netcdf."),
] = None,
def merge(
glide_file: Annotated[
str, typer.Argument(help="A L2 or L3 dataset produced by glide.")
],
input_file: Annotated[str, typer.Argument(help="Input file(s) of a given type.")],
file_type: Annotated[
str,
typer.Argument(
help="Choose 'q' for q2netcdf output file, 'p' for p for p2netcdf output file."
),
],
out_file: _out_file_annotation = "slocum.merged.nc",
config_file: _config_annotation = None,
overwrite: Annotated[
bool,
typer.Option(
"--overwrite",
"-w",
help="Overwrite the existing L3 dataset if it exists.",
help="Overwrite the existing dataset if it exists.",
),
] = False,
) -> None:
"""
Merge ancillary data into L3 data.
Merge ancillary data into L2 or L3 data.
"""
# I could remove the defaul argument to enforce this rule but I am anticipating that
# in the future we may want to merge other kinds of data into the L3 dataset.
if q_netcdf is None:
raise typer.BadParameter("The --q-in option is required for ml3 command.")

if not overwrite and Path(out_file).exists():
if file_type not in ["q", "p"]:
raise typer.BadParameter(f"The file type {file_type} must be q or p.")

if Path(out_file).exists() and not overwrite:
raise typer.BadParameter(
f"The output file {out_file} already exists. Use --overwrite to overwrite it."
)

l3, bin_size = process_l3.parse_l3(l3_file)
# Figure out the processig level of the input
input_file_level = -1
ds = nc.Dataset(glide_file)
dataset_dims = set(ds.dimensions)
ds.close()

if dataset_dims == {"time"}:
input_file_level = 2
elif dataset_dims == {"profile_id", "z"}:
input_file_level = 3
else:
raise ValueError(
f"Could not determine processing level of input file {glide_file} with dimensions {dataset_dims}"
)

conf = config.load_config(config_file)

q = process_l3.parse_q(q_netcdf)

out = process_l3.bin_q(l3, q, bin_size, conf)

out.to_netcdf(out_file)
if file_type == "q":
if input_file_level == 3:
l3, bin_size = process_l3.parse_l3(glide_file)
q = ancillery.parse_q(input_file)
out = process_l3.bin_q(l3, q, bin_size, conf)
out.to_netcdf(out_file)
else:
raise NotImplementedError(
"Merging q files only supported for level 3 data."
)
if file_type == "p":
raise NotImplementedError("Merging of p files is not yet supported.")


@app.command()
Expand Down Expand Up @@ -260,7 +290,7 @@ def concat(
"""
Concatenate multiple netCDF files along a dimension.
"""
ds = process_l3.concat(files, concat_dim=concat_dim)
ds = ancillery.concat(files, concat_dim=concat_dim)

ds.to_netcdf(out_file)

Expand Down
36 changes: 17 additions & 19 deletions src/glide/process_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,6 @@
# Helper functions


def _load_l1_file(file: str | xr.Dataset) -> xr.Dataset:
if isinstance(file, str):
_log.debug("Parsing L1 %s", file)
try:
ds = xr.open_dataset(file, decode_timedelta=True).drop_dims("j").load()
_log.debug("xarray.open_dataset opened %s", file)
except ValueError:
ds = pd.read_csv(file).to_xarray()
_log.debug("pandas.read_csv opened %s", file)
elif isinstance(file, xr.Dataset): # Primarily for testing
ds = file
else:
raise ValueError(f"Expected type str or xarray.Dataset but got {type(file)}")
return ds


def _fix_time_varaiable_conflict(ds: xr.Dataset) -> xr.Dataset:
"""This fixes conflicting time variable names when parsing a combined flight/science data.
Generally, they should be parsed separately."""
Expand Down Expand Up @@ -87,10 +71,24 @@ def _format_variables(
# Public API functions


def parse_l1(file: str | xr.Dataset, config: dict) -> xr.Dataset:
"""Parses flight (sbd) or science (tbd) data processed by dbd2netcdf or dbd2csv."""
def parse_l1(file: str | xr.Dataset) -> xr.Dataset:
if isinstance(file, str):
_log.debug("Parsing L1 %s", file)
try:
ds = xr.open_dataset(file, decode_timedelta=True).drop_dims("j").load()
_log.debug("xarray.open_dataset opened %s", file)
except ValueError:
ds = pd.read_csv(file).to_xarray()
_log.debug("pandas.read_csv opened %s", file)
elif isinstance(file, xr.Dataset): # Primarily for testing
ds = file
else:
raise ValueError(f"Expected type str or xarray.Dataset but got {type(file)}")
return ds


ds = _load_l1_file(file)
def format_l1(ds: xr.Dataset, config: dict) -> xr.Dataset:
"""Parses flight (sbd) or science (tbd) data processed by dbd2netcdf or dbd2csv."""

ds = _fix_time_varaiable_conflict(ds)

Expand Down
6 changes: 3 additions & 3 deletions src/glide/process_l2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Level 3 processing of the level 2 processed data
# Level 3 processing of the level 2 data
# Data are binned in depth

import logging
Expand Down Expand Up @@ -54,8 +54,8 @@ def _get_profile_indexes(ds: xr.Dataset) -> NDArray:
# Public functions


def parse_l2(l2_file: str) -> xr.Dataset:
return xr.open_dataset(l2_file, decode_timedelta=True).load()
def parse_l2(file: str) -> xr.Dataset:
return xr.open_dataset(file, decode_timedelta=True).load()


def bin_l2(
Expand Down
26 changes: 3 additions & 23 deletions src/glide/process_l3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Additional processing of the l3 data, including the assimiation
# Additional processing of the l3 data, including the assimilation
# of other variables such as epsilon.
import logging

Expand All @@ -18,33 +18,13 @@ def _infer_bin_size(ds: xr.Dataset) -> float:
# Public functions


def concat(file_list: list[str], concat_dim: str = "time") -> xr.Dataset:
_log.debug("Loading files")
return xr.open_mfdataset(
file_list,
concat_dim=concat_dim,
combine="nested",
compat="override",
coords="minimal",
decode_timedelta=False,
data_vars="minimal",
).load()


def parse_l3(l3_file: str) -> tuple[xr.Dataset, float]:
ds = xr.open_dataset(l3_file, decode_timedelta=True).load()
def parse_l3(file: str) -> tuple[xr.Dataset, float]:
ds = xr.open_dataset(file, decode_timedelta=True).load()
bin_size = _infer_bin_size(ds)
ds.close() # Will enable overwrite of existing l3 file.
return ds, bin_size


def parse_q(q_file: str) -> xr.Dataset:
_log.debug("Loading Q files")
return xr.open_mfdataset(q_file, decode_timedelta=False)[
["e_1", "e_2", "pressure"]
].load()


def bin_q(
ds: xr.Dataset, ds_q: xr.Dataset, bin_size: float, config: dict
) -> xr.Dataset:
Expand Down
5 changes: 2 additions & 3 deletions tests/test_process_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,5 @@ def test_format_variables() -> None:


def test_parse_l1() -> None:
config = load_config()
pl1.parse_l1(get_test_data("684", "sbd"), config)
pl1.parse_l1(get_test_data("684", "tbd"), config)
pl1.parse_l1(get_test_data("684", "sbd"))
pl1.parse_l1(get_test_data("684", "tbd"))