diff --git a/src/glide/assets/config.yml b/src/glide/assets/config.yml index d953f4f..df44767 100644 --- a/src/glide/assets/config.yml +++ b/src/glide/assets/config.yml @@ -1,468 +1,92 @@ -# glide's default configuration is specified in this file. -# Variable formatting mostly follows national gdlier DAC netCDF format version 2 -# https://ioos.github.io/glider-dac/ngdac-netcdf-file-format-version-2 -# This file is self-documenting in the sense that it is liberally commented. -# The comments provide pointers and tips for defining the configuration. - -# Dashes (---) indicate separate yaml documents. They are important. -# The ordering of documents is important. +# glide user configuration +# +# This file configures your glider deployment. Core variables are defined +# in the package and cannot be removed, but their QC parameters can be +# adjusted here. +# +# For documentation, see: https://github.com/OSUGliders/glide ---- ### PART 1 GLOBAL CONFIGURATION +--- ### DEPLOYMENT INFO -# Edit the name for your deployment -trajectory: +# Edit the trajectory name for your deployment (required) +trajectory: name: YYYYMMDD_gliderSN - attributes: # Don't change the attributes, they are required by IOOS + attributes: cf_role: trajectory_id comment: "A trajectory is a single deployment of a glider and may span multiple data files." - long_name: "Trajectory/Deployment Name" + long_name: Trajectory/Deployment Name -# Add CF standard global attributes here +# Global netCDF attributes (CF standard) netcdf_attributes: title: Example Slocum glider dataset generated by the python package glide. comment: glide is developed by the OSU glider group at https://github.com/OSUGliders/glide conventions: "CF-1.6, Unidata Dataset Discovery v1.0" ---- ### PART 2 VARIABLE DEFINITIONS, QC CONTROLS, AND METADATA - -# This part defines variables. The first item in the hierarchy is the variable name as -# we want it outputted in the file. The next level in the heirarchy defines how we -# want to process the variable. Note that some of the CF attributes are used to tune QC. -# Specifically, the valid_min and valid_max attributes are used to threshold data when -# they are defined. -# -# variable_name: -# source: Slocum name from masterdata -# track_qc: True/False -# conversion: must match a function in convert.py -# interpolate_missing: True/False -# max_gap: Maximum interpolation gap in seconds -# drop_from_l2: True/False -# dtype: numpy type code, e.g. f4, f8 for float, double -# CF: -# standard_name: ... - -# TIME VARIABLES - -time: # This is the name the we want - # This list of sources can only be done for variables that do not appear in the same file - # because we are essentially mapping one variable name, time, to two other variable names. - source: ["m_present_time", "sci_m_present_time"] - track_qc: True - CF: # CF attributes here - long_name: Time - standard_name: time - calendar: gregorian - units: seconds since 1970-01-01T00:00:00Z - axis: T - observation_type: "measured" - valid_min: 2000-01-01T00:00:00 # pyyaml automatically converts timestamps to datetime.datetime objects - valid_max: 2040-01-01T00:00:00 - -# FLIGHT COMPUTER VARIABLES - -lat: # This is the name that we want - source: m_lat # This is the name that Slocum gliders use - conversion: dpm_to_dd # Function applied to convert data units - interpolate_missing: True # Interpolating nan values - max_gap: 600 # Max gap in seconds over which to interpolate nan values - track_qc: True # Whether to keep track of quality control - dtype: f4 - CF: - long_name: Latitude - standard_name: latitude - units: degrees_north - axis: Y - comment: "Estimated between surface fixes" - observation_type: measured - platform: platform - reference: WGS84 - valid_max: 90.0 - valid_min: -90.0 - coordinate_reference_frame: urn:ogc:crs:EPSG::4326 - -lon: - source: m_lon - conversion: dpm_to_dd - interpolate_missing: True - max_gap: 600 - track_qc: True - dtype: f4 - CF: - long_name: Longitude - standard_name: longitude - units: degrees_east - axis: X - comment: "Estimated between surface fixes" - observation_type: measured - platform: platform - reference: WGS84 - valid_max: 180.0 - valid_min: -180.0 - coordinate_reference_frame: urn:ogc:crs:EPSG::4326 - -m_gps_lat: - source: m_gps_lat - conversion: dpm_to_dd - drop_from_l2: True # Not included into l2 dataset if true - CF: - long_name: Latitude - standard_name: latitude - units: degrees_north - axis: Y - comment: "Surface fixes" - observation_type: measured - platform: platform - reference: WGS84 - valid_max: 90.0 - valid_min: -90.0 - coordinate_reference_frame: urn:ogc:crs:EPSG::4326 - -m_gps_lon: - source: m_gps_lon - conversion: dpm_to_dd - drop_from_l2: True - CF: - long_name: Longitude - standard_name: longitude - units: degrees_east - axis: X - comment: "Surface fixes" - observation_type: measured - platform: platform - reference: WGS84 - valid_max: 180.0 - valid_min: -180.0 - coordinate_reference_frame: urn:ogc:crs:EPSG::4326 - -u: - dtype: f4 - CF: - comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." - long_name: "Depth-Averaged Eastward Sea Water Velocity" - observation_type: "calculated" - platform: "platform" - source_sensor: "m_water_vx" - standard_name: "eastward_sea_water_velocity" - units: "m s-1" - valid_max: 10. - valid_min: -10. - -v: - dtype: f4 - CF: - comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." - long_name: "Depth-Averaged Northward Sea Water Velocity" - observation_type: "calculated" - platform: "platform" - source_sensor: "m_water_vy" - standard_name: "northward_sea_water_velocity" - units: "m s-1" - valid_max: 10. - valid_min: -10. - -time_uv: - CF: - calendar: "gregorian" - comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." - long_name: "Depth-Averaged Time" - observation_type: "calculated" - source_sensor: "m_present_time" - standard_name: "time" - units: "seconds since 1970-01-01T00:00:00Z" - -lat_uv: - dtype: f4 - CF: - comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." - long_name: "Depth-Averaged Latitude" - observation_type: "calculated" - platform: "platform" - source_sensor: "m_gps_lat" - standard_name: "latitude" - units: "degrees_north" - valid_max: 90. - valid_min: -90. - -lon_uv: - dtype: f4 - CF: - comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." - long_name: "Depth-Averaged Longitude" - observation_type: "calculated" - platform: "platform" - source_sensor: "m_gps_lon" - standard_name: "longitude" - units: "degrees_east" - valid_max: 180. - valid_min: -180. - -heading: - source: m_heading - conversion: rad_to_deg - interpolate_missing: True - dtype: f4 - CF: - long_name: Heading - standard_name: platform_orientation - units: degrees - observation_type: measured - valid_max: 360.0 - valid_min: 0.0 - -pitch: - source: m_pitch - conversion: rad_to_deg - interpolate_missing: True - dtype: f4 - CF: - long_name: Pitch - standard_name: platform_pitch_fore_up - units: degrees - observation_type: measured - valid_max: 180.0 - valid_min: -180.0 +--- ### OPTIONAL VARIABLE SUITES -roll: - source: m_roll - conversion: rad_to_deg - interpolate_missing: True - dtype: f4 - CF: - long_name: Roll - standard_name: platform_roll_starboard_down - units: degrees - observation_type: measured - valid_max: 180.0 - valid_min: -180.0 +# Toggle optional variable groups on/off +include: + flight: true # fin, battpos, heading, ballast_pumped + thermo: true # salinity, density, rho0, SA, CT, N2, depth, z -m_depth: - source: m_depth - drop_from_l2: True - CF: - long_name: Depth - standard_name: depth - units: m - valid_min: 0.0 - valid_max: 2000.0 - positive: "down" - reference_datum: "sea-surface" - instrument: "instrument_ctd" - observation_type: "measured" +--- ### QC PARAMETER OVERRIDES -# m_fin: -# source: m_fin -# conversion: rad_to_deg -# interpolate_missing: True -# dtype: f4 -# CF: -# long_name: Fin angle -# units: degrees - -# m_battpos: -# source: m_battpos -# interpolate_missing: True -# dtype: f4 -# CF: -# long_name: Battery position -# units: inches - -# SCIENCE COMPUTER VARIABLES - -pressure: - source: sci_water_pressure - conversion: bar_to_dbar - interpolate_missing: True - track_qc: True - dtype: f4 - CF: - long_name: Pressure - standard_name: sea_water_pressure - units: dbar - valid_min: 0.0 - valid_max: 2000.0 - positive: "down" - reference_datum: "sea-surface" - instrument: "instrument_ctd" - observation_type: "measured" - accuracy: 1.0 - precision: 2.0 - resolution: 0.02 - comment: "ctd pressure sensor" - -conductivity: - source: sci_water_cond - interpolate_missing: True - track_qc: True - dtype: f4 - CF: - long_name: Conductivity - standard_name: sea_water_electrical_conductivity - units: S m-1 # Not converted to mS cm-1 becase we need S m-1 for glider DAC - instrument: instrument_ctd - valid_min: 0.1 - valid_max: 10. - observation_type: "measured" - accuracy: 0.0003 - precision: 0.0001 - resolution: 0.00002 - -temperature: - source: sci_water_temp - interpolate_missing: True - track_qc: True - dtype: f4 - CF: - long_name: Temperature - standard_name: sea_water_temperature - units: celsius - instrument: instrument_ctd - valid_min: -5.0 - valid_max: 50.0 - observation_type: "measured" - accuracy: 0.002 - precision: 0.001 - resolution: 0.0002 - -# CALCULATED VARIABLES - -salinity: - track_qc: True - dtype: f4 - CF: - # accuracy: - instrument: instrument_ctd - long_name: Salinity - observation_type: calculated - platform: platform - # precision: - # resolution: - standard_name: sea_water_practical_salinity - units: 1 - valid_max: 40.0 - valid_min: 0.0 - -density: - track_qc: True - dtype: f4 - CF: - # accuracy: - instrument: instrument_ctd - long_name: Density - observation_type: calculated - platform: platform - # precision: - # resolution: - standard_name: sea_water_density - units: kg m-3 - valid_max: 1040.0 - valid_min: 1015.0 - -rho0: - track_qc: True - dtype: f4 - CF: - # accuracy: - instrument: instrument_ctd - long_name: Potential density - observation_type: calculated - platform: platform - reference_pressure: 0 - # precision: - # resolution: - standard_name: sea_water_potential_density - units: kg m-3 - valid_max: 1040.0 - valid_min: 1015.0 - -SA: - track_qc: True - dtype: f4 - CF: - # accuracy: - instrument: instrument_ctd - long_name: Absolute salinity - observation_type: calculated - platform: platform - # precision: - # resolution: - standard_name: sea_water_absolute_salinity - units: g kg-1 - valid_max: 40.0 - valid_min: 0.0 - -CT: - track_qc: True - dtype: f4 - CF: - long_name: Conservative temperature - standard_name: sea_water_conservative_temperature - units: celsius - instrument: instrument_ctd - valid_min: -5.0 - valid_max: 50.0 - observation_type: calculated - # accuracy: 0.002 - # precision: 0.001 - # resolution: 0.0002 - -N2: - track_qc: True - dtype: f4 - CF: - long_name: Buoyancy frequency squared - standard_name: square_of_brunt_vaisala_frequency_in_sea_water - units: s-2 - observations_type: calculated - -depth: - track_qc: True - dtype: f4 - CF: - long_name: Depth - standard_name: depth - units: m - valid_min: 0.0 - valid_max: 2000.0 - positive: "down" - reference_datum: "sea-surface" - instrument: "instrument_ctd" - observation_type: "calculated" - -z: - track_qc: True - dtype: f4 - CF: - long_name: Height - standard_name: height - units: m - valid_min: -2000.0 - valid_max: 0.0 - positive: "up" - reference_datum: "sea-surface" - instrument: "instrument_ctd" - observation_type: "calculated" - -# ASSIMILATED VARIABLES - -e_1: - CF: - long_name: TKE dissipation rate - standard_name: specific_turbulent_kinetic_energy_dissipation_in_sea_water - comment: Estimated using shear probe 1 - units: W kg-1 - valid_min: 0.0 - valid_max: 0.001 - instrument: "instrument_microrider" - observation_type: "calculated" - -e_2: - CF: - long_name: TKE dissipation rate - standard_name: specific_turbulent_kinetic_energy_dissipation_in_sea_water - comment: Estimated using shear probe 2 - units: W kg-1 - valid_min: 0.0 - valid_max: 0.001 - instrument: "instrument_microrider" - observation_type: "calculated" \ No newline at end of file +# Override QC parameters for core variables on a per-deployment basis. +# Only these parameters can be changed: valid_min, valid_max, max_gap, interpolate_missing +# Example: +# +# qc: +# temperature: +# valid_min: -2.0 +# valid_max: 15.0 +# lat: +# max_gap: 900 +# pressure: +# valid_max: 1200.0 + +qc: {} + +--- ### L1 EXTRA VARIABLES + +# Additional variables to extract from raw Slocum data during L1->L2 processing. +# These require a 'source' field matching the Slocum variable name. +# Example: +# +# l1_variables: +# fin: # Note that this variable is already included in the flight suite +# source: m_fin +# conversion: rad_to_deg +# dtype: f4 +# CF: +# long_name: Fin angle +# units: degrees + +l1_variables: {} + +--- ### MERGED VARIABLES + +# Variables to be assimilated from external processing (e.g., microstructure). +# These are used in higher-level processing and do not have a 'source' field. + +merged_variables: + e_1: + CF: + long_name: TKE dissipation rate + standard_name: specific_turbulent_kinetic_energy_dissipation_in_sea_water + comment: Estimated using shear probe 1 + units: W kg-1 + valid_min: 0.0 + valid_max: 0.001 + instrument: instrument_microrider + observation_type: calculated + + e_2: + CF: + long_name: TKE dissipation rate + standard_name: specific_turbulent_kinetic_energy_dissipation_in_sea_water + comment: Estimated using shear probe 2 + units: W kg-1 + valid_min: 0.0 + valid_max: 0.001 + instrument: instrument_microrider + observation_type: calculated \ No newline at end of file diff --git a/src/glide/assets/core.yml b/src/glide/assets/core.yml new file mode 100644 index 0000000..076ed94 --- /dev/null +++ b/src/glide/assets/core.yml @@ -0,0 +1,421 @@ +# Core variable definitions for glide +# This file is bundled with the package and should not be edited by users. +# Variable formatting follows IOOS national glider DAC netCDF format version 2 +# https://ioos.github.io/glider-dac/ngdac-netcdf-file-format-version-2 + +# Dashes (---) indicate separate yaml documents. + +--- ### CORE VARIABLES (always extracted) + +time: + source: ["m_present_time", "sci_m_present_time"] + track_qc: True + CF: + long_name: Time + standard_name: time + calendar: gregorian + units: seconds since 1970-01-01T00:00:00Z + axis: T + observation_type: measured + valid_min: 2000-01-01T00:00:00 + valid_max: 2040-01-01T00:00:00 + +lat: + source: m_lat + conversion: dpm_to_dd + interpolate_missing: True + max_gap: 600 + track_qc: True + dtype: f4 + CF: + long_name: Latitude + standard_name: latitude + units: degrees_north + axis: Y + comment: "Estimated between surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + valid_max: 90.0 + valid_min: -90.0 + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + +lon: + source: m_lon + conversion: dpm_to_dd + interpolate_missing: True + max_gap: 600 + track_qc: True + dtype: f4 + CF: + long_name: Longitude + standard_name: longitude + units: degrees_east + axis: X + comment: "Estimated between surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + valid_max: 180.0 + valid_min: -180.0 + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + +m_gps_lat: + source: m_gps_lat + conversion: dpm_to_dd + drop_from_l2: True + CF: + long_name: Latitude + standard_name: latitude + units: degrees_north + axis: Y + comment: "Surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + valid_max: 90.0 + valid_min: -90.0 + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + +m_gps_lon: + source: m_gps_lon + conversion: dpm_to_dd + drop_from_l2: True + CF: + long_name: Longitude + standard_name: longitude + units: degrees_east + axis: X + comment: "Surface fixes" + observation_type: measured + platform: platform + reference: WGS84 + valid_max: 180.0 + valid_min: -180.0 + coordinate_reference_frame: urn:ogc:crs:EPSG::4326 + +m_depth: + source: m_depth + drop_from_l2: True + CF: + long_name: Depth + standard_name: depth + units: m + valid_min: 0.0 + valid_max: 2000.0 + positive: down + reference_datum: sea-surface + instrument: instrument_ctd + observation_type: measured + +pressure: + source: sci_water_pressure + conversion: bar_to_dbar + interpolate_missing: True + track_qc: True + dtype: f4 + CF: + long_name: Pressure + standard_name: sea_water_pressure + units: dbar + valid_min: 0.0 + valid_max: 2000.0 + positive: down + reference_datum: sea-surface + instrument: instrument_ctd + observation_type: measured + accuracy: 1.0 + precision: 2.0 + resolution: 0.02 + comment: "ctd pressure sensor" + +conductivity: + source: sci_water_cond + interpolate_missing: True + track_qc: True + dtype: f4 + CF: + long_name: Conductivity + standard_name: sea_water_electrical_conductivity + units: S m-1 + instrument: instrument_ctd + valid_min: 0.1 + valid_max: 10.0 + observation_type: measured + accuracy: 0.0003 + precision: 0.0001 + resolution: 0.00002 + +temperature: + source: sci_water_temp + interpolate_missing: True + track_qc: True + dtype: f4 + CF: + long_name: Temperature + standard_name: sea_water_temperature + units: celsius + instrument: instrument_ctd + valid_min: -5.0 + valid_max: 50.0 + observation_type: measured + accuracy: 0.002 + precision: 0.001 + resolution: 0.0002 + +# Core flight + +pitch: + source: m_pitch + conversion: rad_to_deg + interpolate_missing: True + dtype: f4 + CF: + long_name: Pitch + standard_name: platform_pitch_fore_up + units: degrees + observation_type: measured + valid_max: 180.0 + valid_min: -180.0 + +roll: + source: m_roll + conversion: rad_to_deg + interpolate_missing: True + dtype: f4 + CF: + long_name: Roll + standard_name: platform_roll_starboard_down + units: degrees + observation_type: measured + valid_max: 180.0 + valid_min: -180.0 + +# Velocity variables (on time_uv dimension) + +u: + dtype: f4 + CF: + comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." + long_name: Depth-Averaged Eastward Sea Water Velocity + observation_type: calculated + platform: platform + source_sensor: m_water_vx + standard_name: eastward_sea_water_velocity + units: m s-1 + valid_max: 10.0 + valid_min: -10.0 + +v: + dtype: f4 + CF: + comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." + long_name: Depth-Averaged Northward Sea Water Velocity + observation_type: calculated + platform: platform + source_sensor: m_water_vy + standard_name: northward_sea_water_velocity + units: m s-1 + valid_max: 10.0 + valid_min: -10.0 + +time_uv: + CF: + calendar: gregorian + comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." + long_name: Depth-Averaged Time + observation_type: calculated + source_sensor: m_present_time + standard_name: time + units: seconds since 1970-01-01T00:00:00Z + +lat_uv: + dtype: f4 + CF: + comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." + long_name: Depth-Averaged Latitude + observation_type: calculated + platform: platform + source_sensor: m_gps_lat + standard_name: latitude + units: degrees_north + valid_max: 90.0 + valid_min: -90.0 + +lon_uv: + dtype: f4 + CF: + comment: "The depth-averaged current is an estimate of the net current measured while the glider is underwater. The value is calculated over the entire underwater segment, which may consist of 1 or more dives." + long_name: Depth-Averaged Longitude + observation_type: calculated + platform: platform + source_sensor: m_gps_lon + standard_name: longitude + units: degrees_east + valid_max: 180.0 + valid_min: -180.0 + +# Profile identification variables + +dive_id: + dtype: i4 + CF: + long_name: Dive ID + comment: "Unique identifier for each dive profile, starting from 0. -1 indicates no dive." + +climb_id: + dtype: i4 + CF: + long_name: Climb ID + comment: "Unique identifier for each climb profile, starting from 0. -1 indicates no climb." + +state: + dtype: i1 + CF: + long_name: Glider State + comment: "Glider state: -1=unknown, 0=surface, 1=dive, 2=climb" + flag_values: [-1, 0, 1, 2] + flag_meanings: "unknown surface dive climb" + +--- ### OPTIONAL SUITE: flight + +heading: + source: m_heading + conversion: rad_to_deg + interpolate_missing: True + dtype: f4 + CF: + long_name: Heading + standard_name: platform_orientation + units: degrees + observation_type: measured + valid_max: 360.0 + valid_min: 0.0 + +battpos: + source: m_battpos + interpolate_missing: true + dtype: f4 + CF: + long_name: Battery position + units: inches + +fin: + source: m_fin + conversion: rad_to_deg + dtype: f4 + CF: + long_name: Fin angle + units: degrees + +ballast_pumped: + source: m_de_oil_vol + dtype: f4 + CF: + long_name: Ballast pumped + units: cm^3 + observation_type: measured + +--- ### OPTIONAL SUITE: thermo + +salinity: + track_qc: True + dtype: f4 + CF: + instrument: instrument_ctd + long_name: Salinity + observation_type: calculated + platform: platform + standard_name: sea_water_practical_salinity + units: "1" + valid_max: 40.0 + valid_min: 0.0 + +density: + track_qc: True + dtype: f4 + CF: + instrument: instrument_ctd + long_name: Density + observation_type: calculated + platform: platform + standard_name: sea_water_density + units: kg m-3 + valid_max: 1040.0 + valid_min: 1015.0 + +rho0: + track_qc: True + dtype: f4 + CF: + instrument: instrument_ctd + long_name: Potential density + observation_type: calculated + platform: platform + reference_pressure: 0 + standard_name: sea_water_potential_density + units: kg m-3 + valid_max: 1040.0 + valid_min: 1015.0 + +SA: + track_qc: True + dtype: f4 + CF: + instrument: instrument_ctd + long_name: Absolute salinity + observation_type: calculated + platform: platform + standard_name: sea_water_absolute_salinity + units: g kg-1 + valid_max: 40.0 + valid_min: 0.0 + +CT: + track_qc: True + dtype: f4 + CF: + long_name: Conservative temperature + standard_name: sea_water_conservative_temperature + units: celsius + instrument: instrument_ctd + valid_min: -5.0 + valid_max: 50.0 + observation_type: calculated + +N2: + track_qc: True + dtype: f4 + CF: + long_name: Buoyancy frequency squared + standard_name: square_of_brunt_vaisala_frequency_in_sea_water + units: s-2 + observation_type: calculated + +depth: + track_qc: True + dtype: f4 + CF: + long_name: Depth + standard_name: depth + units: m + valid_min: 0.0 + valid_max: 2000.0 + positive: down + reference_datum: sea-surface + instrument: instrument_ctd + observation_type: calculated + +z: + track_qc: True + dtype: f4 + CF: + long_name: Height + standard_name: height + units: m + valid_min: -2000.0 + valid_max: 0.0 + positive: up + reference_datum: sea-surface + instrument: instrument_ctd + observation_type: calculated diff --git a/src/glide/config.py b/src/glide/config.py index 32bbe1d..2deaef0 100644 --- a/src/glide/config.py +++ b/src/glide/config.py @@ -1,5 +1,6 @@ # Functions for handling the configuration file +import copy import logging from datetime import datetime, timezone @@ -14,11 +15,115 @@ def _ensure_utc(dt: datetime) -> datetime: return dt.replace(tzinfo=timezone.utc) +def _deep_merge(base: dict, override: dict) -> dict: + """Deep merge override into base, returning a new dict.""" + result = copy.deepcopy(base) + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +def _load_core() -> tuple[dict, dict, dict]: + """Load core variable definitions from bundled core.yml. + + Returns + ------- + core_variables : dict + Core variables that are always included. + flight_attitude : dict + Optional flight attitude variables (heading, pitch, roll). + derived_thermo : dict + Optional derived thermodynamic variables. + """ + from importlib import resources + + core_file = str(resources.files("glide").joinpath("assets/core.yml")) + + with open(core_file) as f: + docs = [doc for doc in safe_load_all(f)] + + core_variables = docs[0] if docs else {} + flight_attitude = docs[1] if len(docs) > 1 else {} + derived_thermo = docs[2] if len(docs) > 2 else {} + + return core_variables, flight_attitude, derived_thermo + + +def _apply_qc_overrides(variables: dict, qc_overrides: dict) -> dict: + """Apply QC parameter overrides to variable definitions. + + Only allows overriding: valid_min, valid_max, max_gap, interpolate_missing + """ + allowed_keys = {"valid_min", "valid_max", "max_gap", "interpolate_missing"} + + for var_name, overrides in qc_overrides.items(): + if var_name not in variables: + _log.warning("QC override for unknown variable: %s", var_name) + continue + + for key, value in overrides.items(): + if key not in allowed_keys: + _log.warning( + "Ignoring invalid QC override key '%s' for %s", key, var_name + ) + continue + + # valid_min/max go in CF attributes + if key in ("valid_min", "valid_max"): + if "CF" not in variables[var_name]: + variables[var_name]["CF"] = {} + variables[var_name]["CF"][key] = value + _log.debug("Override %s.CF.%s = %s", var_name, key, value) + else: + variables[var_name][key] = value + _log.debug("Override %s.%s = %s", var_name, key, value) + + return variables + + +def _build_slocum_name_map(variables: dict) -> dict: + """Build mapping from Slocum variable names to output variable names.""" + slocum_name_map = {} + for variable_name, specs in variables.items(): + if "source" not in specs: + continue + sources = specs["source"] + if not isinstance(sources, list): + sources = [sources] + for source in sources: + slocum_name_map[source] = variable_name + + _log.debug("Slocum name mapping dict %s", slocum_name_map) + return slocum_name_map + + # Public functions def load_config(file: str | None = None) -> dict: - """Extract variable specifications from a yaml file.""" + """Load and merge configuration from core and user files. + + Parameters + ---------- + file : str, optional + Path to user configuration file. If None, uses bundled default. + + Returns + ------- + dict + Merged configuration with keys: + - globals: trajectory and netcdf_attributes + - variables: all variable definitions (core + optional + user) + - slocum: mapping from Slocum names to output names + - merged_variables: variables for higher-level processing + """ + # Load core definitions + core_variables, flight_attitude, derived_thermo = _load_core() + + # Load user config if file is None: from importlib import resources @@ -27,30 +132,67 @@ def load_config(file: str | None = None) -> dict: with open(file) as f: docs = [doc for doc in safe_load_all(f)] - global_config = docs[0] - variable_specs = docs[1] - - slocum_name_map = { - source: variable_name - for variable_name, specs in variable_specs.items() - if "source" in specs - for source in ( - specs["source"] if isinstance(specs["source"], list) else [specs["source"]] - ) - } - - _log.debug("Slocum name mapping dict %s", slocum_name_map) - - # pyyaml loads datetime objects in local timezone as datetime.datetime objects. - # We need to ensure that all datetime objects are in UTC timestamps for processing to work. - for attr in ["valid_min", "valid_max"]: - if attr in variable_specs["time"]["CF"]: - variable_specs["time"]["CF"][attr] = _ensure_utc( - variable_specs["time"]["CF"][attr] - ).timestamp() + # Parse user config documents + global_config = docs[0] if docs else {} + include_config = docs[1] if len(docs) > 1 else {} + qc_config = docs[2] if len(docs) > 2 else {} + l1_variables = docs[3] if len(docs) > 3 else {} + merged_variables = docs[4] if len(docs) > 4 else {} + + # Extract include toggles (default to True for backward compatibility) + include = include_config.get("include", {}) + include_flight = include.get("flight_attitude", True) + include_thermo = include.get("derived_thermo", True) + + # Build variable set: start with core + variables = copy.deepcopy(core_variables) + + # Add optional suites if enabled + if include_flight: + variables.update(copy.deepcopy(flight_attitude)) + _log.debug("Including flight_attitude suite") + if include_thermo: + variables.update(copy.deepcopy(derived_thermo)) + _log.debug("Including derived_thermo suite") + + # Apply QC overrides + qc_overrides = qc_config.get("qc", {}) if isinstance(qc_config, dict) else {} + variables = _apply_qc_overrides(variables, qc_overrides) + + # Add user L1 variables + l1_vars = ( + l1_variables.get("l1_variables", {}) if isinstance(l1_variables, dict) else {} + ) + for var_name, var_spec in l1_vars.items(): + if var_name in variables: + _log.warning("L1 variable '%s' conflicts with core variable", var_name) + else: + variables[var_name] = var_spec + _log.debug("Added L1 variable: %s", var_name) + + # Build Slocum name mapping + slocum_name_map = _build_slocum_name_map(variables) + + # Handle time valid_min/max UTC conversion + if "time" in variables and "CF" in variables["time"]: + for attr in ["valid_min", "valid_max"]: + if attr in variables["time"]["CF"]: + val = variables["time"]["CF"][attr] + if isinstance(val, datetime): + variables["time"]["CF"][attr] = _ensure_utc(val).timestamp() + + # Extract merged variables + merged_vars = ( + merged_variables.get("merged_variables", {}) + if isinstance(merged_variables, dict) + else {} + ) config = dict( - globals=global_config, variables=variable_specs, slocum=slocum_name_map + globals=global_config, + variables=variables, + slocum=slocum_name_map, + merged_variables=merged_vars, ) return config