From 6efe857eeda003b1b315f12c080e0303294269e9 Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa Date: Mon, 29 Sep 2025 19:59:43 +0000 Subject: [PATCH 1/5] update plots to handle nan values --- .../batch/base/diagnostics/contour_plot.py | 6 ++-- .../batch/base/diagnostics/density.py | 21 ++++++++--- .../base/diagnostics/filled_contour_plot.py | 6 ++-- .../batch/base/diagnostics/histogram.py | 25 +++++++++---- .../batch/base/diagnostics/line_plot.py | 36 ++++++++++++------- .../batch/base/diagnostics/scatter.py | 31 +++++++++------- 6 files changed, 82 insertions(+), 43 deletions(-) diff --git a/src/eva/plotting/batch/base/diagnostics/contour_plot.py b/src/eva/plotting/batch/base/diagnostics/contour_plot.py index ef9d5ce0..2dd3d20d 100644 --- a/src/eva/plotting/batch/base/diagnostics/contour_plot.py +++ b/src/eva/plotting/batch/base/diagnostics/contour_plot.py @@ -87,9 +87,9 @@ def data_prep(self): zdata = slice_var_from_str(self.config['z'], zdata, self.logger) # contour data should be flattened - xdata = xdata.flatten() - ydata = ydata.flatten() - zdata = zdata.flatten() + self.xdata = xdata.flatten() + self.ydata = ydata.flatten() + self.zdata = zdata.flatten() @abstractmethod def configure_plot(self): diff --git a/src/eva/plotting/batch/base/diagnostics/density.py b/src/eva/plotting/batch/base/diagnostics/density.py index 0c90ef65..b5a66bf2 100644 --- a/src/eva/plotting/batch/base/diagnostics/density.py +++ b/src/eva/plotting/batch/base/diagnostics/density.py @@ -73,11 +73,22 @@ def data_prep(self): data = slice_var_from_str(self.config['data'], data, self.logger) # Density data should be flattened - data = data.flatten() - - # Missing data should also be removed - mask = ~np.isnan(data) - self.data = data[mask] + data = np.ravel(np.asanyarray(data)) + + # If upstream gave us a masked array, turn masked to NaN for uniform handling + if ma.isMaskedArray(data): + data = data.filled(np.nan) + + # Optional knob: by default density plots *drop* NaNs (keeps current behavior) + # Set `drop_nan: false` in the layer config if you want to preserve length (masked in place) + drop_nan = bool(self.config.get('drop_nan', True)) + + if drop_nan: + # keep only finite values + self.data = data[np.isfinite(data)] + else: + # preserve length, mask non-finite in place; downstream can decide whether to compress + self.data = ma.masked_invalid(data) # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/plotting/batch/base/diagnostics/filled_contour_plot.py b/src/eva/plotting/batch/base/diagnostics/filled_contour_plot.py index bbb6a46e..78b96a11 100644 --- a/src/eva/plotting/batch/base/diagnostics/filled_contour_plot.py +++ b/src/eva/plotting/batch/base/diagnostics/filled_contour_plot.py @@ -88,9 +88,9 @@ def data_prep(self): zdata = slice_var_from_str(self.config['z'], zdata, self.logger) # contour data should be flattened - xdata = xdata.flatten() - ydata = ydata.flatten() - zdata = zdata.flatten() + self.xdata = xdata.flatten() + self.ydata = ydata.flatten() + self.zdata = zdata.flatten() @abstractmethod def configure_plot(self): diff --git a/src/eva/plotting/batch/base/diagnostics/histogram.py b/src/eva/plotting/batch/base/diagnostics/histogram.py index 78dc9548..7b8c764d 100644 --- a/src/eva/plotting/batch/base/diagnostics/histogram.py +++ b/src/eva/plotting/batch/base/diagnostics/histogram.py @@ -72,12 +72,25 @@ def data_prep(self): # See if we need to slice data data = slice_var_from_str(self.config['data'], data, self.logger) - # Histogram data should be flattened - data = data.flatten() - - # Missing data should also be removed - mask = ~np.isnan(data) - self.data = data[mask] + # Flatten + arr = np.ravel(np.asanyarray(data)) + + # If masked, convert masked entries to NaN for uniform handling + if ma.isMaskedArray(arr): + arr = arr.filled(np.nan) + + # Read & strip the knob so it never leaks to backends + cfg = dict(self.config) + drop_nan = bool(cfg.get('drop_nan', True)) + cfg.pop('drop_nan', None) + self.config = cfg + + if drop_nan: + # Typical histogram path: use only finite values + self.data = arr[np.isfinite(arr)] + else: + # Preserve length and mask invalids in place (backend must accept masked arrays) + self.data = ma.masked_invalid(arr) # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/plotting/batch/base/diagnostics/line_plot.py b/src/eva/plotting/batch/base/diagnostics/line_plot.py index b38de056..1e4a8b01 100644 --- a/src/eva/plotting/batch/base/diagnostics/line_plot.py +++ b/src/eva/plotting/batch/base/diagnostics/line_plot.py @@ -2,6 +2,7 @@ from eva.utilities.config import get from eva.utilities.utils import get_schema, update_object, slice_var_from_str import numpy as np +import numpy.ma as ma import pandas as pd from abc import ABC, abstractmethod @@ -96,19 +97,28 @@ def data_prep(self): xdata = slice_var_from_str(self.config['x'], xdata, self.logger) ydata = slice_var_from_str(self.config['y'], ydata, self.logger) - # line plot data should be flattened - self.xdata = xdata.flatten() - self.ydata = ydata.flatten() - - # Remove NaN values to enable regression - # -------------------------------------- - mask = pd.notna(xdata) - self.xdata = xdata[mask] - self.ydata = ydata[mask] - - mask = pd.notna(self.ydata) - self.xdata = self.xdata[mask] - self.ydata = self.ydata[mask] + # Flatten, build y with NaNs preserved (as you already added) + x_flat = np.ravel(xdata) + y_flat = ma.array(ydata).filled(np.nan).ravel() + + # Read and remove the config knob so it won't be forwarded to plt.plot + cfg = dict(getattr(self, "config", {}) or {}) + drop_nan = bool(cfg.pop("drop_nan", False)) + self.config = cfg + + if drop_nan: + y_is_finite = np.isfinite(y_flat) + y_plot = y_flat[y_is_finite] + try: + x_plot = x_flat[y_is_finite] + except Exception: + x_plot = np.array(x_flat, dtype=object)[y_is_finite] + else: + y_plot = y_flat + x_plot = x_flat + + self.xdata = x_plot + self.ydata = y_plot @abstractmethod def configure_plot(self): diff --git a/src/eva/plotting/batch/base/diagnostics/scatter.py b/src/eva/plotting/batch/base/diagnostics/scatter.py index 726913a1..4aa8bac4 100644 --- a/src/eva/plotting/batch/base/diagnostics/scatter.py +++ b/src/eva/plotting/batch/base/diagnostics/scatter.py @@ -79,19 +79,24 @@ def data_prep(self): xdata = slice_var_from_str(self.config['x'], xdata, self.logger) ydata = slice_var_from_str(self.config['y'], ydata, self.logger) - # scatter data should be flattened - self.xdata = xdata.flatten() - self.ydata = ydata.flatten() - - # Remove NaN values to enable regression - # -------------------------------------- - mask = pd.notna(xdata) - self.xdata = xdata[mask] - self.ydata = ydata[mask] - - mask = pd.notna(self.ydata) - self.xdata = self.xdata[mask] - self.ydata = self.ydata[mask] + # Read and remove the config knob so it won't be forwarded to plt.plot + cfg = dict(getattr(self, "config", {}) or {}) + drop_nan = bool(cfg.pop("drop_nan", False)) + self.config = cfg + + if drop_nan: + y_is_finite = np.isfinite(y_flat) + y_plot = y_flat[y_is_finite] + try: + x_plot = x_flat[y_is_finite] + except Exception: + x_plot = np.array(x_flat, dtype=object)[y_is_finite] + else: + y_plot = y_flat + x_plot = x_flat + + self.xdata = x_plot + self.ydata = y_plot @abstractmethod def configure_plot(self): From 5b233678266a6c9dccedfdddf8c4e8a0207940b0 Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa Date: Mon, 29 Sep 2025 20:04:04 +0000 Subject: [PATCH 2/5] update emcpy hash --- requirements_emc.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_emc.txt b/requirements_emc.txt index 651f4826..ea22910c 100644 --- a/requirements_emc.txt +++ b/requirements_emc.txt @@ -7,7 +7,7 @@ cartopy>=0.21.1 scipy>=1.9.3 xarray>=2022.3.0 pandas>=1.4.0 -numpy>=2.0.0 +numpy==1.26.4 # Additional packages -git+https://github.com/NOAA-EMC/emcpy.git@92aa62f34a1f413d8cb1646bca0e81f267b61365#egg=emcpy +git+https://github.com/NOAA-EMC/emcpy.git@7794574611e760475d61eb5d9458af2d3d2191d8#egg=emcpy From 162b25817f93469c82cf28e2765c10d0b6b7c2c9 Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa Date: Mon, 29 Sep 2025 20:12:48 +0000 Subject: [PATCH 3/5] pycodestyle --- requirements_emc.txt | 2 +- src/eva/plotting/batch/base/diagnostics/density.py | 7 ++++--- src/eva/plotting/batch/base/diagnostics/histogram.py | 1 + src/eva/plotting/batch/base/diagnostics/line_plot.py | 6 +++--- src/eva/plotting/batch/base/diagnostics/scatter.py | 4 ++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/requirements_emc.txt b/requirements_emc.txt index ea22910c..d142fc0d 100644 --- a/requirements_emc.txt +++ b/requirements_emc.txt @@ -7,7 +7,7 @@ cartopy>=0.21.1 scipy>=1.9.3 xarray>=2022.3.0 pandas>=1.4.0 -numpy==1.26.4 +numpy>=2.0.0 # Additional packages git+https://github.com/NOAA-EMC/emcpy.git@7794574611e760475d61eb5d9458af2d3d2191d8#egg=emcpy diff --git a/src/eva/plotting/batch/base/diagnostics/density.py b/src/eva/plotting/batch/base/diagnostics/density.py index b5a66bf2..01c49218 100644 --- a/src/eva/plotting/batch/base/diagnostics/density.py +++ b/src/eva/plotting/batch/base/diagnostics/density.py @@ -2,6 +2,7 @@ from eva.utilities.config import get from eva.utilities.utils import get_schema, update_object, slice_var_from_str import numpy as np +import numpy.ma as ma from abc import ABC, abstractmethod @@ -74,15 +75,15 @@ def data_prep(self): # Density data should be flattened data = np.ravel(np.asanyarray(data)) - + # If upstream gave us a masked array, turn masked to NaN for uniform handling if ma.isMaskedArray(data): data = data.filled(np.nan) - + # Optional knob: by default density plots *drop* NaNs (keeps current behavior) # Set `drop_nan: false` in the layer config if you want to preserve length (masked in place) drop_nan = bool(self.config.get('drop_nan', True)) - + if drop_nan: # keep only finite values self.data = data[np.isfinite(data)] diff --git a/src/eva/plotting/batch/base/diagnostics/histogram.py b/src/eva/plotting/batch/base/diagnostics/histogram.py index 7b8c764d..b0d2759d 100644 --- a/src/eva/plotting/batch/base/diagnostics/histogram.py +++ b/src/eva/plotting/batch/base/diagnostics/histogram.py @@ -2,6 +2,7 @@ from eva.utilities.config import get from eva.utilities.utils import get_schema, update_object, slice_var_from_str import numpy as np +import numpy.ma as ma from abc import ABC, abstractmethod diff --git a/src/eva/plotting/batch/base/diagnostics/line_plot.py b/src/eva/plotting/batch/base/diagnostics/line_plot.py index 1e4a8b01..91e0bb3a 100644 --- a/src/eva/plotting/batch/base/diagnostics/line_plot.py +++ b/src/eva/plotting/batch/base/diagnostics/line_plot.py @@ -100,12 +100,12 @@ def data_prep(self): # Flatten, build y with NaNs preserved (as you already added) x_flat = np.ravel(xdata) y_flat = ma.array(ydata).filled(np.nan).ravel() - + # Read and remove the config knob so it won't be forwarded to plt.plot cfg = dict(getattr(self, "config", {}) or {}) drop_nan = bool(cfg.pop("drop_nan", False)) self.config = cfg - + if drop_nan: y_is_finite = np.isfinite(y_flat) y_plot = y_flat[y_is_finite] @@ -116,7 +116,7 @@ def data_prep(self): else: y_plot = y_flat x_plot = x_flat - + self.xdata = x_plot self.ydata = y_plot diff --git a/src/eva/plotting/batch/base/diagnostics/scatter.py b/src/eva/plotting/batch/base/diagnostics/scatter.py index 4aa8bac4..bae021c4 100644 --- a/src/eva/plotting/batch/base/diagnostics/scatter.py +++ b/src/eva/plotting/batch/base/diagnostics/scatter.py @@ -83,7 +83,7 @@ def data_prep(self): cfg = dict(getattr(self, "config", {}) or {}) drop_nan = bool(cfg.pop("drop_nan", False)) self.config = cfg - + if drop_nan: y_is_finite = np.isfinite(y_flat) y_plot = y_flat[y_is_finite] @@ -94,7 +94,7 @@ def data_prep(self): else: y_plot = y_flat x_plot = x_flat - + self.xdata = x_plot self.ydata = y_plot From 31a8f62468f72b7e3b9ba868556514feccb9f761 Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa Date: Mon, 29 Sep 2025 20:18:21 +0000 Subject: [PATCH 4/5] fix broken scatter --- .../batch/base/diagnostics/scatter.py | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/eva/plotting/batch/base/diagnostics/scatter.py b/src/eva/plotting/batch/base/diagnostics/scatter.py index bae021c4..3c7e1d24 100644 --- a/src/eva/plotting/batch/base/diagnostics/scatter.py +++ b/src/eva/plotting/batch/base/diagnostics/scatter.py @@ -2,7 +2,7 @@ from eva.utilities.config import get from eva.utilities.utils import get_schema, update_object, slice_var_from_str import numpy as np -import pandas as pd +import numpy.ma as ma from abc import ABC, abstractmethod @@ -72,31 +72,35 @@ def data_prep(self): channel = self.config.get('channel') xdata = self.dataobj.get_variable_data(var0_cgv[0], var0_cgv[1], var0_cgv[2], channel) - xdata1 = self.dataobj.get_variable_data(var0_cgv[0], var0_cgv[1], var0_cgv[2]) ydata = self.dataobj.get_variable_data(var1_cgv[0], var1_cgv[1], var1_cgv[2], channel) - # see if we need to slice data + # Optional slicing xdata = slice_var_from_str(self.config['x'], xdata, self.logger) ydata = slice_var_from_str(self.config['y'], ydata, self.logger) - # Read and remove the config knob so it won't be forwarded to plt.plot - cfg = dict(getattr(self, "config", {}) or {}) - drop_nan = bool(cfg.pop("drop_nan", False)) + # Flatten and normalize (turn masked to NaN for uniform handling) + x = np.ravel(np.asanyarray(xdata)) + y = np.ravel(np.asanyarray(ydata)) + if ma.isMaskedArray(x): + x = x.filled(np.nan) + if ma.isMaskedArray(y): + y = y.filled(np.nan) + + # Read & remove knob so it won't propagate to matplotlib kwargs + cfg = dict(self.config) + drop_nan = bool(cfg.pop('drop_nan', True)) # default True for scatter self.config = cfg if drop_nan: - y_is_finite = np.isfinite(y_flat) - y_plot = y_flat[y_is_finite] - try: - x_plot = x_flat[y_is_finite] - except Exception: - x_plot = np.array(x_flat, dtype=object)[y_is_finite] + # Keep only pairs where both x and y are finite + mask = np.isfinite(x) & np.isfinite(y) + self.xdata = x[mask] + self.ydata = y[mask] else: - y_plot = y_flat - x_plot = x_flat - - self.xdata = x_plot - self.ydata = y_plot + # Preserve length; mask invalid pairs in-place (some backends honor masked arrays) + invalid = ~(np.isfinite(x) & np.isfinite(y)) + self.xdata = ma.array(x, mask=invalid) + self.ydata = ma.array(y, mask=invalid) @abstractmethod def configure_plot(self): From dad2b9bf1d950a54d5e2e56402dd97f4582c79ab Mon Sep 17 00:00:00 2001 From: kevindougherty-noaa Date: Mon, 29 Sep 2025 20:21:46 +0000 Subject: [PATCH 5/5] remove pandas library --- src/eva/plotting/batch/base/diagnostics/line_plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/eva/plotting/batch/base/diagnostics/line_plot.py b/src/eva/plotting/batch/base/diagnostics/line_plot.py index 91e0bb3a..b4beccfc 100644 --- a/src/eva/plotting/batch/base/diagnostics/line_plot.py +++ b/src/eva/plotting/batch/base/diagnostics/line_plot.py @@ -3,7 +3,6 @@ from eva.utilities.utils import get_schema, update_object, slice_var_from_str import numpy as np import numpy.ma as ma -import pandas as pd from abc import ABC, abstractmethod