Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions src/esnb/core/CaseExperiment2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import intake_esm

from esnb.core.mdtf import MDTFCaseSettings
from esnb.sites.gfdl import (generate_gfdl_intake_catalog, infer_gfdl_expname,
infer_is_gfdl_ppdir, open_intake_catalog_dora)
from esnb.sites.gfdl import (
generate_gfdl_intake_catalog,
infer_gfdl_expname,
infer_is_gfdl_ppdir,
open_intake_catalog_dora,
)
from esnb.sites.gfdl import site as at_gfdl

from . import html, util
Expand All @@ -15,7 +19,6 @@
logger = logging.getLogger(__name__)



class CaseExperiment2(MDTFCaseSettings):
"""
CaseExperiment2 is a class for managing and validating a single experiment case
Expand Down Expand Up @@ -73,9 +76,9 @@ def __init__(self, source, name=None, verbose=True):
self.name = name

# Read the MDTF settings case file
if self.mode == "mdtf_settings":
logger.info("Loading MDTF Settings File")
self.load_mdtf_settings_file(source)
if (self.mode == "mdtf_settings") or (self.mode == "dictionary"):
logger.info("Loading MDTF Settings")
self.load_mdtf_settings(source)
if len(self.mdtf_settings["case_list"]) == 0:
raise ValueError("No cases found in MDTF settings file")
elif len(self.mdtf_settings["case_list"]) > 1:
Expand Down Expand Up @@ -159,12 +162,12 @@ def __init__(self, source, name=None, verbose=True):
# TODO: this block is failing for some reason

# Try to keep a copy of the original catalog in case its needed later
#try:
# try:
# self._source_catalog = copy.deepcopy(self.catalog)
#except Exception as exc:
# except Exception as exc:
# logger.debug(str(exc))
# logger.debug("Unable to deep copy source catalog. Not an immediate issue.")

self._source_catalog = None

def files(self, **kwargs):
Expand Down Expand Up @@ -244,7 +247,9 @@ def _repr_html_(self):
return result

def __hash__(self):
return hash((self.name, self.source))
_name = str(self.name)
_source = str(self.source)
return hash((_name, _source))

def __eq__(self, other):
return self.__hash__() == other.__hash__()
2 changes: 2 additions & 0 deletions src/esnb/core/CaseGroup2.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,8 @@ def __hash__(self):
for k in sorted(list(self.__dict__.keys())):
if k in acceptable_keys:
v = self.__dict__[k]
if isinstance(v, dict):
v = str(dict)
v = tuple(flatten_list(v)) if isinstance(v, list) else v
if isinstance(v, list):
hashables = hashables + v
Expand Down
29 changes: 23 additions & 6 deletions src/esnb/core/NotebookDiagnostic.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ def __init__(
self.varlist = varlist
self.workdir = workdir

self.name = self.source if self.name is None else self.name
if self.name is None:
if isinstance(self.source, str):
self.name = self.source

init_settings = {}

Expand All @@ -139,14 +141,22 @@ def __init__(
else:
init_settings[key] = None

assert isinstance(source, str), "String or valid path must be supplied"
assert (isinstance(source, str)) or (isinstance(source, dict)), (
"String, valid path, or dict must be supplied"
)

# load an MDTF-compatible jsonc settings file
if os.path.exists(source):
logger.info(f"Reading MDTF settings file from: {source}")
loaded_file = read_json(source)
settings = loaded_file["settings"]
if (isinstance(source, dict)) or (os.path.exists(source)):
if isinstance(source, dict):
logger.debug("Processing MDTF settings dictionary")
loaded_file = source
elif os.path.exists(source):
logger.info(f"Reading MDTF settings file from: {source}")
loaded_file = read_json(source)
else:
raise ValueError(f"Source type {type(source)} is not supported.")

settings = loaded_file["settings"]
self.dimensions = (
self.dimensions
if self.dimensions is not None
Expand Down Expand Up @@ -196,6 +206,13 @@ def __init__(
# initialize an empty groups attribute
self.groups = []

# set diagnostic name to long_name if name is not set
if self.name is None:
if self.long_name is not None:
self.name = self.long_name
else:
self.name = "Generic MDTF Diagnostic"

# initialize workdir
if self.workdir is None:
self.workdir = generate_tempdir_path(self.name)
Expand Down
23 changes: 13 additions & 10 deletions src/esnb/core/mdtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,22 +106,25 @@ class MDTFCaseSettings:

Methods
-------
load_mdtf_settings_file(settings_file)
load_mdtf_settings(settings_file)
Loads MDTF settings from a YAML file, validates required fields, and
sets internal attributes.
sets internal attributes, or optionally, pass a dict with settings

write_mdtf_settings_file(filename='case_settings.yml', fmt='yaml')
Writes the current MDTF settings to a file in the specified format.
"""

def load_mdtf_settings_file(self, settings_file):
settings_file = Path(settings_file)
if not settings_file.exists():
raise FileNotFoundError(
f"MDTF settings file does not exist: {settings_file}"
)
with open(settings_file, "r") as f:
_settings = yaml.safe_load(f)
def load_mdtf_settings(self, settings_file):
if isinstance(settings_file, dict):
_settings = settings_file
else:
settings_file = Path(settings_file)
if not settings_file.exists():
raise FileNotFoundError(
f"MDTF settings file does not exist: {settings_file}"
)
with open(settings_file, "r") as f:
_settings = yaml.safe_load(f)
ingest_mdtf_settings_dict(self, _settings)
self.source = str(settings_file)

Expand Down
9 changes: 7 additions & 2 deletions src/esnb/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ def create_script(
if case_settings is not None:
script.write("# case settings override\n")
script.write(f"os.environ['ESNB_CASE_DATA'] = \"{case_settings}\"\n\n")


script.write("# run notebook \n")
script.write("from esnb.engine import run_notebook\n")
Expand Down Expand Up @@ -192,11 +191,13 @@ def create_script(

return str(script_path)


def dict_to_key_value_string(text):
allowed_punctuation = r":\[\]\(\)\/\-\,\_\."
pattern = rf"[^a-zA-Z0-9{allowed_punctuation}]"
return re.sub(pattern, "", text)


def identify_current_kernel_name():
python_exec = sys.executable
existing_kernels = jupyter_client.kernelspec.find_kernel_specs()
Expand Down Expand Up @@ -276,11 +277,15 @@ def run_notebook(notebook_path, output_dir):
kernel_name = identify_current_kernel_name()

import nest_asyncio

nest_asyncio.apply()
print("doing async io")

client = NotebookClient(
nb, timeout=600, kernel_name=kernel_name, allow_errors=True,
nb,
timeout=600,
kernel_name=kernel_name,
allow_errors=True,
)
_ = client.execute()

Expand Down
12 changes: 6 additions & 6 deletions src/esnb/sites/gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,19 +74,19 @@ def generate_gfdl_intake_catalog(pathpp, fre_cli=None):
# Immediately force collection of any lazy operations
try:
# Method 1: Access df property which should trigger collection
if hasattr(catalog, 'df'):
if hasattr(catalog, "df"):
df_len = len(catalog.df)
print(f"Catalog collected: {df_len} entries")

# Method 2: If there are specific lazy frames, collect them
if hasattr(catalog, 'esmcat') and hasattr(catalog.esmcat, '_frames'):
if hasattr(catalog, "esmcat") and hasattr(catalog.esmcat, "_frames"):
frames = catalog.esmcat._frames
if hasattr(frames, 'lf') and frames.lf is not None:
if hasattr(frames, "lf") and frames.lf is not None:
# Force immediate collection
frames.pl_df = frames.lf.collect()
if not hasattr(frames, 'df') or frames.df is None:
if not hasattr(frames, "df") or frames.df is None:
frames.df = frames.pl_df.to_pandas(use_pyarrow_extension_array=True)

except Exception as exc:
logger.error("Unable to access generated intake catalog")
raise exc
Expand Down
4 changes: 2 additions & 2 deletions tests/test_mdtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
def test_MDTFCaseSettings():
settings_file = esnb.datasources.test_mdtf_settings
settings = mdtf.MDTFCaseSettings
settings.load_mdtf_settings_file(settings, settings_file)
settings.load_mdtf_settings(settings, settings_file)


def test_MDTFCaseSettings_invalid_file():
with pytest.raises(FileNotFoundError):
x = mdtf.MDTFCaseSettings
x = x.load_mdtf_settings_file(x, "non_existent_file.yml")
x = x.load_mdtf_settings(x, "non_existent_file.yml")


def test_mdtf_settings_template_dict_1():
Expand Down