diff --git a/src/esnb/core/CaseExperiment2.py b/src/esnb/core/CaseExperiment2.py index 9b501a9..ee09d5b 100644 --- a/src/esnb/core/CaseExperiment2.py +++ b/src/esnb/core/CaseExperiment2.py @@ -4,8 +4,12 @@ import intake_esm from esnb.core.mdtf import MDTFCaseSettings -from esnb.sites.gfdl import (generate_gfdl_intake_catalog, infer_gfdl_expname, - infer_is_gfdl_ppdir, open_intake_catalog_dora) +from esnb.sites.gfdl import ( + generate_gfdl_intake_catalog, + infer_gfdl_expname, + infer_is_gfdl_ppdir, + open_intake_catalog_dora, +) from esnb.sites.gfdl import site as at_gfdl from . import html, util @@ -15,7 +19,6 @@ logger = logging.getLogger(__name__) - class CaseExperiment2(MDTFCaseSettings): """ CaseExperiment2 is a class for managing and validating a single experiment case @@ -73,9 +76,9 @@ def __init__(self, source, name=None, verbose=True): self.name = name # Read the MDTF settings case file - if self.mode == "mdtf_settings": - logger.info("Loading MDTF Settings File") - self.load_mdtf_settings_file(source) + if (self.mode == "mdtf_settings") or (self.mode == "dictionary"): + logger.info("Loading MDTF Settings") + self.load_mdtf_settings(source) if len(self.mdtf_settings["case_list"]) == 0: raise ValueError("No cases found in MDTF settings file") elif len(self.mdtf_settings["case_list"]) > 1: @@ -159,12 +162,12 @@ def __init__(self, source, name=None, verbose=True): # TODO: this block is failing for some reason # Try to keep a copy of the original catalog in case its needed later - #try: + # try: # self._source_catalog = copy.deepcopy(self.catalog) - #except Exception as exc: + # except Exception as exc: # logger.debug(str(exc)) # logger.debug("Unable to deep copy source catalog. Not an immediate issue.") - + self._source_catalog = None def files(self, **kwargs): @@ -244,7 +247,9 @@ def _repr_html_(self): return result def __hash__(self): - return hash((self.name, self.source)) + _name = str(self.name) + _source = str(self.source) + return hash((_name, _source)) def __eq__(self, other): return self.__hash__() == other.__hash__() diff --git a/src/esnb/core/CaseGroup2.py b/src/esnb/core/CaseGroup2.py index 4e0fc0b..331f9a0 100644 --- a/src/esnb/core/CaseGroup2.py +++ b/src/esnb/core/CaseGroup2.py @@ -549,6 +549,8 @@ def __hash__(self): for k in sorted(list(self.__dict__.keys())): if k in acceptable_keys: v = self.__dict__[k] + if isinstance(v, dict): + v = str(dict) v = tuple(flatten_list(v)) if isinstance(v, list) else v if isinstance(v, list): hashables = hashables + v diff --git a/src/esnb/core/NotebookDiagnostic.py b/src/esnb/core/NotebookDiagnostic.py index fb1ff26..915521f 100644 --- a/src/esnb/core/NotebookDiagnostic.py +++ b/src/esnb/core/NotebookDiagnostic.py @@ -116,7 +116,9 @@ def __init__( self.varlist = varlist self.workdir = workdir - self.name = self.source if self.name is None else self.name + if self.name is None: + if isinstance(self.source, str): + self.name = self.source init_settings = {} @@ -139,14 +141,22 @@ def __init__( else: init_settings[key] = None - assert isinstance(source, str), "String or valid path must be supplied" + assert (isinstance(source, str)) or (isinstance(source, dict)), ( + "String, valid path, or dict must be supplied" + ) # load an MDTF-compatible jsonc settings file - if os.path.exists(source): - logger.info(f"Reading MDTF settings file from: {source}") - loaded_file = read_json(source) - settings = loaded_file["settings"] + if (isinstance(source, dict)) or (os.path.exists(source)): + if isinstance(source, dict): + logger.debug("Processing MDTF settings dictionary") + loaded_file = source + elif os.path.exists(source): + logger.info(f"Reading MDTF settings file from: {source}") + loaded_file = read_json(source) + else: + raise ValueError(f"Source type {type(source)} is not supported.") + settings = loaded_file["settings"] self.dimensions = ( self.dimensions if self.dimensions is not None @@ -196,6 +206,13 @@ def __init__( # initialize an empty groups attribute self.groups = [] + # set diagnostic name to long_name if name is not set + if self.name is None: + if self.long_name is not None: + self.name = self.long_name + else: + self.name = "Generic MDTF Diagnostic" + # initialize workdir if self.workdir is None: self.workdir = generate_tempdir_path(self.name) diff --git a/src/esnb/core/mdtf.py b/src/esnb/core/mdtf.py index 97eba24..29179e8 100644 --- a/src/esnb/core/mdtf.py +++ b/src/esnb/core/mdtf.py @@ -106,22 +106,25 @@ class MDTFCaseSettings: Methods ------- - load_mdtf_settings_file(settings_file) + load_mdtf_settings(settings_file) Loads MDTF settings from a YAML file, validates required fields, and - sets internal attributes. + sets internal attributes, or optionally, pass a dict with settings write_mdtf_settings_file(filename='case_settings.yml', fmt='yaml') Writes the current MDTF settings to a file in the specified format. """ - def load_mdtf_settings_file(self, settings_file): - settings_file = Path(settings_file) - if not settings_file.exists(): - raise FileNotFoundError( - f"MDTF settings file does not exist: {settings_file}" - ) - with open(settings_file, "r") as f: - _settings = yaml.safe_load(f) + def load_mdtf_settings(self, settings_file): + if isinstance(settings_file, dict): + _settings = settings_file + else: + settings_file = Path(settings_file) + if not settings_file.exists(): + raise FileNotFoundError( + f"MDTF settings file does not exist: {settings_file}" + ) + with open(settings_file, "r") as f: + _settings = yaml.safe_load(f) ingest_mdtf_settings_dict(self, _settings) self.source = str(settings_file) diff --git a/src/esnb/engine/__init__.py b/src/esnb/engine/__init__.py index 88c9557..d8c80c8 100644 --- a/src/esnb/engine/__init__.py +++ b/src/esnb/engine/__init__.py @@ -158,7 +158,6 @@ def create_script( if case_settings is not None: script.write("# case settings override\n") script.write(f"os.environ['ESNB_CASE_DATA'] = \"{case_settings}\"\n\n") - script.write("# run notebook \n") script.write("from esnb.engine import run_notebook\n") @@ -192,11 +191,13 @@ def create_script( return str(script_path) + def dict_to_key_value_string(text): allowed_punctuation = r":\[\]\(\)\/\-\,\_\." pattern = rf"[^a-zA-Z0-9{allowed_punctuation}]" return re.sub(pattern, "", text) + def identify_current_kernel_name(): python_exec = sys.executable existing_kernels = jupyter_client.kernelspec.find_kernel_specs() @@ -276,11 +277,15 @@ def run_notebook(notebook_path, output_dir): kernel_name = identify_current_kernel_name() import nest_asyncio + nest_asyncio.apply() print("doing async io") client = NotebookClient( - nb, timeout=600, kernel_name=kernel_name, allow_errors=True, + nb, + timeout=600, + kernel_name=kernel_name, + allow_errors=True, ) _ = client.execute() diff --git a/src/esnb/sites/gfdl.py b/src/esnb/sites/gfdl.py index e68c727..61ed2c5 100644 --- a/src/esnb/sites/gfdl.py +++ b/src/esnb/sites/gfdl.py @@ -74,19 +74,19 @@ def generate_gfdl_intake_catalog(pathpp, fre_cli=None): # Immediately force collection of any lazy operations try: # Method 1: Access df property which should trigger collection - if hasattr(catalog, 'df'): + if hasattr(catalog, "df"): df_len = len(catalog.df) print(f"Catalog collected: {df_len} entries") - + # Method 2: If there are specific lazy frames, collect them - if hasattr(catalog, 'esmcat') and hasattr(catalog.esmcat, '_frames'): + if hasattr(catalog, "esmcat") and hasattr(catalog.esmcat, "_frames"): frames = catalog.esmcat._frames - if hasattr(frames, 'lf') and frames.lf is not None: + if hasattr(frames, "lf") and frames.lf is not None: # Force immediate collection frames.pl_df = frames.lf.collect() - if not hasattr(frames, 'df') or frames.df is None: + if not hasattr(frames, "df") or frames.df is None: frames.df = frames.pl_df.to_pandas(use_pyarrow_extension_array=True) - + except Exception as exc: logger.error("Unable to access generated intake catalog") raise exc diff --git a/tests/test_mdtf.py b/tests/test_mdtf.py index 002fc7f..43c0816 100644 --- a/tests/test_mdtf.py +++ b/tests/test_mdtf.py @@ -7,13 +7,13 @@ def test_MDTFCaseSettings(): settings_file = esnb.datasources.test_mdtf_settings settings = mdtf.MDTFCaseSettings - settings.load_mdtf_settings_file(settings, settings_file) + settings.load_mdtf_settings(settings, settings_file) def test_MDTFCaseSettings_invalid_file(): with pytest.raises(FileNotFoundError): x = mdtf.MDTFCaseSettings - x = x.load_mdtf_settings_file(x, "non_existent_file.yml") + x = x.load_mdtf_settings(x, "non_existent_file.yml") def test_mdtf_settings_template_dict_1():