From 16381ad78240fa10efccb41bfd314edb2137c391 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 15 Jan 2026 08:06:45 +0100 Subject: [PATCH 1/8] Add inclusion of optional rules to config --- config/forecasters-co1e.yaml | 2 ++ config/forecasters.yaml | 2 ++ config/interpolators.yaml | 2 ++ config/showcase.yaml | 2 ++ src/evalml/config.py | 5 +++++ workflow/tools/config.schema.json | 8 ++++++++ 6 files changed, 21 insertions(+) diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index 758e213..11f561e 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -58,3 +58,5 @@ profile: runtime: "1h" gpus: 0 jobs: 50 + +include_optional_rules: [] diff --git a/config/forecasters.yaml b/config/forecasters.yaml index bd11485..683b5bf 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -54,3 +54,5 @@ profile: runtime: "1h" gpus: 0 jobs: 50 + +include_optional_rules: [] diff --git a/config/interpolators.yaml b/config/interpolators.yaml index b29337b..1f5c9bd 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -78,3 +78,5 @@ profile: runtime: "1h" gpus: 0 jobs: 50 + +include_optional_rules: [] diff --git a/config/showcase.yaml b/config/showcase.yaml index 29d02f2..f448a42 100644 --- a/config/showcase.yaml +++ b/config/showcase.yaml @@ -64,3 +64,5 @@ profile: jobs: 50 batch_rules: plot_forecast_frame: 24 + +include_optional_rules: [] diff --git a/src/evalml/config.py b/src/evalml/config.py index 8faa614..7f5ad55 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -311,6 +311,11 @@ class ConfigModel(BaseModel): "populate_by_name": True, } + include_optional_rules: List[str] = Field( + default_factory=list, + description="List of optional rules to include in the workflow.", + ) + def generate_config_schema() -> str: """Generate the JSON schema for the ConfigModel.""" diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 666e35b..fe411c7 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -561,6 +561,14 @@ }, "profile": { "$ref": "#/$defs/Profile" + }, + "include_optional_rules": { + "description": "List of optional rules to include in the workflow.", + "items": { + "type": "string" + }, + "title": "Include Optional Rules", + "type": "array" } }, "required": [ From 2a7f75c6cc2ebc6db055981df78aed37b139cabe Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 15 Jan 2026 08:09:48 +0100 Subject: [PATCH 2/8] Generalize baseline extraction to work with ICON and add rule --- workflow/rules/data.smk | 35 +++++++- workflow/scripts/extract_baseline.py | 121 +++++++++++++++++++-------- 2 files changed, 118 insertions(+), 38 deletions(-) diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk index bef818f..278454c 100644 --- a/workflow/rules/data.smk +++ b/workflow/rules/data.smk @@ -4,7 +4,7 @@ from pathlib import Path include: "common.smk" -if "extract_cosmoe" in config.get("include-optional-rules", []): +if "extract_cosmoe" in config.get("include_optional_rules", []): rule extract_cosmoe: input: @@ -23,7 +23,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []): OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log", shell: """ - python workflow/scripts/extract_baseline_fct.py \ + python workflow/scripts/extract_baseline.py \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ --steps {params.steps} \ @@ -31,7 +31,7 @@ if "extract_cosmoe" in config.get("include-optional-rules", []): """ -if "extract_cosmo1e" in config.get("include-optional-rules", []): +if "extract_cosmo1e" in config.get("include_optional_rules", []): rule extract_cosmo1e: input: @@ -50,7 +50,34 @@ if "extract_cosmo1e" in config.get("include-optional-rules", []): OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log", shell: """ - python workflow/scripts/extract_baseline_fct.py \ + python workflow/scripts/extract_baseline.py \ + --archive_dir {input.archive}/{params.year_postfix} \ + --output_store {output.fcts} \ + --steps {params.steps} \ + > {log} 2>&1 + """ + + +if "extract_icon1" in config.get("include_optional_rules", []): + + rule extract_icon1: + input: + archive=Path("/store_new/mch/msopr/osm/ICON-CH1-EPS"), + output: + fcts=protected( + directory(Path("/store_new/mch/msopr/ml/ICON-CH1-EPS/FCST{year}.zarr")) + ), + resources: + cpus_per_task=4, + runtime="24h", + params: + year_postfix=lambda wc: f"FCST{wc.year}", + steps="0/33/1", + log: + OUT_ROOT / "logs/extract-icon1-fcts-{year}.log", + shell: + """ + python workflow/scripts/extract_baseline.py \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ --steps {params.steps} \ diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py index f450b82..b1e1214 100644 --- a/workflow/scripts/extract_baseline.py +++ b/workflow/scripts/extract_baseline.py @@ -20,21 +20,45 @@ ) +def get_input(root: Path) -> list[Path]: + """Get list of tarfiles or directories in root directory.""" + input_files = sorted(root.glob("*.tar")) + if not input_files: + gribfiles = sorted(root.glob("*_*/grib/i1eff00000000_000")) + input_files = [f.parent.parent for f in gribfiles] + if not input_files: + raise ValueError(f"No files found in {root}.") + return input_files + + +def get_reftime(file: Path) -> datetime: + if ".tar" in file.suffixes: + return reftime_from_tarfile(file) + else: + return reftime_from_directory(file) + + +def reftime_from_directory(directory: Path) -> datetime: + """Extract reftime from directory name.""" + dir_stem = directory.name.rsplit("_", 1)[0] + return datetime.strptime(dir_stem, "%y%m%d%H") + + def reftime_from_tarfile(tarfile: Path, suffix: str | None = None) -> datetime: """Extract reftime from tarfile name.""" suffix = tarfile.stem[-4:] if suffix is None else suffix return datetime.strptime(tarfile.stem.removesuffix(suffix), "%y%m%d%H") -def check_reftime_consistency(tarfiles: list[Path], delta_h: int = 12): +def check_reftime_consistency(input: list[Path], delta_h: int = 12): """Check that all reftimes are available and every delta_h hours.""" # note the lower case y in the format string, it's for 2-digit years - first_reftime = reftime_from_tarfile(tarfiles[0]) + first_reftime = get_reftime(input[0]) expected_reftime = first_reftime - for file in tarfiles: - reftime = reftime_from_tarfile(file) + for file in input: + reftime = get_reftime(file) if reftime != expected_reftime: raise ValueError(f"Expected reftime {expected_reftime} but got {reftime}.") expected_reftime += timedelta(hours=delta_h) @@ -42,31 +66,47 @@ def check_reftime_consistency(tarfiles: list[Path], delta_h: int = 12): def extract( - tar: Path, lead_times: list[int], run_id: str, params: list[str] + file: Path, lead_times: list[int], run_id: str, params: list[str] ) -> xr.Dataset: - LOG.info(f"Extracting fields from {tar}.") - reftime = reftime_from_tarfile(tar) - if "COSMO-E" in tar.parts: + LOG.info(f"Extracting fields from {file}.") + reftime = reftime_from_tarfile(file) + if "COSMO-E" in file.parts: gribname = "ceffsurf" - elif "COSMO-1E" in tar.parts: + elif "COSMO-1E" in file.parts: gribname = "c1effsurf" + elif "ICON-CH1-EPS" in file.parts: + gribname = "i1eff" + elif "ICON-CH2-EPS" in file.parts: + gribname = "i2eff" else: - raise ValueError("Currently only COSMO-E and COSMO-1E are supported.") - tar_archive = tarfile.open(tar, mode="r:*") + raise ValueError("Currently only COSMO-E/1E and ICON-CH1/2-EPS are supported.") out = ekd.SimpleFieldList() - for lt in lead_times: - filename = f"{tar.stem}/grib/{gribname}{lt:03}_{run_id}" - LOG.info(f"Extracting {filename}.") - stream = tar_archive.extractfile(filename) - - # LOG.info(f"Reading fields...") - streamfieldlist: StreamFieldList = ekd.from_source("stream", stream) - for field in streamfieldlist: - shortname = field.metadata("shortName") - if shortname in params: - out.append(field) - stream.close() - tar_archive.close() + if ".tar" in file.suffixes: + tar_archive = tarfile.open(file, mode="r:*") + for lt in lead_times: + filename = f"{file.stem}/grib/{gribname}{lt:03}_{run_id}" + LOG.info(f"Extracting {filename}.") + stream = tar_archive.extractfile(filename) + + # LOG.info(f"Reading fields...") + streamfieldlist: StreamFieldList = ekd.from_source("stream", stream) + for field in streamfieldlist: + shortname = field.metadata("shortName") + if shortname in params: + out.append(field) + stream.close() + tar_archive.close() + else: + for lt in lead_times: + lh = lt % 24 + ld = lt // 24 + filepath = file / "grib" / f"{gribname}{ld:02}{lh:02}0000_{run_id}" + LOG.info(f"Extracting {filepath}.") + fields = ekd.from_source("file", filepath) + for field in fields: + shortname = field.metadata("shortName") + if shortname in params: + out.append(field) out = out.to_xarray(profile="grib") out = out.expand_dims( @@ -95,16 +135,16 @@ def _parse_steps(steps: str) -> int: def main(cfg: ScriptConfig): - tarfiles = sorted(cfg.archive_dir.glob("*.tar")) + input = get_input(cfg.archive_dir) delta_h = 12 - if "COSMO-1E" in tarfiles[0].parts: + if "COSMO-1E" in input[0].parts or "ICON-CH1-EPS" in input[0].parts: delta_h = 3 - first_reftime, last_reftime = check_reftime_consistency(tarfiles, delta_h) - LOG.info( - f"Found {len(tarfiles)} tar archives from {first_reftime} to {last_reftime}." - ) + if "ICON-CH2-EPS" in input[0].parts: + delta_h = 6 + first_reftime, last_reftime = check_reftime_consistency(input, delta_h) + LOG.info(f"Found {len(input)} forecasts from {first_reftime} to {last_reftime}.") - reftimes = np.array([reftime_from_tarfile(f) for f in tarfiles], dtype="datetime64") + reftimes = np.array([get_reftime(f) for f in input], dtype="datetime64") missing = reftimes if not cfg.overwrite: # only check dataset when we want to append as this is slow existing_reftimes = np.array([]) @@ -130,7 +170,7 @@ def main(cfg: ScriptConfig): _, indices, _ = np.intersect1d(reftimes, missing, return_indices=True) for i in indices: - file = tarfiles[i] + file = input[i] ds = extract(file, cfg.steps, cfg.run_id, cfg.params) LOG.info(f"Extracted: {ds}") @@ -144,9 +184,12 @@ def main(cfg: ScriptConfig): zarr_encoding = { "forecast_reference_time": {"units": "nanoseconds since 1970-01-01"} } + cfg.output_store.parent.mkdir(parents=True, exist_ok=True) if i == 0: + LOG.info(f"Creating new zarr store at {cfg.output_store}.") ds.to_zarr(cfg.output_store, mode="w", encoding=zarr_encoding) else: + LOG.info(f"Appending to existing zarr store at {cfg.output_store}.") ds.to_zarr(cfg.output_store, mode="a", append_dim="forecast_reference_time") @@ -186,13 +229,23 @@ def main(cfg: ScriptConfig): """ Example usage: -python workflow/scripts/extract_baseline_fct.py \ +python workflow/scripts/extract_baseline.py \ --archive_dir /archive/mch/msopr/osm/COSMO-E/FCST20 \ --output_store /store_new/mch/msopr/ml/COSMO-E/FCST20.zarr \ --steps 0/120/6 -python workflow/scripts/extract_baseline_fct.py \ +python workflow/scripts/extract_baseline.py \ --archive_dir /archive/mch/s83/osm/from_GPFS/COSMO-1E/FCST20 \ --output_store /store_new/mch/msopr/ml/COSMO-1E/FCST20.zarr \ --steps 0/33/1 + +python workflow/scripts/extract_baseline.py \ + --archive_dir /store_new/mch/msopr/osm/ICON-CH1-EPS/FCST24 \ + --output_store /store_new/mch/msopr/ml/ICON-CH1-EPS/FCST24.zarr \ + --steps 0/33/1 + +python workflow/scripts/extract_baseline.py \ + --archive_dir /store_new/mch/msopr/osm/ICON-CH1-EPS/FCST25 \ + --output_store /store_new/mch/msopr/ml/ICON-CH1-EPS/FCST25.zarr \ + --steps 0/33/1 """ From 09cb70dc275962ca054d13453cf8951239ba7c30 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 15 Jan 2026 15:24:47 +0100 Subject: [PATCH 3/8] adjust filename pattern to include ICON-CH2-EPS --- workflow/scripts/extract_baseline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py index b1e1214..274c86f 100644 --- a/workflow/scripts/extract_baseline.py +++ b/workflow/scripts/extract_baseline.py @@ -24,7 +24,7 @@ def get_input(root: Path) -> list[Path]: """Get list of tarfiles or directories in root directory.""" input_files = sorted(root.glob("*.tar")) if not input_files: - gribfiles = sorted(root.glob("*_*/grib/i1eff00000000_000")) + gribfiles = sorted(root.glob("*_*/grib/i?eff00000000_000")) input_files = [f.parent.parent for f in gribfiles] if not input_files: raise ValueError(f"No files found in {root}.") From 5411743d15da6b5c4f34ff18cba5f581e2228077 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Fri, 16 Jan 2026 10:05:38 +0100 Subject: [PATCH 4/8] make inclusion of optional rules optional --- config/forecasters-co1e.yaml | 2 -- config/forecasters.yaml | 2 -- config/interpolators.yaml | 2 -- config/showcase.yaml | 2 -- src/evalml/config.py | 2 +- workflow/tools/config.schema.json | 17 ++++++++++++----- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml index 11f561e..758e213 100644 --- a/config/forecasters-co1e.yaml +++ b/config/forecasters-co1e.yaml @@ -58,5 +58,3 @@ profile: runtime: "1h" gpus: 0 jobs: 50 - -include_optional_rules: [] diff --git a/config/forecasters.yaml b/config/forecasters.yaml index 683b5bf..bd11485 100644 --- a/config/forecasters.yaml +++ b/config/forecasters.yaml @@ -54,5 +54,3 @@ profile: runtime: "1h" gpus: 0 jobs: 50 - -include_optional_rules: [] diff --git a/config/interpolators.yaml b/config/interpolators.yaml index 1f5c9bd..b29337b 100644 --- a/config/interpolators.yaml +++ b/config/interpolators.yaml @@ -78,5 +78,3 @@ profile: runtime: "1h" gpus: 0 jobs: 50 - -include_optional_rules: [] diff --git a/config/showcase.yaml b/config/showcase.yaml index f448a42..29d02f2 100644 --- a/config/showcase.yaml +++ b/config/showcase.yaml @@ -64,5 +64,3 @@ profile: jobs: 50 batch_rules: plot_forecast_frame: 24 - -include_optional_rules: [] diff --git a/src/evalml/config.py b/src/evalml/config.py index 7f5ad55..ba4f3ab 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -311,7 +311,7 @@ class ConfigModel(BaseModel): "populate_by_name": True, } - include_optional_rules: List[str] = Field( + include_optional_rules: List[str] | None = Field( default_factory=list, description="List of optional rules to include in the workflow.", ) diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index fe411c7..58afa1f 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -563,12 +563,19 @@ "$ref": "#/$defs/Profile" }, "include_optional_rules": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], "description": "List of optional rules to include in the workflow.", - "items": { - "type": "string" - }, - "title": "Include Optional Rules", - "type": "array" + "title": "Include Optional Rules" } }, "required": [ From 73a84efa82c8bdc835cc240e3e6aaf2d74fa6970 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Fri, 16 Jan 2026 10:07:12 +0100 Subject: [PATCH 5/8] Update workflow/rules/data.smk Co-authored-by: Francesco Zanetta <62377868+frazane@users.noreply.github.com> --- workflow/rules/data.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk index 278454c..e29ac5b 100644 --- a/workflow/rules/data.smk +++ b/workflow/rules/data.smk @@ -74,7 +74,7 @@ if "extract_icon1" in config.get("include_optional_rules", []): year_postfix=lambda wc: f"FCST{wc.year}", steps="0/33/1", log: - OUT_ROOT / "logs/extract-icon1-fcts-{year}.log", + OUT_ROOT / "logs/extract-icon1/year={year}.log", shell: """ python workflow/scripts/extract_baseline.py \ From 69a9096d9704876000d8111e8f97013f69987005 Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Fri, 23 Jan 2026 10:35:39 +0100 Subject: [PATCH 6/8] add script as input to rules --- workflow/rules/data.smk | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk index e29ac5b..1289f98 100644 --- a/workflow/rules/data.smk +++ b/workflow/rules/data.smk @@ -9,6 +9,7 @@ if "extract_cosmoe" in config.get("include_optional_rules", []): rule extract_cosmoe: input: archive=Path("/archive/mch/msopr/osm/COSMO-E"), + script="workflow/scripts/extract_baseline.py", output: fcts=protected( directory(Path("/store_new/mch/msopr/ml/COSMO-E/FCST{year}.zarr")) @@ -23,7 +24,7 @@ if "extract_cosmoe" in config.get("include_optional_rules", []): OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log", shell: """ - python workflow/scripts/extract_baseline.py \ + python {input.script} \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ --steps {params.steps} \ @@ -36,6 +37,7 @@ if "extract_cosmo1e" in config.get("include_optional_rules", []): rule extract_cosmo1e: input: archive=Path("/archive/mch/s83/osm/from_GPFS/COSMO-1E"), + script="workflow/scripts/extract_baseline.py", output: fcts=protected( directory(Path("/store_new/mch/msopr/ml/COSMO-1E/FCST{year}.zarr")) @@ -50,7 +52,7 @@ if "extract_cosmo1e" in config.get("include_optional_rules", []): OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log", shell: """ - python workflow/scripts/extract_baseline.py \ + python {input.script} \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ --steps {params.steps} \ @@ -63,6 +65,7 @@ if "extract_icon1" in config.get("include_optional_rules", []): rule extract_icon1: input: archive=Path("/store_new/mch/msopr/osm/ICON-CH1-EPS"), + script="workflow/scripts/extract_baseline.py", output: fcts=protected( directory(Path("/store_new/mch/msopr/ml/ICON-CH1-EPS/FCST{year}.zarr")) @@ -77,7 +80,7 @@ if "extract_icon1" in config.get("include_optional_rules", []): OUT_ROOT / "logs/extract-icon1/year={year}.log", shell: """ - python workflow/scripts/extract_baseline.py \ + python {input.script} \ --archive_dir {input.archive}/{params.year_postfix} \ --output_store {output.fcts} \ --steps {params.steps} \ From 91d169f58c10ed9bdce01a73718cacd37432a2ba Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Sun, 25 Jan 2026 22:26:19 +0100 Subject: [PATCH 7/8] Remove rule-based extraction of datasets (dead end) --- src/evalml/config.py | 5 -- workflow/rules/data.smk | 88 ------------------------------- workflow/tools/config.schema.json | 15 ------ 3 files changed, 108 deletions(-) delete mode 100644 workflow/rules/data.smk diff --git a/src/evalml/config.py b/src/evalml/config.py index ba4f3ab..8faa614 100644 --- a/src/evalml/config.py +++ b/src/evalml/config.py @@ -311,11 +311,6 @@ class ConfigModel(BaseModel): "populate_by_name": True, } - include_optional_rules: List[str] | None = Field( - default_factory=list, - description="List of optional rules to include in the workflow.", - ) - def generate_config_schema() -> str: """Generate the JSON schema for the ConfigModel.""" diff --git a/workflow/rules/data.smk b/workflow/rules/data.smk deleted file mode 100644 index 1289f98..0000000 --- a/workflow/rules/data.smk +++ /dev/null @@ -1,88 +0,0 @@ -from pathlib import Path - - -include: "common.smk" - - -if "extract_cosmoe" in config.get("include_optional_rules", []): - - rule extract_cosmoe: - input: - archive=Path("/archive/mch/msopr/osm/COSMO-E"), - script="workflow/scripts/extract_baseline.py", - output: - fcts=protected( - directory(Path("/store_new/mch/msopr/ml/COSMO-E/FCST{year}.zarr")) - ), - resources: - cpus_per_task=4, - runtime="24h", - params: - year_postfix=lambda wc: f"FCST{wc.year}", - steps="0/120/6", - log: - OUT_ROOT / "logs/extract-cosmoe-fcts-{year}.log", - shell: - """ - python {input.script} \ - --archive_dir {input.archive}/{params.year_postfix} \ - --output_store {output.fcts} \ - --steps {params.steps} \ - > {log} 2>&1 - """ - - -if "extract_cosmo1e" in config.get("include_optional_rules", []): - - rule extract_cosmo1e: - input: - archive=Path("/archive/mch/s83/osm/from_GPFS/COSMO-1E"), - script="workflow/scripts/extract_baseline.py", - output: - fcts=protected( - directory(Path("/store_new/mch/msopr/ml/COSMO-1E/FCST{year}.zarr")) - ), - resources: - cpus_per_task=4, - runtime="24h", - params: - year_postfix=lambda wc: f"FCST{wc.year}", - steps="0/33/1", - log: - OUT_ROOT / "logs/extract-cosmo1e-fcts-{year}.log", - shell: - """ - python {input.script} \ - --archive_dir {input.archive}/{params.year_postfix} \ - --output_store {output.fcts} \ - --steps {params.steps} \ - > {log} 2>&1 - """ - - -if "extract_icon1" in config.get("include_optional_rules", []): - - rule extract_icon1: - input: - archive=Path("/store_new/mch/msopr/osm/ICON-CH1-EPS"), - script="workflow/scripts/extract_baseline.py", - output: - fcts=protected( - directory(Path("/store_new/mch/msopr/ml/ICON-CH1-EPS/FCST{year}.zarr")) - ), - resources: - cpus_per_task=4, - runtime="24h", - params: - year_postfix=lambda wc: f"FCST{wc.year}", - steps="0/33/1", - log: - OUT_ROOT / "logs/extract-icon1/year={year}.log", - shell: - """ - python {input.script} \ - --archive_dir {input.archive}/{params.year_postfix} \ - --output_store {output.fcts} \ - --steps {params.steps} \ - > {log} 2>&1 - """ diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json index 58afa1f..666e35b 100644 --- a/workflow/tools/config.schema.json +++ b/workflow/tools/config.schema.json @@ -561,21 +561,6 @@ }, "profile": { "$ref": "#/$defs/Profile" - }, - "include_optional_rules": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "description": "List of optional rules to include in the workflow.", - "title": "Include Optional Rules" } }, "required": [ From a2ff3fe13e83cad9c8aa46a1bb9156f923f3d1fa Mon Sep 17 00:00:00 2001 From: Jonas Bhend Date: Thu, 29 Jan 2026 10:03:03 +0100 Subject: [PATCH 8/8] Document direct use of extract scripts with slurm --- workflow/scripts/extract_baseline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workflow/scripts/extract_baseline.py b/workflow/scripts/extract_baseline.py index 274c86f..075ad1d 100644 --- a/workflow/scripts/extract_baseline.py +++ b/workflow/scripts/extract_baseline.py @@ -229,6 +229,10 @@ def main(cfg: ScriptConfig): """ Example usage: + +To submit as a batch job on compute nodes +sbatch --wrap "uv run python ..." + python workflow/scripts/extract_baseline.py \ --archive_dir /archive/mch/msopr/osm/COSMO-E/FCST20 \ --output_store /store_new/mch/msopr/ml/COSMO-E/FCST20.zarr \