From 0662f5b5c355cecbeddd2f662eb2895000eb04ad Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Thu, 13 Nov 2025 08:51:25 -0700
Subject: [PATCH 1/7] add lstm bmi config gen feature with test

---
 configs/ngen/realization_rust_lstm.json |  44 +++++++++
 src/datastreamcli/ngen_configs_gen.py   | 118 +++++++++++++++++++++---
 src/datastreamcli/noahowp_pkl.py        |  37 +++++---
 tests/test_bmi_config_generation.py     |  37 ++++++--
 4 files changed, 197 insertions(+), 39 deletions(-)
 create mode 100644 configs/ngen/realization_rust_lstm.json

diff --git a/configs/ngen/realization_rust_lstm.json b/configs/ngen/realization_rust_lstm.json
new file mode 100644
index 0000000..e86f271
--- /dev/null
+++ b/configs/ngen/realization_rust_lstm.json
@@ -0,0 +1,44 @@
+{
+    "global": {
+        "formulations": [
+            {
+                "name": "bmi_multi",
+                "params": {
+                    "name": "bmi_multi",
+                    "model_type_name": "lstm",
+                    "forcing_file": "",
+                    "init_config": "",
+                    "allow_exceed_end_time": true,
+                    "main_output_variable": "land_surface_water__runoff_depth",
+                    "modules": [
+                        {
+                        "name": "bmi_c",
+                        "params": {
+                            "name": "bmi_c",
+                            "model_type_name": "bmi_rust",
+                            "init_config": "./config/cat_config/lstm/{{id}}.yml",
+                            "allow_exceed_end_time": true,
+                            "main_output_variable": "land_surface_water__runoff_depth",
+                            "uses_forcing_file": false,
+                            "registration_function": "register_bmi_lstm",
+                            "library_file": "/dmod/shared_libs/librust_lstm_1025.so"
+                        }
+                    }
+                    ]
+                }
+            }
+        ],
+        "forcing": {
+            "path": "./forcings/forcings.nc",
+            "provider": "NetCDF",
+            "enable_cache": false
+        }
+    },
+    "time": {
+        "start_time": "2010-01-01 00:00:00",
+        "end_time": "2010-01-02 00:00:00",
+        "output_interval": 3600
+    },
+    "remotes_enabled": false,
+    "output_root": "./outputs/ngen"
+}
\ No newline at end of file
diff --git a/src/datastreamcli/ngen_configs_gen.py b/src/datastreamcli/ngen_configs_gen.py
index 53d6533..2e8c7d1 100644
--- a/src/datastreamcli/ngen_configs_gen.py
+++ b/src/datastreamcli/ngen_configs_gen.py
@@ -5,6 +5,7 @@
 import pickle, copy
 from pathlib import Path
 import datetime
+import subprocess
 gpd.options.io_engine = "pyogrio"
 
 from ngen.config_gen.file_writer import DefaultFileWriter
@@ -17,6 +18,51 @@
 from ngen.config.realization import NgenRealization
 from ngen.config.configurations import Routing
 
+LSTM_TEMPLATE = data = {
+    "time_step": "",
+    "area_sqkm": 0,
+    "basin_id": "cat-1",
+    "basin_name": "cat-1",
+    "elev_mean": 0,
+    "initial_state": "zero",
+    "lat": None,  
+    "lon": None,  
+    "slope_mean": 0,
+    "train_cfg_file": [
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_7/config.yml",
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_8/config.yml",
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_9/config.yml",
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed101_0701_143442/config.yml",
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed103_2701_171540/config.yml",
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_slope_elev_precip_temp_seq999_seed101_2801_191806/config.yml"
+    ],
+    "verbose": 0
+}
+
+def get_hf(hf_file : str):
+    """
+    Parameters:
+        hf_file : path to hydrofabric file (*.gpkg)
+
+    Returns:
+        hf : divide layer of hydrofabric
+        layers :  all layers within the hydrofabric file
+        attrs : divide attributes (found under different layers)
+            v2.1 -> model-attributes
+            v2.2 -> divide-attributes
+    """
+
+    hf: gpd.GeoDataFrame = gpd.read_file(hf_file, layer="divides") 
+    layers = gpd.list_layers(hf_file)
+    if "model-attributes" in list(layers.name):
+        attrs: pd.DataFrame = gpd.read_file(hf_file,layer="model-attributes")
+    elif "divide-attributes" in list(layers.name):
+        attrs: pd.DataFrame = gpd.read_file(hf_file,layer="divide-attributes")
+    else:
+        raise Exception(f"Can't find attributes!")        
+
+    return hf, layers, attrs
+
 def gen_noah_owp_confs_from_pkl(pkl_file,out_dir,start,end):
 
     if not os.path.exists(out_dir):
@@ -101,22 +147,54 @@ def generate_troute_conf(out_dir,start,max_loop_size,geo_file_path):
     with open(Path(out_dir,"troute.yaml"),'w') as fp:
         fp.writelines(troute_conf_str)  
 
-def gen_petAORcfe(hf_file,out,include):
+def gen_lstm(hf,attrs,out,real):
+    lstm_config_dir = Path(out,'cat_config/LSTM')
+    if not Path.exists(lstm_config_dir):
+        os.system(f"mkdir -p {lstm_config_dir}")
+
+    lstm_config = copy.copy(LSTM_TEMPLATE)
+    interval = real.time.output_interval // 3600
+    lstm_config['time_step'] = f"{interval} hour"
+    cats = attrs['divide_id']
+    ncats = len(cats)
+    from pyproj import Transformer
+    import yaml
+    count = 0
+    for x, y in zip(hf.sort_values(by="divide_id").iterrows(),attrs.sort_values(by="divide_id").iterrows()) :    
+        count += 1
+        j, hf_row = x    
+        k, attrs_row =y
+        lstm_config_jcat = copy.copy(lstm_config)
+        jcat = attrs_row['divide_id']
+        source_crs = 'EPSG:5070' 
+        target_crs = 'EPSG:4326'
+        transformer = Transformer.from_crs(source_crs, target_crs, always_xy=True)
+        x_coord = attrs_row['centroid_x']
+        y_coord = attrs_row['centroid_y']
+        lon, lat = transformer.transform(x_coord,y_coord)        
+        lstm_config_jcat['area_sqkm'] = hf_row['areasqkm']
+        lstm_config_jcat['basid_id'] = jcat  
+        lstm_config_jcat['basid_name'] = jcat    
+        lstm_config_jcat['elev_mean'] = attrs_row['mean.elevation']    
+        lstm_config_jcat['lat'] = lat
+        lstm_config_jcat['lon'] = lon
+        lstm_config_jcat['slope_mean'] = attrs_row['mean.slope']   
+        filename = Path(lstm_config_dir, jcat + ".yml")
+        with open(filename,"w") as fp:
+            yaml.dump(lstm_config_jcat, fp, default_flow_style=False, sort_keys=False)
+        perc_comp = 100 * (count/ncats)
+        print(f"{perc_comp:.1f}% complete",end='\r')
+
+    return
+
+def gen_petAORcfe(hf,attrs,out,include):
     models = []
     if 'PET' in include:
         models.append(Pet)
     if 'CFE' in include:
         models.append(Cfe)        
     for j, jmodel in enumerate(include):
-        hf: gpd.GeoDataFrame = gpd.read_file(hf_file, layer="divides")
-        layers = gpd.list_layers(hf_file)
-        if "model-attributes" in list(layers.name):
-            hf_lnk_data: pd.DataFrame = gpd.read_file(hf_file,layer="model-attributes")
-        elif "divide-attributes" in list(layers.name):
-            hf_lnk_data: pd.DataFrame = gpd.read_file(hf_file,layer="divide-attributes")
-        else:
-            raise Exception(f"Can't find attributes!")
-        hook_provider = DefaultHookProvider(hf=hf, hf_lnk_data=hf_lnk_data)
+        hook_provider = DefaultHookProvider(hf=hf, hf_lnk_data=attrs)
         jmodel_out = Path(out,'cat_config',jmodel)
         os.system(f"mkdir -p {jmodel_out}")
         file_writer = DefaultFileWriter(jmodel_out)
@@ -135,7 +213,7 @@ def gen_petAORcfe(hf_file,out,include):
 #     file_writer: FileWriter,
 #     pool: cf.ProcessPoolExecutor,
 # ):
-#     def capture(divide_id: str, bv: BuilderVisitableFn):
+#     def capture(divide)_id: str, bv: BuilderVisitableFn):
 #         bld_vbl = bv()
 #         bld_vbl.visit(hook_prov)
 #         model = bld_vbl.build()
@@ -202,14 +280,17 @@ def gen_petAORcfe(hf_file,out,include):
     dir_dict = {"CFE":"CFE",
                 "PET":"PET",
                 "NoahOWP":"NOAH-OWP-M",
-                "SLOTH":""}
+                "SLOTH":"",
+                "bmi_rust":"lstm"}
 
     ignore = []
     for jmodel in model_names:
         config_path = Path(args.outdir,"cat_config",dir_dict[jmodel])
         if config_path.exists(): ignore.append(jmodel)
     routing_path = Path(args.outdir,"troute.yaml")
-    if routing_path.exists(): ignore.append("routing")        
+    if routing_path.exists(): ignore.append("routing")       
+
+    hf, layers, attrs = get_hf(args.hf_file)
 
     if "NoahOWP" in model_names:
         if "NoahOWP" in ignore:
@@ -230,14 +311,21 @@ def gen_petAORcfe(hf_file,out,include):
             print(f'ignoring CFE')
         else:
             print(f'Generating CFE configs from pydantic models',flush = True)
-            gen_petAORcfe(args.hf_file,args.outdir,["CFE"])
+            gen_petAORcfe(hf,attrs,args.outdir,["CFE"])
 
     if "PET" in model_names: 
         if "PET" in ignore:
             print(f'ignoring PET')
         else:
             print(f'Generating PET configs from pydantic models',flush = True)
-            gen_petAORcfe(args.hf_file,args.outdir,["PET"])
+            gen_petAORcfe(hf,attrs,args.outdir,["PET"])
+
+    if "bmi_rust" in model_names:
+        if "bmi_rust" in ignore:
+            print(f'ignoring LSTM')
+        else:
+            print(f'Generating LSTM configs from pydantic models',flush = True)
+            gen_lstm(hf,attrs,args.outdir,serialized_realization)        
 
     globals = [x[0] for x in serialized_realization]
     if serialized_realization.routing is not None:
diff --git a/src/datastreamcli/noahowp_pkl.py b/src/datastreamcli/noahowp_pkl.py
index e0c6334..0c6528f 100644
--- a/src/datastreamcli/noahowp_pkl.py
+++ b/src/datastreamcli/noahowp_pkl.py
@@ -4,13 +4,23 @@
 gpd.options.io_engine = "pyogrio"
 import concurrent.futures as cf
 
-def gen_noah_owp_pkl(gdf):    
+def gen_noah_owp_confs(gdf,hf_version):    
+    """
+    Create a json of noah owp config dicts
+
+    Parameters:
+        gdf : geopandas data frame of divides
+        hf_version : hydrofabric version
+
+    Returns:
+        all_confs : a dict of noah owp configs (json objects)
+    """
     template = Path(__file__).parent.parent.parent/"configs/ngen/noah-owp-modular-init.namelist.input"
     with open(template,'r') as fp:
         conf_template = fp.readlines()
 
     all_confs = {}
-    if HF_VERSION == "v2.2":
+    if hf_version == "v2.2":
         for row in gdf.itertuples():
             jcatch = row.divide_id
             lat = row.centroid_x
@@ -59,22 +69,20 @@ def gen_noah_owp_pkl(gdf):
             all_confs[jcatch] = jcatch_conf
     return all_confs
 
-def multiprocess_pkl(gpkg_path,outdir):
+def multiprocess_gen_pkl(gpkg_path,outdir,hf_version):
     print(f'Generating NoahOWP pkl',flush=True)
 
-    global HF_VERSION
-    try:
-        HF_VERSION = "v2.2"
+    if hf_version == "v2.2":
         gdf = gpd.read_file(gpkg_path,layer = 'divide-attributes').sort_values(by='divide_id')
-    except:
-        HF_VERSION = "v2.1"
+    elif hf_version == "v2.1":
         gdf = gpd.read_file(gpkg_path,layer = 'model-attributes').sort_values(by='divide_id')
-    
+    else:
+        raise Exception("This function supports v2.1 and v2.2 hydrofabrics")
+
     catchment_list = sorted(list(gdf['divide_id']))
 
     nprocs = max(os.cpu_count() - 1,1)
     ncatch = len(catchment_list)
-    catchment_list_list = []
     gdf_list = []
     nper = ncatch // nprocs
     nleft = ncatch - (nper * nprocs)   
@@ -89,8 +97,9 @@ def multiprocess_pkl(gpkg_path,outdir):
     all_proc_confs = {}
     with cf.ProcessPoolExecutor(max_workers=nprocs) as pool:
         for results in pool.map(
-        gen_noah_owp_pkl,
-        gdf_list
+        gen_noah_owp_confs,
+        gdf_list,
+        [hf_version for x in range(nprocs)]
         ):
             all_proc_confs.update(results)
 
@@ -127,7 +136,7 @@ def multiprocess_pkl(gpkg_path,outdir):
         hf_file = args.hf_file
 
     outdir = args.outdir
-    multiprocess_pkl(hf_file,outdir)   
+    multiprocess_gen_pkl(hf_file,outdir)   
     # gdf     = gpd.read_file(hf_file,layer = 'divide-attributes')
     # catchment_list = sorted(list(gdf['divide_id']))         
-    # gen_noah_owp_pkl(catchment_list,gdf)
\ No newline at end of file
+    # gen_noah_owp_confs(catchment_list,gdf)
\ No newline at end of file
diff --git a/tests/test_bmi_config_generation.py b/tests/test_bmi_config_generation.py
index a02d53f..fcdb585 100644
--- a/tests/test_bmi_config_generation.py
+++ b/tests/test_bmi_config_generation.py
@@ -1,14 +1,21 @@
 import pytest
-from datastreamcli.ngen_configs_gen import gen_noah_owp_confs_from_pkl, gen_petAORcfe, generate_troute_conf
-from datastreamcli.noahowp_pkl import multiprocess_pkl
+from datastreamcli.ngen_configs_gen import gen_noah_owp_confs_from_pkl, gen_petAORcfe, generate_troute_conf, gen_lstm, get_hf
+from datastreamcli.noahowp_pkl import multiprocess_gen_pkl
 import datetime as dt
 from pathlib import Path
 import shutil
 import subprocess
+from ngen.config.realization import NgenRealization
 
 TEST_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = TEST_DIR.parent
+CONFIG_DIR = PROJECT_ROOT / "configs"
+NGEN_CONFIG_DIR = CONFIG_DIR / "ngen"
 DATA_DIR = TEST_DIR / "data"
 
+LSTM_REALIZATION =  NGEN_CONFIG_DIR / "realization_rust_lstm.json"
+PKL_FILE = DATA_DIR / "noah-owp-modular-init.namelist.input.pkl"
+
 # Ensure DATA_DIR exists and is empty
 if DATA_DIR.exists():
     shutil.rmtree(DATA_DIR)
@@ -18,13 +25,13 @@
 NOAH_DIR = CONF_DIR / "NOAH-OWP-M"
 CFE_DIR  = CONF_DIR / "CFE"
 PET_DIR  = CONF_DIR / "PET"
+LSTM_DIR  = CONF_DIR / "LSTM"
 
 GEOPACKAGE_NAME_v21 = "palisade.gpkg"
 GEOPACKAGE_NAME_v22 = "vpu-09_subset.gpkg"
 GEOPACKAGE_PATH_v21 = DATA_DIR / GEOPACKAGE_NAME_v21
 GEOPACKAGE_PATH_v22 = DATA_DIR / GEOPACKAGE_NAME_v22
 
-# Download geopackages using subprocess (more portable than os.system)
 subprocess.run([
     "curl", "-L", "-o", str(GEOPACKAGE_PATH_v21),
     f"https://ngen-datastream.s3.us-east-2.amazonaws.com/{GEOPACKAGE_NAME_v21}"
@@ -35,10 +42,12 @@
     f"https://communityhydrofabric.s3.us-east-1.amazonaws.com/hydrofabrics/community/VPU/{GEOPACKAGE_NAME_v22}"
 ], check=True)
 
-PKL_FILE = DATA_DIR / "noah-owp-modular-init.namelist.input.pkl"
+
 START    = dt.datetime.strptime("202006200100", '%Y%m%d%H%M')
 END      = dt.datetime.strptime("202006200100", '%Y%m%d%H%M')
 
+hf_v21, layers_v21, attrs_v21 = get_hf(GEOPACKAGE_PATH_v21)
+hf_v22, layers_v22, attrs_v22 = get_hf(GEOPACKAGE_PATH_v22)
 
 @pytest.fixture(autouse=True)
 def clean_dir():
@@ -48,7 +57,7 @@ def clean_dir():
 
 
 def test_pkl_v21():
-    multiprocess_pkl(GEOPACKAGE_PATH_v21, DATA_DIR)
+    multiprocess_gen_pkl(GEOPACKAGE_PATH_v21, DATA_DIR, "v2.1")
     assert PKL_FILE.exists()
 
 
@@ -61,14 +70,14 @@ def test_noah_owp_m_v21():
 
 def test_cfe_v21():
     CFE_DIR.mkdir(parents=True, exist_ok=True)
-    gen_petAORcfe(GEOPACKAGE_PATH_v21, DATA_DIR, ["CFE"])
+    gen_petAORcfe(hf_v21, attrs_v21, DATA_DIR, ["CFE"])
     cfe_example = CFE_DIR / "CFE_cat-2586011.ini"
     assert cfe_example.exists()
 
 
 def test_pet_v21():
     PET_DIR.mkdir(parents=True, exist_ok=True)
-    gen_petAORcfe(GEOPACKAGE_PATH_v21, DATA_DIR, ["PET"])
+    gen_petAORcfe(hf_v21, attrs_v21, DATA_DIR, ["PET"])
     pet_example = PET_DIR / "PET_cat-2586011.ini"
     assert pet_example.exists()
 
@@ -81,7 +90,7 @@ def test_routing_v21():
 
 
 def test_pkl_v22():
-    multiprocess_pkl(GEOPACKAGE_PATH_v22, DATA_DIR)
+    multiprocess_gen_pkl(GEOPACKAGE_PATH_v22, DATA_DIR, "v2.2")
     assert PKL_FILE.exists()
 
 
@@ -94,18 +103,26 @@ def test_noah_owp_m_v22():
 
 def test_cfe_v22():
     CFE_DIR.mkdir(parents=True, exist_ok=True)
-    gen_petAORcfe(GEOPACKAGE_PATH_v22, DATA_DIR, ["CFE"])
+    gen_petAORcfe(hf_v22, attrs_v22, DATA_DIR, ["CFE"])
     cfe_example = CFE_DIR / "CFE_cat-1496145.ini"
     assert cfe_example.exists()
 
 
 def test_pet_v22():
     PET_DIR.mkdir(parents=True, exist_ok=True)
-    gen_petAORcfe(GEOPACKAGE_PATH_v22, DATA_DIR, ["PET"])
+    gen_petAORcfe(hf_v22, attrs_v22, DATA_DIR, ["PET"])
     pet_example = PET_DIR / "PET_cat-1496145.ini"
     assert pet_example.exists()
 
 
+def test_lstm_v22():
+    serialized_realization = NgenRealization.parse_file(LSTM_REALIZATION)
+    LSTM_DIR.mkdir(parents=True, exist_ok=True)
+    gen_lstm(hf_v22, attrs_v22, DATA_DIR,serialized_realization)
+    lstm_example = LSTM_DIR / "cat-1496145.yml"
+    assert lstm_example.exists()
+
+
 def test_routing_v22():
     max_loop_size = (END - START + dt.timedelta(hours=1)).total_seconds() / 3600
     generate_troute_conf(DATA_DIR, START, max_loop_size, GEOPACKAGE_PATH_v22)

From ea2f011903cc34e0330eaddccd3089a0a741be53 Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Thu, 13 Nov 2025 13:46:39 -0700
Subject: [PATCH 2/7] update workflows

---
 .github/workflows/test_datastream_ngiab.yaml  | 30 +++++---------
 .../workflows/test_datastream_options.yaml    | 40 +++++++++----------
 2 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/test_datastream_ngiab.yaml b/.github/workflows/test_datastream_ngiab.yaml
index 0735a85..fa9caff 100644
--- a/.github/workflows/test_datastream_ngiab.yaml
+++ b/.github/workflows/test_datastream_ngiab.yaml
@@ -6,31 +6,23 @@ on:
     branches:
       - main
     paths:
+      - .github/workflow/test_datastream_options.yaml
       - 'docker/**'
-      - 'forcingprocessor/**'
-      - 'scripts/**'  
-      - 'python_tools/**'      
-      - '!docs/**'
-      - '!workflows/**'
-      - '!scripts/README.md'
-      - '!docker/README.md'
-      - '!forcingprocessor/README.md'
-      - '!python_tools/README.md'
-     
+      - '!docker/README.md'      
+      - 'scripts/datastream'  
+      - 'src/datastreamcli/**'  
+      - '!src/datastreamcli/README.md'      
+
   pull_request:
     branches:
       - main  
     paths:
+      - .github/workflow/test_datastream_options.yaml
       - 'docker/**'
-      - 'forcingprocessor/**'
-      - 'scripts/**'  
-      - 'python_tools/**'      
-      - '!docs/**'
-      - '!workflows/**'
-      - '!scripts/README.md'
-      - '!docker/README.md'
-      - '!forcingprocessor/README.md'
-      - '!python_tools/README.md'         
+      - '!docker/README.md'      
+      - 'scripts/datastream'  
+      - 'src/datastreamcli/**'  
+      - '!src/datastreamcli/README.md'            
 
 permissions:
   contents: read      
diff --git a/.github/workflows/test_datastream_options.yaml b/.github/workflows/test_datastream_options.yaml
index 3c0024a..95c2a86 100644
--- a/.github/workflows/test_datastream_options.yaml
+++ b/.github/workflows/test_datastream_options.yaml
@@ -6,33 +6,24 @@ on:
     branches:
       - main
     paths:
+      - .github/workflow/test_datastream_options.yaml
       - 'docker/**'
-      - 'forcingprocessor/**'
-      - 'scripts/**'  
-      - 'python_tools/**'      
-      - '!docs/**'
-      - '!workflows/**'
-      - '!scripts/README.md'
-      - '!docker/README.md'
-      - '!forcingprocessor/README.md'
-      - '!python_tools/README.md'
+      - '!docker/README.md'      
+      - 'scripts/datastream'  
+      - 'src/datastreamcli/**'  
+      - '!src/datastreamcli/README.md'      
      
   pull_request:
     branches:
       - main  
     paths:
+      - .github/workflow/test_datastream_options.yaml
       - 'docker/**'
-      - 'forcingprocessor/**'
-      - 'scripts/**'  
-      - 'python_tools/**'      
-      - '!docs/**'
-      - '!workflows/**'
-      - '!scripts/README.md'
-      - '!docker/README.md'
-      - '!forcingprocessor/README.md'
-      - '!python_tools/README.md'            
-
-      
+      - '!docker/README.md'      
+      - 'scripts/datastream'  
+      - 'src/datastreamcli/**'  
+      - '!src/datastreamcli/README.md'               
+
 permissions:
   contents: read      
 
@@ -106,6 +97,12 @@ jobs:
         sudo rm -rf $(pwd)/data/datastream_test
         ./scripts/datastream -r ./data/cache/datastream-resources -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test
 
+    - name: Rust LSTM test
+      if: always()
+      run: |
+        sudo rm -rf $(pwd)/data/datastream_test
+        ./scripts/datastream -r ./data/cache/datastream-resources -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -R $(pwd)/configs/ngen/realization_rust_lstm.json
+
     - name: Forcings sources option test NWM_RETRO_V2
       if: always()
       run: |
@@ -168,5 +165,4 @@ jobs:
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND_16 -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json      
-  
+        ./scripts/datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND_16 -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json      
\ No newline at end of file

From 286626d28115bdb7003c688feef943c735299765 Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Thu, 13 Nov 2025 13:47:43 -0700
Subject: [PATCH 3/7] edits for type hinting and variable passing

---
 configs/ngen/realization_rust_lstm.json |  2 +-
 scripts/datastream                      |  2 +-
 src/datastreamcli/ngen_configs_gen.py   | 19 +++++++++++-
 src/datastreamcli/noahowp_pkl.py        | 21 +++++++++++--
 src/datastreamcli/run_validator.py      | 39 ++++++++++++++-----------
 5 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/configs/ngen/realization_rust_lstm.json b/configs/ngen/realization_rust_lstm.json
index e86f271..1311ab7 100644
--- a/configs/ngen/realization_rust_lstm.json
+++ b/configs/ngen/realization_rust_lstm.json
@@ -16,7 +16,7 @@
                         "params": {
                             "name": "bmi_c",
                             "model_type_name": "bmi_rust",
-                            "init_config": "./config/cat_config/lstm/{{id}}.yml",
+                            "init_config": "./config/cat_config/LSTM/{{id}}.yml",
                             "allow_exceed_end_time": true,
                             "main_output_variable": "land_surface_water__runoff_depth",
                             "uses_forcing_file": false,
diff --git a/scripts/datastream b/scripts/datastream
index 09ec358..edb0742 100755
--- a/scripts/datastream
+++ b/scripts/datastream
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -ex
+set -e
 
 # NextGen DataStream CLI: A batteries-included style workflow for running NextGen
 #
diff --git a/src/datastreamcli/ngen_configs_gen.py b/src/datastreamcli/ngen_configs_gen.py
index 2e8c7d1..37a655f 100644
--- a/src/datastreamcli/ngen_configs_gen.py
+++ b/src/datastreamcli/ngen_configs_gen.py
@@ -147,7 +147,24 @@ def generate_troute_conf(out_dir,start,max_loop_size,geo_file_path):
     with open(Path(out_dir,"troute.yaml"),'w') as fp:
         fp.writelines(troute_conf_str)  
 
-def gen_lstm(hf,attrs,out,real):
+def gen_lstm(
+        hf : gpd.GeoDataFrame,
+        attrs : gpd.GeoDataFrame,
+        out : str, 
+        real : NgenRealization
+        ):
+    """
+    Generate LSTM BMI configs from hydrofabric and NextGen realizaiton files
+
+    Parameters
+        hf : divides layer of hydrofabric,
+        attrs : attributes of the divides,
+        out : path to write configs out to, 
+        real : NextGen realization
+
+    Returns
+        None
+    """
     lstm_config_dir = Path(out,'cat_config/LSTM')
     if not Path.exists(lstm_config_dir):
         os.system(f"mkdir -p {lstm_config_dir}")
diff --git a/src/datastreamcli/noahowp_pkl.py b/src/datastreamcli/noahowp_pkl.py
index 0c6528f..5a3d442 100644
--- a/src/datastreamcli/noahowp_pkl.py
+++ b/src/datastreamcli/noahowp_pkl.py
@@ -69,7 +69,22 @@ def gen_noah_owp_confs(gdf,hf_version):
             all_confs[jcatch] = jcatch_conf
     return all_confs
 
-def multiprocess_gen_pkl(gpkg_path,outdir,hf_version):
+def multiprocess_gen_pkl(gpkg_path : str,
+                         outdir : str,
+                         hf_version : str
+                         ):
+    """
+    Multiprocessing layer for gen_noah_owp_confs()
+
+    Parameters
+        gpkg_path : Path to geopackage,
+        outdir : Path to directory to store pickle file,
+        hf_version : hydrofabric version
+
+    Returns
+        None
+    
+    """
     print(f'Generating NoahOWP pkl',flush=True)
 
     if hf_version == "v2.2":
@@ -136,7 +151,9 @@ def multiprocess_gen_pkl(gpkg_path,outdir,hf_version):
         hf_file = args.hf_file
 
     outdir = args.outdir
-    multiprocess_gen_pkl(hf_file,outdir)   
+    hf_version = "v2.1"
+    if "divide-attributes" in list(gpd.list_layers(hf_file).name): hf_version = "v2.2"
+    multiprocess_gen_pkl(hf_file,outdir,hf_version)   
     # gdf     = gpd.read_file(hf_file,layer = 'divide-attributes')
     # catchment_list = sorted(list(gdf['divide_id']))         
     # gen_noah_owp_confs(catchment_list,gdf)
\ No newline at end of file
diff --git a/src/datastreamcli/run_validator.py b/src/datastreamcli/run_validator.py
index 33aa3fd..ff4e905 100644
--- a/src/datastreamcli/run_validator.py
+++ b/src/datastreamcli/run_validator.py
@@ -9,7 +9,7 @@
 from datetime import datetime, timezone
 import concurrent.futures as cf
 
-def check_forcings(forcings_start,forcings_end,n):
+def check_forcings(forcings_start,forcings_end,n,serialized_realization):
     start_time = serialized_realization.time.start_time
     end_time   = serialized_realization.time.end_time
     dt_s = serialized_realization.time.output_interval
@@ -42,11 +42,14 @@ def validate_realization(realization_file):
     
     return serialized_realization, relative_dir
 
-def validate_catchment_files(validations, catchments):
+def validate_catchment_files(validations : dict,
+                            catchments : list,
+                            forcing_dir : str,
+                            serialized_realization : NgenRealization):
     """
     General function to validate any files that need to be associated with a catchment
 
-    Inputs:
+    Parameters:
     validations: dictionary of list of patterns and files to match. Each list should be a 1:1 correspondence between a catchment and it's file.
     Multiple lists are allowed to allow for multiple file types (forcings, ngen configs like CFE)
     Validates 
@@ -59,7 +62,7 @@ def validate_catchment_files(validations, catchments):
         pattern     = validations[jval]['pattern']
         files       = validations[jval]['files']
         if len(files) == 0:
-            continue
+            raise Exception(f"No files found at {pattern}!")
         if jval == "forcing":
             if files[0].endswith(".nc"):
                 nc_file = files[0]
@@ -69,12 +72,12 @@ def validate_catchment_files(validations, catchments):
                     df = ngen_forcings['precip_rate']
                     forcings_start = datetime.fromtimestamp(ngen_forcings.Time.values[0,0],timezone.utc)
                     forcings_end   = datetime.fromtimestamp(ngen_forcings.Time.values[0,-1],timezone.utc)
-                    check_forcings(forcings_start,forcings_end,len(ngen_forcings.time.values))
+                    check_forcings(forcings_start,forcings_end,len(ngen_forcings.time.values),serialized_realization)
                     continue
 
         for j, jcatch in enumerate(catchments):    
             jcatch_pattern = pattern.replace('{{id}}',jcatch)
-            compiled       = re.compile(jcatch_pattern)      
+            compiled       = re.compile(jcatch_pattern)    
 
             jfile = files[j]     
             if not bool(compiled.match(jfile)):
@@ -86,7 +89,7 @@ def validate_catchment_files(validations, catchments):
                     df = pd.read_csv(full_path)
                     forcings_start = datetime.strptime(df['time'].iloc[0],'%Y-%m-%d %H:%M:%S')
                     forcings_end   = datetime.strptime(df['time'].iloc[-1],'%Y-%m-%d %H:%M:%S')
-                    check_forcings(forcings_start,forcings_end,len(df['time']))
+                    check_forcings(forcings_start,forcings_end,len(df['time']),serialized_realization)
 
 def validate_data_dir(data_dir):
 
@@ -116,11 +119,9 @@ def validate_data_dir(data_dir):
     catchments     = gpd.read_file(geopackage_file, layer='divides')
     catchment_list = sorted(list(catchments['divide_id']))
 
-    global serialized_realization
     serialized_realization, relative_dir = validate_realization(realization_file)    
 
     print(f'Done\nValidating required individual catchment paths',flush = True)
-    global forcing_dir, config_dir, validate_type_names
     forcing_dir    = os.path.join(relative_dir,serialized_realization.global_config.forcing.path)
     config_dir     = os.path.join(data_dir,"config","cat_config")
     if os.path.isdir(forcing_dir):
@@ -139,7 +140,9 @@ def validate_data_dir(data_dir):
 
     jdir_dict = {"CFE":"CFE",
                  "PET":"PET",
-                 "NoahOWP":"NOAH-OWP-M"}
+                 "NoahOWP":"NOAH-OWP-M",
+                 "bmi_rust":"LSTM"
+                 }
 
     validate_files = {"forcing":{"pattern":serialized_realization.global_config.forcing.file_pattern,"files": forcing_files}}
     serialized_realization = NgenRealization.parse_file(realization_file)
@@ -180,13 +183,15 @@ def validate_data_dir(data_dir):
         catchment_list_list.append(jcatchments)
         i = k
         
-    # validate_catchment_files(val_dict_list[0],catchment_list_list[0])
-    with cf.ProcessPoolExecutor() as pool:
-        for results in pool.map(
-            validate_catchment_files,
-            val_dict_list,
-            catchment_list_list):
-            pass    
+    validate_catchment_files(val_dict_list[0],catchment_list_list[0],forcing_dir, serialized_realization)
+    # with cf.ProcessPoolExecutor() as pool:
+    #     for results in pool.map(
+    #         validate_catchment_files,
+    #         val_dict_list,
+    #         catchment_list_list,
+    #         [forcing_dir for x in range(nprocs)],
+    #         [serialized_realization for x in range(nprocs)]):
+    #         pass    
 
     print(f'\nNGen run folder is valid\n',flush = True)        
 

From 932aa0d4da88e0f455a7b1d4d7a0afb69f931a8b Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Thu, 13 Nov 2025 14:41:10 -0700
Subject: [PATCH 4/7] test with v2.2 hydrofabric

---
 .github/workflows/test_datastream_ngiab.yaml  |  2 +-
 .../workflows/test_datastream_options.yaml    | 20 +++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/test_datastream_ngiab.yaml b/.github/workflows/test_datastream_ngiab.yaml
index fa9caff..37a4e74 100644
--- a/.github/workflows/test_datastream_ngiab.yaml
+++ b/.github/workflows/test_datastream_ngiab.yaml
@@ -55,4 +55,4 @@ jobs:
     - name: Base test and NWM_RETRO_V3
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test        
-        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g https://ngen-datastream.s3.us-east-2.amazonaws.com/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
\ No newline at end of file
+        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g https://ciroh-community-ngen-datastream.s3.amazonaws.com/v2.2_resources/VPU_09/config/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
\ No newline at end of file
diff --git a/.github/workflows/test_datastream_options.yaml b/.github/workflows/test_datastream_options.yaml
index 95c2a86..c9afa97 100644
--- a/.github/workflows/test_datastream_options.yaml
+++ b/.github/workflows/test_datastream_options.yaml
@@ -56,12 +56,12 @@ jobs:
 
     - name: Get geopackage 
       run: |
-        curl -L -O https://ngen-datastream.s3.us-east-2.amazonaws.com/palisade.gpkg
+        curl -L -O https://ciroh-community-ngen-datastream.s3.amazonaws.com/v2.2_resources/VPU_09/config/nextgen_VPU_09.gpkg
 
     - name: Base test and NWM_RETRO_V3
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test        
-        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
+        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
 
     - name: Cache resource directory and CONF_FILE
       run: |
@@ -81,7 +81,7 @@ jobs:
       if: always()
       run: |
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -g $(pwd)/palisade.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $(pwd)/data/cache/datastream-resources/ngen-forcings/1_forcings.nc -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test        
+        ./scripts/datastream -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $(pwd)/data/cache/datastream-resources/ngen-forcings/1_forcings.nc -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test        
 
     - name: Resource directory test missing all
       if: always()
@@ -89,7 +89,7 @@ jobs:
         sudo rm -rf $(pwd)/data/datastream_test
         sudo rm -rf ./data/cache/datastream-resources-missing/ngen-forcings 
         sudo rm -rf ./data/cache/datastream-resources-missing/config/*
-        ./scripts/datastream -r ./data/cache/datastream-resources-missing -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg
+        ./scripts/datastream -r ./data/cache/datastream-resources-missing -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg
 
     - name: Resource directory test
       if: always()
@@ -126,7 +126,7 @@ jobs:
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -S ciroh-community-ngen-datastream -o git_actions_test
+        ./scripts/datastream -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json -S ciroh-community-ngen-datastream -o git_actions_test
         aws s3api wait object-exists --bucket ciroh-community-ngen-datastream --key git_actions_test/ngen-run.tar.gz   
         aws s3 rm s3://ciroh-community-ngen-datastream/git_actions_test --recursive
 
@@ -134,25 +134,25 @@ jobs:
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -C NWM_SHORT_RANGE_00 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
+        ./scripts/datastream -s DAILY -C NWM_SHORT_RANGE_00 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
 
     - name: DAILY short_range 23 today test
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -C NWM_SHORT_RANGE_23 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
+        ./scripts/datastream -s DAILY -C NWM_SHORT_RANGE_23 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
 
     - name: DAILY short_range 00 pick day test
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -e $(date -d '-15 day' '+%Y%m%d0000') -C NWM_SHORT_RANGE_00 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
+        ./scripts/datastream -s DAILY -e $(date -d '-15 day' '+%Y%m%d0000') -C NWM_SHORT_RANGE_00 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
         
     - name: DAILY medium_range today test 00 0
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -C NWM_MEDIUM_RANGE_00_0 -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
+        ./scripts/datastream -s DAILY -C NWM_MEDIUM_RANGE_00_0 -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json
     
     - name: DAILY medium_range today test 00 3
       if: always()
@@ -165,4 +165,4 @@ jobs:
       if: always()
       run: |        
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND_16 -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/palisade.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json      
\ No newline at end of file
+        ./scripts/datastream -s DAILY -C NWM_ANALYSIS_ASSIM_EXTEND_16 -e $(date -d '-2 day' '+%Y%m%d0000') -d $(pwd)/data/datastream_test -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/configs/ngen/realization_sloth_nom_cfe_pet.json      
\ No newline at end of file

From 7690d24a96c61ff4d9584a4cc8c2c5bcaa247475 Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Tue, 18 Nov 2025 07:45:03 -0700
Subject: [PATCH 5/7] find forcing file

---
 .github/workflows/test_datastream_options.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test_datastream_options.yaml b/.github/workflows/test_datastream_options.yaml
index c9afa97..44b190b 100644
--- a/.github/workflows/test_datastream_options.yaml
+++ b/.github/workflows/test_datastream_options.yaml
@@ -81,7 +81,8 @@ jobs:
       if: always()
       run: |
         sudo rm -rf $(pwd)/data/datastream_test
-        ./scripts/datastream -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $(pwd)/data/cache/datastream-resources/ngen-forcings/1_forcings.nc -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test        
+        forcings_file=$(find "./data/datastream-resources/ngen-forcings/" -type f)
+        ./scripts/datastream -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $forcings_file -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test        
 
     - name: Resource directory test missing all
       if: always()

From da4b7567b3d5d3511eac70debda3d6faefd74f0a Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Tue, 18 Nov 2025 07:46:30 -0700
Subject: [PATCH 6/7] add ruamel for more type control in yaml

---
 setup.cfg                             |  1 +
 src/datastreamcli/ngen_configs_gen.py | 41 ++++++++++++++-------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 737bb32..13b1cff 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,6 +35,7 @@ install_requires =
     xarray
     scipy
     requests
+    ruamel.yaml
     ngen_config[all] @ git+https://github.com/ciroh-ua/ngen-cal@master#egg=ngen_config&subdirectory=python/ngen_conf
     ngen_init_config[all] @ git+https://github.com/ciroh-ua/ngen-cal@master#egg=ngen_init_config&subdirectory=python/ngen_init_config
     ngen_config_gen[all] @ git+https://github.com/ciroh-ua/ngen-cal@master#egg=ngen_config_gen&subdirectory=python/ngen_config_gen
diff --git a/src/datastreamcli/ngen_configs_gen.py b/src/datastreamcli/ngen_configs_gen.py
index c254885..d632594 100644
--- a/src/datastreamcli/ngen_configs_gen.py
+++ b/src/datastreamcli/ngen_configs_gen.py
@@ -8,6 +8,10 @@
 import subprocess
 gpd.options.io_engine = "pyogrio"
 
+import ruamel, io
+from ruamel.yaml import YAML
+from ruamel.yaml.scalarstring import DoubleQuotedScalarString
+
 from ngen.config_gen.file_writer import DefaultFileWriter
 from ngen.config_gen.hook_providers import DefaultHookProvider
 from ngen.config_gen.generate import generate_configs
@@ -34,7 +38,7 @@
         "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_1210_112435_9/config.yml",
         "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed101_0701_143442/config.yml",
         "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_seq999_seed103_2701_171540/config.yml",
-        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_25yr_slope_elev_precip_temp_seq999_seed101_2801_191806/config.yml"
+        "/ngen/ngen/extern/lstm/trained_neuralhydrology_models/nh_AORC_hourly_slope_elev_precip_temp_seq999_seed101_2801_191806/config.yml"
     ],
     "verbose": 0
 }
@@ -171,34 +175,39 @@ def gen_lstm(
 
     lstm_config = copy.copy(LSTM_TEMPLATE)
     interval = real.time.output_interval // 3600
-    lstm_config['time_step'] = f"{interval} hour"
+    lstm_config['time_step'] = DoubleQuotedScalarString(f"{interval} hour")
     cats = attrs['divide_id']
     ncats = len(cats)
     from pyproj import Transformer
     import yaml
     count = 0
-    for x, y in zip(hf.sort_values(by="divide_id").iterrows(),attrs.sort_values(by="divide_id").iterrows()) :    
+    source_crs = 'EPSG:5070' 
+    target_crs = 'EPSG:4326'
+    transformer = Transformer.from_crs(source_crs, target_crs, always_xy=True)    
+    for x, y in zip(hf.sort_values(by="divide_id").iterrows(),attrs.sort_values(by="divide_id").iterrows()) :   
         count += 1
         j, hf_row = x    
         k, attrs_row =y
         lstm_config_jcat = copy.copy(lstm_config)
         jcat = attrs_row['divide_id']
-        source_crs = 'EPSG:5070' 
-        target_crs = 'EPSG:4326'
-        transformer = Transformer.from_crs(source_crs, target_crs, always_xy=True)
         x_coord = attrs_row['centroid_x']
-        y_coord = attrs_row['centroid_y']
-        lon, lat = transformer.transform(x_coord,y_coord)        
+        y_coord = attrs_row['centroid_y']      
+        lon, lat = transformer.transform(x_coord,y_coord)     
         lstm_config_jcat['area_sqkm'] = hf_row['areasqkm']
-        lstm_config_jcat['basid_id'] = jcat  
-        lstm_config_jcat['basid_name'] = jcat    
+        lstm_config_jcat['basin_id'] = jcat  
+        lstm_config_jcat['basin_name'] = jcat    
         lstm_config_jcat['elev_mean'] = attrs_row['mean.elevation']    
         lstm_config_jcat['lat'] = lat
         lstm_config_jcat['lon'] = lon
         lstm_config_jcat['slope_mean'] = attrs_row['mean.slope']   
         filename = Path(lstm_config_dir, jcat + ".yml")
-        with open(filename,"w") as fp:
-            yaml.dump(lstm_config_jcat, fp, default_flow_style=False, sort_keys=False)
+        yaml = ruamel.yaml.YAML()
+        yaml.indent(mapping=2, sequence=4, offset=2)
+        stream = io.StringIO()
+        yaml.dump(lstm_config_jcat, stream)
+        yaml_string = stream.getvalue()
+        with open(filename,'w') as fp:
+            fp.write(yaml_string)
         perc_comp = 100 * (count/ncats)
         print(f"{perc_comp:.1f}% complete",end='\r')
 
@@ -344,14 +353,6 @@ def gen_petAORcfe(hf,attrs,out,include):
             print(f'Generating LSTM configs from pydantic models',flush = True)
             gen_lstm(hf,attrs,args.outdir,serialized_realization)        
 
-    if "bmi_rust" in model_names:
-        if "bmi_rust" in ignore:
-            print(f'ignoring LSTM')
-        else:
-            print(f'Generating LSTM configs from pydantic models',flush = True)
-            gen_LSTM(args.hf_file,args.outdir)        
-
-
     globals = [x[0] for x in serialized_realization]
     if serialized_realization.routing is not None:
         if "routing" in ignore:

From dd608c7656dbeb779b7e8f1b09b0f11d51f6f175 Mon Sep 17 00:00:00 2001
From: Jordan Laser <jlaser@lynker.com>
Date: Thu, 20 Nov 2025 11:08:46 -0700
Subject: [PATCH 7/7] fix forcings file find in workflow

---
 .github/workflows/test_datastream_options.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_datastream_options.yaml b/.github/workflows/test_datastream_options.yaml
index 44b190b..7249b68 100644
--- a/.github/workflows/test_datastream_options.yaml
+++ b/.github/workflows/test_datastream_options.yaml
@@ -81,7 +81,7 @@ jobs:
       if: always()
       run: |
         sudo rm -rf $(pwd)/data/datastream_test
-        forcings_file=$(find "./data/datastream-resources/ngen-forcings/" -type f)
+        forcings_file=$(find "./data/cache/datastream-resources/ngen-forcings/" -name "*.nc")
         ./scripts/datastream -g $(pwd)/nextgen_VPU_09.gpkg -R $(pwd)/data/cache/datastream-resources/config/realization_sloth_nom_cfe_pet.json -F $forcings_file -s 202006200100 -e 202006200200 -C NWM_RETRO_V3 -d $(pwd)/data/datastream_test        
 
     - name: Resource directory test missing all