From 7d55c8d3ce1ba6fe80db753bb71903420d52b38b Mon Sep 17 00:00:00 2001
From: kobebryant432 <kobe.ande@gmail.com>
Date: Thu, 12 Feb 2026 12:12:58 +0100
Subject: [PATCH 1/2] Avoid Path like object in esmcatalog to accomadate
 pyarrow issue

---
 pyproject.toml                            | 1 -
 src/valenspy/input/esm_catalog_builder.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 28d10270..50f102b2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,6 @@ xesmf = "^0.8.8"
 xclim = ">0.55.0" #Previous versions not compatible with xarray >2025.03.0 as they rely on xarray.core.merge which was moved
 docstring-parser = "^0.16"
 intake-esm = ">v2023.10.27"
-pyarrow = "19.0.1" #pyarrow newer versions incompatible with intake-esm as they use polars (which can't handle Path objects)
 
 [tool.poetry.group.dev]
 # Development group
diff --git a/src/valenspy/input/esm_catalog_builder.py b/src/valenspy/input/esm_catalog_builder.py
index a68fd823..ced37fca 100644
--- a/src/valenspy/input/esm_catalog_builder.py
+++ b/src/valenspy/input/esm_catalog_builder.py
@@ -198,7 +198,7 @@ def _process_dataset_for_catalog(self, dataset_name, dataset_info):
                         continue
 
                     # Add the file path to the metadata
-                    file_metadata["path"] = Path(file_path)
+                    file_metadata["path"] = Path(file_path).as_posix()
 
                     # Add dataset level metadata
                     file_metadata = {**dataset_meta_data, **file_metadata}
@@ -245,4 +245,4 @@ def _process_dataset_for_catalog(self, dataset_name, dataset_info):
         if not files_with_metadata:
             warnings.warn(f"No valid files found for dataset {dataset_name}; \n Please check the dataset root {dataset_root} and pattern {dataset_info.get('pattern', None)}")
 
-        return files_with_metadata
\ No newline at end of file
+        return files_with_metadata

From 3e40bb373bb8639eb9fcce459b7042735692cd45 Mon Sep 17 00:00:00 2001
From: kobebryant432 <kobe.ande@gmail.com>
Date: Thu, 12 Feb 2026 13:43:05 +0100
Subject: [PATCH 2/2] update manager bugfix load_regrid script

---
 src/valenspy/input/manager.py | 50 ++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/src/valenspy/input/manager.py b/src/valenspy/input/manager.py
index 4f699c3a..8c8bbbfb 100644
--- a/src/valenspy/input/manager.py
+++ b/src/valenspy/input/manager.py
@@ -239,21 +239,47 @@ def update_catalog_from_dataset_info(self, dataset_name, dataset_root_dir, datas
         self._update_catalog(dataset_name, dataset_info)
         self.catalog_builder._validate_dataset_info()
         self.esm_datastore.esmcat._df = self.catalog_builder.df
-
+    
     @property
     def preprocess(self):
         """
-        A preprocessor function to convert the input dataset to ValEnsPy compliant data.
-
-        This function applys the input convertor to the dataset if an input convertor exists (i.e. source_id is in this managers input convertors).
+        Preprocessor to convert datasets to ValEnsPy-compliant format.
+    
+        Applies the appropriate input converter based on the dataset's source_id.
         """
+    
+        # --- Build lookup once (critical for performance) ---
+        df = self.esm_datastore.df
+
+        # Ensure paths are POSIX strings
+        path_to_source = dict(zip(df["path"], df["source_id"]))
+    
+        IC_dict = self.input_convertors
 
-        def process_IC(ds, IC_dict, df):
-            file_name = ds.encoding["source"]
-            source_id = df[df["path"] == Path(file_name)]["source_id"].values[0]
-            if source_id in IC_dict:
-                return IC_dict[source_id](ds)
-            else:
+        def process_IC(ds, path_to_source, IC_dict):
+            # Intake provides the file path here
+            file_name = ds.encoding.get("source")
+            if file_name is None:
                 return ds
-            
-        return partial(process_IC, IC_dict=self.input_convertors, df=self.esm_datastore.df)
\ No newline at end of file
+
+            # Normalize exactly like catalog
+            file_name = Path(file_name).as_posix()
+
+            source_id = path_to_source.get(file_name)
+            if source_id is None:
+                raise ValueError(
+                    f"Dataset path not found in catalog: {file_name}"
+                )
+
+            convertor = IC_dict.get(source_id)
+            if convertor is None:
+                return ds
+
+            return convertor(ds)
+
+        return partial(
+            process_IC,
+            path_to_source=path_to_source,
+            IC_dict=IC_dict,
+        )
+