diff --git a/cosmotech/coal/cosmotech_api/apis/dataset.py b/cosmotech/coal/cosmotech_api/apis/dataset.py
index a137b2f4..680f0bca 100644
--- a/cosmotech/coal/cosmotech_api/apis/dataset.py
+++ b/cosmotech/coal/cosmotech_api/apis/dataset.py
@@ -79,7 +79,7 @@ def _download_part(self, dataset_id, dataset_part, destination):
         )
 
     @staticmethod
-    def path_to_parts(_path, part_type) -> list[tuple[str, Path, DatasetPartTypeEnum]]:
+    def path_to_parts(_path, part_type) -> list[tuple[str, str, Path, DatasetPartTypeEnum]]:
         if (_path := Path(_path)).is_dir():
             return list((str(_p.relative_to(_path)), _p, part_type) for _p in _path.rglob("*") if _p.is_file())
         return list(((_path.name, _path, part_type),))
@@ -118,7 +118,7 @@ def upload_dataset(
             additional_data=additional_data,
             parts=list(
                 DatasetPartCreateRequest(
-                    name=_p_name,
+                    name=Path(_p_name).stem,
                     description=_p_name,
                     sourceName=_p_name,
                     type=_type,
@@ -195,7 +195,7 @@ def upload_dataset_parts(
 
             # Create new part
             part_request = DatasetPartCreateRequest(
-                name=_p_name,
+                name=Path(_p_name).stem,
                 description=_p_name,
                 sourceName=_p_name,
                 type=_type,
diff --git a/cosmotech/coal/postgresql/runner.py b/cosmotech/coal/postgresql/runner.py
index 34497bcd..9236f3e6 100644
--- a/cosmotech/coal/postgresql/runner.py
+++ b/cosmotech/coal/postgresql/runner.py
@@ -49,19 +49,25 @@ def send_runner_metadata_to_postgresql(
                 CREATE TABLE IF NOT EXISTS {schema_table}  (
                   id varchar(32) PRIMARY KEY,
                   name varchar(256),
-                  last_csm_run_id varchar(32),
+                  last_csm_run_id varchar(32) UNIQUE,
                   run_template_id varchar(32)
                 );
             """
             LOGGER.info(T("coal.services.postgresql.creating_table").format(schema_table=schema_table))
             curs.execute(sql_create_table)
             conn.commit()
+
+            runner_id = runner.get("id")
+            sql_delete_from_metatable = f"""
+                DELETE FROM {schema_table}
+                WHERE id= $1;
+            """
+            curs.execute(sql_delete_from_metatable, (runner_id,))
+            conn.commit()
+
             sql_upsert = f"""
                 INSERT INTO {schema_table} (id, name, last_csm_run_id, run_template_id)
-                  VALUES ($1, $2, $3, $4)
-                  ON CONFLICT (id)
-                  DO
-                    UPDATE SET name = EXCLUDED.name, last_csm_run_id = EXCLUDED.last_csm_run_id;
+                VALUES ($1, $2, $3, $4)
             """
             LOGGER.debug(runner)
             curs.execute(
diff --git a/cosmotech/coal/postgresql/store.py b/cosmotech/coal/postgresql/store.py
index 8db8fb72..95d6c422 100644
--- a/cosmotech/coal/postgresql/store.py
+++ b/cosmotech/coal/postgresql/store.py
@@ -115,7 +115,7 @@ def dump_store_to_postgresql_from_conf(
             )
             if fk_id and _psql.is_metadata_exists():
                 metadata_table = f"{_psql.metadata_table_name}"
-                _psql.add_fk_constraint(table_name, "csm_run_id", metadata_table, "last_csm_run_id")
+                _psql.add_fk_constraint(target_table_name, "csm_run_id", metadata_table, "last_csm_run_id")
 
             total_rows += rows
             _up_time = perf_counter()
diff --git a/cosmotech/coal/postgresql/utils.py b/cosmotech/coal/postgresql/utils.py
index 4d863608..a95e8d60 100644
--- a/cosmotech/coal/postgresql/utils.py
+++ b/cosmotech/coal/postgresql/utils.py
@@ -155,13 +155,20 @@ def add_fk_constraint(
         to_table: str,
         to_col: str,
     ) -> None:
-        # Connect to PostgreSQL and remove runner metadata row
-        with dbapi.connect(self.full_uri, autocommit=True) as conn:
+        # Connect to PostgreSQL and add a foreign key constraint
+        with dbapi.connect(self.full_uri, autocommit=False) as conn:
             with conn.cursor() as curs:
+                sql_drop_fk = f"""
+                    ALTER TABLE {self.db_schema}.{from_table}
+                    DROP CONSTRAINT IF EXISTS metadata;
+                """
                 sql_add_fk = f"""
                     ALTER TABLE {self.db_schema}.{from_table}
-                    CONSTRAINT metadata FOREIGN KEY ({from_col}) REFERENCES {to_table}({to_col})
+                    ADD CONSTRAINT metadata FOREIGN KEY ({from_col})
+                    REFERENCES {self.db_schema}.{to_table}({to_col})
+                    ON DELETE CASCADE;
                 """
+                curs.execute(sql_drop_fk)
                 curs.execute(sql_add_fk)
                 conn.commit()
 
diff --git a/cosmotech/coal/store/parquet.py b/cosmotech/coal/store/parquet.py
new file mode 100644
index 00000000..76f1283d
--- /dev/null
+++ b/cosmotech/coal/store/parquet.py
@@ -0,0 +1,45 @@
+# Copyright (C) - 2023 - 2025 - Cosmo Tech
+# This document and all information contained herein is the exclusive property -
+# including all intellectual property rights pertaining thereto - of Cosmo Tech.
+# Any use, reproduction, translation, broadcasting, transmission, distribution,
+# etc., to any person is prohibited unless it has been previously and
+# specifically authorized by written means by Cosmo Tech.
+
+import pathlib
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+from cosmotech.coal.store.store import Store
+
+
+def store_parquet_file(
+    table_name: str,
+    parquet_path: pathlib.Path,
+    replace_existsing_file: bool = False,
+    store=Store(),
+):
+    if not parquet_path.exists():
+        raise FileNotFoundError(f"File {parquet_path} does not exists")
+
+    data: pa.Table = pq.ParquetFile(parquet_path).read()
+    _c = data.column_names
+    data = data.rename_columns([Store.sanitize_column(_column) for _column in _c])
+
+    store.add_table(table_name=table_name, data=data, replace=replace_existsing_file)
+
+
+def convert_store_table_to_parquet(
+    table_name: str,
+    parquet_path: pathlib.Path,
+    replace_existsing_file: bool = False,
+    store=Store(),
+):
+    if parquet_path.name.endswith(".parquet") and parquet_path.exists() and not replace_existsing_file:
+        raise FileExistsError(f"File {parquet_path} already exists")
+    if not parquet_path.name.endswith(".parquet"):
+        parquet_path = parquet_path / f"{table_name}.parquet"
+    folder = parquet_path.parent
+    folder.mkdir(parents=True, exist_ok=True)
+
+    pq.write_table(store.get_table(table_name), parquet_path)
diff --git a/cosmotech/coal/utils/configuration.py b/cosmotech/coal/utils/configuration.py
index 55a6d400..3bd88761 100644
--- a/cosmotech/coal/utils/configuration.py
+++ b/cosmotech/coal/utils/configuration.py
@@ -76,6 +76,8 @@ class Configuration(Dotdict):
                 "api": {"url": "CSM_API_URL", "scope": "CSM_API_SCOPE"},
                 "dataset_absolute_path": "CSM_DATASET_ABSOLUTE_PATH",
                 "parameters_absolute_path": "CSM_PARAMETERS_ABSOLUTE_PATH",
+                "output_absolute_path": "CSM_OUTPUT_ABSOLUTE_PATH",
+                "tmp_absolute_path": "CSM_TEMP_ABSOLUTE_PATH",
                 "organization_id": "CSM_ORGANIZATION_ID",
                 "workspace_id": "CSM_WORKSPACE_ID",
                 "runner_id": "CSM_RUNNER_ID",
diff --git a/cosmotech/coal/utils/input_collector.py b/cosmotech/coal/utils/input_collector.py
new file mode 100644
index 00000000..25684960
--- /dev/null
+++ b/cosmotech/coal/utils/input_collector.py
@@ -0,0 +1,121 @@
+import json
+import os
+from pathlib import Path
+
+from cosmotech.coal.utils.configuration import ENVIRONMENT_CONFIGURATION as EC
+from cosmotech.coal.utils.logger import LOGGER
+
+
+class InputCollector:
+    def __init__(self):
+        self.dataset_collector = DatasetCollector()
+        self.parameter_collector = ParameterCollector()
+        self.workspace_collector = WorkspaceCollector()
+
+    def fetch_dataset(self, dataset_name: str) -> Path:
+        return self.dataset_collector.fetch(dataset_name)
+
+    def fetch_parameter(self, param_name: str) -> Path:
+        return self.parameter_collector.fetch(param_name)
+
+    def fetch_workspace_file(self, file_name: str) -> Path:
+        return self.workspace_collector.fetch(file_name)
+
+    def fetch(self, name: str) -> Path:
+        try:
+            return self.fetch_parameter(name)
+        except (KeyError, FileNotFoundError):
+            LOGGER.debug(f"Parameter {name} not found, trying workspace files.")
+        try:
+            return self.fetch_workspace_file(name)
+        except FileNotFoundError:
+            LOGGER.debug(f"Workspace file {name} not found, trying dataset files.")
+        return self.fetch_dataset(name)
+
+
+class DatasetCollector:
+    def __init__(self):
+        self.paths: dict[str, Path] = {}
+
+    def collect(self):
+        for dataset_id in os.listdir(EC.cosmotech.dataset_absolute_path):
+            for r, d, f in os.walk(Path(EC.cosmotech.dataset_absolute_path) / dataset_id):
+                for dataset_name in f:
+                    path = Path(r) / dataset_name
+                    self.paths[dataset_name] = path
+                    self.paths[path.stem] = path
+
+    def fetch(self, dataset_name: str) -> Path:
+        # lazy collection to avoid unnecessary os.walk calls
+        if not self.paths:
+            self.collect()
+        if dataset_name in self.paths:
+            return self.paths[dataset_name]
+        raise FileNotFoundError(f"File for {dataset_name} not found in {EC.cosmotech.dataset_absolute_path}.")
+
+
+class WorkspaceCollector:
+    def __init__(self):
+        self.paths: dict[str, Path] = {}
+
+    def collect(self):
+        workspace_path = Path(EC.cosmotech.dataset_absolute_path) / "workspace_files"
+        if workspace_path.exists():
+            for r, d, f in os.walk(workspace_path):
+                for file_name in f:
+                    path = Path(r) / file_name
+                    self.paths[file_name] = path
+                    self.paths[path.stem] = path
+
+    def fetch(self, file_name: str) -> Path:
+        if not self.paths:
+            self.collect()
+        if file_name in self.paths:
+            return self.paths[file_name]
+        raise FileNotFoundError(f"File {file_name} not found in workspace_files.")
+
+
+class ParameterCollector:
+    def __init__(self):
+        self.paths: dict[str, Path] = {}
+        self.parameters: dict[str, str] = {}
+
+    def read_parameters_json(self):
+        parameter_file = Path(EC.cosmotech.parameters_absolute_path) / "parameters.json"
+        if parameter_file.exists():
+            with open(parameter_file) as f:
+                parameters = json.load(f)
+                for parameter in parameters:
+                    self.parameters[parameter["parameterId"]] = parameter["value"]
+
+    def collect(self):
+        for dataset_id in os.listdir(EC.cosmotech.parameters_absolute_path):
+            for r, d, f in os.walk(Path(EC.cosmotech.parameters_absolute_path) / dataset_id):
+                for file_name in f:
+                    path = Path(r) / file_name
+                    param_name = path.parent.name
+                    self.paths[param_name] = path
+                    self.paths[path.stem] = path
+
+    def fetch_parameter(self, param_name: str) -> Path:
+        # lazy collection to avoid unnecessary json loading
+        if not self.parameters:
+            self.read_parameters_json()
+        return self.parameters[param_name]
+
+    def fetch_file_path(self, param_name: str) -> Path:
+        # lazy collection to avoid unnecessary os.walk calls
+        if not self.paths:
+            self.collect()
+        if param_name in self.paths:
+            return self.paths[param_name]
+        raise FileNotFoundError(f"File for {param_name} not found in {EC.cosmotech.parameters_absolute_path}.")
+
+    def fetch(self, param_name: str) -> Path:
+        try:
+            return self.fetch_parameter(param_name)
+        except KeyError:
+            return self.fetch_file_path(param_name)
+
+
+ENVIRONMENT_INPUT_COLLECTOR = InputCollector()
diff --git a/cosmotech/csm_data/commands/api/wsf_load_file.py b/cosmotech/csm_data/commands/api/wsf_load_file.py
index 1f84d1e1..2f26eed0 100644
--- a/cosmotech/csm_data/commands/api/wsf_load_file.py
+++ b/cosmotech/csm_data/commands/api/wsf_load_file.py
@@ -35,7 +35,7 @@
     "--workspace-path",
     help=T("csm_data.commands.api.wsf_load_file.parameters.workspace_path"),
     metavar="PATH",
-    default="/",
+    default="",
     type=str,
 )
 @click.option(
diff --git a/cosmotech/csm_data/commands/store/load_parquet_folder.py b/cosmotech/csm_data/commands/store/load_parquet_folder.py
new file mode 100644
index 00000000..08cb19ab
--- /dev/null
+++ b/cosmotech/csm_data/commands/store/load_parquet_folder.py
@@ -0,0 +1,49 @@
+# Copyright (C) - 2023 - 2025 - Cosmo Tech
+# This document and all information contained herein is the exclusive property -
+# including all intellectual property rights pertaining thereto - of Cosmo Tech.
+# Any use, reproduction, translation, broadcasting, transmission, distribution,
+# etc., to any person is prohibited unless it has been previously and
+# specifically authorized by written means by Cosmo Tech.
+from cosmotech.orchestrator.utils.translate import T
+
+from cosmotech.csm_data.utils.click import click
+from cosmotech.csm_data.utils.decorators import translate_help, web_help
+
+
+@click.command()
+@web_help("csm-data/store/load-parquet-folder")
+@translate_help("csm_data.commands.store.load_parquet_folder.description")
+@click.option(
+    "--store-folder",
+    envvar="CSM_PARAMETERS_ABSOLUTE_PATH",
+    help=T("csm_data.commands.store.load_parquet_folder.parameters.store_folder"),
+    metavar="PATH",
+    type=str,
+    show_envvar=True,
+    required=True,
+)
+@click.option(
+    "--parquet-folder",
+    envvar="CSM_OUTPUT_ABSOLUTE_PATH",
+    help=T("csm_data.commands.store.load_parquet_folder.parameters.parquet_folder"),
+    metavar="PATH",
+    type=str,
+    show_envvar=True,
+    required=True,
+)
+def load_parquet_folder(store_folder, parquet_folder):
+    # Import the modules and functions at the start of the command
+    import pathlib
+
+    from cosmotech.coal.store.parquet import store_parquet_file
+    from cosmotech.coal.store.store import Store
+    from cosmotech.coal.utils.configuration import Configuration
+    from cosmotech.coal.utils.logger import LOGGER
+
+    _conf = Configuration()
+
+    _conf.coal.store = store_folder
+
+    for parquet_path in pathlib.Path(parquet_folder).glob("*.parquet"):
+        LOGGER.info(T("coal.services.azure_storage.found_file").format(file=parquet_path.name))
+        store_parquet_file(parquet_path.name[:-8], parquet_path, store=Store(False, _conf))
diff --git a/cosmotech/csm_data/commands/store/store.py b/cosmotech/csm_data/commands/store/store.py
index 47be8957..e17b2623 100644
--- a/cosmotech/csm_data/commands/store/store.py
+++ b/cosmotech/csm_data/commands/store/store.py
@@ -14,6 +14,7 @@
 from cosmotech.csm_data.commands.store.load_from_singlestore import (
     load_from_singlestore_command,
 )
+from cosmotech.csm_data.commands.store.load_parquet_folder import load_parquet_folder
 from cosmotech.csm_data.commands.store.output import output
 from cosmotech.csm_data.commands.store.reset import reset
 from cosmotech.csm_data.utils.click import click
@@ -30,6 +31,7 @@ def store():
 store.add_command(reset, "reset")
 store.add_command(list_tables, "list-tables")
 store.add_command(load_csv_folder, "load-csv-folder")
+store.add_command(load_parquet_folder, "load-parquet-folder")
 store.add_command(load_from_singlestore_command, "load-from-singlestore")
 store.add_command(dump_to_postgresql, "dump-to-postgresql")
 store.add_command(dump_to_s3, "dump-to-s3")
diff --git a/cosmotech/translation/csm_data/en-US/csm_data/commands/store/load_parquet_folder.yml b/cosmotech/translation/csm_data/en-US/csm_data/commands/store/load_parquet_folder.yml
new file mode 100644
index 00000000..4e869074
--- /dev/null
+++ b/cosmotech/translation/csm_data/en-US/csm_data/commands/store/load_parquet_folder.yml
@@ -0,0 +1,5 @@
+description: |
+  Running this command will find all parquet files in the given folder and put them in the store
+parameters:
+  store_folder: The folder containing the store files
+  parquet_folder: The folder containing the parquet files to store
diff --git a/tests/unit/coal/test_cosmotech_api/test_apis/test_dataset.py b/tests/unit/coal/test_cosmotech_api/test_apis/test_dataset.py
index 8ac14f48..9d493e16 100644
--- a/tests/unit/coal/test_cosmotech_api/test_apis/test_dataset.py
+++ b/tests/unit/coal/test_cosmotech_api/test_apis/test_dataset.py
@@ -543,13 +543,13 @@ def test_update_dataset_mixed_files(self, mock_cosmotech_config, mock_api_client
             assert len(args_list) == 2
             # check first call used to create csv part
             dpcr = args_list[0].kwargs.get("dataset_part_create_request")
-            assert dpcr.name == "data.csv"
+            assert dpcr.name == "data"
             assert dpcr.source_name == "data.csv"
             assert dpcr.description == "data.csv"
             assert dpcr.type == DatasetPartTypeEnum.FILE
             # check second call used to create db part
             dpcr = args_list[1].kwargs.get("dataset_part_create_request")
-            assert dpcr.name == "data.db"
+            assert dpcr.name == "data"
             assert dpcr.source_name == "data.db"
             assert dpcr.description == "data.db"
             assert dpcr.type == DatasetPartTypeEnum.DB
diff --git a/tests/unit/coal/test_postgresql/test_postgresql_runner.py b/tests/unit/coal/test_postgresql/test_postgresql_runner.py
index 870a1f89..33c3f4a8 100644
--- a/tests/unit/coal/test_postgresql/test_postgresql_runner.py
+++ b/tests/unit/coal/test_postgresql/test_postgresql_runner.py
@@ -72,14 +72,19 @@ def test_send_runner_metadata_to_postgresql(self, mock_connect, mock_postgres_ut
         mock_connect.assert_called_once_with("postgresql://user:password@localhost:5432/testdb", autocommit=True)
 
         # Check that SQL statements were executed
-        assert mock_cursor.execute.call_count == 2
+        assert mock_cursor.execute.call_count == 3
 
         # Verify the SQL statements (partially, since the exact SQL is complex)
         create_table_call = mock_cursor.execute.call_args_list[0]
         assert "CREATE TABLE IF NOT EXISTS" in create_table_call[0][0]
         assert "public.test_runnermetadata" in create_table_call[0][0]
 
-        upsert_call = mock_cursor.execute.call_args_list[1]
+        delete_call = mock_cursor.execute.call_args_list[1]
+        assert "DELETE FROM" in delete_call[0][0]
+        assert "public.test_runnermetadata" in delete_call[0][0]
+        assert delete_call[0][1] == ("test-runner-id",)
+
+        upsert_call = mock_cursor.execute.call_args_list[2]
         assert "INSERT INTO" in upsert_call[0][0]
         assert "public.test_runnermetadata" in upsert_call[0][0]
         assert upsert_call[0][1] == (
@@ -90,7 +95,7 @@ def test_send_runner_metadata_to_postgresql(self, mock_connect, mock_postgres_ut
         )
 
         # Check that commits were called
-        assert mock_conn.commit.call_count == 2
+        assert mock_conn.commit.call_count == 3
 
         # Verify the function returns the lastRunId
         assert result == "test-run-id"
diff --git a/tests/unit/coal/test_store/test_store_parquet.py b/tests/unit/coal/test_store/test_store_parquet.py
new file mode 100644
index 00000000..e003fd5b
--- /dev/null
+++ b/tests/unit/coal/test_store/test_store_parquet.py
@@ -0,0 +1,200 @@
+# Copyright (C) - 2023 - 2025 - Cosmo Tech
+# This document and all information contained herein is the exclusive property -
+# including all intellectual property rights pertaining thereto - of Cosmo Tech.
+# Any use, reproduction, translation, broadcasting, transmission, distribution,
+# etc., to any person is prohibited unless it has been previously and
+# specifically authorized by written means by Cosmo Tech.
+
+import pathlib
+from unittest.mock import MagicMock, patch
+
+import pyarrow as pa
+import pytest
+
+from cosmotech.coal.store.parquet import (
+    convert_store_table_to_parquet,
+    store_parquet_file,
+)
+from cosmotech.coal.store.store import Store
+
+
+class TestParquetFunctions:
+    """Tests for top-level functions in the parquet module."""
+
+    @patch("pyarrow.parquet.ParquetFile")
+    @patch("pathlib.Path.exists")
+    def test_store_parquet_file_success(self, mock_exists, mock_parquet_file):
+        """Test the store_parquet_file function with a valid Parquet file."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/test.parquet")
+        mock_exists.return_value = True
+
+        # Mock Parquet data
+        mock_data = pa.Table.from_arrays([pa.array([1, 2, 3]), pa.array(["a", "b", "c"])], names=["id", "name"])
+        mock_parquet_file.return_value.read.return_value = mock_data
+
+        # Mock store
+        mock_store = MagicMock(spec=Store)
+
+        # Act
+        store_parquet_file(table_name, parquet_path, False, mock_store)
+
+        # Assert
+        mock_exists.assert_called_once_with()
+        mock_parquet_file.assert_called_once_with(parquet_path)
+        mock_parquet_file.return_value.read.assert_called_once_with()
+        mock_store.add_table.assert_called_once()
+        args, kwargs = mock_store.add_table.call_args
+        assert kwargs["table_name"] == table_name
+        assert kwargs["replace"] is False
+
+    @patch("pathlib.Path.exists")
+    def test_store_parquet_file_not_found(self, mock_exists):
+        """Test the store_parquet_file function with a non-existent Parquet file."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/nonexistent.parquet")
+        mock_exists.return_value = False
+
+        # Mock store
+        mock_store = MagicMock(spec=Store)
+
+        # Act & Assert
+        with pytest.raises(FileNotFoundError):
+            store_parquet_file(table_name, parquet_path, False, mock_store)
+
+        mock_exists.assert_called_once_with()
+
+    @patch("pyarrow.parquet.ParquetFile")
+    @patch("pathlib.Path.exists")
+    def test_store_parquet_file_with_column_sanitization(self, mock_exists, mock_parquet_file):
+        """Test the store_parquet_file function with column sanitization."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/test.parquet")
+        mock_exists.return_value = True
+
+        # Mock Parquet data with columns that need sanitization
+        mock_data = pa.Table.from_arrays(
+            [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])], names=["id with space", "name-with-dash"]
+        )
+        mock_parquet_file.return_value.read.return_value = mock_data
+
+        # Mock store and sanitize_column
+        mock_store = MagicMock(spec=Store)
+        Store.sanitize_column = MagicMock(side_effect=lambda x: x.replace(" ", "_"))
+
+        # Act
+        store_parquet_file(table_name, parquet_path, False, mock_store)
+
+        # Assert
+        assert Store.sanitize_column.call_count == 2
+        Store.sanitize_column.assert_any_call("id with space")
+        Store.sanitize_column.assert_any_call("name-with-dash")
+        mock_store.add_table.assert_called_once()
+
+    @patch("pyarrow.parquet.ParquetFile")
+    @patch("pathlib.Path.exists")
+    def test_store_parquet_file_with_replace(self, mock_exists, mock_parquet_file):
+        """Test the store_parquet_file function with replace_existing_file=True."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/test.parquet")
+        mock_exists.return_value = True
+
+        mock_data = pa.Table.from_arrays([pa.array([1, 2, 3])], names=["id"])
+        mock_parquet_file.return_value.read.return_value = mock_data
+
+        mock_store = MagicMock(spec=Store)
+
+        # Act
+        store_parquet_file(table_name, parquet_path, True, mock_store)
+
+        # Assert
+        args, kwargs = mock_store.add_table.call_args
+        assert kwargs["replace"] is True
+
+    @patch("pyarrow.parquet.write_table")
+    @patch("pathlib.Path.exists")
+    def test_convert_store_table_to_parquet_success(self, mock_exists, mock_write_table):
+        """Test the convert_store_table_to_parquet function with a valid table."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/output.parquet")
+        mock_exists.return_value = False
+
+        mock_store = MagicMock(spec=Store)
+        mock_table = pa.Table.from_arrays([pa.array([1, 2, 3]), pa.array(["a", "b", "c"])], names=["id", "name"])
+        mock_store.get_table.return_value = mock_table
+
+        with patch.object(pathlib.Path, "mkdir") as mock_mkdir:
+            # Act
+            convert_store_table_to_parquet(table_name, parquet_path, False, mock_store)
+
+            # Assert
+            mock_store.get_table.assert_called_once_with(table_name)
+            mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
+            mock_write_table.assert_called_once_with(mock_table, parquet_path)
+
+    @patch("pathlib.Path.exists")
+    def test_convert_store_table_to_parquet_file_exists(self, mock_exists):
+        """Test the convert_store_table_to_parquet function when the output file already exists."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/output.parquet")
+        mock_exists.return_value = True
+
+        mock_store = MagicMock(spec=Store)
+
+        # Act & Assert
+        with pytest.raises(FileExistsError):
+            convert_store_table_to_parquet(table_name, parquet_path, False, mock_store)
+
+        mock_exists.assert_called_once_with()
+        mock_store.get_table.assert_not_called()
+
+    @patch("pyarrow.parquet.write_table")
+    @patch("pathlib.Path.exists")
+    def test_convert_store_table_to_parquet_replace_existing(self, mock_exists, mock_write_table):
+        """Test the convert_store_table_to_parquet function with replace_existing_file=True."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/output.parquet")
+        mock_exists.return_value = True
+
+        mock_store = MagicMock(spec=Store)
+        mock_table = pa.Table.from_arrays([pa.array([1, 2, 3]), pa.array(["a", "b", "c"])], names=["id", "name"])
+        mock_store.get_table.return_value = mock_table
+
+        with patch.object(pathlib.Path, "mkdir") as mock_mkdir:
+            # Act
+            convert_store_table_to_parquet(table_name, parquet_path, True, mock_store)
+
+            # Assert
+            mock_store.get_table.assert_called_once_with(table_name)
+            mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
+            mock_write_table.assert_called_once_with(mock_table, parquet_path)
+
+    @patch("pyarrow.parquet.write_table")
+    @patch("pathlib.Path.exists")
+    def test_convert_store_table_to_parquet_directory_path(self, mock_exists, mock_write_table):
+        """Test the convert_store_table_to_parquet function with a directory path."""
+        # Arrange
+        table_name = "test_table"
+        parquet_path = pathlib.Path("/path/to/directory")  # Not ending with .parquet
+        mock_exists.return_value = False
+
+        mock_store = MagicMock(spec=Store)
+        mock_table = pa.Table.from_arrays([pa.array([1, 2, 3]), pa.array(["a", "b", "c"])], names=["id", "name"])
+        mock_store.get_table.return_value = mock_table
+
+        with patch.object(pathlib.Path, "mkdir") as mock_mkdir:
+            # Act
+            convert_store_table_to_parquet(table_name, parquet_path, False, mock_store)
+
+            # Assert
+            mock_store.get_table.assert_called_once_with(table_name)
+            mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
+            expected_path = parquet_path / f"{table_name}.parquet"
+            mock_write_table.assert_called_once_with(mock_table, expected_path)
diff --git a/tests/unit/coal/test_utils/test_utils_input_collector.py b/tests/unit/coal/test_utils/test_utils_input_collector.py
new file mode 100644
index 00000000..12116c9e
--- /dev/null
+++ b/tests/unit/coal/test_utils/test_utils_input_collector.py
@@ -0,0 +1,243 @@
+# Copyright (C) - 2023 - 2025 - Cosmo Tech
+# This document and all information contained herein is the exclusive property -
+# including all intellectual property rights pertaining thereto - of Cosmo Tech.
+# Any use, reproduction, translation, broadcasting, transmission, distribution,
+# etc., to any person is prohibited unless it has been previously and
+# specifically authorized by written means by Cosmo Tech.
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from cosmotech.coal.utils.input_collector import (
+    DatasetCollector,
+    InputCollector,
+    ParameterCollector,
+)
+
+
+@pytest.fixture
+def mock_ec(tmp_path):
+    """Fixture that patches EC with a MagicMock whose dataset and parameters paths
+    both point to tmp_path. Override the attributes in individual tests as needed."""
+    ec = MagicMock()
+    ec.cosmotech.dataset_absolute_path = str(tmp_path)
+    ec.cosmotech.parameters_absolute_path = str(tmp_path)
+    with patch("cosmotech.coal.utils.input_collector.EC", ec):
+        yield ec
+
+
+class TestDatasetCollector:
+    def test_fetch_existing_file(self, tmp_path, mock_ec):
+        dataset_dir = tmp_path / "ds1"
+        dataset_dir.mkdir()
+        file = dataset_dir / "mydata.csv"
+        file.write_text("a,b\n1,2")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path)
+
+        collector = DatasetCollector()
+        result = collector.fetch("mydata.csv")
+
+        assert result == file
+
+    def test_fetch_triggers_lazy_collection(self, tmp_path, mock_ec):
+        dataset_dir = tmp_path / "ds1"
+        dataset_dir.mkdir()
+        (dataset_dir / "file.csv").write_text("")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path)
+
+        collector = DatasetCollector()
+        assert collector.paths == {}
+        collector.fetch("file.csv")
+        assert "file.csv" in collector.paths
+
+    def test_fetch_missing_file_raises(self, tmp_path, mock_ec):
+        dataset_dir = tmp_path / "ds1"
+        dataset_dir.mkdir()
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path)
+
+        collector = DatasetCollector()
+        with pytest.raises(FileNotFoundError):
+            collector.fetch("nonexistent.csv")
+
+    def test_collect_indexes_nested_files(self, tmp_path, mock_ec):
+        sub = tmp_path / "ds1" / "sub"
+        sub.mkdir(parents=True)
+        f = sub / "nested.csv"
+        f.write_text("")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path)
+
+        collector = DatasetCollector()
+        collector.collect()
+
+        assert "nested.csv" in collector.paths
+        assert collector.paths["nested.csv"] == f
+
+
+class TestParameterCollector:
+    def test_init_starts_with_empty_dicts(self, tmp_path, mock_ec):
+        """parameters.json is no longer read at init — loading is now lazy."""
+        params = [{"parameterId": "alpha", "value": "42"}]
+        (tmp_path / "parameters.json").write_text(json.dumps(params))
+
+        collector = ParameterCollector()
+
+        assert collector.parameters == {}
+        assert collector.paths == {}
+
+    def test_read_parameters_json_populates_parameters(self, tmp_path, mock_ec):
+        params = [{"parameterId": "alpha", "value": "42"}, {"parameterId": "beta", "value": "hello"}]
+        (tmp_path / "parameters.json").write_text(json.dumps(params))
+
+        collector = ParameterCollector()
+        collector.read_parameters_json()
+
+        assert collector.parameters["alpha"] == "42"
+        assert collector.parameters["beta"] == "hello"
+
+    def test_read_parameters_json_no_file_keeps_empty(self, tmp_path, mock_ec):
+        collector = ParameterCollector()
+        collector.read_parameters_json()
+
+        assert collector.parameters == {}
+
+    def test_fetch_parameter_triggers_lazy_load(self, tmp_path, mock_ec):
+        params = [{"parameterId": "myparam", "value": "myvalue"}]
+        (tmp_path / "parameters.json").write_text(json.dumps(params))
+
+        collector = ParameterCollector()
+        assert collector.parameters == {}
+
+        result = collector.fetch_parameter("myparam")
+
+        assert result == "myvalue"
+        assert "myparam" in collector.parameters
+
+    def test_fetch_parameter_does_not_reload_if_already_loaded(self, tmp_path, mock_ec):
+        params = [{"parameterId": "key", "value": "first"}]
+        (tmp_path / "parameters.json").write_text(json.dumps(params))
+
+        collector = ParameterCollector()
+        collector.read_parameters_json()
+        # Mutate to confirm no second load overwrites it
+        collector.parameters["key"] = "modified"
+
+        result = collector.fetch_parameter("key")
+
+        assert result == "modified"
+
+    def test_fetch_parameter_raises_key_error_for_unknown(self, tmp_path, mock_ec):
+        (tmp_path / "parameters.json").write_text(json.dumps([]))
+
+        collector = ParameterCollector()
+        with pytest.raises(KeyError):
+            collector.fetch_parameter("nonexistent")
+
+    def test_fetch_file_path_returns_file(self, tmp_path, mock_ec):
+        param_dir = tmp_path / "myparam"
+        param_dir.mkdir()
+        f = param_dir / "data.csv"
+        f.write_text("")
+
+        collector = ParameterCollector()
+        result = collector.fetch_file_path("myparam")
+
+        assert result == f
+
+    def test_fetch_file_path_missing_raises(self, tmp_path, mock_ec):
+        collector = ParameterCollector()
+        with pytest.raises(FileNotFoundError):
+            collector.fetch_file_path("nonexistent")
+
+    def test_fetch_returns_value_if_already_in_parameters(self, tmp_path, mock_ec):
+        """fetch() checks self.parameters directly — no lazy load triggered."""
+        collector = ParameterCollector()
+        collector.parameters["preloaded"] = "value"
+
+        result = collector.fetch("preloaded")
+
+        assert result == "value"
+
+    def test_fetch_falls_back_to_file_without_lazy_load(self, tmp_path, mock_ec):
+        """fetch() does NOT trigger read_parameters_json — falls straight to fetch_file_path."""
+        param_dir = tmp_path / "myparam"
+        param_dir.mkdir()
+        f = param_dir / "data.csv"
+        f.write_text("")
+
+        # parameters.json exists but fetch() won't load it
+        (tmp_path / "parameters.json").write_text(json.dumps([{"parameterId": "myparam", "value": "json_val"}]))
+
+        collector = ParameterCollector()
+        result = collector.fetch("myparam")
+
+        assert result == "json_val"
+
+
+class TestInputCollector:
+    def test_fetch_parameter_returns_preloaded_value(self, tmp_path, mock_ec):
+        """InputCollector.fetch_parameter calls parameter_collector.fetch(),
+        which checks self.parameters directly without lazy-loading JSON."""
+        collector = InputCollector()
+        collector.parameter_collector.parameters["key"] = "val"
+
+        assert collector.fetch_parameter("key") == "val"
+
+    def test_fetch_parameter_falls_back_to_file(self, tmp_path, mock_ec):
+        param_dir = tmp_path / "myparam"
+        param_dir.mkdir()
+        f = param_dir / "data.csv"
+        f.write_text("")
+
+        collector = InputCollector()
+        result = collector.fetch_parameter("myparam")
+
+        assert result == f
+
+    def test_fetch_dataset_delegates_to_dataset_collector(self, tmp_path, mock_ec):
+        ds_dir = tmp_path / "ds" / "ds1"
+        ds_dir.mkdir(parents=True)
+        f = ds_dir / "myfile.csv"
+        f.write_text("")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path / "ds")
+
+        collector = InputCollector()
+        result = collector.fetch_dataset("myfile.csv")
+
+        assert result == f
+
+    def test_fetch_tries_parameter_first(self, tmp_path, mock_ec):
+        """fetch() catches KeyError/FileNotFoundError from fetch_parameter and falls back to dataset."""
+        ds_dir = tmp_path / "ds" / "ds1"
+        ds_dir.mkdir(parents=True)
+        f = ds_dir / "fallback.csv"
+        f.write_text("")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path / "ds")
+
+        collector = InputCollector()
+        # Pre-load a parameter so fetch_parameter returns it without touching files
+        collector.parameter_collector.parameters["fallback.csv"] = "param_value"
+
+        result = collector.fetch("fallback.csv")
+
+        assert result == "param_value"
+
+    def test_fetch_falls_back_to_dataset(self, tmp_path, mock_ec):
+        ds_dir = tmp_path / "ds" / "ds1"
+        ds_dir.mkdir(parents=True)
+        f = ds_dir / "fallback.csv"
+        f.write_text("")
+
+        mock_ec.cosmotech.dataset_absolute_path = str(tmp_path / "ds")
+
+        collector = InputCollector()
+        result = collector.fetch("fallback.csv")
+
+        assert result == f