diff --git a/.vscode/settings.json b/.vscode/settings.json
index 2c67ab7..4b5a294 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,5 +1,4 @@
 {
-    "python-envs.defaultEnvManager": "ms-python.python:system",
-    "python-envs.defaultPackageManager": "ms-python.python:pip",
-    "python-envs.pythonProjects": []
+    "python-envs.defaultEnvManager": "ms-python.python:conda",
+    "python-envs.defaultPackageManager": "ms-python.python:conda"
 }
\ No newline at end of file
diff --git a/AutoREACTER/detectors/functional_groups_detector.py b/AutoREACTER/detectors/functional_groups_detector.py
index 1f612ac..f35f1e2 100644
--- a/AutoREACTER/detectors/functional_groups_detector.py
+++ b/AutoREACTER/detectors/functional_groups_detector.py
@@ -139,7 +139,7 @@ class MonomerEntry:
 # TODO: Add more functional groups as needed from literature/user requirements.
 
 
-@dataclass(slots=True, frozen=True)
+@dataclass(slots=True)
 class FunctionalGroupInfo:
     """
     Immutable dataclass storing information about a detected functional group.
diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py
index 4ffbb72..5b16523 100644
--- a/AutoREACTER/detectors/reaction_detector.py
+++ b/AutoREACTER/detectors/reaction_detector.py
@@ -58,20 +58,8 @@ class EmptyReactionListError(Exception):
     This should be prevented by the reaction_selection method, but this error serves as a safeguard."""
     pass
 
-@dataclass(slots=True, frozen=True)
-class ReactionTemplate:
-    """
-    Defines a generic template for a polymerization reaction.
-    """
-    reaction_name: str
-    reactant_1: str                 # functional group name
-    reactant_2: Optional[str]       # functional group name or None
-    reaction_smarts: str
-    same_reactants: bool
-    delete_atom: bool
-    references: dict
 
-@dataclass(slots=True, frozen=True)
+@dataclass(slots=True)
 class ReactionInstance:
     """
     Represents a specific instance of a reaction between identified monomers.
@@ -90,6 +78,7 @@ class ReactionInstance:
     reaction_smarts: str
     delete_atom: bool
     references: dict
+    same_reactants: bool
     monomer_1: MonomerRole
     functional_group_1: FunctionalGroupInfo
     monomer_2: Optional[MonomerRole] = None
@@ -118,35 +107,27 @@ def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> L
         return [fg for fg in monomer_entry.functionalities if fg.fg_name == target_group_name]
 
     def _seen_pair_key(
-            self,
-            reaction_name: str,
-            monomer_role_1: MonomerRole,
-            fg_1: FunctionalGroupInfo,
-            monomer_role_2: Optional[MonomerRole] = None,
-            fg_2: Optional[FunctionalGroupInfo] = None,
-        ) -> Tuple:
-        """
-        Generates a unique, order-independent key for a reaction pair to avoid duplicates.
-        
-        Args:
-            reaction_name: Name of the reaction.
-            monomer_role_1: First monomer.
-            fg_1: First functional group.
-            monomer_role_2: Second monomer (optional).
-            fg_2: Second functional group (optional).
-            
-        Returns:
-            A tuple representing the unique state of this reaction instance.
-        """
+        self,
+        reaction_name: str,
+        monomer_role_1: MonomerRole,
+        fg_1: FunctionalGroupInfo,
+        monomer_role_2: Optional[MonomerRole] = None,
+        fg_2: Optional[FunctionalGroupInfo] = None,
+    ) -> Tuple:
+
         if monomer_role_2 is None or fg_2 is None:
-            return (reaction_name, monomer_role_1, fg_1)
+            return (
+                reaction_name,
+                monomer_role_1.smiles,
+                fg_1.fg_name
+            )
+
+        pair1 = (monomer_role_1.smiles, fg_1.fg_name)
+        pair2 = (monomer_role_2.smiles, fg_2.fg_name)
+
+        ordered = tuple(sorted([pair1, pair2]))
 
-        # Sort reactants by memory ID to ensure (A, B) and (B, A) produce the same key
-        ordered = tuple(sorted(
-            [(monomer_role_1, fg_1), (monomer_role_2, fg_2)],
-            key=lambda x: (id(x[0]), id(x[1]))
-        ))
-        return (reaction_name, ordered[0], ordered[1])
+        return (reaction_name, ordered)
 
     def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionInstance]:
         """
@@ -180,6 +161,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn
                                     reaction_smarts=reaction_info["reaction"],
                                     delete_atom=reaction_info["delete_atom"],
                                     references=reaction_info["reference"],
+                                    same_reactants=same_reactants,
                                     monomer_1=monomer_role,
                                     functional_group_1=fg
                                 )
@@ -210,6 +192,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn
                                             reaction_smarts=reaction_info["reaction"],
                                             delete_atom=reaction_info["delete_atom"],
                                             references=reaction_info["reference"],
+                                            same_reactants=same_reactants,
                                             monomer_1=monomer_role_i,
                                             functional_group_1=fg_i,
                                             monomer_2=monomer_role_j,
@@ -243,6 +226,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn
                                         reaction_smarts=reaction_info["reaction"],
                                         delete_atom=reaction_info["delete_atom"],
                                         references=reaction_info["reference"],
+                                        same_reactants=same_reactants,
                                         monomer_1=monomer_role,
                                         functional_group_1=fg_1,
                                         monomer_2=monomer_role,
diff --git a/AutoREACTER/detectors/reactions_library.py b/AutoREACTER/detectors/reactions_library.py
index f0abc47..cd970c7 100644
--- a/AutoREACTER/detectors/reactions_library.py
+++ b/AutoREACTER/detectors/reactions_library.py
@@ -51,8 +51,8 @@ def __init__(self):
             },
             "Hydroxy Acid Halides Hydroxy Acid Halides Polycondensation(Polyesterification)": {
                 "same_reactants": False,
-                "reactant_1": "hydroxy_acid_halides_monomer",
-                "reactant_2": "hydroxy_acid_halides_monomer",
+                "reactant_1": "hydroxy_acid_halide",
+                "reactant_2": "hydroxy_acid_halide",
                 "product": "polyester_chain",
                 "delete_atom": True,
                 "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[Cl,Br,I:4]>>[OX2:1]-[CX3:2](=[O:5]).[Cl,Br,I:4]-[H:3]",
@@ -62,7 +62,7 @@ def __init__(self):
             },
             "Hydroxy Acid Halides Polycondensation(Polyesterification)": {
                 "same_reactants": True,
-                "reactant_1": "hydroxy_acid_halides_monomer",
+                "reactant_1": "hydroxy_acid_halide",
                 "product": "polyester_chain",
                 "delete_atom": True,
                 "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[Cl,Br,I:4]>>[OX2:1]-[CX3:2](=[O:5]).[Cl,Br,I:4]-[H:3]",
diff --git a/AutoREACTER/reaction_template_builder/__init__.py b/AutoREACTER/reaction_preparation/__init__.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/__init__.py
rename to AutoREACTER/reaction_preparation/__init__.py
diff --git a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py b/AutoREACTER/reaction_preparation/build_reaction_system.py
similarity index 66%
rename from AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py
rename to AutoREACTER/reaction_preparation/build_reaction_system.py
index 7c8e4ac..3ebc26b 100644
--- a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py
+++ b/AutoREACTER/reaction_preparation/build_reaction_system.py
@@ -24,7 +24,7 @@
 """
 
 
-from typing import Literal
+
 # --- preferred: package-relative imports; fallback: absolute imports for script/notebook ---
 try:
     # Case 1 (correct when imported as part of the package)
@@ -54,204 +54,69 @@
     from lunar_client.lunar_api_wrapper import lunar_workflow
     from lunar_client.molecule_template_preparation import molecule_template_preparation
 
+from AutoREACTER.detectors.reaction_detector import ReactionInstance, ReactionTemplate
+from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupInfo, MonomerRole
+from AutoREACTER.input_parser import SimulationSetup, MonomerEntry
 
 # standard libs / third-party
 import pandas as pd
 from pathlib import Path
 import os
+from dataclasses import dataclass, field
+from typing import List, Any, Optional, Dict
+from rdkit import Chem
 
 
-def is_continuous(d):
-    """
-    Checks if the integer keys of a dictionary form a continuous sequence starting from 1.
-
-    This is used to verify if reaction IDs are sequential after potential 
-    filtering or deduplication steps.
-
-    Args:
-        d (dict): The dictionary to check, where keys are expected to be integers.
 
-    Returns:
-        bool: True if keys are exactly [1, 2, ..., len(d)], False otherwise.
-    """
-    keys = sorted(d.keys())
-    if not keys: 
-        return True
-    # Compare sorted keys against a generated range of the same length
-    return keys == list(range(1, len(keys) + 1))
-
-
-def save_grid_image(mols, cache, key=None):
-    from pathlib import Path
-    from rdkit.Chem import Draw
-    import base64
-    """
-    TODO: Add full grid set of reactant1 + reactant2  → product image grid to ipynb or cache/ "grid_images".
-          With Template atoms highlighted in one color and Edge atoms highlighted in another color.
-          With Edge atoms hightlighted in a different color if possible. 
-          This will be useful for debugging and for users to visually inspect the detected reactions and templates.
-    Note: If you want to use the SVG output instead of PNG, you can set useSVG=True in Draw.MolsToGridImage and handle the SVG data accordingly.
+@dataclass (slots=True)
+class ReactionMetadata:
     """
-    out_dir = Path(cache) / "grid_images"
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    out_path = out_dir / (f"reaction_grid_{key}.png" if key is not None else "reaction_grid.png")
-
-    # Ask RDKit for a raster image (PNG-like)
-    img = Draw.MolsToGridImage(mols, useSVG=False)
-
-    # Case 1: PIL.Image.Image (has .save)
-    if hasattr(img, "save"):
-        img.save(str(out_path))
-        return out_path
-
-    # Case 2: IPython/RDKit display object (often has .data)
-    data = getattr(img, "data", None)
-    if data is None:
-        raise TypeError(f"Unexpected image type {type(img)}; cannot save.")
-
-    # data can be bytes OR base64 string depending on wrapper
-    if isinstance(data, bytes):
-        png_bytes = data
-    elif isinstance(data, str):
-        # try base64 decode; if it fails, treat as raw text
-        try:
-            png_bytes = base64.b64decode(data)
-        except Exception:
-            png_bytes = data.encode("utf-8")
-    else:
-        raise TypeError(f"Unexpected img.data type {type(data)}; cannot save.")
-
-    with open(out_path, "wb") as f:
-        f.write(png_bytes)
-
-    return out_path
-
-
-def add_dict_as_new_columns(df_existing, data_dict, titles=["template_reactant_idx", "template_product_idx"]):
-    """
-    Adds dictionary keys and values as new columns to an existing DataFrame.
-
-    This is specifically used to map reactant indices to product indices 
-    within the reaction template.
-
-    Args:
-        df_existing (pd.DataFrame): The DataFrame to modify.
-        data_dict (dict): Dictionary where keys and values will become column data.
-        titles (list): List of two strings for the new column names.
-
-    Returns:
-        pd.DataFrame: The modified DataFrame with new columns added.
-    """
-    # Convert dict keys and values to Series to ensure alignment with the DataFrame
-    # Use .astype("Int64") to allow for potential Null/NaN values while keeping integers
-    df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64")
-    df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64")
-    
-    return df_existing
-
-
-def molecule_dict_csv_path_dict_rearrange(molecule_dict_csv_path_dict):
-    """
-    Rearranges the reaction dictionary keys to be continuous and renames 
-    associated CSV files on the filesystem to match the new keys.
-
-    This ensures that if reaction #2 is deleted, reaction #3 becomes the new #2,
-    maintaining a clean, gapless sequence for downstream processing.
-
-    Args:
-        molecule_dict_csv_path_dict (dict): Dictionary containing reaction metadata 
-                                        and 'csv_path' entries.
-
-    Returns:
-        dict: A new dictionary with normalized continuous keys (1 to N) and updated file paths.
+    A dataclass to hold all relevant metadata and file paths for a single reaction instance.
+    Attributes:
+    - reaction_id: Unique identifier for the reaction.
+    - reactant_combined_mol: RDKit molecule object representing the combined reactants.
+    - product_combined_mol: RDKit molecule object representing the combined products.
+    - reaction_smarts: Optional SMARTS string representing the reaction.
+    - reactant_smarts: Optional SMARTS string for the reactants.
+    - product_smiles: Optional SMILES string for the products.
+    - csv_path: Path to the CSV file containing processing reaction data.
+    - mol_3d_path: Optional path to the generated 3D molecule file.
+    - reaction_dataframe: Optional pandas DataFrame holding detailed reaction mapping and analysis data.
+    - reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices.
+    - template_reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices specifically for the reaction template.
+    - edge_atoms: List of atom indices that are considered 'edge atoms' connecting the reaction template to the rest of the molecule.
+    - delete_atom: Boolean flag indicating whether an atom is deleted in the reaction (e.g., for polycondensation).
+    - delete_atom_idx: Optional index of the atom that is deleted in the reaction, if applicable.
+    - activity_stats: Boolean flag indicating whether to calculate and include activity statistics for the reaction.
     """
-    # Skip if already continuous to save processing time
-    if is_continuous(molecule_dict_csv_path_dict):
-        return molecule_dict_csv_path_dict
-        
-    print("Rearranging molecule_dict_csv_path_dict keys to be continuous...")
-    new_dict = {}
-    
-    # Sort keys to ensure we process them in the original numerical order
-    sorted_old_keys = sorted(molecule_dict_csv_path_dict.keys())
+    reaction_id: int
+    reactant_combined_mol: Chem.Mol
+    product_combined_mol: Chem.Mol
+    reaction_smarts: Optional[str] = None
+    reactant_smarts: Optional[str] = None
+    product_smiles: Optional[str] = None
+    csv_path: Path = None
+    mol_3d_path: Optional[Path] = None
+    reaction_dataframe: Optional[pd.DataFrame] = None
+    reactant_to_product_mapping: Dict[int, int]
+    template_reactant_to_product_mapping: Dict[int, int] 
+    edge_atoms: List[int] 
+    delete_atom: bool = True
+    delete_atom_idx: Optional[int] = None
+    activity_stats: bool = True
     
-    for new_key_idx, old_key in enumerate(sorted_old_keys):
-        new_key = new_key_idx + 1  # Start new keys from 1
-        reaction_data = molecule_dict_csv_path_dict[old_key]
-        csv_save_path = reaction_data.get("csv_path")
-        
-        if new_key != old_key:
-            # Construct the new filename based on the new index (e.g., reaction_2.csv)
-            new_csv_save_path = os.path.join(
-                os.path.dirname(csv_save_path),
-                f"reaction_{new_key}.csv"
-            )
-            
-            # Check if destination exists to avoid crashing or accidental overwrites
-            if os.path.exists(new_csv_save_path):
-                os.remove(new_csv_save_path)
-                
-            # Rename the physical file on the disk to match the new key
-            os.rename(csv_save_path, new_csv_save_path)
-            
-            # Update the path in the metadata dictionary
-            reaction_data["csv_path"] = new_csv_save_path
-            
-        new_dict[new_key] = reaction_data
-
-    return new_dict
 
-
-def add_column_safe(df, list_data, column_name):
+ 
+@dataclass (slots=True)
+class PipelineOutput:
     """
-    Safely adds a list as a new column to a DataFrame, handling potential 
-    length mismatches by using Series alignment.
-
-    Args:
-        df (pd.DataFrame): Target DataFrame.
-        list_data (list): Data to be added to the column.
-        column_name (str): Name of the new column.
-
-    Returns:
-        pd.DataFrame: Modified DataFrame with the new column.
+    Final structured output of the ReactionTemplatePipeline.
+    Attributes:
+    - template_files: Dictionary mapping reaction identifiers to their corresponding molecule template file paths.
+    - all_reactions: Optional list of ReactionMetadata instances containing detailed information about each processed reaction
     """
-    # Creating a Series from the list ensures it starts from the top (index 0)
-    # and fills missing rows with NaN if the list is shorter than the DataFrame
-    df[column_name] = pd.Series(list_data).astype("Int64")
-    return df
-
-
-def extract_unique_references(detected_reactions: dict) -> list[str]:
-        """Collect reference URLs from detected_reactions[*]["reference"], dedupe, keep order."""
-        seen = set()
-        refs: list[str] = []
-
-        for rxn in detected_reactions.values():
-            ref = rxn.get("reference") or {}
-
-            # single URL fields
-            for v in ref.values():
-                if isinstance(v, str):
-                    if v not in seen:
-                        seen.add(v)
-                        refs.append(v)
-
-                # list-of-URLs fields
-                elif isinstance(v, (list, tuple)):
-                    for u in v:
-                        if isinstance(u, str) and u not in seen:
-                            seen.add(u)
-                            refs.append(u)
-
-                # (optional) nested dict handling, if you ever add that later
-                elif isinstance(v, dict):
-                    for u in v.values():
-                        if isinstance(u, str) and u not in seen:
-                            seen.add(u)
-                            refs.append(u)
-        return refs
-
+    template_files: Dict[str, Path]
+    all_reactions: Optional[List[ReactionMetadata]] = None
 
 # The main class encapsulating the reaction template pipeline
 class ReactionTemplatePipeline:
@@ -277,7 +142,46 @@ def __init__(self, detected_reactions_dict: dict, cache: str, non_monomer_molecu
         self.non_monomer_molecules_to_retain = non_monomer_molecules_to_retain
         self.formatted_dict, self.molecule_template_files, self.molecule_images = self.execute_pipeline(self.detected_reactions_dict, self.non_monomer_molecules_to_retain, self.cache)
 
+    def _add_column_safe(self, df, list_data, column_name):
+        """
+        Safely adds a list as a new column to a DataFrame, handling potential 
+        length mismatches by using Series alignment.
+
+        Args:
+            df (pd.DataFrame): Target DataFrame.
+            list_data (list): Data to be added to the column.
+            column_name (str): Name of the new column.
 
+        Returns:
+            pd.DataFrame: Modified DataFrame with the new column.
+        """
+        # Creating a Series from the list ensures it starts from the top (index 0)
+        # and fills missing rows with NaN if the list is shorter than the DataFrame
+        df[column_name] = pd.Series(list_data).astype("Int64")
+        return df
+    
+    def _add_dict_as_new_columns(self, df_existing, data_dict, titles=["template_reactant_idx", "template_product_idx"]):
+        """
+        Adds dictionary keys and values as new columns to an existing DataFrame.
+
+        This is specifically used to map reactant indices to product indices 
+        within the reaction template.
+
+        Args:
+            df_existing (pd.DataFrame): The DataFrame to modify.
+            data_dict (dict): Dictionary where keys and values will become column data.
+            titles (list): List of two strings for the new column names.
+
+        Returns:
+            pd.DataFrame: The modified DataFrame with new columns added.
+        """
+        # Convert dict keys and values to Series to ensure alignment with the DataFrame
+        # Use .astype("Int64") to allow for potential Null/NaN values while keeping integers
+        df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64")
+        df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64")
+    
+        return df_existing
+    
     def run_reaction_template_pipeline(self, detected_reactions, cache):
         """
         Main execution pipeline for mapping reactions, identifying templates, 
@@ -324,14 +228,14 @@ def run_reaction_template_pipeline(self, detected_reactions, cache):
             # is_duplicate, processed_dict = compare_rdkit_fragments(...)
 
             # Update the dataframe with specific template mapping indices
-            reaction_dataframe = add_dict_as_new_columns(
+            reaction_dataframe = self._add_dict_as_new_columns(
                 reaction_dataframe,
                 template_mapped_dict,
                 titles = ["template_reactant_idx", "template_product_idx"]
             )
             
             # Append edge atoms information (crucial for building polymer chains)
-            reaction_dataframe = add_column_safe(
+            reaction_dataframe = self._add_column_safe(
                 reaction_dataframe,
                 edge_atoms,
                 "edge_atoms"
@@ -406,19 +310,11 @@ def execute_pipeline(self, detected_reactions, non_monomer_molecules_to_retain,
             print(f"Molecule Template File for {name}: {path}")
 
         # Extract and save unique references to a text file for user reference
-        references = extract_unique_references(detected_reactions)
-
-        # Print and save references to a text file
-        print("\nReferences used in detected reactions:")
-        for ref in references:
-            print(ref)
         
         with open(Path(cache) / "molecule_template_files.txt", "w") as f:
             for name, path in molecule_template_files.items():
                 f.write(f"{name}: {path}\n")
-            f.write("\nReferences:\n")
-            for ref in references:
-                f.write(f"{ref}\n")
+            
 
         return formatted_dict, molecule_template_files, molecule_images
         # TODO: If duplicates were found and skipped, re-index the dictionary and files to be continuous
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/__init__.py b/AutoREACTER/reaction_preparation/lunar_client/__init__.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/lunar_client/__init__.py
rename to AutoREACTER/reaction_preparation/lunar_client/__init__.py
diff --git a/AutoREACTER/reaction_preparation/lunar_client/config.py b/AutoREACTER/reaction_preparation/lunar_client/config.py
new file mode 100644
index 0000000..e8b270f
--- /dev/null
+++ b/AutoREACTER/reaction_preparation/lunar_client/config.py
@@ -0,0 +1 @@
+LUNAR_ROOT_DIR = ''
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/locate_lunar.py b/AutoREACTER/reaction_preparation/lunar_client/locate_lunar.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/lunar_client/locate_lunar.py
rename to AutoREACTER/reaction_preparation/lunar_client/locate_lunar.py
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/lunar_api_wrapper.py b/AutoREACTER/reaction_preparation/lunar_client/lunar_api_wrapper.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/lunar_client/lunar_api_wrapper.py
rename to AutoREACTER/reaction_preparation/lunar_client/lunar_api_wrapper.py
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/molecule_3d_preparation.py b/AutoREACTER/reaction_preparation/lunar_client/molecule_3d_preparation.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/lunar_client/molecule_3d_preparation.py
rename to AutoREACTER/reaction_preparation/lunar_client/molecule_3d_preparation.py
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/molecule_template_preparation.py b/AutoREACTER/reaction_preparation/lunar_client/molecule_template_preparation.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/lunar_client/molecule_template_preparation.py
rename to AutoREACTER/reaction_preparation/lunar_client/molecule_template_preparation.py
diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/__init__.py b/AutoREACTER/reaction_preparation/reaction_processor/__init__.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/__init__.py
rename to AutoREACTER/reaction_preparation/reaction_processor/__init__.py
diff --git a/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py b/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py
new file mode 100644
index 0000000..cbcaa99
--- /dev/null
+++ b/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py
@@ -0,0 +1,36 @@
+def smart_mapping(reactant, smarts_template, match_tuple):
+        """
+        Apply atom mapping from SMARTS template to reactant molecule based on match indices.
+        
+        This function transfers atom map numbers from a SMARTS template to corresponding
+        atoms in the reactant molecule using the provided match tuple that maps SMARTS
+        atom indices to reactant atom indices.
+        
+        Args:
+            reactant (Chem.Mol): The reactant molecule to apply mapping to
+            smarts_template (Chem.Mol): SMARTS pattern molecule with atom map numbers
+            match_tuple (tuple): Tuple mapping SMARTS atom indices to reactant atom indices
+            
+        Returns:
+            None: Modifies the reactant molecule in-place by setting atom map numbers
+            
+        Note:
+            Only atoms with non-zero map numbers in the SMARTS template are processed.
+            The function does nothing if match_tuple is empty.
+        """
+        if not match_tuple:
+            return
+        
+        # Create mapping from SMARTS atom indices to their map numbers
+        SMARTS_index_to_Map_Number = {}
+        for smarts_atom in smarts_template.GetAtoms():
+            map_num = smarts_atom.GetAtomMapNum()
+            if map_num != 0:  # Only process atoms with explicit mapping
+                SMARTS_index_to_Map_Number[smarts_atom.GetIdx()] = map_num
+        
+        # Apply mapping to corresponding atoms in reactant
+        for smarts_pos, map_num in SMARTS_index_to_Map_Number.items():
+            if smarts_pos < len(match_tuple):
+                atom_index_in_mol = match_tuple[smarts_pos]
+                atom = reactant.GetAtomWithIdx(atom_index_in_mol)
+                atom.SetAtomMapNum(map_num)
\ No newline at end of file
diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/compare_rdkit_fragments.py b/AutoREACTER/reaction_preparation/reaction_processor/fragment_comparison.py
similarity index 100%
rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/compare_rdkit_fragments.py
rename to AutoREACTER/reaction_preparation/reaction_processor/fragment_comparison.py
diff --git a/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py b/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py
new file mode 100644
index 0000000..03d0c35
--- /dev/null
+++ b/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py
@@ -0,0 +1,711 @@
+
+"""
+Module for preparing chemical reactions for analysis, including atom mapping between reactants and products,
+reaction metadata extraction, and visualization utilities using RDKit.
+
+This module processes reaction SMARTS, applies atom mappings, identifies key reaction features (e.g., first shell,
+initiators, byproducts), and generates metadata and visualizations for downstream analysis. It also includes validation checks to ensure mapping 
+consistency and completeness.
+"""
+
+
+# WARNING:
+# When modifying this file for dataframe or any other indexing variables use idx and idxs, do not use index or indices or similar.
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from rdkit import Chem
+from rdkit.Chem import AllChem, Draw, rdmolops
+from PIL.Image import Image
+import pandas as pd
+
+from AutoREACTER.detectors.reaction_detector import ReactionInstance
+from AutoREACTER.reaction_preparation.reaction_processor.utils import (
+    add_dict_as_new_columns, add_column_safe, compare_set, prepare_paths
+)
+from AutoREACTER.reaction_preparation.reaction_processor.walker import reaction_atom_walker
+
+
+class MappingError(Exception):
+    """Custom exception raised when atom mapping between reactants and products fails or is inconsistent."""
+
+
+class SMARTSParsingError(Exception):
+    """Custom exception raised when parsing SMILES or reaction SMARTS fails."""
+
+
+@dataclass(slots=True)
+class ReactionMetadata:
+    """
+    Stores comprehensive metadata for a single reaction including molecular structures, 
+    atom mappings, and analysis results.
+    reaction_id: Unique identifier for the reaction instance
+    reactant_combined_mol: RDKit molecule object representing combined reactants
+    product_combined_mol: RDKit molecule object representing combined products
+    reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices
+    product_to_reactant_mapping: Dictionary mapping product atom indices back to reactant atom indices
+    template_reactant_to_product_mapping: Optional dictionary mapping reactant indices in the template to product indices
+    edge_atoms: Optional list of reactant atom indices that are at the edge of the local environment
+    first_shell: Optional list of reactant atom indices in the first coordination shell (reaction center)
+    initiators: Optional list of reactant atom indices that are initiators (map numbers 1 or 2)
+    byproduct_indices: Optional list of reactant atom indices corresponding to detected byproducts
+    reaction_smarts: Optional string of the reaction SMARTS pattern
+    reactant_smarts: Optional string of the combined reactant SMILES
+    product_smiles: Optional string of the combined product SMILES
+    csv_path: Optional Path to the CSV file containing atom mappings and analysis
+    mol_3d_path: Optional Path to a file containing 3D structures of the molecules
+    reaction_dataframe: Optional pandas DataFrame containing detailed mapping and analysis results
+    delete_atom: Boolean indicating whether the reaction involves a delete atom (byproduct)
+    delete_atom_idx: Optional integer index of the reactant atom that corresponds to the byproduct
+    activity_stats: Boolean indicating whether this reaction should be included in activity statistics (e.g., not a duplicate)
+    """
+    reaction_id: int
+    reactant_combined_mol: Chem.Mol
+    product_combined_mol: Chem.Mol
+    reactant_to_product_mapping: Dict[int, int]
+    product_to_reactant_mapping: Dict[int, int]
+    template_reactant_to_product_mapping: Optional[Dict[int, int]] = None
+    edge_atoms: Optional[List[int]] = None
+    first_shell: Optional[List[int]] = None
+    initiators: Optional[List[int]] = None
+    byproduct_indices: Optional[List[int]] = None
+    reaction_smarts: Optional[str] = None
+    reactant_smarts: Optional[str] = None
+    product_smiles: Optional[str] = None
+    csv_path: Optional[Path] = None
+    mol_3d_path: Optional[Path] = None
+    reaction_dataframe: Optional[pd.DataFrame] = None
+    delete_atom: bool = True
+    delete_atom_idx: Optional[int] = None
+    activity_stats: bool = True
+
+
+class PrepareReactions:
+    """Processes chemical reactions: builds atom mappings, identifies reaction centers, and detects byproducts."""
+
+    def __init__(self, cache: Path):
+        """Initialize with cache directory for storing reaction CSVs."""
+        self.cache = Path(cache)
+        self.csv_cache = prepare_paths(self.cache, "csv_cache")
+
+    # --- PUBLIC ---
+
+    def prepare_reactions(self, reaction_instances: list[ReactionInstance]) -> list[ReactionMetadata]:
+        """
+        Main pipeline: processes reaction instances, detects duplicates, and enriches metadata with template mappings.
+        
+        Args:
+            reaction_instances: List of detected reaction instances to process
+            
+        Returns:
+            List of processed ReactionMetadata objects with template mappings and edge atoms
+        """
+        reactions_metadata = self._process_reaction_instances(reaction_instances)
+        reaction_metadata = self._detect_duplicates(reactions_metadata)
+        
+        for reaction in reactions_metadata:
+            # Skip reactions marked as duplicates
+            if not reaction.activity_stats:
+                continue  # Skip reactions marked as duplicates
+            
+            combined_reactant_molecule_object = reaction.reactant_combined_mol
+            reaction_dataframe = reaction.reaction_dataframe
+            csv_save_path = reaction.csv_path
+            
+            # Build full atom mapping dictionary from dataframe
+            fully_mapped_dict = reaction_dataframe.set_index("reactant_idx")["product_idx"].to_dict()
+            first_shell = reaction_dataframe["first_shell"].dropna().tolist()
+            
+            # Generate template mapping by walking reaction graph
+            template_mapped_dict, edge_atoms = reaction_atom_walker(
+                combined_reactant_molecule_object,
+                first_shell,
+                fully_mapped_dict
+            )
+            
+            # Add template mapping and edge atoms to dataframe
+            reaction_dataframe = add_dict_as_new_columns(
+                reaction_dataframe,
+                template_mapped_dict,
+                titles=["template_reactant_idx", "template_product_idx"]
+            )
+
+            # Add edge atoms as a new column in the dataframe
+            reaction_dataframe = add_column_safe(
+                reaction_dataframe,
+                edge_atoms,
+                "edge_atoms"
+            )
+            
+            # Save updated dataframe back to CSV and update metadata
+            reaction.reaction_dataframe = reaction_dataframe.copy()
+            # Save the updated dataframe with template mappings and edge atoms to CSV
+            reaction_dataframe.to_csv(csv_save_path, index=False)
+            # Update metadata with template mapping and edge atoms
+            reaction.edge_atoms = edge_atoms
+            # Store the template mapping in the metadata for later use
+            reaction.template_reactant_to_product_mapping = template_mapped_dict
+
+        return reaction_metadata
+    
+    # --- PIPELINE STEPS (PRIVATE) ---
+    
+    def _process_reaction_instances(self, detected_reactions: list[ReactionInstance]) -> list[ReactionMetadata]:
+        """
+        Converts ReactionInstance objects into ReactionMetadata by building molecules and running reactions.
+        
+        Args:
+            detected_reactions: List of detected reaction instances
+            
+        Returns:
+            List of ReactionMetadata objects with atom mappings
+        """
+        csv_cache = self.csv_cache
+        reaction_metadata = []
+
+        for reaction in detected_reactions:
+            rxn_smarts = reaction.reaction_smarts
+            reactant_smiles_1 = reaction.monomer_1.smiles
+            same_reactants = reaction.same_reactants
+            
+            # Handle case where both reactants are identical
+            if same_reactants:
+                reactant_smiles_2 = reactant_smiles_1
+            else:
+                reactant_smiles_2 = reaction.monomer_2.smiles
+
+            delete_atoms = reaction.delete_atom
+
+            # Build reaction and reactant molecules
+            rxn = self._build_reaction(rxn_smarts)
+            # This function also runs the reaction and builds metadata for each product set, including atom mappings and byproduct detection
+            mol_reactant_1, mol_reactant_2 = self._build_reactants(reactant_smiles_1, reactant_smiles_2)
+            # Build reaction tuple based on whether reactants are the same or different. If same, only one ordering is needed. If different, both orderings are processed to account for reaction directionality.
+            reaction_tuple = self._build_reaction_tuple(same_reactants, mol_reactant_1, mol_reactant_2)
+
+            # Process products and build metadata
+            reaction_metadata = self._process_reaction_products(
+                rxn,
+                csv_cache,
+                reaction_tuple,
+                delete_atoms,
+                reaction_metadata
+            )
+        
+        return reaction_metadata
+
+    def _detect_duplicates(self, reaction_metadata_list: list[ReactionMetadata]) -> list[ReactionMetadata]:
+        """
+        Filters duplicate reactions based on reactant and product molecules.
+        
+        Args:
+            reaction_metadata_list: List of reaction metadata to filter
+            
+        Returns:
+            List of unique reactions; duplicates marked with activity_stats=False
+        """
+        unique_metadata: list[ReactionMetadata] = []
+        
+        for reaction in reaction_metadata_list:
+            # Compare current reaction's reactants and products against unique reactions collected so far
+            reactants = reaction.reactant_combined_mol
+            products = reaction.product_combined_mol
+
+            # Keep reaction if it's unique, otherwise mark as duplicate
+            if compare_set(unique_metadata, reactants, products):
+                unique_metadata.append(reaction)
+            else:
+                reaction.activity_stats = False
+                
+        return unique_metadata
+    
+    def _process_reaction_products(self, 
+                                   rxn: Chem.rdChemReactions.ChemicalReaction, 
+                                   csv_cache: Path, 
+                                   reaction_tuple: list, 
+                                   delete_atoms: bool = True,
+                                   reaction_metadata: Optional[list[ReactionMetadata]] = None
+                                   ) -> list[ReactionMetadata]:
+        """
+        Runs reactions on reactant pairs and builds metadata for each product set.
+        
+        Args:
+            rxn: RDKit ChemicalReaction object
+            csv_cache: Path to cache directory for saving CSVs
+            reaction_tuple: List of reactant pairs to process
+            delete_atoms: Whether to detect and track byproducts
+            reaction_metadata: Accumulator list for metadata objects
+            
+        Returns:
+            Updated list of ReactionMetadata objects
+        """
+        if reaction_metadata is None:
+            reaction_metadata = []
+        
+        for pair in reaction_tuple:
+            r1, r2 = Chem.Mol(pair[0]), Chem.Mol(pair[1])
+            
+            # Assign unique map numbers and isotopes to track atoms through reaction
+            self._assign_atom_map_numbers_and_set_isotopes(r1, r2)
+            # Run the reaction to get products
+            products = rxn.RunReactants((r1, r2))
+            
+            # If no products are generated, skip to the next reactant pair
+            if not products:
+                continue
+            
+            # Process each product set generated by the reaction
+            for product_set in products:
+                df = pd.DataFrame(columns=["reactant_idx", "product_idx"])
+
+                # Combine molecules for mapping
+                reactant_combined = Chem.CombineMols(r1, r2)
+                product_combined = Chem.CombineMols(*product_set)
+
+                # Restore atom map numbers from isotopes (which survive the reaction)
+                self._reassign_atom_map_numbers_by_isotope(product_combined)
+                
+                # Build bidirectional atom index mappings
+                mapping_dict, df = self._build_atom_index_mapping(reactant_combined, product_combined)
+                reverse_mapping = {v: k for k, v in mapping_dict.items()}
+
+                # Restore map numbers for visualization
+                self._reveal_template_map_numbers(product_combined)
+
+                # Validate mapping consistency
+                self._validate_mapping(df, reactant_combined, product_combined)
+
+                # Identify atoms involved in reaction center and initiators
+                first_shell, initiator_idxs = self._assign_first_shell_and_initiators(
+                    reactant_combined,
+                    product_combined,
+                    reverse_mapping
+                )
+
+                # Detect byproducts (smallest fragments)
+                byproduct_reactant_idxs = self._detect_byproducts(product_combined, reverse_mapping, delete_atoms)
+
+                # Combine all mapping data into single dataframe
+                df_combined = pd.concat([
+                    df,
+                    pd.Series(first_shell, name="first_shell"),
+                    pd.Series(initiator_idxs, name="initiators"),
+                    pd.Series(byproduct_reactant_idxs, name="byproduct_idx")
+                ], axis=1).astype(pd.Int64Dtype())
+
+                total_products = len(reaction_metadata) + 1
+
+                # Clear isotopes before saving to restore normal chemistry
+                self._clear_isotopes(reactant_combined, product_combined)
+
+                # Save mapping dataframe to CSV
+                df_combined.to_csv(csv_cache / f"reaction_{total_products}.csv", index=False)
+
+                # Create and store metadata object
+                reaction_metadata.append(
+                    ReactionMetadata(
+                        reaction_id=total_products,
+                        reactant_combined_mol=reactant_combined,
+                        product_combined_mol=product_combined,
+                        reactant_to_product_mapping=mapping_dict,
+                        product_to_reactant_mapping=reverse_mapping,
+                        first_shell=first_shell,
+                        initiators=initiator_idxs,
+                        byproduct_indices=byproduct_reactant_idxs,
+                        csv_path=csv_cache / f"reaction_{total_products}.csv",
+                        reaction_dataframe=df_combined,
+                        delete_atom=delete_atoms,
+                        delete_atom_idx=byproduct_reactant_idxs[0] if byproduct_reactant_idxs else None,
+                        activity_stats=True
+                    )
+                )
+
+        return reaction_metadata
+    
+    # --- CORE REACTION LOGIC ---
+    
+    def _assign_first_shell_and_initiators(self, 
+                                           reactant_combined: Chem.Mol, 
+                                           product_combined: Chem.Mol, 
+                                           reversed_mapping_dict: dict[int, int]) -> tuple[list[int], list[int]]:
+        """
+        Identifies atoms in the first coordination shell (atoms with map numbers < 999) and initiator atoms.
+        Initiators are atoms with map numbers 1 or 2 (typically the reactive centers).
+        
+        Args:
+            reactant_combined: Combined reactant molecule
+            product_combined: Combined product molecule
+            reversed_mapping_dict: Product idx -> Reactant idx mapping
+            
+        Returns:
+            Tuple of (first_shell atom indices, initiator atom indices)
+            
+        Raises:
+            ValueError: If exactly 2 initiators are not found
+        """
+        first_shell = []
+        initiator_idxs = []
+
+        for p_atom in product_combined.GetAtoms():
+            # Only process atoms with valid map numbers (< 999 indicates non-byproduct)
+            if p_atom.GetAtomMapNum() < 999:
+                p_idx = p_atom.GetIdx()
+
+                if p_idx not in reversed_mapping_dict:
+                    raise ValueError(f"Mapping error: product atom {p_idx} not found in mapping_dict")
+
+                r_idx = reversed_mapping_dict[p_idx]
+                atom = reactant_combined.GetAtomWithIdx(r_idx)
+                atom.SetAtomMapNum(p_atom.GetAtomMapNum())
+
+                first_shell.append(r_idx)
+
+                # Initiators are atoms with map numbers 1 or 2
+                if p_atom.GetAtomMapNum() in [1, 2]:
+                    initiator_idxs.append(r_idx)
+        
+        if len(initiator_idxs) != 2:
+            raise ValueError(f"Expected 2 initiators, got {len(initiator_idxs)}: {initiator_idxs}")
+
+        return first_shell, initiator_idxs
+    
+    def _detect_byproducts(self, 
+                          product_combined: Chem.Mol, 
+                          reversed_mapping_dict: dict[int, int], 
+                          delete_atoms: bool) -> list[int]:
+        """
+        Identifies byproduct atoms (smallest molecular fragment) and maps them back to reactant space.
+        
+        Args:
+            product_combined: Combined product molecule
+            reversed_mapping_dict: Product idx -> Reactant idx mapping
+            delete_atoms: Whether to perform byproduct detection
+            
+        Returns:
+            List of reactant indices corresponding to byproduct atoms
+        """
+        # If delete_atoms is False, we skip byproduct detection and return an empty list
+        if not delete_atoms:
+            return []
+
+        # Fragment molecule and find smallest fragment
+        frags = rdmolops.GetMolFrags(product_combined, asMols=True)
+        smallest = min(frags, key=lambda m: m.GetNumAtoms())
+
+        byproduct_reactant_indices = []
+
+        # Map byproduct atoms back to reactant indices
+        for atom in smallest.GetAtoms():
+            p_idx = atom.GetIdx()
+            if p_idx in reversed_mapping_dict:
+                byproduct_reactant_indices.append(reversed_mapping_dict[p_idx])
+
+        return byproduct_reactant_indices
+    
+    def _validate_mapping(self, df: pd.DataFrame, reactant: Chem.Mol, product: Chem.Mol) -> None:
+        """
+        Validates atom mapping consistency: checks for required columns, duplicates, bounds, and completeness.
+        
+        Args:
+            df: Dataframe containing reactant_idx and product_idx columns
+            reactant: Reactant molecule
+            product: Product molecule
+            
+        Raises:
+            MappingError: If any validation check fails
+        """
+        # Ensure dataframe exists and has required columns
+        if df is None or df.empty:
+            raise MappingError("Mapping validation failed: empty dataframe")
+
+        # Check for required columns
+        required_cols = {"reactant_idx", "product_idx"}
+        if not required_cols.issubset(df.columns):
+            raise MappingError(f"Mapping validation error: required columns {required_cols} not found in dataframe.")
+
+        # Extract indices and perform validation checks
+        r_idxs = df["reactant_idx"].dropna().tolist()
+        p_idxs = df["product_idx"].dropna().tolist()
+
+        # Atom counts must match
+        if len(r_idxs) != len(p_idxs):
+            raise MappingError(f"Mapping validation error: mismatch in atom counts between reactant and product.")
+
+        # No duplicate mappings (1-to-1 mapping required)
+        if len(set(r_idxs)) != len(r_idxs):
+            raise MappingError(f"Mapping validation error: duplicate indices found in reactant mapping.")
+        if len(set(p_idxs)) != len(p_idxs):
+            raise MappingError(f"Mapping validation error: duplicate indices found in product mapping.")
+
+        # Indices must be within molecule bounds
+        if any(idx >= reactant.GetNumAtoms() for idx in r_idxs):
+            raise MappingError(f"Mapping validation error: reactant index out of bounds.")
+        if any(idx >= product.GetNumAtoms() for idx in p_idxs):
+            raise MappingError(f"Mapping validation error: product index out of bounds.")
+
+        # All atoms must be mapped (complete mapping)
+        if len(r_idxs) != reactant.GetNumAtoms():
+            raise MappingError(f"Mapping validation error: incomplete mapping for reactant.")
+        if len(p_idxs) != product.GetNumAtoms():
+            raise MappingError(f"Mapping validation error: incomplete mapping for product.")
+
+    # --- ATOM MAPPING ---
+    
+    def _assign_atom_map_numbers_and_set_isotopes(self, r1: Chem.Mol, r2: Chem.Mol) -> None:
+        """
+        Assigns unique map numbers and isotopes to reactant atoms for tracking through reaction.
+        Isotopes survive RDKit's reaction engine, allowing atom identity recovery post-reaction.
+        
+        Args:
+            r1: First reactant molecule
+            r2: Second reactant molecule
+        """
+        # Assign map numbers 1001+ to first reactant atoms
+        for atom in r1.GetAtoms():
+            idx = 1001 + atom.GetIdx()
+            atom.SetAtomMapNum(idx)
+            atom.SetIsotope(idx)  # Isotope survives the reaction
+
+        # Assign map numbers 2001+ to second reactant atoms
+        for atom in r2.GetAtoms():
+            idx = 2001 + atom.GetIdx()
+            atom.SetAtomMapNum(idx)
+            atom.SetIsotope(idx) # Isotope survives the reaction
+			
+    def _reassign_atom_map_numbers_by_isotope(self, mol: Chem.Mol) -> None:
+        """
+        Restores atom map numbers from isotope values after reaction.
+        RDKit's reaction engine preserves isotopes, allowing recovery of original atom identities.
+        
+        Args:
+            mol: Product molecule with isotope information
+        """
+        for atom in mol.GetAtoms():
+            surviving_idx = atom.GetIsotope()
+            if surviving_idx != 0:
+                atom.SetAtomMapNum(surviving_idx)  # Restore original map number
+                atom.SetIsotope(0)                 # Clear isotope to restore normal chemistry
+
+    def _build_atom_index_mapping(self, 
+                                  reactant_combined: Chem.Mol, 
+                                  product_combined: Chem.Mol) -> tuple[dict[int, int], pd.DataFrame]:
+        """
+        Builds bidirectional atom index mapping between reactants and products using map numbers.
+        
+        Args:
+            reactant_combined: Combined reactant molecule
+            product_combined: Combined product molecule
+            
+        Returns:
+            Tuple of (mapping dict: reactant_idx -> product_idx, dataframe with mapping)
+        """
+        mapping_dict = {}
+        
+        # Pre-index product atoms by map number for O(1) lookup
+        product_map = {
+            atom.GetAtomMapNum(): atom.GetIdx()
+            for atom in product_combined.GetAtoms()
+            if atom.GetAtomMapNum() != 0
+        }
+        
+        rows = []
+        for r_atom in reactant_combined.GetAtoms():
+            r_map_num = r_atom.GetAtomMapNum()
+
+            # Match reactant atom to product atom via map number
+            if r_map_num != 0 and r_map_num in product_map:
+                r_idx = r_atom.GetIdx()
+                p_idx = product_map[r_map_num]
+
+                mapping_dict[r_idx] = p_idx
+                rows.append({
+                    "reactant_idx": r_idx,
+                    "product_idx": p_idx
+                })
+
+        df = pd.DataFrame(rows)
+        return mapping_dict, df
+
+    def _reveal_template_map_numbers(self, mol: Chem.Mol) -> None:
+        """
+        Restores map numbers from RDKit's internal 'old_mapno' property for visualization.
+        RDKit stores original map numbers in this property after reaction execution.
+        
+        Args:
+            mol: Product molecule
+        """
+        for atom in mol.GetAtoms():
+            if atom.HasProp('old_mapno'):
+                map_num = atom.GetIntProp('old_mapno')
+                atom.SetAtomMapNum(map_num)
+
+    def _clear_isotopes(self, mol_1: Chem.Mol, mol_2: Chem.Mol) -> None:
+        """
+        Clears isotope values from molecules to restore normal chemistry after using isotopes for atom tracking.
+        
+        Args:
+            mol_1: First molecule to clear
+            mol_2: Second molecule to clear
+        """
+        for atom in mol_1.GetAtoms():
+            atom.SetIsotope(0)
+        for atom in mol_2.GetAtoms():
+            atom.SetIsotope(0)
+
+    # --- BUILDERS ---
+    
+    def _build_reaction(self, rxn_smarts: str) -> Chem.rdChemReactions.ChemicalReaction:
+        """Builds RDKit ChemicalReaction object from SMARTS string."""
+        return AllChem.ReactionFromSmarts(rxn_smarts)
+    
+    def _build_reactants(self, reactant_smiles_1: str, reactant_smiles_2: str) -> tuple[Chem.Mol, Chem.Mol]:
+        """
+        Builds reactant molecules from SMILES strings with explicit hydrogens added.
+        
+        Args:
+            reactant_smiles_1: SMILES string for first reactant
+            reactant_smiles_2: SMILES string for second reactant
+            
+        Returns:
+            Tuple of (reactant1 molecule, reactant2 molecule) with explicit hydrogens
+        """
+        mol_reactant_1 = Chem.MolFromSmiles(reactant_smiles_1)
+        if mol_reactant_1 is None:
+            raise SMARTSParsingError(f"Failed to parse first reactant SMILES: {reactant_smiles_1!r}")
+        mol_reactant_1 = Chem.AddHs(mol_reactant_1)
+        
+        mol_reactant_2 = Chem.MolFromSmiles(reactant_smiles_2)
+        if mol_reactant_2 is None:
+            raise SMARTSParsingError(f"Failed to parse second reactant SMILES: {reactant_smiles_2!r}")
+        mol_reactant_2 = Chem.AddHs(mol_reactant_2)
+        
+        return mol_reactant_1, mol_reactant_2
+    
+    def _build_reaction_tuple(self, same_reactants: bool, mol_reactant_1: Chem.Mol, mol_reactant_2: Chem.Mol) -> list:
+        """
+        Builds list of reactant pairs to process. If reactants are identical, returns single pair.
+        Otherwise returns both orderings to account for reaction directionality.
+        
+        Args:
+            same_reactants: Whether both reactants are identical
+            mol_reactant_1: First reactant molecule
+            mol_reactant_2: Second reactant molecule
+            
+        Returns:
+            List of reactant pairs [[r1, r2], ...] to process
+        """
+        if same_reactants:
+            return [[mol_reactant_1, mol_reactant_1]]
+
+        return [[mol_reactant_1, mol_reactant_2], [mol_reactant_2, mol_reactant_1]]
+    
+    # --- HELPERS ---
+    
+    def _is_consecutive(self, num_list: list[int]) -> bool:
+        """
+        Checks if list contains consecutive integers with no duplicates.
+        
+        Args:
+            num_list: List of integers to check
+            
+        Returns:
+            True if list is consecutive and has no duplicates, False otherwise
+        """
+        if not num_list:
+            return False
+
+        return (
+            len(set(num_list)) == len(num_list)
+            and max(num_list) - min(num_list) + 1 == len(num_list)
+        )
+
+    # --- VISUALIZATION ---
+    
+    def reaction_templates_highlighted_image_grid(
+        self,
+        metadata_list: List[ReactionMetadata],
+        highlight_type: str = "template",
+    ) -> Image:
+        """
+        Generates grid image of reactions with highlighted atoms based on type.
+        
+        Args:
+            metadata_list: List of reaction metadata to visualize
+            highlight_type: Type of atoms to highlight - "template", "edge", "initiators", or "delete"
+            
+        Returns:
+            PIL Image containing 2-column grid of reactant-product pairs with highlighted atoms
+        """
+        mols = []
+        highlight_lists = []
+        highlight_colors = []
+
+        for metadata in metadata_list:
+            reactant = Chem.RWMol(metadata.reactant_combined_mol)
+            product = Chem.RWMol(metadata.product_combined_mol)
+
+            # Clear atom maps for clean visualization
+            for atom in reactant.GetAtoms():
+                atom.SetAtomMapNum(0)
+            for atom in product.GetAtoms():
+                atom.SetAtomMapNum(0)
+
+            df = metadata.reaction_dataframe
+            atoms: List[int] = []
+            color_map: Dict[int, tuple] = {}
+
+            # Select atoms to highlight based on type
+            if highlight_type == "template":
+                atoms = list((metadata.template_reactant_to_product_mapping or {}).keys())
+                for a in atoms:
+                    color_map[a] = (0.2, 0.6, 1.0)  # blue
+
+            elif highlight_type == "edge":
+                atoms = metadata.edge_atoms or []
+                for a in atoms:
+                    color_map[a] = (1.0, 0.4, 0.0)  # orange
+
+            elif highlight_type == "initiators":
+                atoms = df["initiators"].dropna().astype(int).tolist() if df is not None else []
+                for a in atoms:
+                    color_map[a] = (0.0, 0.8, 0.2)  # green
+
+            elif highlight_type == "delete":
+                if metadata.delete_atom and metadata.byproduct_indices:
+                    atoms = metadata.byproduct_indices
+                    for a in atoms:
+                        color_map[a] = (1.0, 0.0, 0.0)  # red
+
+            mols.extend([reactant, product])
+            
+            # Build atom mappings for highlighting
+            forward_map = metadata.reactant_to_product_mapping
+            reactant_atoms = atoms
+
+            # Map reactant atoms to product atoms
+            product_atoms = []
+            for r_idx in reactant_atoms:
+                if r_idx in forward_map:
+                    product_atoms.append(forward_map[r_idx])
+
+            # Build color maps for reactant and product
+            reactant_color_map = {a: color_map[a] for a in reactant_atoms}
+            product_color_map = {p: color_map[r] for r, p in forward_map.items() if r in reactant_atoms}
+
+            highlight_lists.append(reactant_atoms)
+            highlight_lists.append(product_atoms)
+            highlight_colors.append(reactant_color_map)
+            highlight_colors.append(product_color_map)
+
+        # Generate grid image with 2 molecules per row
+        img = Draw.MolsToGridImage(
+            mols,
+            molsPerRow=2,
+            highlightAtomLists=highlight_lists,
+            highlightAtomColors=highlight_colors,
+            subImgSize=(400, 400),
+            useSVG=False,
+        )
+        return img
diff --git a/AutoREACTER/reaction_preparation/reaction_processor/utils.py b/AutoREACTER/reaction_preparation/reaction_processor/utils.py
new file mode 100644
index 0000000..7f4a571
--- /dev/null
+++ b/AutoREACTER/reaction_preparation/reaction_processor/utils.py
@@ -0,0 +1,161 @@
+from rdkit import Chem
+import pandas as pd
+from typing import List
+from pathlib import Path
+import os
+
+from AutoREACTER.detectors.reaction_detector import ReactionInstance
+
+def prepare_paths( cache, subdir):
+    """
+    Create and prepare directory structure for CSV output files.
+        
+    Args:
+        cache (str or Path): Base cache directory path
+            
+    Returns:
+        Path: Path object pointing to the CSV cache directory
+            
+    Example:
+        >>> csv_dir = prepare_paths("/path/to/cache")
+        >>> print(csv_dir)  # /path/to/cache/csv
+    """
+    csv_cache = Path(cache) / subdir
+    os.makedirs(csv_cache, exist_ok=True)
+    return csv_cache
+
+def add_dict_as_new_columns(df_existing, data_dict, titles=("template_reactant_idx", "template_product_idx")):
+    df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64")
+    df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64")
+    return df_existing
+
+
+def add_column_safe(df, list_data, column_name):
+    df[column_name] = pd.Series(list_data).astype("Int64")
+    return df
+
+
+def extract_unique_references(detected_reactions: List[ReactionInstance]) -> list[str]:
+    seen = set()
+    refs = []
+
+    for metadata in detected_reactions:
+        # Prefer the correct 'references' attribute on ReactionInstance, with
+        # a backward-compatible fallback to 'reference' if present.
+        ref = getattr(metadata, "references", None)
+        if ref is None:
+            ref = getattr(metadata, "reference", {}) or {}
+        else:
+            if ref is None:
+                ref = {}
+
+        for v in ref.values():
+            if isinstance(v, str):
+                if v not in seen:
+                    seen.add(v)
+                    refs.append(v)
+
+            elif isinstance(v, (list, tuple)):
+                for u in v:
+                    if isinstance(u, str) and u not in seen:
+                        seen.add(u)
+                        refs.append(u)
+
+            elif isinstance(v, dict):
+                for u in v.values():
+                    if isinstance(u, str) and u not in seen:
+                        seen.add(u)
+                        refs.append(u)
+
+    return refs
+
+def compare_set(reaction_metadata_list, _react2, _prod2):
+    """
+    Compare reactant + product pair against existing reactions.
+    Ignores atom mapping.
+
+    Returns False if duplicate pair found.
+    """
+
+    # normalize new pair
+    react2 = Chem.Mol(_react2)
+    prod2 = Chem.Mol(_prod2)
+
+    for atom in react2.GetAtoms():
+        atom.SetAtomMapNum(0)
+    for atom in prod2.GetAtoms():
+        atom.SetAtomMapNum(0)
+
+    smi_r2 = Chem.MolToSmiles(react2, canonical=True)
+    smi_p2 = Chem.MolToSmiles(prod2, canonical=True)
+
+    for metadata in reaction_metadata_list:
+        react1 = Chem.Mol(metadata.reactant_combined_mol)
+        prod1 = Chem.Mol(metadata.product_combined_mol)
+
+        for atom in react1.GetAtoms():
+            atom.SetAtomMapNum(0)
+        for atom in prod1.GetAtoms():
+            atom.SetAtomMapNum(0)
+
+        smi_r1 = Chem.MolToSmiles(react1, canonical=True)
+        smi_p1 = Chem.MolToSmiles(prod1, canonical=True)
+
+        if smi_r1 == smi_r2 and smi_p1 == smi_p2:
+            return False
+
+    return True
+
+
+def compare_rdkit_molecules_canonical(data_smiles_list, mol_smi_2):
+    """
+    Compares two RDKit molecule SMILES strings to determine if they
+    represent the same chemical structure using canonical SMILES.
+
+    Args:
+        data_smiles_list (list): List of SMILES strings to compare against.
+        mol_smi_2 (str): SMILES string of the second molecule.
+
+    Returns:
+        bool: True if molecules are chemically identical, False otherwise.
+    """
+    if not mol_smi_2:
+        return data_smiles_list, False
+    mol2 = Chem.MolFromSmiles(mol_smi_2)
+    if mol2 is None:
+        return data_smiles_list, False
+        
+    for mol_smi_1 in data_smiles_list:
+        mol1 = Chem.MolFromSmiles(mol_smi_1)
+        
+        # Handle cases where SMILES might be invalid
+        if mol1 is None or mol2 is None:
+            return data_smiles_list, False  # or raise a ValueError
+        # Generate canonical SMILES and compare them
+        canonical_smi_1 = Chem.MolToSmiles(mol1, canonical=True)
+        canonical_smi_2 = Chem.MolToSmiles(mol2, canonical=True)
+
+        if canonical_smi_1 == canonical_smi_2:
+            return data_smiles_list, True
+    
+    data_smiles_list.append(mol_smi_2)
+    return data_smiles_list, False
+
+
+
+def prep_for_3d_molecule_generation(data_smiles_list, molecule_dict_csv_path_dict):
+    molecule_dict = {}
+    for i, smiles in enumerate(data_smiles_list):
+        key = f"data_{i+1}"
+        molecule = Chem.MolFromSmiles(smiles)
+        if molecule is None:
+            raise ValueError(f"Invalid SMILES: {smiles}")
+        molecule = Chem.AddHs(molecule)
+        molecule_dict[key] = molecule
+    for i, (key, value) in enumerate(molecule_dict_csv_path_dict.items()):
+        reactant = value.get("reactant")
+        product = value.get("product")
+        if reactant and product:
+            molecule_dict[f"pre_{i+1}"] = reactant
+            molecule_dict[f"post_{i+1}"] = product
+    return molecule_dict
diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py b/AutoREACTER/reaction_preparation/reaction_processor/walker.py
similarity index 98%
rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py
rename to AutoREACTER/reaction_preparation/reaction_processor/walker.py
index 36d939c..b6d7c97 100644
--- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py
+++ b/AutoREACTER/reaction_preparation/reaction_processor/walker.py
@@ -105,7 +105,8 @@ def product_atom_walker(template_indexes, MAP_dict):
     """
     template_mapped_dict = {}
     for i in template_indexes:
-        print(f"Mapping reactant atom index {i}")
+        # debug print to trace which reactant atom index is being processed
+        # print(f"Mapping reactant atom index {i}")
         # Check if the reactant atom index exists in our mapping dictionary
         atom = MAP_dict.get(i)
         if atom is not None:
diff --git a/AutoREACTER/reaction_template_builder/lunar_client/config.py b/AutoREACTER/reaction_template_builder/lunar_client/config.py
deleted file mode 100644
index 50954f3..0000000
--- a/AutoREACTER/reaction_template_builder/lunar_client/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import os
-
-# Root directory for the LUNAR installation.
-# Configure via the LUNAR_ROOT_DIR environment variable; defaults to None if unset.
-LUNAR_ROOT_DIR = os.environ.get("LUNAR_ROOT_DIR") or None
diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py b/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py
deleted file mode 100644
index c13dad4..0000000
--- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py
+++ /dev/null
@@ -1,714 +0,0 @@
-"""
-Reaction Mapping and Analysis Module
-
-This module provides tools for processing chemical reactions, mapping atoms between
-reactants and products, and generating comprehensive analysis data for polymer chemistry.
-It uses RDKit for molecular operations and pandas for data management.
-
-Key features:
-- Atom mapping between reactants and products
-- Reaction validation and consistency checking
-- CSV export of mapping data
-- Visualization of reaction grids
-- Support for polymerization reactions
-
-Author: Janitha Mahanthe
-Date: 1/21/2026
-Version: 0.1.0
-# TODO: Return mols with mapped atoms for visualization for each reaction with highlighting
-"""
-
-from rdkit import Chem
-from rdkit.Chem import AllChem, rdmolops
-import pandas as pd
-from pathlib import Path
-import os
-import itertools
-try:
-    # Case 1: correct when reaction_template_pipeline is a proper package
-    from .util import compare_products
-
-except (ImportError, ModuleNotFoundError):
-    try:
-        # Case 2: fully-qualified import under your installed namespace
-        from reaction_lammps_mupt.reaction_template_builder.reaction_template_pipeline.util import (
-            compare_products,
-        )
-
-    except (ImportError, ModuleNotFoundError):
-        try:
-            # Case 3: if reaction_template_pipeline is installed as a top-level package
-            from reaction_template_pipeline.util import compare_products
-
-        except (ImportError, ModuleNotFoundError):
-            # Case 4: running as a loose script from the same directory
-            from util import compare_products
-
-
-def is_number_in_set(set_of_tuples, reactant):
-    """
-    Find a tuple in the set that contains atom indices matching specific atom map numbers.
-    
-    This function searches for atoms in the reactant molecule that have atom map numbers
-    101 or 102, then checks if those atom indices exist in any tuple within the provided set.
-    
-    Args:
-        set_of_tuples (set of tuples): Set of atom index tuples to search through
-        reactant (Chem.Mol): RDKit molecule object to search for mapped atoms
-        
-    Returns:
-        tuple: The first tuple found containing a matching atom index
-        
-    Raises:
-        ValueError: If no matching atom is found in the provided set of tuples
-        
-    Example:
-        >>> matches = {(0, 1, 2), (3, 4, 5)}
-        >>> result = is_number_in_set(matches, reactant_mol)
-    """
-    for atom in reactant.GetAtoms():
-        # Check for specific atom map numbers (101 or 102)
-        if atom.GetAtomMapNum() == 101 or atom.GetAtomMapNum() == 102:
-            x = atom.GetIdx()
-            for t in set_of_tuples:
-                if x in t:
-                    print(f"Found matching atom index {x} in tuple {t}")
-                    return t
-    raise ValueError("No matching atom found in the provided set of tuples.")
-
-
-def smart_mapping(reactant, smarts_template, match_tuple):
-    """
-    Apply atom mapping from SMARTS template to reactant molecule based on match indices.
-    
-    This function transfers atom map numbers from a SMARTS template to corresponding
-    atoms in the reactant molecule using the provided match tuple that maps SMARTS
-    atom indices to reactant atom indices.
-    
-    Args:
-        reactant (Chem.Mol): The reactant molecule to apply mapping to
-        smarts_template (Chem.Mol): SMARTS pattern molecule with atom map numbers
-        match_tuple (tuple): Tuple mapping SMARTS atom indices to reactant atom indices
-        
-    Returns:
-        None: Modifies the reactant molecule in-place by setting atom map numbers
-        
-    Note:
-        Only atoms with non-zero map numbers in the SMARTS template are processed.
-        The function does nothing if match_tuple is empty.
-    """
-    if not match_tuple:
-        return
-    
-    # Create mapping from SMARTS atom indices to their map numbers
-    SMARTS_index_to_Map_Number = {}
-    for smarts_atom in smarts_template.GetAtoms():
-        map_num = smarts_atom.GetAtomMapNum()
-        if map_num != 0:  # Only process atoms with explicit mapping
-            SMARTS_index_to_Map_Number[smarts_atom.GetIdx()] = map_num
-    
-    # Apply mapping to corresponding atoms in reactant
-    for smarts_pos, map_num in SMARTS_index_to_Map_Number.items():
-        if smarts_pos < len(match_tuple):
-            atom_index_in_mol = match_tuple[smarts_pos]
-            atom = reactant.GetAtomWithIdx(atom_index_in_mol)
-            atom.SetAtomMapNum(map_num)
-
-
-def is_consecutive(num_list):
-    """
-    Check if a list of numbers forms a consecutive sequence without gaps.
-    
-    Args:
-        num_list (list): List of integers to check
-        
-    Returns:
-        bool: True if the numbers are consecutive, False otherwise
-        
-    Examples:
-        >>> is_consecutive([1, 2, 3, 4])  # Returns True
-        >>> is_consecutive([1, 3, 4])     # Returns False
-        >>> is_consecutive([])            # Returns False
-    """
-    if not num_list:
-        return False
-    
-    # Check for consecutive sequence: unique numbers and max-min+1 equals length
-    return (len(set(num_list)) == len(num_list) and 
-            max(num_list) - min(num_list) + 1 == len(num_list))
-
-
-def prepare_paths(cache):
-    """
-    Create and prepare directory structure for CSV output files.
-    
-    Args:
-        cache (str or Path): Base cache directory path
-        
-    Returns:
-        Path: Path object pointing to the CSV cache directory
-        
-    Example:
-        >>> csv_dir = prepare_paths("/path/to/cache")
-        >>> print(csv_dir)  # /path/to/cache/csv
-    """
-    csv_cache = Path(cache) / "csv" / "reactant_product_mapping"
-    os.makedirs(csv_cache, exist_ok=True)
-    return csv_cache
-
-
-def build_reaction(rxn_smarts):
-    """
-    Create an RDKit reaction object from SMARTS string.
-    
-    Args:
-        rxn_smarts (str): Reaction SMARTS string
-        
-    Returns:
-        Chem.rdChemReactions.ChemicalReaction: RDKit reaction object
-        
-    Example:
-        >>> rxn = build_reaction("[C:1]=[O:2]>>[C:1]-[O:2]")
-    """
-    return AllChem.ReactionFromSmarts(rxn_smarts)
-
-
-def build_reactants(reactant_smiles_1, reactant_smiles_2):
-    """
-    Create RDKit molecule objects from SMILES strings with explicit hydrogens.
-    
-    Args:
-        reactant_smiles_1 (str): SMILES string for first reactant
-        reactant_smiles_2 (str): SMILES string for second reactant
-        
-    Returns:
-        tuple: (mol_reactant_1, mol_reactant_2) - Both molecules with explicit hydrogens
-        
-    Example:
-        >>> mol1, mol2 = build_reactants("CCO", "CC=O")
-    """
-    mol_reactant_1 = Chem.MolFromSmiles(reactant_smiles_1)
-    mol_reactant_1 = Chem.AddHs(mol_reactant_1)  # Add explicit hydrogens
-    mol_reactant_2 = Chem.MolFromSmiles(reactant_smiles_2)
-    mol_reactant_2 = Chem.AddHs(mol_reactant_2)  # Add explicit hydrogens
-    return mol_reactant_1, mol_reactant_2
-
-
-def process_reactions(rxn, csv_cache, reaction_tuple, key=None, 
-                      molecule_and_csv_path_dict=None, delete_atoms=False):
-    """
-    Process chemical reactions, map atoms between reactants and products, and save data.
-    
-    This is the main processing function that:
-    1. Runs reactions on reactant pairs
-    2. Maps atoms between reactants and products
-    3. Validates mapping consistency
-    4. Saves mapping data to CSV files
-    5. Identifies reaction features (first shell, initiators, byproducts)
-    
-    Args:
-        rxn (Chem.rdChemReactions.ChemicalReaction): RDKit reaction object
-        csv_cache (Path): Directory path for CSV output files
-        reaction_tuple (list): List of reactant pairs to process
-        key (str or int, optional): Identifier for the reaction set
-        molecule_and_csv_path_dict (dict, optional): Dictionary to store results
-        delete_atoms (bool, optional): Whether to identify byproduct atoms for deletion
-        
-    Returns:
-        tuple: (mols, output, molecule_and_csv_path_dict)
-            - mols: List of reactant and product molecules for visualization
-            - output: String output (currently empty, reserved for future use)
-            - molecule_and_csv_path_dict: Dictionary containing all processed data
-            
-    Raises:
-        ValueError: If mapping validation fails at any step
-        
-    Note:
-        This function performs extensive validation including:
-        - Atom count consistency between reactants and products
-        - Consecutive indexing requirements
-        - Initiator count validation (must be exactly 2)
-    """
-    if molecule_and_csv_path_dict is None:
-        molecule_and_csv_path_dict = {}
-    
-    total_products = 0
-    mols = []
-
-    # Process each reactant pair in the reaction tuple
-    for j, pair in enumerate(reaction_tuple):
-        r1, r2 = Chem.Mol(pair[0]), Chem.Mol(pair[1])
-        products = rxn.RunReactants((r1, r2))
-        
-        # Get substructure matches for each reactant against reaction templates
-        matches_1 = r1.GetSubstructMatches(rxn.GetReactants()[0])
-        matches_2 = r2.GetSubstructMatches(rxn.GetReactants()[1])
-
-        # Process each product set generated by the reaction
-        for product_set in products:
-            # Clean up any existing DataFrame
-            if "df" in locals():
-                del df
-            
-            
-            # Initialize data structures
-            df = pd.DataFrame(columns=["reactant_idx", "product_idx"])
-            first_shell = []      # Atoms in first coordination shell
-            initiator_idxs = []   # Initiator atom indices
-            mapping_dict = {}     # Additional mapping dictionary
-
-            # Apply atom mapping from reaction properties
-            num_total_atoms = 0
-            for i, product in enumerate(product_set):
-                if i > 0:
-                    num_total_atoms += product_set[i - 1].GetNumAtoms()
-                for atom in product.GetAtoms():
-                    if atom.HasProp("react_idx"):
-                        r_idx = atom.GetIntProp("react_idx")
-                        a_idx = atom.GetIntProp("react_atom_idx")
-                        r = (r1, r2)[r_idx]
-                        r_atom = r.GetAtomWithIdx(a_idx)
-                        map_num = atom.GetIdx() + 1 + num_total_atoms + 100
-                        r_atom.SetAtomMapNum(map_num)
-                        atom.SetAtomMapNum(map_num)
-
-            # Combine reactants and products for easier processing
-            reactant_combined = Chem.CombineMols(r1, r2)
-            product_combined = Chem.CombineMols(*product_set)
-
-            # Create mapping between reactant and product atoms based on map numbers
-            for r_atom in reactant_combined.GetAtoms():
-                for p_atom in product_combined.GetAtoms():
-                    if r_atom.GetAtomMapNum() == p_atom.GetAtomMapNum():
-                        new_row = pd.DataFrame([{"reactant_idx": r_atom.GetIdx(), 
-                                                "product_idx": p_atom.GetIdx()}])
-                        df = pd.concat([df, new_row], ignore_index=True)
-                        break
-
-            # Validate mapping completeness and consistency
-            num_reactant_atoms = reactant_combined.GetNumAtoms()
-            num_product_atoms = product_combined.GetNumAtoms()
-            reactant_mapped = df["reactant_idx"].notna().sum()
-            product_mapped = df["product_idx"].notna().sum()
-
-            # Validation 1: Mapped atom counts must match between columns
-            if reactant_mapped != product_mapped:
-                raise ValueError(
-                    "Mismatch in mapped atom counts between columns: "
-                    f"reactant_idx mapped={reactant_mapped}, product_idx mapped={product_mapped}"
-                )
-
-            # Validation 2: All reactant atoms must be mapped
-            if reactant_mapped != num_reactant_atoms:
-                df.to_csv(csv_cache / f"debug_mapping_{key}_{total_products}.csv", index=False)
-                raise ValueError(
-                    "Mapping does not cover all reactant atoms: "
-                    f"reactant atoms={num_reactant_atoms}, mapped={reactant_mapped}"
-                )
-
-            # Validation 3: All product atoms must be mapped
-            if product_mapped != num_product_atoms:
-                raise ValueError(
-                    "Mapping does not cover all product atoms: "
-                    f"product atoms={num_product_atoms}, mapped={product_mapped}"
-                )
-
-            # Validation 4: Mapping indices must be consecutive
-            if (is_consecutive(df["reactant_idx"].tolist()) is False or 
-                is_consecutive(df["product_idx"].tolist()) is False):
-                raise ValueError(
-                    "Mapping indices are not consecutive: "
-                    f"reactant indices={df['reactant_idx'].tolist()}, "
-                    f"product indices={df['product_idx'].tolist()}"
-                )
-
-            # Apply smart mapping to both reactants
-            try:
-                sub_set1 = is_number_in_set(matches_1, r1)
-            except ValueError:
-                sub_set1 = ()
-            smart_mapping(
-                reactant=r1,
-                smarts_template=rxn.GetReactants()[0],
-                match_tuple=sub_set1 if sub_set1 else (),
-            )
-            
-            try:
-                sub_set2 = is_number_in_set(matches_2, r2)
-            except ValueError:
-                sub_set2 = ()
-            smart_mapping(
-                reactant=r2,
-                smarts_template=rxn.GetReactants()[1],
-                match_tuple=sub_set2 if sub_set2 else (),
-            )
-
-            # Recombine reactants after mapping
-            reactant_combined = Chem.CombineMols(r1, r2)
-            
-            # Identify first shell atoms and initiators
-            for atom in reactant_combined.GetAtoms():
-                if atom.GetAtomMapNum() <= 99:
-                    first_shell.append(atom.GetIdx())
-                # we could get the initiators by their map numbers
-                if atom.GetAtomMapNum() in [1, 2]:
-                    initiator_idxs.append(atom.GetIdx())
-
-            # Identify byproduct atoms if deletion is requested
-            byproduct_product_idxs = []
-            byproduct_reactant_idxs = []
-
-            if delete_atoms:
-                frags = rdmolops.GetMolFrags(product_combined, asMols=True)
-                smallest_mol = min(frags, key=lambda m: m.GetNumAtoms())
-
-                # 1) get byproduct atom indices in PRODUCT space
-                for atom in smallest_mol.GetAtoms():
-                    byproduct_map_number = atom.GetAtomMapNum()
-                    for p_atom in product_combined.GetAtoms():
-                        if p_atom.GetAtomMapNum() == byproduct_map_number:
-                            byproduct_product_idxs.append(p_atom.GetIdx())
-                            break
-
-                # 2) convert PRODUCT indices -> REACTANT indices using df mapping
-                byproduct_reactant_idxs = (
-                    df.loc[df["product_idx"].isin(byproduct_product_idxs), "reactant_idx"]
-                    .astype(int)
-                    .tolist()
-                )
-
-            # if you want the column to store REACTANT indices now:
-            byproduct_indexs = byproduct_reactant_idxs
-
-            # Add any additional mappings from mapping_dict
-            for r_idx, p_idx in mapping_dict.items():
-                new_row = pd.DataFrame([{"reactant_idx": r_idx, "product_idx": p_idx}])
-                df = pd.concat([df, new_row], ignore_index=True)
-
-            # Create comprehensive DataFrame with all analysis columns
-            first_shell_column = pd.Series(first_shell, name="first_shell")
-            initiator_idxs_column = pd.Series(initiator_idxs, name="initiators")
-            
-            # Validation: Must have exactly 2 initiators
-            if len(initiator_idxs) != 2:
-                raise ValueError(f"Expected 2 initiators, got {len(initiator_idxs)}: {initiator_idxs}")
-            
-            by_product_indexs_column = pd.Series(byproduct_indexs, name="byproduct_indices")
-            
-            # Combine all data into final DataFrame
-            df_combined = pd.concat([df, first_shell_column, initiator_idxs_column,
-                                     by_product_indexs_column], axis=1).astype(pd.Int64Dtype())
-            
-            # Check for duplicate products
-            if not compare_products(molecule_and_csv_path_dict, product_combined):
-                continue
-
-            # Update counters and store results
-            total_products += 1
-            mols.append(reactant_combined)
-            mols.append(product_combined)
-
-            # Organize results in the output dictionary
-            for i in itertools.count(1):
-                if i not in molecule_and_csv_path_dict: # Check if i IS a key
-                    sub_dict = molecule_and_csv_path_dict[i] = {} # Initialize if not found
-                    dict_key = i
-                    break
-
-            # Save to CSV file
-            df_combined.to_csv(csv_cache / f"reaction_{dict_key}.csv", index=False)
-            print(f"Saved reaction {dict_key} to CSV")
-
-            sub_dict["reactant"] = reactant_combined
-            sub_dict["product"] = product_combined
-            sub_dict["csv_path"] = csv_cache / f"reaction_{dict_key}.csv"
-            sub_dict["delete_atoms"] = delete_atoms
-            sub_dict["reaction_dataframe"] = df_combined
-
-
-    return mols, molecule_and_csv_path_dict
-
-
-def save_grid_image(mols, cache, key=None):
-    from pathlib import Path
-    from rdkit.Chem import Draw
-    import base64
-
-    out_dir = Path(cache) / "grid_images"
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    out_path = out_dir / (f"reaction_grid_{key}.png" if key is not None else "reaction_grid.png")
-
-    # Ask RDKit for a raster image (PNG-like)
-    img = Draw.MolsToGridImage(mols, useSVG=False, subImgSize=(900, 900))
-
-    # Case 1: PIL.Image.Image (has .save)
-    if hasattr(img, "save"):
-        img.save(str(out_path))
-        return out_path
-
-    # Case 2: IPython/RDKit display object (often has .data)
-    data = getattr(img, "data", None)
-    if data is None:
-        raise TypeError(f"Unexpected image type {type(img)}; cannot save.")
-
-    # data can be bytes OR base64 string depending on wrapper
-    if isinstance(data, bytes):
-        png_bytes = data
-    elif isinstance(data, str):
-        # try base64 decode; if it fails, treat as raw text
-        try:
-            png_bytes = base64.b64decode(data)
-        except Exception:
-            png_bytes = data.encode("utf-8")
-    else:
-        raise TypeError(f"Unexpected img.data type {type(data)}; cannot save.")
-
-    with open(out_path, "wb") as f:
-        f.write(png_bytes)
-
-    return out_path
-
-
-
-def reaction_tuples(same_reactants, mol_reactant_1, mol_reactant_2):
-    """
-    Generate reactant pair orderings for reaction processing.
-
-    - For same reactants: only one unique ordering [A, A].
-    - For different reactants: return both [A, B] and [B, A] to avoid relying
-      on reactant ordering and to support order-dependent reaction definitions.
-    """
-    if same_reactants:
-        return [[mol_reactant_1, mol_reactant_1]]
-
-    return [[mol_reactant_1, mol_reactant_2], [mol_reactant_2, mol_reactant_1]]
-
-
-def process_reaction_dict(detected_reactions, cache):
-    """
-    Process a dictionary of detected reactions and generate mapping data.
-    
-    This is a high-level function that processes multiple reactions defined in
-    a dictionary structure, handling all steps from SMILES to final CSV output.
-    
-    Args:
-        detected_reactions (dict): Dictionary containing reaction definitions
-        cache (str or Path): Base cache directory for output files
-        
-    Returns:
-        tuple: (molecule_and_csv_path_dict, detected_reactions)
-            - molecule_and_csv_path_dict: Dictionary with all processed reaction data
-            - detected_reactions: The original input dictionary (unchanged)
-            
-    Example:
-        >>> reactions_dict = {
-        ...     1: {"reaction": "[C:1]=[O:2]>>[C:1]-[O:2]", 
-        ...         "monomer_1": {"smiles": "CCO"}, ...}
-        ... }
-        >>> results, original = process_reaction_dict(reactions_dict, "/path/to/cache")
-    """
-    molecule_and_csv_path_dict = {}
-    csv_cache = prepare_paths(cache)
-    molecule_images =  []
-
-    # TODO: Add function to compare products if products are identicals drop the duplicates
-    
-
-    # Process each reaction in the dictionary
-    for key in detected_reactions:
-        reaction_dict = detected_reactions[key]
-        rxn_smarts = reaction_dict["reaction"]
-        reactant_smiles_1 = reaction_dict["monomer_1"]["smiles"]
-        same_reactants = reaction_dict["same_reactants"]
-        
-        # Handle same vs different reactants
-        if same_reactants:
-            reactant_smiles_2 = reactant_smiles_1
-        else:
-            reactant_smiles_2 = reaction_dict["monomer_2"]["smiles"]
-        
-        delete_atoms = reaction_dict.get("delete_atom", False)
-        
-        # Build reaction and reactants
-        rxn = build_reaction(rxn_smarts)
-        mol_reactant_1, mol_reactant_2 = build_reactants(reactant_smiles_1, reactant_smiles_2)
-        reaction_tuple = reaction_tuples(same_reactants, mol_reactant_1, mol_reactant_2)
-
-        # Process the reaction
-        mols, molecule_and_csv_path_dict = process_reactions(
-            rxn, csv_cache, reaction_tuple, key, molecule_and_csv_path_dict, 
-            delete_atoms=delete_atoms
-        )
-        
-        # Save outputs
-        save_grid_image(mols, cache, key)
-        molecule_images.append(mols)
-
-    return molecule_and_csv_path_dict, detected_reactions, molecule_images
-
-
-def run_all(cache, rxn_smarts, reactant_smiles_1, reactant_smiles_2):
-    """
-    Run a complete reaction processing pipeline for a single reaction.
-    
-    This is a convenience function that handles all steps for processing
-    a single reaction from SMILES strings to final outputs.
-    
-    Args:
-        cache (str or Path): Base cache directory for output files
-        rxn_smarts (str): Reaction SMARTS string
-        reactant_smiles_1 (str): SMILES string for first reactant
-        reactant_smiles_2 (str): SMILES string for second reactant
-        
-    Returns:
-        dict: Dictionary containing all processed reaction data
-        
-    Example:
-        >>> results = run_all("/path/to/cache", 
-        ...                   "[C:1]=[O:2]>>[C:1]-[O:2]", 
-        ...                   "CCO", "CC=O")
-    """
-    molecule_and_csv_path_dict = {}
-    csv_cache = prepare_paths(cache)
-    
-    # Build reaction and reactants
-    rxn = build_reaction(rxn_smarts)
-    mol_reactant_1, mol_reactant_2 = build_reactants(reactant_smiles_1, reactant_smiles_2)
-    reaction_tuple = [[mol_reactant_1, mol_reactant_2]]
-    delete_atoms = True  # Default to identifying byproducts
-    
-    # Process the reaction
-    mols, molecule_and_csv_path_dict = process_reactions(
-        rxn, csv_cache, reaction_tuple, None, molecule_and_csv_path_dict, delete_atoms
-    )
-    
-    # Save outputs
-    save_grid_image(mols, cache, None)
-
-    return molecule_and_csv_path_dict
-
-
-if __name__ == "__main__":
-    """
-    Main execution block with example reaction data.
-    
-    This demonstrates the usage of the module with example polyesterification
-    reactions between hydroxy carboxylic acids.
-    """
-    
-    # Example reaction database for polyesterification reactions
-    detected_reactions = {
-        1: {
-            "reaction_name": "Hydroxy Carboxylic Acid Polycondensation(Polyesterification)",
-            "same_reactants": True,
-            "reactant_1": "hydroxy_carboxylic_acid",
-            "product": "polyester_chain",
-            "delete_atom": True,
-            "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]",
-            "reference": {
-                "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329",
-                "reaction_and_mechanism": [
-                    "https://pubs.acs.org/doi/10.1021/ed048pA734.1",
-                    "https://pubs.acs.org/doi/10.1021/ed073pA312",
-                ],
-            },
-            "monomer_1": {
-                "smiles": "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1",
-                "functionality_type": "di_different",
-                "functional_group_name": "hydroxy_carboxylic_acid",
-                "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]",
-                "functional_count_1": 1,
-                "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]",
-                "functional_count_2": 2,
-            },
-        },
-        2: {
-            "reaction_name": "Hydroxy Carboxylic Acid Polycondensation(Polyesterification)",
-            "same_reactants": True,
-            "reactant_1": "hydroxy_carboxylic_acid",
-            "product": "polyester_chain",
-            "delete_atom": True,
-            "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]",
-            "reference": {
-                "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329",
-                "reaction_and_mechanism": [
-                    "https://pubs.acs.org/doi/10.1021/ed048pA734.1",
-                    "https://pubs.acs.org/doi/10.1021/ed073pA312",
-                ],
-            },
-            "monomer_1": {
-                "smiles": "O=C(O)CCCC(O)CCCO",
-                "functionality_type": "di_different",
-                "functional_group_name": "hydroxy_carboxylic_acid",
-                "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]",
-                "functional_count_1": 2,
-                "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]",
-                "functional_count_2": 1,
-            },
-        },
-        3: {
-            "reaction_name": "Hydroxy Carboxylic and Hydroxy Carboxylic Polycondensation(Polyesterification)",
-            "same_reactants": False,
-            "reactant_1": "hydroxy_carboxylic_acid",
-            "reactant_2": "hydroxy_carboxylic_acid",
-            "product": "polyester_chain",
-            "delete_atom": True,
-            "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]",
-            "reference": {
-                "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329",
-                "reaction_and_mechanism": [
-                    "https://pubs.acs.org/doi/10.1021/ed048pA734.1",
-                    "https://pubs.acs.org/doi/10.1021/ed073pA312",
-                ],
-            },
-            "monomer_1": {
-                "smiles": "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1",
-                "functionality_type": "di_different",
-                "functional_group_name": "hydroxy_carboxylic_acid",
-                "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]",
-                "functional_count_1": 1,
-                "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]",
-                "functional_count_2": 2,
-            },
-            "monomer_2": {
-                "smiles": "O=C(O)CCCC(O)CCCO",
-                "functionality_type": "di_different",
-                "functional_group_name": "hydroxy_carboxylic_acid",
-                "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]",
-                "functional_count_1": 2,
-                "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]",
-                "functional_count_2": 1,
-            },
-        },
-    }
-
-    # Configuration parameters
-    cache = "C:\\Users\\Janitha\\Documents\\GitHub\\reaction_lammps_mupt\\cache\\00_cache"
-    rxn_smarts = "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]"
-    reactant_smiles_1 = "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1"
-    reactant_smiles_2 = "O=C(O)CCCC(O)CCCO"
-
-    # Execute the reaction processing pipeline
-    print("Starting reaction processing pipeline...")
-    
-    # Process a single reaction using run_all
-    # run_all(cache, rxn_smarts, reactant_smiles_1, reactant_smiles_2)
-    
-    # Process all reactions in the dictionary
-    molecule_dict_csv_path_dict, detected_reactions, molecule_images = process_reaction_dict(detected_reactions, cache)
-    
-    # Display results
-    import pprint
-    print("\n" + "="*80)
-    print("REACTION PROCESSING RESULTS")
-    print("="*80)
-    pprint.pprint(molecule_dict_csv_path_dict)
-    
-    print("\n" + "="*80)
-    print("SUMMARY")
-    print("="*80)
-    print(f"Total reactions processed: {len(molecule_dict_csv_path_dict)}")
-    
-    # Count total products across all reactions
-    print("\nProcessing complete! CSV files and images have been saved to the cache directory.")
diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py b/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py
deleted file mode 100644
index 3ad4257..0000000
--- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py
+++ /dev/null
@@ -1,167 +0,0 @@
-from rdkit import Chem
-
-def compare_products(reactions_dict, _prod2):
-    """
-    Compares a new product molecule against a collection of existing products 
-    to identify duplicates, ignoring atom mapping.
-
-    Args:
-        reactions_dict (dict): A dictionary where values are dictionaries 
-                               potentially containing a "product" RDKit molecule object.
-        _prod2 (rdkit.Chem.rdchem.Mol): The RDKit molecule object to check for duplicates.
-
-    Returns:
-        bool: Returns False if a duplicate product is found, True otherwise.
-    """
-    # Create a copy of the input molecule to avoid modifying the original object
-    prod2 = Chem.Mol(_prod2)
-    
-    for key, dict_2 in reactions_dict.items():
-        # Retrieve the product molecule from the nested dictionary
-        prod = dict_2.get("product")
-        
-        if prod is not None:
-            # Create a copy of the stored product for comparison
-            prod1 = Chem.Mol(prod)
-
-            # Remove atom map numbers from both molecules.
-            # Atom maps are often used in reactions to track atoms but should be 
-            # ignored when checking if two chemical structures are identical.
-            for atom in prod1.GetAtoms():
-                atom.SetAtomMapNum(0)
-            for atom in prod2.GetAtoms():
-                atom.SetAtomMapNum(0)
-
-            # Convert both molecules to canonical SMILES strings.
-            # Canonicalization ensures that the same molecule always results in the same string.
-            smi1 = Chem.MolToSmiles(prod1, canonical=True)
-            smi2 = Chem.MolToSmiles(prod2, canonical=True)
-
-            # Perform string comparison to detect duplicates
-            if smi1 == smi2:
-                print("DUPLICATE PRODUCT FOUND")
-                return False
-                
-    # No duplicates found after checking all entries
-    return True
-
-def compare_rdkit_molecules_canonical(data_smiles_list, mol_smi_2):
-    """
-    Compares two RDKit molecule SMILES strings to determine if they
-    represent the same chemical structure using canonical SMILES.
-
-    Args:
-        data_smiles_list (list): List of SMILES strings to compare against.
-        mol_smi_2 (str): SMILES string of the second molecule.
-
-    Returns:
-        bool: True if molecules are chemically identical, False otherwise.
-    """
-    if not mol_smi_2:
-        return data_smiles_list, False
-    mol2 = Chem.MolFromSmiles(mol_smi_2)
-    if mol2 is None:
-        return data_smiles_list, False
-        
-    for mol_smi_1 in data_smiles_list:
-        mol1 = Chem.MolFromSmiles(mol_smi_1)
-        
-        # Handle cases where SMILES might be invalid
-        if mol1 is None or mol2 is None:
-            return data_smiles_list, False  # or raise a ValueError
-        # Generate canonical SMILES and compare them
-        canonical_smi_1 = Chem.MolToSmiles(mol1, canonical=True)
-        canonical_smi_2 = Chem.MolToSmiles(mol2, canonical=True)
-
-        if canonical_smi_1 == canonical_smi_2:
-            return data_smiles_list, True
-    
-    data_smiles_list.append(mol_smi_2)
-    return data_smiles_list, False
-
-def format_detected_reactions_dict(detected_reactions, non_monomer_molecules_to_retain = None):
-    """
-    Aggregates and formats information from a dictionary of detected reactions 
-    into a single summary dictionary.
-
-    This function compiles unique reaction names, SMARTS references, and 
-    mechanism references into comma-separated strings.
-
-    Args:
-        detected_reactions (dict): A dictionary containing reaction data, 
-                                   where each value is a dictionary with keys 
-                                   like "reaction_name" and "reference".
-
-    Returns:
-        dict: A dictionary containing aggregated strings for:
-              - "reactions_names"
-              - "smart_references"
-              - "mechanism_references"
-    """
-    reactions_names = []
-    smart_references = []
-    mechanism_references = []
-
-    seen_reactions = set()
-    seen_smarts = set()
-    seen_mechanisms = set()
-    
-    formatted_dict = {}
-    data_smiles_list = []
-    
-    for key, reaction in detected_reactions.items():
-        # Deduplicate reaction names safely (no substring bugs)
-        reactions_name = reaction.get("reaction_name")
-        if reactions_name and reactions_name not in seen_reactions:
-            seen_reactions.add(reactions_name)
-            reactions_names.append(reactions_name)
-    
-        # Reference is optional; do not skip monomer processing if missing
-        reference = reaction.get("reference") or {}
-        smarts_ref = reference.get("smarts")
-        mech_refs = reference.get("reaction_and_mechanism")
-    
-        if smarts_ref and smarts_ref not in seen_smarts:
-            seen_smarts.add(smarts_ref)
-            smart_references.append(smarts_ref)
-    
-        if mech_refs:
-            mech_block = ", ".join(mech_refs)
-            if mech_block and mech_block not in seen_mechanisms:
-                seen_mechanisms.add(mech_block)
-                mechanism_references.append(mech_block)
-    
-        # Always process monomers (if present)
-        m1 = reaction.get("monomer_1")
-        m2 = reaction.get("monomer_2")
-        m1_smiles = m1.get("smiles") if isinstance(m1, dict) else None
-        m2_smiles = m2.get("smiles") if isinstance(m2, dict) else None
-    
-        if m1_smiles:
-            data_smiles_list, _ = compare_rdkit_molecules_canonical(data_smiles_list, m1_smiles)
-        if m2_smiles:
-            data_smiles_list, _ = compare_rdkit_molecules_canonical(data_smiles_list, m2_smiles)
-    
-    formatted_dict = {
-        "reactions_names": ", ".join(reactions_names),
-        "smart_references": ", ".join(smart_references),
-        "mechanism_references": ", ".join(mechanism_references),
-    }
-    return formatted_dict , data_smiles_list
-
-def prep_for_3d_molecule_generation(data_smiles_list, molecule_dict_csv_path_dict):
-    molecule_dict = {}
-    for i, smiles in enumerate(data_smiles_list):
-        key = f"data_{i+1}"
-        molecule = Chem.MolFromSmiles(smiles)
-        if molecule is None:
-            raise ValueError(f"Invalid SMILES: {smiles}")
-        molecule = Chem.AddHs(molecule)
-        molecule_dict[key] = molecule
-    for i, (key, value) in enumerate(molecule_dict_csv_path_dict.items()):
-        reactant = value.get("reactant")
-        product = value.get("product")
-        if reactant and product:
-            molecule_dict[f"pre_{i+1}"] = reactant
-            molecule_dict[f"post_{i+1}"] = product
-    return molecule_dict
diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb
index 69c0c6c..e59d05f 100644
--- a/examples/example_1.ipynb
+++ b/examples/example_1.ipynb
@@ -12,9 +12,7 @@
     "- `Initialization()` sets up the AutoREACTER runtime environment.\n",
     "- `InputParser()` prepares the parser for validating and normalizing user input files.\n",
     "- `GetCacheDir()` retrieves the staging cache directory used for intermediate data.\n",
-    "- `RunDirectoryManager.make_dated_run_dir()` creates a timestamped run directory to store outputs for this session.\n",
-    "\n",
-    "This ensures each run is isolated and reproducible."
+    "- `RunDirectoryManager.make_dated_run_dir()` creates a timestamped run directory to store outputs for this session."
    ]
   },
   {
@@ -30,7 +28,8 @@
     "from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupsDetector\n",
     "from AutoREACTER.detectors.reaction_detector import ReactionDetector\n",
     "from AutoREACTER.detectors.non_monomer_detector import NonReactantsDetector\n",
-    "from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n",
+    "from AutoREACTER.reaction_preparation.reaction_processor.prepare_reactions import PrepareReactions\n",
+    "# from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n",
     "from rdkit import Chem\n",
     "from rdkit.Chem import Draw\n",
     "import json\n",
@@ -41,6 +40,7 @@
     "dated_cache_dir = RunDirectoryManager.make_dated_run_dir(cache_dir, chdir_to=\"none\")\n",
     "# #future use\n",
     "# RunDirectoryManager.copy_into_run(cache_dir, dated_cache_dir)\n",
+    "print(f\"Cache directory: {cache_dir}\")\n",
     "\n",
     "\n"
    ]
@@ -52,12 +52,7 @@
    "source": [
     "### Cache Cleanup (Optional)\n",
     "\n",
-    "This cell can be used to clean up cached data generated by previous runs.\n",
-    "\n",
-    "- `RetentionCleanup.run()` removes cached directories from the AutoREACTER cache base directory.\n",
-    "- Useful for freeing disk space or resetting the cache state.\n",
-    "\n",
-    "Uncomment and run only if you want to remove cached run data."
+    "This cell can be used to clean up cached data generated by previous runs.\n"
    ]
   },
   {
@@ -67,6 +62,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
+    "# `RetentionCleanup.run()` removes cached directories from the AutoREACTER cache base directory.\n",
+    "#  Useful for freeing disk space or resetting the cache state.\n",
+    "\n",
     "# RetentionCleanup.run(GetCacheDir().cache_base_dir)"
    ]
   },
@@ -75,16 +74,10 @@
    "id": "b1dd989a",
    "metadata": {},
    "source": [
-    "### Load, Validate, and Visualize Monomers\n",
+    "### Load and Validate Monomers\n",
     "\n",
     "This cell loads the example input file, validates it using the `InputParser`, and visualizes the monomers.\n",
-    "\n",
-    "- The JSON input file is loaded into `input_data`.\n",
-    "- `validate_inputs()` checks the schema and normalizes the data.\n",
-    "- `molecule_representation_of_initial_molecules()` extracts RDKit molecule objects.\n",
-    "- `Draw.MolsToGridImage()` displays the monomers in a grid with their labels.\n",
-    "\n",
-    "This provides a quick visual check of the input molecules before running further analysis."
+    "\n"
    ]
   },
   {
@@ -94,10 +87,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
+    "# validate_inputs() checks the schema and normalizes the data.\n",
+    "# molecule_representation_of_initial_molecules() extracts RDKit molecule objects.\n",
+    "# Draw.MolsToGridImage() displays the monomers in a grid with their labels.\n",
+    "\n",
     "with open(\"example_1_inputs_count_mode.json\", \"r\") as f:\n",
     "    input_data = json.load(f)\n",
     "\n",
-    "validated_inputs = input_parser.validate_inputs(input_data)\n",
+    "validated_inputs = input_parser.validate_inputs(input_data)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a48c7aee",
+   "metadata": {},
+   "source": [
+    "### Visualize Monomers (Optional)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63c6e279",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "initial_molecules = input_parser.initial_molecules_image_grid(validated_inputs)\n",
     "initial_molecules"
    ]
@@ -130,11 +145,7 @@
    "source": [
     "### Functional Group Detection\n",
     "\n",
-    "This cell runs the functional group detection step.\n",
-    "\n",
-    "- `FunctionalGroupsDetector()` initializes the detector.\n",
-    "- `functional_groups_detector()` analyzes the validated monomers.\n",
-    "- It identifies functional groups present in each molecule and generates corresponding visualization images."
+    "This cell runs the functional group detection step.\n"
    ]
   },
   {
@@ -144,6 +155,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# FunctionalGroupsDetector() initializes the detector.\n",
+    "# functional_groups_detector() analyzes the validated monomers.\n",
+    "# It identifies functional groups present in each molecule and generates corresponding visualization images.\n",
+    "\n",
     "# Run the detector only after the inputs have been validated, as it consumes validated_inputs.monomers to execute the detection workflow.\n",
     "functional_groups_detector = FunctionalGroupsDetector()\n",
     "functional_groups, functional_groups_imgs = \\\n",
@@ -157,12 +172,9 @@
    "id": "76ad6db0",
    "metadata": {},
    "source": [
-    "### Functional Group Visualization\n",
+    "### Functional Group Visualization (Optional)\n",
     "\n",
-    "This cell visualizes the detected functional groups on the molecules.\n",
-    "\n",
-    "- `molecules_to_visualization()` highlights the functional groups identified in the previous step.\n",
-    "- The resulting image shows each molecule with the detected functional groups marked."
+    "This cell visualizes the detected functional groups on the molecules."
    ]
   },
   {
@@ -172,6 +184,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# molecules_to_visualization() highlights the functional groups identified in the previous step.\n",
+    "# The resulting image shows each molecule with the detected functional groups marked.\n",
+    "\n",
     "img = functional_groups_detector.functional_group_highlighted_molecules_image_grid(functional_groups_imgs)\n",
     "img\n"
    ]
@@ -183,11 +198,7 @@
    "source": [
     "### Reaction Detection\n",
     "\n",
-    "This cell identifies possible reactions between the detected functional groups.\n",
-    "\n",
-    "- `ReactionDetector()` initializes the reaction detection module.\n",
-    "- `reaction_detector()` analyzes the detected functional groups and generates possible reaction instances.\n",
-    "- `create_reaction_image_grid()` visualizes the detected reactions in a grid format."
+    "This cell identifies possible reactions between the detected functional groups.\n"
    ]
   },
   {
@@ -197,9 +208,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
+    "# ReactionDetector() initializes the reaction detection module.\n",
+    "# reaction_detector() analyzes the detected functional groups and generates possible reaction instances.\n",
+    "# create_reaction_image_grid() visualizes the detected reactions in a grid format.\n",
+    "\n",
     "reaction_detector = ReactionDetector()\n",
-    "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n",
-    "print(f\"Number of reaction instances detected: {len(reaction_instances)}\")\n",
+    "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0149d1d3",
+   "metadata": {},
+   "source": [
+    "### Reaction visualization (Optional)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b443f92e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "img = reaction_detector.available_reaction_image_grid(reaction_instances)\n",
     "img"
    ]
@@ -211,10 +243,7 @@
    "source": [
     "### Reaction Selection\n",
     "\n",
-    "This cell filters and selects the relevant reactions from the detected reaction instances.\n",
-    "\n",
-    "- `reaction_selection()` processes the detected reactions.\n",
-    "- It removes invalid or redundant reaction candidates."
+    "This cell filters and selects the relevant reactions from the detected reaction instances.\n"
    ]
   },
   {
@@ -224,9 +253,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# reaction_selection() processes the detected reactions.\n",
+    "# It removes user selected reactions from the list of detected reactions and returns the final set of reactions to be used for template generation.\n",
+    "\n",
     "selected_reactions = reaction_detector.reaction_selection(reaction_instances)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee4e5c22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(selected_reactions)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "09b1cb36",
@@ -235,12 +277,7 @@
     "### Non-Reactant (Non-Monomer) Detection\n",
     "\n",
     "This cell identifies molecules that do **not participate in any detected reactions**.\n",
-    "\n",
-    "- `NonReactantsDetector()` initializes the detector.\n",
-    "- `non_monomer_detector()` to determine which molecules are non-reactive.\n",
-    "- `non_reactants_to_visualization()` grid to non reactive molecules\n",
-    "\n",
-    "If no non-reactant molecules are detected, this stage is **automatically skipped**, and the workflow continues without generating any visualization."
+    "\n"
    ]
   },
   {
@@ -250,8 +287,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# NonReactantsDetector() initializes the detector.\n",
+    "# non_monomer_detector() to determine which molecules are non-reactive.\n",
+    "\n",
+    "\n",
     "non_monomer_detector = NonReactantsDetector()\n",
-    "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n",
+    "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e03f325",
+   "metadata": {},
+   "source": [
+    "### non_reactants_to_visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5040c430",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# `()` grid to non reactive molecules\n",
+    "\n",
+    "# If no non-reactant molecules are detected, this stage is **automatically skipped**, and the workflow continues without generating any visualization.\n",
+    "\n",
     "img_non_reactants = non_monomer_detector.non_reactants_to_visualization(non_reactants_list)\n",
     "img_non_reactants"
    ]
@@ -263,12 +325,7 @@
    "source": [
     "### Update Inputs After Non-Reactant Filtering\n",
     "\n",
-    "This cell updates the validated inputs after identifying non-reactive molecules.\n",
-    "\n",
-    "- `non_reactant_selection()` removes or marks molecules that do not participate in any reactions.\n",
-    "- The resulting `updated_inputs` contains the filtered set of monomers that will be used in the next stages of the workflow.\n",
-    "\n",
-    "If no non-reactants were detected in the previous step, the inputs remain unchanged."
+    "This cell updates the validated inputs after identifying non-reactive molecules.\n"
    ]
   },
   {
@@ -278,6 +335,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# non_reactant_selection() removes or marks molecules that do not participate in any reactions.\n",
+    "# The resulting `updated_inputs` contains the filtered set of monomers that will be used in the next stages of the workflow.\n",
+    "# If no non-reactants were detected in the previous step, the inputs remain unchanged.\n",
+    "\n",
     "updated_inputs = non_monomer_detector.non_reactant_selection(validated_inputs, non_reactants_list)"
    ]
   },
@@ -287,7 +348,21 @@
    "id": "d62efe2a",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "prepare_reactions = PrepareReactions(cache_dir)\n",
+    "prepared_reactions = prepare_reactions.prepare_reactions(selected_reactions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7c134e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "img = prepare_reactions.reaction_templates_highlighted_image_grid(prepared_reactions)\n",
+    "img"
+   ]
   },
   {
    "cell_type": "code",