diff --git a/.vscode/settings.json b/.vscode/settings.json index 2c67ab7..4b5a294 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,4 @@ { - "python-envs.defaultEnvManager": "ms-python.python:system", - "python-envs.defaultPackageManager": "ms-python.python:pip", - "python-envs.pythonProjects": [] + "python-envs.defaultEnvManager": "ms-python.python:conda", + "python-envs.defaultPackageManager": "ms-python.python:conda" } \ No newline at end of file diff --git a/AutoREACTER/detectors/functional_groups_detector.py b/AutoREACTER/detectors/functional_groups_detector.py index 1f612ac..f35f1e2 100644 --- a/AutoREACTER/detectors/functional_groups_detector.py +++ b/AutoREACTER/detectors/functional_groups_detector.py @@ -139,7 +139,7 @@ class MonomerEntry: # TODO: Add more functional groups as needed from literature/user requirements. -@dataclass(slots=True, frozen=True) +@dataclass(slots=True) class FunctionalGroupInfo: """ Immutable dataclass storing information about a detected functional group. diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py index 4ffbb72..5b16523 100644 --- a/AutoREACTER/detectors/reaction_detector.py +++ b/AutoREACTER/detectors/reaction_detector.py @@ -58,20 +58,8 @@ class EmptyReactionListError(Exception): This should be prevented by the reaction_selection method, but this error serves as a safeguard.""" pass -@dataclass(slots=True, frozen=True) -class ReactionTemplate: - """ - Defines a generic template for a polymerization reaction. - """ - reaction_name: str - reactant_1: str # functional group name - reactant_2: Optional[str] # functional group name or None - reaction_smarts: str - same_reactants: bool - delete_atom: bool - references: dict -@dataclass(slots=True, frozen=True) +@dataclass(slots=True) class ReactionInstance: """ Represents a specific instance of a reaction between identified monomers. @@ -90,6 +78,7 @@ class ReactionInstance: reaction_smarts: str delete_atom: bool references: dict + same_reactants: bool monomer_1: MonomerRole functional_group_1: FunctionalGroupInfo monomer_2: Optional[MonomerRole] = None @@ -118,35 +107,27 @@ def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> L return [fg for fg in monomer_entry.functionalities if fg.fg_name == target_group_name] def _seen_pair_key( - self, - reaction_name: str, - monomer_role_1: MonomerRole, - fg_1: FunctionalGroupInfo, - monomer_role_2: Optional[MonomerRole] = None, - fg_2: Optional[FunctionalGroupInfo] = None, - ) -> Tuple: - """ - Generates a unique, order-independent key for a reaction pair to avoid duplicates. - - Args: - reaction_name: Name of the reaction. - monomer_role_1: First monomer. - fg_1: First functional group. - monomer_role_2: Second monomer (optional). - fg_2: Second functional group (optional). - - Returns: - A tuple representing the unique state of this reaction instance. - """ + self, + reaction_name: str, + monomer_role_1: MonomerRole, + fg_1: FunctionalGroupInfo, + monomer_role_2: Optional[MonomerRole] = None, + fg_2: Optional[FunctionalGroupInfo] = None, + ) -> Tuple: + if monomer_role_2 is None or fg_2 is None: - return (reaction_name, monomer_role_1, fg_1) + return ( + reaction_name, + monomer_role_1.smiles, + fg_1.fg_name + ) + + pair1 = (monomer_role_1.smiles, fg_1.fg_name) + pair2 = (monomer_role_2.smiles, fg_2.fg_name) + + ordered = tuple(sorted([pair1, pair2])) - # Sort reactants by memory ID to ensure (A, B) and (B, A) produce the same key - ordered = tuple(sorted( - [(monomer_role_1, fg_1), (monomer_role_2, fg_2)], - key=lambda x: (id(x[0]), id(x[1])) - )) - return (reaction_name, ordered[0], ordered[1]) + return (reaction_name, ordered) def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionInstance]: """ @@ -180,6 +161,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn reaction_smarts=reaction_info["reaction"], delete_atom=reaction_info["delete_atom"], references=reaction_info["reference"], + same_reactants=same_reactants, monomer_1=monomer_role, functional_group_1=fg ) @@ -210,6 +192,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn reaction_smarts=reaction_info["reaction"], delete_atom=reaction_info["delete_atom"], references=reaction_info["reference"], + same_reactants=same_reactants, monomer_1=monomer_role_i, functional_group_1=fg_i, monomer_2=monomer_role_j, @@ -243,6 +226,7 @@ def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionIn reaction_smarts=reaction_info["reaction"], delete_atom=reaction_info["delete_atom"], references=reaction_info["reference"], + same_reactants=same_reactants, monomer_1=monomer_role, functional_group_1=fg_1, monomer_2=monomer_role, diff --git a/AutoREACTER/detectors/reactions_library.py b/AutoREACTER/detectors/reactions_library.py index f0abc47..cd970c7 100644 --- a/AutoREACTER/detectors/reactions_library.py +++ b/AutoREACTER/detectors/reactions_library.py @@ -51,8 +51,8 @@ def __init__(self): }, "Hydroxy Acid Halides Hydroxy Acid Halides Polycondensation(Polyesterification)": { "same_reactants": False, - "reactant_1": "hydroxy_acid_halides_monomer", - "reactant_2": "hydroxy_acid_halides_monomer", + "reactant_1": "hydroxy_acid_halide", + "reactant_2": "hydroxy_acid_halide", "product": "polyester_chain", "delete_atom": True, "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[Cl,Br,I:4]>>[OX2:1]-[CX3:2](=[O:5]).[Cl,Br,I:4]-[H:3]", @@ -62,7 +62,7 @@ def __init__(self): }, "Hydroxy Acid Halides Polycondensation(Polyesterification)": { "same_reactants": True, - "reactant_1": "hydroxy_acid_halides_monomer", + "reactant_1": "hydroxy_acid_halide", "product": "polyester_chain", "delete_atom": True, "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[Cl,Br,I:4]>>[OX2:1]-[CX3:2](=[O:5]).[Cl,Br,I:4]-[H:3]", diff --git a/AutoREACTER/reaction_template_builder/__init__.py b/AutoREACTER/reaction_preparation/__init__.py similarity index 100% rename from AutoREACTER/reaction_template_builder/__init__.py rename to AutoREACTER/reaction_preparation/__init__.py diff --git a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py b/AutoREACTER/reaction_preparation/build_reaction_system.py similarity index 66% rename from AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py rename to AutoREACTER/reaction_preparation/build_reaction_system.py index 7c8e4ac..3ebc26b 100644 --- a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py +++ b/AutoREACTER/reaction_preparation/build_reaction_system.py @@ -24,7 +24,7 @@ """ -from typing import Literal + # --- preferred: package-relative imports; fallback: absolute imports for script/notebook --- try: # Case 1 (correct when imported as part of the package) @@ -54,204 +54,69 @@ from lunar_client.lunar_api_wrapper import lunar_workflow from lunar_client.molecule_template_preparation import molecule_template_preparation +from AutoREACTER.detectors.reaction_detector import ReactionInstance, ReactionTemplate +from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupInfo, MonomerRole +from AutoREACTER.input_parser import SimulationSetup, MonomerEntry # standard libs / third-party import pandas as pd from pathlib import Path import os +from dataclasses import dataclass, field +from typing import List, Any, Optional, Dict +from rdkit import Chem -def is_continuous(d): - """ - Checks if the integer keys of a dictionary form a continuous sequence starting from 1. - - This is used to verify if reaction IDs are sequential after potential - filtering or deduplication steps. - - Args: - d (dict): The dictionary to check, where keys are expected to be integers. - Returns: - bool: True if keys are exactly [1, 2, ..., len(d)], False otherwise. - """ - keys = sorted(d.keys()) - if not keys: - return True - # Compare sorted keys against a generated range of the same length - return keys == list(range(1, len(keys) + 1)) - - -def save_grid_image(mols, cache, key=None): - from pathlib import Path - from rdkit.Chem import Draw - import base64 - """ - TODO: Add full grid set of reactant1 + reactant2 → product image grid to ipynb or cache/ "grid_images". - With Template atoms highlighted in one color and Edge atoms highlighted in another color. - With Edge atoms hightlighted in a different color if possible. - This will be useful for debugging and for users to visually inspect the detected reactions and templates. - Note: If you want to use the SVG output instead of PNG, you can set useSVG=True in Draw.MolsToGridImage and handle the SVG data accordingly. +@dataclass (slots=True) +class ReactionMetadata: """ - out_dir = Path(cache) / "grid_images" - out_dir.mkdir(parents=True, exist_ok=True) - - out_path = out_dir / (f"reaction_grid_{key}.png" if key is not None else "reaction_grid.png") - - # Ask RDKit for a raster image (PNG-like) - img = Draw.MolsToGridImage(mols, useSVG=False) - - # Case 1: PIL.Image.Image (has .save) - if hasattr(img, "save"): - img.save(str(out_path)) - return out_path - - # Case 2: IPython/RDKit display object (often has .data) - data = getattr(img, "data", None) - if data is None: - raise TypeError(f"Unexpected image type {type(img)}; cannot save.") - - # data can be bytes OR base64 string depending on wrapper - if isinstance(data, bytes): - png_bytes = data - elif isinstance(data, str): - # try base64 decode; if it fails, treat as raw text - try: - png_bytes = base64.b64decode(data) - except Exception: - png_bytes = data.encode("utf-8") - else: - raise TypeError(f"Unexpected img.data type {type(data)}; cannot save.") - - with open(out_path, "wb") as f: - f.write(png_bytes) - - return out_path - - -def add_dict_as_new_columns(df_existing, data_dict, titles=["template_reactant_idx", "template_product_idx"]): - """ - Adds dictionary keys and values as new columns to an existing DataFrame. - - This is specifically used to map reactant indices to product indices - within the reaction template. - - Args: - df_existing (pd.DataFrame): The DataFrame to modify. - data_dict (dict): Dictionary where keys and values will become column data. - titles (list): List of two strings for the new column names. - - Returns: - pd.DataFrame: The modified DataFrame with new columns added. - """ - # Convert dict keys and values to Series to ensure alignment with the DataFrame - # Use .astype("Int64") to allow for potential Null/NaN values while keeping integers - df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64") - df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64") - - return df_existing - - -def molecule_dict_csv_path_dict_rearrange(molecule_dict_csv_path_dict): - """ - Rearranges the reaction dictionary keys to be continuous and renames - associated CSV files on the filesystem to match the new keys. - - This ensures that if reaction #2 is deleted, reaction #3 becomes the new #2, - maintaining a clean, gapless sequence for downstream processing. - - Args: - molecule_dict_csv_path_dict (dict): Dictionary containing reaction metadata - and 'csv_path' entries. - - Returns: - dict: A new dictionary with normalized continuous keys (1 to N) and updated file paths. + A dataclass to hold all relevant metadata and file paths for a single reaction instance. + Attributes: + - reaction_id: Unique identifier for the reaction. + - reactant_combined_mol: RDKit molecule object representing the combined reactants. + - product_combined_mol: RDKit molecule object representing the combined products. + - reaction_smarts: Optional SMARTS string representing the reaction. + - reactant_smarts: Optional SMARTS string for the reactants. + - product_smiles: Optional SMILES string for the products. + - csv_path: Path to the CSV file containing processing reaction data. + - mol_3d_path: Optional path to the generated 3D molecule file. + - reaction_dataframe: Optional pandas DataFrame holding detailed reaction mapping and analysis data. + - reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices. + - template_reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices specifically for the reaction template. + - edge_atoms: List of atom indices that are considered 'edge atoms' connecting the reaction template to the rest of the molecule. + - delete_atom: Boolean flag indicating whether an atom is deleted in the reaction (e.g., for polycondensation). + - delete_atom_idx: Optional index of the atom that is deleted in the reaction, if applicable. + - activity_stats: Boolean flag indicating whether to calculate and include activity statistics for the reaction. """ - # Skip if already continuous to save processing time - if is_continuous(molecule_dict_csv_path_dict): - return molecule_dict_csv_path_dict - - print("Rearranging molecule_dict_csv_path_dict keys to be continuous...") - new_dict = {} - - # Sort keys to ensure we process them in the original numerical order - sorted_old_keys = sorted(molecule_dict_csv_path_dict.keys()) + reaction_id: int + reactant_combined_mol: Chem.Mol + product_combined_mol: Chem.Mol + reaction_smarts: Optional[str] = None + reactant_smarts: Optional[str] = None + product_smiles: Optional[str] = None + csv_path: Path = None + mol_3d_path: Optional[Path] = None + reaction_dataframe: Optional[pd.DataFrame] = None + reactant_to_product_mapping: Dict[int, int] + template_reactant_to_product_mapping: Dict[int, int] + edge_atoms: List[int] + delete_atom: bool = True + delete_atom_idx: Optional[int] = None + activity_stats: bool = True - for new_key_idx, old_key in enumerate(sorted_old_keys): - new_key = new_key_idx + 1 # Start new keys from 1 - reaction_data = molecule_dict_csv_path_dict[old_key] - csv_save_path = reaction_data.get("csv_path") - - if new_key != old_key: - # Construct the new filename based on the new index (e.g., reaction_2.csv) - new_csv_save_path = os.path.join( - os.path.dirname(csv_save_path), - f"reaction_{new_key}.csv" - ) - - # Check if destination exists to avoid crashing or accidental overwrites - if os.path.exists(new_csv_save_path): - os.remove(new_csv_save_path) - - # Rename the physical file on the disk to match the new key - os.rename(csv_save_path, new_csv_save_path) - - # Update the path in the metadata dictionary - reaction_data["csv_path"] = new_csv_save_path - - new_dict[new_key] = reaction_data - - return new_dict - -def add_column_safe(df, list_data, column_name): + +@dataclass (slots=True) +class PipelineOutput: """ - Safely adds a list as a new column to a DataFrame, handling potential - length mismatches by using Series alignment. - - Args: - df (pd.DataFrame): Target DataFrame. - list_data (list): Data to be added to the column. - column_name (str): Name of the new column. - - Returns: - pd.DataFrame: Modified DataFrame with the new column. + Final structured output of the ReactionTemplatePipeline. + Attributes: + - template_files: Dictionary mapping reaction identifiers to their corresponding molecule template file paths. + - all_reactions: Optional list of ReactionMetadata instances containing detailed information about each processed reaction """ - # Creating a Series from the list ensures it starts from the top (index 0) - # and fills missing rows with NaN if the list is shorter than the DataFrame - df[column_name] = pd.Series(list_data).astype("Int64") - return df - - -def extract_unique_references(detected_reactions: dict) -> list[str]: - """Collect reference URLs from detected_reactions[*]["reference"], dedupe, keep order.""" - seen = set() - refs: list[str] = [] - - for rxn in detected_reactions.values(): - ref = rxn.get("reference") or {} - - # single URL fields - for v in ref.values(): - if isinstance(v, str): - if v not in seen: - seen.add(v) - refs.append(v) - - # list-of-URLs fields - elif isinstance(v, (list, tuple)): - for u in v: - if isinstance(u, str) and u not in seen: - seen.add(u) - refs.append(u) - - # (optional) nested dict handling, if you ever add that later - elif isinstance(v, dict): - for u in v.values(): - if isinstance(u, str) and u not in seen: - seen.add(u) - refs.append(u) - return refs - + template_files: Dict[str, Path] + all_reactions: Optional[List[ReactionMetadata]] = None # The main class encapsulating the reaction template pipeline class ReactionTemplatePipeline: @@ -277,7 +142,46 @@ def __init__(self, detected_reactions_dict: dict, cache: str, non_monomer_molecu self.non_monomer_molecules_to_retain = non_monomer_molecules_to_retain self.formatted_dict, self.molecule_template_files, self.molecule_images = self.execute_pipeline(self.detected_reactions_dict, self.non_monomer_molecules_to_retain, self.cache) + def _add_column_safe(self, df, list_data, column_name): + """ + Safely adds a list as a new column to a DataFrame, handling potential + length mismatches by using Series alignment. + + Args: + df (pd.DataFrame): Target DataFrame. + list_data (list): Data to be added to the column. + column_name (str): Name of the new column. + Returns: + pd.DataFrame: Modified DataFrame with the new column. + """ + # Creating a Series from the list ensures it starts from the top (index 0) + # and fills missing rows with NaN if the list is shorter than the DataFrame + df[column_name] = pd.Series(list_data).astype("Int64") + return df + + def _add_dict_as_new_columns(self, df_existing, data_dict, titles=["template_reactant_idx", "template_product_idx"]): + """ + Adds dictionary keys and values as new columns to an existing DataFrame. + + This is specifically used to map reactant indices to product indices + within the reaction template. + + Args: + df_existing (pd.DataFrame): The DataFrame to modify. + data_dict (dict): Dictionary where keys and values will become column data. + titles (list): List of two strings for the new column names. + + Returns: + pd.DataFrame: The modified DataFrame with new columns added. + """ + # Convert dict keys and values to Series to ensure alignment with the DataFrame + # Use .astype("Int64") to allow for potential Null/NaN values while keeping integers + df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64") + df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64") + + return df_existing + def run_reaction_template_pipeline(self, detected_reactions, cache): """ Main execution pipeline for mapping reactions, identifying templates, @@ -324,14 +228,14 @@ def run_reaction_template_pipeline(self, detected_reactions, cache): # is_duplicate, processed_dict = compare_rdkit_fragments(...) # Update the dataframe with specific template mapping indices - reaction_dataframe = add_dict_as_new_columns( + reaction_dataframe = self._add_dict_as_new_columns( reaction_dataframe, template_mapped_dict, titles = ["template_reactant_idx", "template_product_idx"] ) # Append edge atoms information (crucial for building polymer chains) - reaction_dataframe = add_column_safe( + reaction_dataframe = self._add_column_safe( reaction_dataframe, edge_atoms, "edge_atoms" @@ -406,19 +310,11 @@ def execute_pipeline(self, detected_reactions, non_monomer_molecules_to_retain, print(f"Molecule Template File for {name}: {path}") # Extract and save unique references to a text file for user reference - references = extract_unique_references(detected_reactions) - - # Print and save references to a text file - print("\nReferences used in detected reactions:") - for ref in references: - print(ref) with open(Path(cache) / "molecule_template_files.txt", "w") as f: for name, path in molecule_template_files.items(): f.write(f"{name}: {path}\n") - f.write("\nReferences:\n") - for ref in references: - f.write(f"{ref}\n") + return formatted_dict, molecule_template_files, molecule_images # TODO: If duplicates were found and skipped, re-index the dictionary and files to be continuous diff --git a/AutoREACTER/reaction_template_builder/lunar_client/__init__.py b/AutoREACTER/reaction_preparation/lunar_client/__init__.py similarity index 100% rename from AutoREACTER/reaction_template_builder/lunar_client/__init__.py rename to AutoREACTER/reaction_preparation/lunar_client/__init__.py diff --git a/AutoREACTER/reaction_preparation/lunar_client/config.py b/AutoREACTER/reaction_preparation/lunar_client/config.py new file mode 100644 index 0000000..e8b270f --- /dev/null +++ b/AutoREACTER/reaction_preparation/lunar_client/config.py @@ -0,0 +1 @@ +LUNAR_ROOT_DIR = '' diff --git a/AutoREACTER/reaction_template_builder/lunar_client/locate_lunar.py b/AutoREACTER/reaction_preparation/lunar_client/locate_lunar.py similarity index 100% rename from AutoREACTER/reaction_template_builder/lunar_client/locate_lunar.py rename to AutoREACTER/reaction_preparation/lunar_client/locate_lunar.py diff --git a/AutoREACTER/reaction_template_builder/lunar_client/lunar_api_wrapper.py b/AutoREACTER/reaction_preparation/lunar_client/lunar_api_wrapper.py similarity index 100% rename from AutoREACTER/reaction_template_builder/lunar_client/lunar_api_wrapper.py rename to AutoREACTER/reaction_preparation/lunar_client/lunar_api_wrapper.py diff --git a/AutoREACTER/reaction_template_builder/lunar_client/molecule_3d_preparation.py b/AutoREACTER/reaction_preparation/lunar_client/molecule_3d_preparation.py similarity index 100% rename from AutoREACTER/reaction_template_builder/lunar_client/molecule_3d_preparation.py rename to AutoREACTER/reaction_preparation/lunar_client/molecule_3d_preparation.py diff --git a/AutoREACTER/reaction_template_builder/lunar_client/molecule_template_preparation.py b/AutoREACTER/reaction_preparation/lunar_client/molecule_template_preparation.py similarity index 100% rename from AutoREACTER/reaction_template_builder/lunar_client/molecule_template_preparation.py rename to AutoREACTER/reaction_preparation/lunar_client/molecule_template_preparation.py diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/__init__.py b/AutoREACTER/reaction_preparation/reaction_processor/__init__.py similarity index 100% rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/__init__.py rename to AutoREACTER/reaction_preparation/reaction_processor/__init__.py diff --git a/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py b/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py new file mode 100644 index 0000000..cbcaa99 --- /dev/null +++ b/AutoREACTER/reaction_preparation/reaction_processor/atom_mapping.py @@ -0,0 +1,36 @@ +def smart_mapping(reactant, smarts_template, match_tuple): + """ + Apply atom mapping from SMARTS template to reactant molecule based on match indices. + + This function transfers atom map numbers from a SMARTS template to corresponding + atoms in the reactant molecule using the provided match tuple that maps SMARTS + atom indices to reactant atom indices. + + Args: + reactant (Chem.Mol): The reactant molecule to apply mapping to + smarts_template (Chem.Mol): SMARTS pattern molecule with atom map numbers + match_tuple (tuple): Tuple mapping SMARTS atom indices to reactant atom indices + + Returns: + None: Modifies the reactant molecule in-place by setting atom map numbers + + Note: + Only atoms with non-zero map numbers in the SMARTS template are processed. + The function does nothing if match_tuple is empty. + """ + if not match_tuple: + return + + # Create mapping from SMARTS atom indices to their map numbers + SMARTS_index_to_Map_Number = {} + for smarts_atom in smarts_template.GetAtoms(): + map_num = smarts_atom.GetAtomMapNum() + if map_num != 0: # Only process atoms with explicit mapping + SMARTS_index_to_Map_Number[smarts_atom.GetIdx()] = map_num + + # Apply mapping to corresponding atoms in reactant + for smarts_pos, map_num in SMARTS_index_to_Map_Number.items(): + if smarts_pos < len(match_tuple): + atom_index_in_mol = match_tuple[smarts_pos] + atom = reactant.GetAtomWithIdx(atom_index_in_mol) + atom.SetAtomMapNum(map_num) \ No newline at end of file diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/compare_rdkit_fragments.py b/AutoREACTER/reaction_preparation/reaction_processor/fragment_comparison.py similarity index 100% rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/compare_rdkit_fragments.py rename to AutoREACTER/reaction_preparation/reaction_processor/fragment_comparison.py diff --git a/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py b/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py new file mode 100644 index 0000000..03d0c35 --- /dev/null +++ b/AutoREACTER/reaction_preparation/reaction_processor/prepare_reactions.py @@ -0,0 +1,711 @@ + +""" +Module for preparing chemical reactions for analysis, including atom mapping between reactants and products, +reaction metadata extraction, and visualization utilities using RDKit. + +This module processes reaction SMARTS, applies atom mappings, identifies key reaction features (e.g., first shell, +initiators, byproducts), and generates metadata and visualizations for downstream analysis. It also includes validation checks to ensure mapping +consistency and completeness. +""" + + +# WARNING: +# When modifying this file for dataframe or any other indexing variables use idx and idxs, do not use index or indices or similar. + +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional + +from rdkit import Chem +from rdkit.Chem import AllChem, Draw, rdmolops +from PIL.Image import Image +import pandas as pd + +from AutoREACTER.detectors.reaction_detector import ReactionInstance +from AutoREACTER.reaction_preparation.reaction_processor.utils import ( + add_dict_as_new_columns, add_column_safe, compare_set, prepare_paths +) +from AutoREACTER.reaction_preparation.reaction_processor.walker import reaction_atom_walker + + +class MappingError(Exception): + """Custom exception raised when atom mapping between reactants and products fails or is inconsistent.""" + + +class SMARTSParsingError(Exception): + """Custom exception raised when parsing SMILES or reaction SMARTS fails.""" + + +@dataclass(slots=True) +class ReactionMetadata: + """ + Stores comprehensive metadata for a single reaction including molecular structures, + atom mappings, and analysis results. + reaction_id: Unique identifier for the reaction instance + reactant_combined_mol: RDKit molecule object representing combined reactants + product_combined_mol: RDKit molecule object representing combined products + reactant_to_product_mapping: Dictionary mapping reactant atom indices to product atom indices + product_to_reactant_mapping: Dictionary mapping product atom indices back to reactant atom indices + template_reactant_to_product_mapping: Optional dictionary mapping reactant indices in the template to product indices + edge_atoms: Optional list of reactant atom indices that are at the edge of the local environment + first_shell: Optional list of reactant atom indices in the first coordination shell (reaction center) + initiators: Optional list of reactant atom indices that are initiators (map numbers 1 or 2) + byproduct_indices: Optional list of reactant atom indices corresponding to detected byproducts + reaction_smarts: Optional string of the reaction SMARTS pattern + reactant_smarts: Optional string of the combined reactant SMILES + product_smiles: Optional string of the combined product SMILES + csv_path: Optional Path to the CSV file containing atom mappings and analysis + mol_3d_path: Optional Path to a file containing 3D structures of the molecules + reaction_dataframe: Optional pandas DataFrame containing detailed mapping and analysis results + delete_atom: Boolean indicating whether the reaction involves a delete atom (byproduct) + delete_atom_idx: Optional integer index of the reactant atom that corresponds to the byproduct + activity_stats: Boolean indicating whether this reaction should be included in activity statistics (e.g., not a duplicate) + """ + reaction_id: int + reactant_combined_mol: Chem.Mol + product_combined_mol: Chem.Mol + reactant_to_product_mapping: Dict[int, int] + product_to_reactant_mapping: Dict[int, int] + template_reactant_to_product_mapping: Optional[Dict[int, int]] = None + edge_atoms: Optional[List[int]] = None + first_shell: Optional[List[int]] = None + initiators: Optional[List[int]] = None + byproduct_indices: Optional[List[int]] = None + reaction_smarts: Optional[str] = None + reactant_smarts: Optional[str] = None + product_smiles: Optional[str] = None + csv_path: Optional[Path] = None + mol_3d_path: Optional[Path] = None + reaction_dataframe: Optional[pd.DataFrame] = None + delete_atom: bool = True + delete_atom_idx: Optional[int] = None + activity_stats: bool = True + + +class PrepareReactions: + """Processes chemical reactions: builds atom mappings, identifies reaction centers, and detects byproducts.""" + + def __init__(self, cache: Path): + """Initialize with cache directory for storing reaction CSVs.""" + self.cache = Path(cache) + self.csv_cache = prepare_paths(self.cache, "csv_cache") + + # --- PUBLIC --- + + def prepare_reactions(self, reaction_instances: list[ReactionInstance]) -> list[ReactionMetadata]: + """ + Main pipeline: processes reaction instances, detects duplicates, and enriches metadata with template mappings. + + Args: + reaction_instances: List of detected reaction instances to process + + Returns: + List of processed ReactionMetadata objects with template mappings and edge atoms + """ + reactions_metadata = self._process_reaction_instances(reaction_instances) + reaction_metadata = self._detect_duplicates(reactions_metadata) + + for reaction in reactions_metadata: + # Skip reactions marked as duplicates + if not reaction.activity_stats: + continue # Skip reactions marked as duplicates + + combined_reactant_molecule_object = reaction.reactant_combined_mol + reaction_dataframe = reaction.reaction_dataframe + csv_save_path = reaction.csv_path + + # Build full atom mapping dictionary from dataframe + fully_mapped_dict = reaction_dataframe.set_index("reactant_idx")["product_idx"].to_dict() + first_shell = reaction_dataframe["first_shell"].dropna().tolist() + + # Generate template mapping by walking reaction graph + template_mapped_dict, edge_atoms = reaction_atom_walker( + combined_reactant_molecule_object, + first_shell, + fully_mapped_dict + ) + + # Add template mapping and edge atoms to dataframe + reaction_dataframe = add_dict_as_new_columns( + reaction_dataframe, + template_mapped_dict, + titles=["template_reactant_idx", "template_product_idx"] + ) + + # Add edge atoms as a new column in the dataframe + reaction_dataframe = add_column_safe( + reaction_dataframe, + edge_atoms, + "edge_atoms" + ) + + # Save updated dataframe back to CSV and update metadata + reaction.reaction_dataframe = reaction_dataframe.copy() + # Save the updated dataframe with template mappings and edge atoms to CSV + reaction_dataframe.to_csv(csv_save_path, index=False) + # Update metadata with template mapping and edge atoms + reaction.edge_atoms = edge_atoms + # Store the template mapping in the metadata for later use + reaction.template_reactant_to_product_mapping = template_mapped_dict + + return reaction_metadata + + # --- PIPELINE STEPS (PRIVATE) --- + + def _process_reaction_instances(self, detected_reactions: list[ReactionInstance]) -> list[ReactionMetadata]: + """ + Converts ReactionInstance objects into ReactionMetadata by building molecules and running reactions. + + Args: + detected_reactions: List of detected reaction instances + + Returns: + List of ReactionMetadata objects with atom mappings + """ + csv_cache = self.csv_cache + reaction_metadata = [] + + for reaction in detected_reactions: + rxn_smarts = reaction.reaction_smarts + reactant_smiles_1 = reaction.monomer_1.smiles + same_reactants = reaction.same_reactants + + # Handle case where both reactants are identical + if same_reactants: + reactant_smiles_2 = reactant_smiles_1 + else: + reactant_smiles_2 = reaction.monomer_2.smiles + + delete_atoms = reaction.delete_atom + + # Build reaction and reactant molecules + rxn = self._build_reaction(rxn_smarts) + # This function also runs the reaction and builds metadata for each product set, including atom mappings and byproduct detection + mol_reactant_1, mol_reactant_2 = self._build_reactants(reactant_smiles_1, reactant_smiles_2) + # Build reaction tuple based on whether reactants are the same or different. If same, only one ordering is needed. If different, both orderings are processed to account for reaction directionality. + reaction_tuple = self._build_reaction_tuple(same_reactants, mol_reactant_1, mol_reactant_2) + + # Process products and build metadata + reaction_metadata = self._process_reaction_products( + rxn, + csv_cache, + reaction_tuple, + delete_atoms, + reaction_metadata + ) + + return reaction_metadata + + def _detect_duplicates(self, reaction_metadata_list: list[ReactionMetadata]) -> list[ReactionMetadata]: + """ + Filters duplicate reactions based on reactant and product molecules. + + Args: + reaction_metadata_list: List of reaction metadata to filter + + Returns: + List of unique reactions; duplicates marked with activity_stats=False + """ + unique_metadata: list[ReactionMetadata] = [] + + for reaction in reaction_metadata_list: + # Compare current reaction's reactants and products against unique reactions collected so far + reactants = reaction.reactant_combined_mol + products = reaction.product_combined_mol + + # Keep reaction if it's unique, otherwise mark as duplicate + if compare_set(unique_metadata, reactants, products): + unique_metadata.append(reaction) + else: + reaction.activity_stats = False + + return unique_metadata + + def _process_reaction_products(self, + rxn: Chem.rdChemReactions.ChemicalReaction, + csv_cache: Path, + reaction_tuple: list, + delete_atoms: bool = True, + reaction_metadata: Optional[list[ReactionMetadata]] = None + ) -> list[ReactionMetadata]: + """ + Runs reactions on reactant pairs and builds metadata for each product set. + + Args: + rxn: RDKit ChemicalReaction object + csv_cache: Path to cache directory for saving CSVs + reaction_tuple: List of reactant pairs to process + delete_atoms: Whether to detect and track byproducts + reaction_metadata: Accumulator list for metadata objects + + Returns: + Updated list of ReactionMetadata objects + """ + if reaction_metadata is None: + reaction_metadata = [] + + for pair in reaction_tuple: + r1, r2 = Chem.Mol(pair[0]), Chem.Mol(pair[1]) + + # Assign unique map numbers and isotopes to track atoms through reaction + self._assign_atom_map_numbers_and_set_isotopes(r1, r2) + # Run the reaction to get products + products = rxn.RunReactants((r1, r2)) + + # If no products are generated, skip to the next reactant pair + if not products: + continue + + # Process each product set generated by the reaction + for product_set in products: + df = pd.DataFrame(columns=["reactant_idx", "product_idx"]) + + # Combine molecules for mapping + reactant_combined = Chem.CombineMols(r1, r2) + product_combined = Chem.CombineMols(*product_set) + + # Restore atom map numbers from isotopes (which survive the reaction) + self._reassign_atom_map_numbers_by_isotope(product_combined) + + # Build bidirectional atom index mappings + mapping_dict, df = self._build_atom_index_mapping(reactant_combined, product_combined) + reverse_mapping = {v: k for k, v in mapping_dict.items()} + + # Restore map numbers for visualization + self._reveal_template_map_numbers(product_combined) + + # Validate mapping consistency + self._validate_mapping(df, reactant_combined, product_combined) + + # Identify atoms involved in reaction center and initiators + first_shell, initiator_idxs = self._assign_first_shell_and_initiators( + reactant_combined, + product_combined, + reverse_mapping + ) + + # Detect byproducts (smallest fragments) + byproduct_reactant_idxs = self._detect_byproducts(product_combined, reverse_mapping, delete_atoms) + + # Combine all mapping data into single dataframe + df_combined = pd.concat([ + df, + pd.Series(first_shell, name="first_shell"), + pd.Series(initiator_idxs, name="initiators"), + pd.Series(byproduct_reactant_idxs, name="byproduct_idx") + ], axis=1).astype(pd.Int64Dtype()) + + total_products = len(reaction_metadata) + 1 + + # Clear isotopes before saving to restore normal chemistry + self._clear_isotopes(reactant_combined, product_combined) + + # Save mapping dataframe to CSV + df_combined.to_csv(csv_cache / f"reaction_{total_products}.csv", index=False) + + # Create and store metadata object + reaction_metadata.append( + ReactionMetadata( + reaction_id=total_products, + reactant_combined_mol=reactant_combined, + product_combined_mol=product_combined, + reactant_to_product_mapping=mapping_dict, + product_to_reactant_mapping=reverse_mapping, + first_shell=first_shell, + initiators=initiator_idxs, + byproduct_indices=byproduct_reactant_idxs, + csv_path=csv_cache / f"reaction_{total_products}.csv", + reaction_dataframe=df_combined, + delete_atom=delete_atoms, + delete_atom_idx=byproduct_reactant_idxs[0] if byproduct_reactant_idxs else None, + activity_stats=True + ) + ) + + return reaction_metadata + + # --- CORE REACTION LOGIC --- + + def _assign_first_shell_and_initiators(self, + reactant_combined: Chem.Mol, + product_combined: Chem.Mol, + reversed_mapping_dict: dict[int, int]) -> tuple[list[int], list[int]]: + """ + Identifies atoms in the first coordination shell (atoms with map numbers < 999) and initiator atoms. + Initiators are atoms with map numbers 1 or 2 (typically the reactive centers). + + Args: + reactant_combined: Combined reactant molecule + product_combined: Combined product molecule + reversed_mapping_dict: Product idx -> Reactant idx mapping + + Returns: + Tuple of (first_shell atom indices, initiator atom indices) + + Raises: + ValueError: If exactly 2 initiators are not found + """ + first_shell = [] + initiator_idxs = [] + + for p_atom in product_combined.GetAtoms(): + # Only process atoms with valid map numbers (< 999 indicates non-byproduct) + if p_atom.GetAtomMapNum() < 999: + p_idx = p_atom.GetIdx() + + if p_idx not in reversed_mapping_dict: + raise ValueError(f"Mapping error: product atom {p_idx} not found in mapping_dict") + + r_idx = reversed_mapping_dict[p_idx] + atom = reactant_combined.GetAtomWithIdx(r_idx) + atom.SetAtomMapNum(p_atom.GetAtomMapNum()) + + first_shell.append(r_idx) + + # Initiators are atoms with map numbers 1 or 2 + if p_atom.GetAtomMapNum() in [1, 2]: + initiator_idxs.append(r_idx) + + if len(initiator_idxs) != 2: + raise ValueError(f"Expected 2 initiators, got {len(initiator_idxs)}: {initiator_idxs}") + + return first_shell, initiator_idxs + + def _detect_byproducts(self, + product_combined: Chem.Mol, + reversed_mapping_dict: dict[int, int], + delete_atoms: bool) -> list[int]: + """ + Identifies byproduct atoms (smallest molecular fragment) and maps them back to reactant space. + + Args: + product_combined: Combined product molecule + reversed_mapping_dict: Product idx -> Reactant idx mapping + delete_atoms: Whether to perform byproduct detection + + Returns: + List of reactant indices corresponding to byproduct atoms + """ + # If delete_atoms is False, we skip byproduct detection and return an empty list + if not delete_atoms: + return [] + + # Fragment molecule and find smallest fragment + frags = rdmolops.GetMolFrags(product_combined, asMols=True) + smallest = min(frags, key=lambda m: m.GetNumAtoms()) + + byproduct_reactant_indices = [] + + # Map byproduct atoms back to reactant indices + for atom in smallest.GetAtoms(): + p_idx = atom.GetIdx() + if p_idx in reversed_mapping_dict: + byproduct_reactant_indices.append(reversed_mapping_dict[p_idx]) + + return byproduct_reactant_indices + + def _validate_mapping(self, df: pd.DataFrame, reactant: Chem.Mol, product: Chem.Mol) -> None: + """ + Validates atom mapping consistency: checks for required columns, duplicates, bounds, and completeness. + + Args: + df: Dataframe containing reactant_idx and product_idx columns + reactant: Reactant molecule + product: Product molecule + + Raises: + MappingError: If any validation check fails + """ + # Ensure dataframe exists and has required columns + if df is None or df.empty: + raise MappingError("Mapping validation failed: empty dataframe") + + # Check for required columns + required_cols = {"reactant_idx", "product_idx"} + if not required_cols.issubset(df.columns): + raise MappingError(f"Mapping validation error: required columns {required_cols} not found in dataframe.") + + # Extract indices and perform validation checks + r_idxs = df["reactant_idx"].dropna().tolist() + p_idxs = df["product_idx"].dropna().tolist() + + # Atom counts must match + if len(r_idxs) != len(p_idxs): + raise MappingError(f"Mapping validation error: mismatch in atom counts between reactant and product.") + + # No duplicate mappings (1-to-1 mapping required) + if len(set(r_idxs)) != len(r_idxs): + raise MappingError(f"Mapping validation error: duplicate indices found in reactant mapping.") + if len(set(p_idxs)) != len(p_idxs): + raise MappingError(f"Mapping validation error: duplicate indices found in product mapping.") + + # Indices must be within molecule bounds + if any(idx >= reactant.GetNumAtoms() for idx in r_idxs): + raise MappingError(f"Mapping validation error: reactant index out of bounds.") + if any(idx >= product.GetNumAtoms() for idx in p_idxs): + raise MappingError(f"Mapping validation error: product index out of bounds.") + + # All atoms must be mapped (complete mapping) + if len(r_idxs) != reactant.GetNumAtoms(): + raise MappingError(f"Mapping validation error: incomplete mapping for reactant.") + if len(p_idxs) != product.GetNumAtoms(): + raise MappingError(f"Mapping validation error: incomplete mapping for product.") + + # --- ATOM MAPPING --- + + def _assign_atom_map_numbers_and_set_isotopes(self, r1: Chem.Mol, r2: Chem.Mol) -> None: + """ + Assigns unique map numbers and isotopes to reactant atoms for tracking through reaction. + Isotopes survive RDKit's reaction engine, allowing atom identity recovery post-reaction. + + Args: + r1: First reactant molecule + r2: Second reactant molecule + """ + # Assign map numbers 1001+ to first reactant atoms + for atom in r1.GetAtoms(): + idx = 1001 + atom.GetIdx() + atom.SetAtomMapNum(idx) + atom.SetIsotope(idx) # Isotope survives the reaction + + # Assign map numbers 2001+ to second reactant atoms + for atom in r2.GetAtoms(): + idx = 2001 + atom.GetIdx() + atom.SetAtomMapNum(idx) + atom.SetIsotope(idx) # Isotope survives the reaction + + def _reassign_atom_map_numbers_by_isotope(self, mol: Chem.Mol) -> None: + """ + Restores atom map numbers from isotope values after reaction. + RDKit's reaction engine preserves isotopes, allowing recovery of original atom identities. + + Args: + mol: Product molecule with isotope information + """ + for atom in mol.GetAtoms(): + surviving_idx = atom.GetIsotope() + if surviving_idx != 0: + atom.SetAtomMapNum(surviving_idx) # Restore original map number + atom.SetIsotope(0) # Clear isotope to restore normal chemistry + + def _build_atom_index_mapping(self, + reactant_combined: Chem.Mol, + product_combined: Chem.Mol) -> tuple[dict[int, int], pd.DataFrame]: + """ + Builds bidirectional atom index mapping between reactants and products using map numbers. + + Args: + reactant_combined: Combined reactant molecule + product_combined: Combined product molecule + + Returns: + Tuple of (mapping dict: reactant_idx -> product_idx, dataframe with mapping) + """ + mapping_dict = {} + + # Pre-index product atoms by map number for O(1) lookup + product_map = { + atom.GetAtomMapNum(): atom.GetIdx() + for atom in product_combined.GetAtoms() + if atom.GetAtomMapNum() != 0 + } + + rows = [] + for r_atom in reactant_combined.GetAtoms(): + r_map_num = r_atom.GetAtomMapNum() + + # Match reactant atom to product atom via map number + if r_map_num != 0 and r_map_num in product_map: + r_idx = r_atom.GetIdx() + p_idx = product_map[r_map_num] + + mapping_dict[r_idx] = p_idx + rows.append({ + "reactant_idx": r_idx, + "product_idx": p_idx + }) + + df = pd.DataFrame(rows) + return mapping_dict, df + + def _reveal_template_map_numbers(self, mol: Chem.Mol) -> None: + """ + Restores map numbers from RDKit's internal 'old_mapno' property for visualization. + RDKit stores original map numbers in this property after reaction execution. + + Args: + mol: Product molecule + """ + for atom in mol.GetAtoms(): + if atom.HasProp('old_mapno'): + map_num = atom.GetIntProp('old_mapno') + atom.SetAtomMapNum(map_num) + + def _clear_isotopes(self, mol_1: Chem.Mol, mol_2: Chem.Mol) -> None: + """ + Clears isotope values from molecules to restore normal chemistry after using isotopes for atom tracking. + + Args: + mol_1: First molecule to clear + mol_2: Second molecule to clear + """ + for atom in mol_1.GetAtoms(): + atom.SetIsotope(0) + for atom in mol_2.GetAtoms(): + atom.SetIsotope(0) + + # --- BUILDERS --- + + def _build_reaction(self, rxn_smarts: str) -> Chem.rdChemReactions.ChemicalReaction: + """Builds RDKit ChemicalReaction object from SMARTS string.""" + return AllChem.ReactionFromSmarts(rxn_smarts) + + def _build_reactants(self, reactant_smiles_1: str, reactant_smiles_2: str) -> tuple[Chem.Mol, Chem.Mol]: + """ + Builds reactant molecules from SMILES strings with explicit hydrogens added. + + Args: + reactant_smiles_1: SMILES string for first reactant + reactant_smiles_2: SMILES string for second reactant + + Returns: + Tuple of (reactant1 molecule, reactant2 molecule) with explicit hydrogens + """ + mol_reactant_1 = Chem.MolFromSmiles(reactant_smiles_1) + if mol_reactant_1 is None: + raise SMARTSParsingError(f"Failed to parse first reactant SMILES: {reactant_smiles_1!r}") + mol_reactant_1 = Chem.AddHs(mol_reactant_1) + + mol_reactant_2 = Chem.MolFromSmiles(reactant_smiles_2) + if mol_reactant_2 is None: + raise SMARTSParsingError(f"Failed to parse second reactant SMILES: {reactant_smiles_2!r}") + mol_reactant_2 = Chem.AddHs(mol_reactant_2) + + return mol_reactant_1, mol_reactant_2 + + def _build_reaction_tuple(self, same_reactants: bool, mol_reactant_1: Chem.Mol, mol_reactant_2: Chem.Mol) -> list: + """ + Builds list of reactant pairs to process. If reactants are identical, returns single pair. + Otherwise returns both orderings to account for reaction directionality. + + Args: + same_reactants: Whether both reactants are identical + mol_reactant_1: First reactant molecule + mol_reactant_2: Second reactant molecule + + Returns: + List of reactant pairs [[r1, r2], ...] to process + """ + if same_reactants: + return [[mol_reactant_1, mol_reactant_1]] + + return [[mol_reactant_1, mol_reactant_2], [mol_reactant_2, mol_reactant_1]] + + # --- HELPERS --- + + def _is_consecutive(self, num_list: list[int]) -> bool: + """ + Checks if list contains consecutive integers with no duplicates. + + Args: + num_list: List of integers to check + + Returns: + True if list is consecutive and has no duplicates, False otherwise + """ + if not num_list: + return False + + return ( + len(set(num_list)) == len(num_list) + and max(num_list) - min(num_list) + 1 == len(num_list) + ) + + # --- VISUALIZATION --- + + def reaction_templates_highlighted_image_grid( + self, + metadata_list: List[ReactionMetadata], + highlight_type: str = "template", + ) -> Image: + """ + Generates grid image of reactions with highlighted atoms based on type. + + Args: + metadata_list: List of reaction metadata to visualize + highlight_type: Type of atoms to highlight - "template", "edge", "initiators", or "delete" + + Returns: + PIL Image containing 2-column grid of reactant-product pairs with highlighted atoms + """ + mols = [] + highlight_lists = [] + highlight_colors = [] + + for metadata in metadata_list: + reactant = Chem.RWMol(metadata.reactant_combined_mol) + product = Chem.RWMol(metadata.product_combined_mol) + + # Clear atom maps for clean visualization + for atom in reactant.GetAtoms(): + atom.SetAtomMapNum(0) + for atom in product.GetAtoms(): + atom.SetAtomMapNum(0) + + df = metadata.reaction_dataframe + atoms: List[int] = [] + color_map: Dict[int, tuple] = {} + + # Select atoms to highlight based on type + if highlight_type == "template": + atoms = list((metadata.template_reactant_to_product_mapping or {}).keys()) + for a in atoms: + color_map[a] = (0.2, 0.6, 1.0) # blue + + elif highlight_type == "edge": + atoms = metadata.edge_atoms or [] + for a in atoms: + color_map[a] = (1.0, 0.4, 0.0) # orange + + elif highlight_type == "initiators": + atoms = df["initiators"].dropna().astype(int).tolist() if df is not None else [] + for a in atoms: + color_map[a] = (0.0, 0.8, 0.2) # green + + elif highlight_type == "delete": + if metadata.delete_atom and metadata.byproduct_indices: + atoms = metadata.byproduct_indices + for a in atoms: + color_map[a] = (1.0, 0.0, 0.0) # red + + mols.extend([reactant, product]) + + # Build atom mappings for highlighting + forward_map = metadata.reactant_to_product_mapping + reactant_atoms = atoms + + # Map reactant atoms to product atoms + product_atoms = [] + for r_idx in reactant_atoms: + if r_idx in forward_map: + product_atoms.append(forward_map[r_idx]) + + # Build color maps for reactant and product + reactant_color_map = {a: color_map[a] for a in reactant_atoms} + product_color_map = {p: color_map[r] for r, p in forward_map.items() if r in reactant_atoms} + + highlight_lists.append(reactant_atoms) + highlight_lists.append(product_atoms) + highlight_colors.append(reactant_color_map) + highlight_colors.append(product_color_map) + + # Generate grid image with 2 molecules per row + img = Draw.MolsToGridImage( + mols, + molsPerRow=2, + highlightAtomLists=highlight_lists, + highlightAtomColors=highlight_colors, + subImgSize=(400, 400), + useSVG=False, + ) + return img diff --git a/AutoREACTER/reaction_preparation/reaction_processor/utils.py b/AutoREACTER/reaction_preparation/reaction_processor/utils.py new file mode 100644 index 0000000..7f4a571 --- /dev/null +++ b/AutoREACTER/reaction_preparation/reaction_processor/utils.py @@ -0,0 +1,161 @@ +from rdkit import Chem +import pandas as pd +from typing import List +from pathlib import Path +import os + +from AutoREACTER.detectors.reaction_detector import ReactionInstance + +def prepare_paths( cache, subdir): + """ + Create and prepare directory structure for CSV output files. + + Args: + cache (str or Path): Base cache directory path + + Returns: + Path: Path object pointing to the CSV cache directory + + Example: + >>> csv_dir = prepare_paths("/path/to/cache") + >>> print(csv_dir) # /path/to/cache/csv + """ + csv_cache = Path(cache) / subdir + os.makedirs(csv_cache, exist_ok=True) + return csv_cache + +def add_dict_as_new_columns(df_existing, data_dict, titles=("template_reactant_idx", "template_product_idx")): + df_existing[titles[0]] = pd.Series(list(data_dict.keys())).astype("Int64") + df_existing[titles[1]] = pd.Series(list(data_dict.values())).astype("Int64") + return df_existing + + +def add_column_safe(df, list_data, column_name): + df[column_name] = pd.Series(list_data).astype("Int64") + return df + + +def extract_unique_references(detected_reactions: List[ReactionInstance]) -> list[str]: + seen = set() + refs = [] + + for metadata in detected_reactions: + # Prefer the correct 'references' attribute on ReactionInstance, with + # a backward-compatible fallback to 'reference' if present. + ref = getattr(metadata, "references", None) + if ref is None: + ref = getattr(metadata, "reference", {}) or {} + else: + if ref is None: + ref = {} + + for v in ref.values(): + if isinstance(v, str): + if v not in seen: + seen.add(v) + refs.append(v) + + elif isinstance(v, (list, tuple)): + for u in v: + if isinstance(u, str) and u not in seen: + seen.add(u) + refs.append(u) + + elif isinstance(v, dict): + for u in v.values(): + if isinstance(u, str) and u not in seen: + seen.add(u) + refs.append(u) + + return refs + +def compare_set(reaction_metadata_list, _react2, _prod2): + """ + Compare reactant + product pair against existing reactions. + Ignores atom mapping. + + Returns False if duplicate pair found. + """ + + # normalize new pair + react2 = Chem.Mol(_react2) + prod2 = Chem.Mol(_prod2) + + for atom in react2.GetAtoms(): + atom.SetAtomMapNum(0) + for atom in prod2.GetAtoms(): + atom.SetAtomMapNum(0) + + smi_r2 = Chem.MolToSmiles(react2, canonical=True) + smi_p2 = Chem.MolToSmiles(prod2, canonical=True) + + for metadata in reaction_metadata_list: + react1 = Chem.Mol(metadata.reactant_combined_mol) + prod1 = Chem.Mol(metadata.product_combined_mol) + + for atom in react1.GetAtoms(): + atom.SetAtomMapNum(0) + for atom in prod1.GetAtoms(): + atom.SetAtomMapNum(0) + + smi_r1 = Chem.MolToSmiles(react1, canonical=True) + smi_p1 = Chem.MolToSmiles(prod1, canonical=True) + + if smi_r1 == smi_r2 and smi_p1 == smi_p2: + return False + + return True + + +def compare_rdkit_molecules_canonical(data_smiles_list, mol_smi_2): + """ + Compares two RDKit molecule SMILES strings to determine if they + represent the same chemical structure using canonical SMILES. + + Args: + data_smiles_list (list): List of SMILES strings to compare against. + mol_smi_2 (str): SMILES string of the second molecule. + + Returns: + bool: True if molecules are chemically identical, False otherwise. + """ + if not mol_smi_2: + return data_smiles_list, False + mol2 = Chem.MolFromSmiles(mol_smi_2) + if mol2 is None: + return data_smiles_list, False + + for mol_smi_1 in data_smiles_list: + mol1 = Chem.MolFromSmiles(mol_smi_1) + + # Handle cases where SMILES might be invalid + if mol1 is None or mol2 is None: + return data_smiles_list, False # or raise a ValueError + # Generate canonical SMILES and compare them + canonical_smi_1 = Chem.MolToSmiles(mol1, canonical=True) + canonical_smi_2 = Chem.MolToSmiles(mol2, canonical=True) + + if canonical_smi_1 == canonical_smi_2: + return data_smiles_list, True + + data_smiles_list.append(mol_smi_2) + return data_smiles_list, False + + + +def prep_for_3d_molecule_generation(data_smiles_list, molecule_dict_csv_path_dict): + molecule_dict = {} + for i, smiles in enumerate(data_smiles_list): + key = f"data_{i+1}" + molecule = Chem.MolFromSmiles(smiles) + if molecule is None: + raise ValueError(f"Invalid SMILES: {smiles}") + molecule = Chem.AddHs(molecule) + molecule_dict[key] = molecule + for i, (key, value) in enumerate(molecule_dict_csv_path_dict.items()): + reactant = value.get("reactant") + product = value.get("product") + if reactant and product: + molecule_dict[f"pre_{i+1}"] = reactant + molecule_dict[f"post_{i+1}"] = product + return molecule_dict diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py b/AutoREACTER/reaction_preparation/reaction_processor/walker.py similarity index 98% rename from AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py rename to AutoREACTER/reaction_preparation/reaction_processor/walker.py index 36d939c..b6d7c97 100644 --- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/walker.py +++ b/AutoREACTER/reaction_preparation/reaction_processor/walker.py @@ -105,7 +105,8 @@ def product_atom_walker(template_indexes, MAP_dict): """ template_mapped_dict = {} for i in template_indexes: - print(f"Mapping reactant atom index {i}") + # debug print to trace which reactant atom index is being processed + # print(f"Mapping reactant atom index {i}") # Check if the reactant atom index exists in our mapping dictionary atom = MAP_dict.get(i) if atom is not None: diff --git a/AutoREACTER/reaction_template_builder/lunar_client/config.py b/AutoREACTER/reaction_template_builder/lunar_client/config.py deleted file mode 100644 index 50954f3..0000000 --- a/AutoREACTER/reaction_template_builder/lunar_client/config.py +++ /dev/null @@ -1,5 +0,0 @@ -import os - -# Root directory for the LUNAR installation. -# Configure via the LUNAR_ROOT_DIR environment variable; defaults to None if unset. -LUNAR_ROOT_DIR = os.environ.get("LUNAR_ROOT_DIR") or None diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py b/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py deleted file mode 100644 index c13dad4..0000000 --- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/map_reactant_atoms.py +++ /dev/null @@ -1,714 +0,0 @@ -""" -Reaction Mapping and Analysis Module - -This module provides tools for processing chemical reactions, mapping atoms between -reactants and products, and generating comprehensive analysis data for polymer chemistry. -It uses RDKit for molecular operations and pandas for data management. - -Key features: -- Atom mapping between reactants and products -- Reaction validation and consistency checking -- CSV export of mapping data -- Visualization of reaction grids -- Support for polymerization reactions - -Author: Janitha Mahanthe -Date: 1/21/2026 -Version: 0.1.0 -# TODO: Return mols with mapped atoms for visualization for each reaction with highlighting -""" - -from rdkit import Chem -from rdkit.Chem import AllChem, rdmolops -import pandas as pd -from pathlib import Path -import os -import itertools -try: - # Case 1: correct when reaction_template_pipeline is a proper package - from .util import compare_products - -except (ImportError, ModuleNotFoundError): - try: - # Case 2: fully-qualified import under your installed namespace - from reaction_lammps_mupt.reaction_template_builder.reaction_template_pipeline.util import ( - compare_products, - ) - - except (ImportError, ModuleNotFoundError): - try: - # Case 3: if reaction_template_pipeline is installed as a top-level package - from reaction_template_pipeline.util import compare_products - - except (ImportError, ModuleNotFoundError): - # Case 4: running as a loose script from the same directory - from util import compare_products - - -def is_number_in_set(set_of_tuples, reactant): - """ - Find a tuple in the set that contains atom indices matching specific atom map numbers. - - This function searches for atoms in the reactant molecule that have atom map numbers - 101 or 102, then checks if those atom indices exist in any tuple within the provided set. - - Args: - set_of_tuples (set of tuples): Set of atom index tuples to search through - reactant (Chem.Mol): RDKit molecule object to search for mapped atoms - - Returns: - tuple: The first tuple found containing a matching atom index - - Raises: - ValueError: If no matching atom is found in the provided set of tuples - - Example: - >>> matches = {(0, 1, 2), (3, 4, 5)} - >>> result = is_number_in_set(matches, reactant_mol) - """ - for atom in reactant.GetAtoms(): - # Check for specific atom map numbers (101 or 102) - if atom.GetAtomMapNum() == 101 or atom.GetAtomMapNum() == 102: - x = atom.GetIdx() - for t in set_of_tuples: - if x in t: - print(f"Found matching atom index {x} in tuple {t}") - return t - raise ValueError("No matching atom found in the provided set of tuples.") - - -def smart_mapping(reactant, smarts_template, match_tuple): - """ - Apply atom mapping from SMARTS template to reactant molecule based on match indices. - - This function transfers atom map numbers from a SMARTS template to corresponding - atoms in the reactant molecule using the provided match tuple that maps SMARTS - atom indices to reactant atom indices. - - Args: - reactant (Chem.Mol): The reactant molecule to apply mapping to - smarts_template (Chem.Mol): SMARTS pattern molecule with atom map numbers - match_tuple (tuple): Tuple mapping SMARTS atom indices to reactant atom indices - - Returns: - None: Modifies the reactant molecule in-place by setting atom map numbers - - Note: - Only atoms with non-zero map numbers in the SMARTS template are processed. - The function does nothing if match_tuple is empty. - """ - if not match_tuple: - return - - # Create mapping from SMARTS atom indices to their map numbers - SMARTS_index_to_Map_Number = {} - for smarts_atom in smarts_template.GetAtoms(): - map_num = smarts_atom.GetAtomMapNum() - if map_num != 0: # Only process atoms with explicit mapping - SMARTS_index_to_Map_Number[smarts_atom.GetIdx()] = map_num - - # Apply mapping to corresponding atoms in reactant - for smarts_pos, map_num in SMARTS_index_to_Map_Number.items(): - if smarts_pos < len(match_tuple): - atom_index_in_mol = match_tuple[smarts_pos] - atom = reactant.GetAtomWithIdx(atom_index_in_mol) - atom.SetAtomMapNum(map_num) - - -def is_consecutive(num_list): - """ - Check if a list of numbers forms a consecutive sequence without gaps. - - Args: - num_list (list): List of integers to check - - Returns: - bool: True if the numbers are consecutive, False otherwise - - Examples: - >>> is_consecutive([1, 2, 3, 4]) # Returns True - >>> is_consecutive([1, 3, 4]) # Returns False - >>> is_consecutive([]) # Returns False - """ - if not num_list: - return False - - # Check for consecutive sequence: unique numbers and max-min+1 equals length - return (len(set(num_list)) == len(num_list) and - max(num_list) - min(num_list) + 1 == len(num_list)) - - -def prepare_paths(cache): - """ - Create and prepare directory structure for CSV output files. - - Args: - cache (str or Path): Base cache directory path - - Returns: - Path: Path object pointing to the CSV cache directory - - Example: - >>> csv_dir = prepare_paths("/path/to/cache") - >>> print(csv_dir) # /path/to/cache/csv - """ - csv_cache = Path(cache) / "csv" / "reactant_product_mapping" - os.makedirs(csv_cache, exist_ok=True) - return csv_cache - - -def build_reaction(rxn_smarts): - """ - Create an RDKit reaction object from SMARTS string. - - Args: - rxn_smarts (str): Reaction SMARTS string - - Returns: - Chem.rdChemReactions.ChemicalReaction: RDKit reaction object - - Example: - >>> rxn = build_reaction("[C:1]=[O:2]>>[C:1]-[O:2]") - """ - return AllChem.ReactionFromSmarts(rxn_smarts) - - -def build_reactants(reactant_smiles_1, reactant_smiles_2): - """ - Create RDKit molecule objects from SMILES strings with explicit hydrogens. - - Args: - reactant_smiles_1 (str): SMILES string for first reactant - reactant_smiles_2 (str): SMILES string for second reactant - - Returns: - tuple: (mol_reactant_1, mol_reactant_2) - Both molecules with explicit hydrogens - - Example: - >>> mol1, mol2 = build_reactants("CCO", "CC=O") - """ - mol_reactant_1 = Chem.MolFromSmiles(reactant_smiles_1) - mol_reactant_1 = Chem.AddHs(mol_reactant_1) # Add explicit hydrogens - mol_reactant_2 = Chem.MolFromSmiles(reactant_smiles_2) - mol_reactant_2 = Chem.AddHs(mol_reactant_2) # Add explicit hydrogens - return mol_reactant_1, mol_reactant_2 - - -def process_reactions(rxn, csv_cache, reaction_tuple, key=None, - molecule_and_csv_path_dict=None, delete_atoms=False): - """ - Process chemical reactions, map atoms between reactants and products, and save data. - - This is the main processing function that: - 1. Runs reactions on reactant pairs - 2. Maps atoms between reactants and products - 3. Validates mapping consistency - 4. Saves mapping data to CSV files - 5. Identifies reaction features (first shell, initiators, byproducts) - - Args: - rxn (Chem.rdChemReactions.ChemicalReaction): RDKit reaction object - csv_cache (Path): Directory path for CSV output files - reaction_tuple (list): List of reactant pairs to process - key (str or int, optional): Identifier for the reaction set - molecule_and_csv_path_dict (dict, optional): Dictionary to store results - delete_atoms (bool, optional): Whether to identify byproduct atoms for deletion - - Returns: - tuple: (mols, output, molecule_and_csv_path_dict) - - mols: List of reactant and product molecules for visualization - - output: String output (currently empty, reserved for future use) - - molecule_and_csv_path_dict: Dictionary containing all processed data - - Raises: - ValueError: If mapping validation fails at any step - - Note: - This function performs extensive validation including: - - Atom count consistency between reactants and products - - Consecutive indexing requirements - - Initiator count validation (must be exactly 2) - """ - if molecule_and_csv_path_dict is None: - molecule_and_csv_path_dict = {} - - total_products = 0 - mols = [] - - # Process each reactant pair in the reaction tuple - for j, pair in enumerate(reaction_tuple): - r1, r2 = Chem.Mol(pair[0]), Chem.Mol(pair[1]) - products = rxn.RunReactants((r1, r2)) - - # Get substructure matches for each reactant against reaction templates - matches_1 = r1.GetSubstructMatches(rxn.GetReactants()[0]) - matches_2 = r2.GetSubstructMatches(rxn.GetReactants()[1]) - - # Process each product set generated by the reaction - for product_set in products: - # Clean up any existing DataFrame - if "df" in locals(): - del df - - - # Initialize data structures - df = pd.DataFrame(columns=["reactant_idx", "product_idx"]) - first_shell = [] # Atoms in first coordination shell - initiator_idxs = [] # Initiator atom indices - mapping_dict = {} # Additional mapping dictionary - - # Apply atom mapping from reaction properties - num_total_atoms = 0 - for i, product in enumerate(product_set): - if i > 0: - num_total_atoms += product_set[i - 1].GetNumAtoms() - for atom in product.GetAtoms(): - if atom.HasProp("react_idx"): - r_idx = atom.GetIntProp("react_idx") - a_idx = atom.GetIntProp("react_atom_idx") - r = (r1, r2)[r_idx] - r_atom = r.GetAtomWithIdx(a_idx) - map_num = atom.GetIdx() + 1 + num_total_atoms + 100 - r_atom.SetAtomMapNum(map_num) - atom.SetAtomMapNum(map_num) - - # Combine reactants and products for easier processing - reactant_combined = Chem.CombineMols(r1, r2) - product_combined = Chem.CombineMols(*product_set) - - # Create mapping between reactant and product atoms based on map numbers - for r_atom in reactant_combined.GetAtoms(): - for p_atom in product_combined.GetAtoms(): - if r_atom.GetAtomMapNum() == p_atom.GetAtomMapNum(): - new_row = pd.DataFrame([{"reactant_idx": r_atom.GetIdx(), - "product_idx": p_atom.GetIdx()}]) - df = pd.concat([df, new_row], ignore_index=True) - break - - # Validate mapping completeness and consistency - num_reactant_atoms = reactant_combined.GetNumAtoms() - num_product_atoms = product_combined.GetNumAtoms() - reactant_mapped = df["reactant_idx"].notna().sum() - product_mapped = df["product_idx"].notna().sum() - - # Validation 1: Mapped atom counts must match between columns - if reactant_mapped != product_mapped: - raise ValueError( - "Mismatch in mapped atom counts between columns: " - f"reactant_idx mapped={reactant_mapped}, product_idx mapped={product_mapped}" - ) - - # Validation 2: All reactant atoms must be mapped - if reactant_mapped != num_reactant_atoms: - df.to_csv(csv_cache / f"debug_mapping_{key}_{total_products}.csv", index=False) - raise ValueError( - "Mapping does not cover all reactant atoms: " - f"reactant atoms={num_reactant_atoms}, mapped={reactant_mapped}" - ) - - # Validation 3: All product atoms must be mapped - if product_mapped != num_product_atoms: - raise ValueError( - "Mapping does not cover all product atoms: " - f"product atoms={num_product_atoms}, mapped={product_mapped}" - ) - - # Validation 4: Mapping indices must be consecutive - if (is_consecutive(df["reactant_idx"].tolist()) is False or - is_consecutive(df["product_idx"].tolist()) is False): - raise ValueError( - "Mapping indices are not consecutive: " - f"reactant indices={df['reactant_idx'].tolist()}, " - f"product indices={df['product_idx'].tolist()}" - ) - - # Apply smart mapping to both reactants - try: - sub_set1 = is_number_in_set(matches_1, r1) - except ValueError: - sub_set1 = () - smart_mapping( - reactant=r1, - smarts_template=rxn.GetReactants()[0], - match_tuple=sub_set1 if sub_set1 else (), - ) - - try: - sub_set2 = is_number_in_set(matches_2, r2) - except ValueError: - sub_set2 = () - smart_mapping( - reactant=r2, - smarts_template=rxn.GetReactants()[1], - match_tuple=sub_set2 if sub_set2 else (), - ) - - # Recombine reactants after mapping - reactant_combined = Chem.CombineMols(r1, r2) - - # Identify first shell atoms and initiators - for atom in reactant_combined.GetAtoms(): - if atom.GetAtomMapNum() <= 99: - first_shell.append(atom.GetIdx()) - # we could get the initiators by their map numbers - if atom.GetAtomMapNum() in [1, 2]: - initiator_idxs.append(atom.GetIdx()) - - # Identify byproduct atoms if deletion is requested - byproduct_product_idxs = [] - byproduct_reactant_idxs = [] - - if delete_atoms: - frags = rdmolops.GetMolFrags(product_combined, asMols=True) - smallest_mol = min(frags, key=lambda m: m.GetNumAtoms()) - - # 1) get byproduct atom indices in PRODUCT space - for atom in smallest_mol.GetAtoms(): - byproduct_map_number = atom.GetAtomMapNum() - for p_atom in product_combined.GetAtoms(): - if p_atom.GetAtomMapNum() == byproduct_map_number: - byproduct_product_idxs.append(p_atom.GetIdx()) - break - - # 2) convert PRODUCT indices -> REACTANT indices using df mapping - byproduct_reactant_idxs = ( - df.loc[df["product_idx"].isin(byproduct_product_idxs), "reactant_idx"] - .astype(int) - .tolist() - ) - - # if you want the column to store REACTANT indices now: - byproduct_indexs = byproduct_reactant_idxs - - # Add any additional mappings from mapping_dict - for r_idx, p_idx in mapping_dict.items(): - new_row = pd.DataFrame([{"reactant_idx": r_idx, "product_idx": p_idx}]) - df = pd.concat([df, new_row], ignore_index=True) - - # Create comprehensive DataFrame with all analysis columns - first_shell_column = pd.Series(first_shell, name="first_shell") - initiator_idxs_column = pd.Series(initiator_idxs, name="initiators") - - # Validation: Must have exactly 2 initiators - if len(initiator_idxs) != 2: - raise ValueError(f"Expected 2 initiators, got {len(initiator_idxs)}: {initiator_idxs}") - - by_product_indexs_column = pd.Series(byproduct_indexs, name="byproduct_indices") - - # Combine all data into final DataFrame - df_combined = pd.concat([df, first_shell_column, initiator_idxs_column, - by_product_indexs_column], axis=1).astype(pd.Int64Dtype()) - - # Check for duplicate products - if not compare_products(molecule_and_csv_path_dict, product_combined): - continue - - # Update counters and store results - total_products += 1 - mols.append(reactant_combined) - mols.append(product_combined) - - # Organize results in the output dictionary - for i in itertools.count(1): - if i not in molecule_and_csv_path_dict: # Check if i IS a key - sub_dict = molecule_and_csv_path_dict[i] = {} # Initialize if not found - dict_key = i - break - - # Save to CSV file - df_combined.to_csv(csv_cache / f"reaction_{dict_key}.csv", index=False) - print(f"Saved reaction {dict_key} to CSV") - - sub_dict["reactant"] = reactant_combined - sub_dict["product"] = product_combined - sub_dict["csv_path"] = csv_cache / f"reaction_{dict_key}.csv" - sub_dict["delete_atoms"] = delete_atoms - sub_dict["reaction_dataframe"] = df_combined - - - return mols, molecule_and_csv_path_dict - - -def save_grid_image(mols, cache, key=None): - from pathlib import Path - from rdkit.Chem import Draw - import base64 - - out_dir = Path(cache) / "grid_images" - out_dir.mkdir(parents=True, exist_ok=True) - - out_path = out_dir / (f"reaction_grid_{key}.png" if key is not None else "reaction_grid.png") - - # Ask RDKit for a raster image (PNG-like) - img = Draw.MolsToGridImage(mols, useSVG=False, subImgSize=(900, 900)) - - # Case 1: PIL.Image.Image (has .save) - if hasattr(img, "save"): - img.save(str(out_path)) - return out_path - - # Case 2: IPython/RDKit display object (often has .data) - data = getattr(img, "data", None) - if data is None: - raise TypeError(f"Unexpected image type {type(img)}; cannot save.") - - # data can be bytes OR base64 string depending on wrapper - if isinstance(data, bytes): - png_bytes = data - elif isinstance(data, str): - # try base64 decode; if it fails, treat as raw text - try: - png_bytes = base64.b64decode(data) - except Exception: - png_bytes = data.encode("utf-8") - else: - raise TypeError(f"Unexpected img.data type {type(data)}; cannot save.") - - with open(out_path, "wb") as f: - f.write(png_bytes) - - return out_path - - - -def reaction_tuples(same_reactants, mol_reactant_1, mol_reactant_2): - """ - Generate reactant pair orderings for reaction processing. - - - For same reactants: only one unique ordering [A, A]. - - For different reactants: return both [A, B] and [B, A] to avoid relying - on reactant ordering and to support order-dependent reaction definitions. - """ - if same_reactants: - return [[mol_reactant_1, mol_reactant_1]] - - return [[mol_reactant_1, mol_reactant_2], [mol_reactant_2, mol_reactant_1]] - - -def process_reaction_dict(detected_reactions, cache): - """ - Process a dictionary of detected reactions and generate mapping data. - - This is a high-level function that processes multiple reactions defined in - a dictionary structure, handling all steps from SMILES to final CSV output. - - Args: - detected_reactions (dict): Dictionary containing reaction definitions - cache (str or Path): Base cache directory for output files - - Returns: - tuple: (molecule_and_csv_path_dict, detected_reactions) - - molecule_and_csv_path_dict: Dictionary with all processed reaction data - - detected_reactions: The original input dictionary (unchanged) - - Example: - >>> reactions_dict = { - ... 1: {"reaction": "[C:1]=[O:2]>>[C:1]-[O:2]", - ... "monomer_1": {"smiles": "CCO"}, ...} - ... } - >>> results, original = process_reaction_dict(reactions_dict, "/path/to/cache") - """ - molecule_and_csv_path_dict = {} - csv_cache = prepare_paths(cache) - molecule_images = [] - - # TODO: Add function to compare products if products are identicals drop the duplicates - - - # Process each reaction in the dictionary - for key in detected_reactions: - reaction_dict = detected_reactions[key] - rxn_smarts = reaction_dict["reaction"] - reactant_smiles_1 = reaction_dict["monomer_1"]["smiles"] - same_reactants = reaction_dict["same_reactants"] - - # Handle same vs different reactants - if same_reactants: - reactant_smiles_2 = reactant_smiles_1 - else: - reactant_smiles_2 = reaction_dict["monomer_2"]["smiles"] - - delete_atoms = reaction_dict.get("delete_atom", False) - - # Build reaction and reactants - rxn = build_reaction(rxn_smarts) - mol_reactant_1, mol_reactant_2 = build_reactants(reactant_smiles_1, reactant_smiles_2) - reaction_tuple = reaction_tuples(same_reactants, mol_reactant_1, mol_reactant_2) - - # Process the reaction - mols, molecule_and_csv_path_dict = process_reactions( - rxn, csv_cache, reaction_tuple, key, molecule_and_csv_path_dict, - delete_atoms=delete_atoms - ) - - # Save outputs - save_grid_image(mols, cache, key) - molecule_images.append(mols) - - return molecule_and_csv_path_dict, detected_reactions, molecule_images - - -def run_all(cache, rxn_smarts, reactant_smiles_1, reactant_smiles_2): - """ - Run a complete reaction processing pipeline for a single reaction. - - This is a convenience function that handles all steps for processing - a single reaction from SMILES strings to final outputs. - - Args: - cache (str or Path): Base cache directory for output files - rxn_smarts (str): Reaction SMARTS string - reactant_smiles_1 (str): SMILES string for first reactant - reactant_smiles_2 (str): SMILES string for second reactant - - Returns: - dict: Dictionary containing all processed reaction data - - Example: - >>> results = run_all("/path/to/cache", - ... "[C:1]=[O:2]>>[C:1]-[O:2]", - ... "CCO", "CC=O") - """ - molecule_and_csv_path_dict = {} - csv_cache = prepare_paths(cache) - - # Build reaction and reactants - rxn = build_reaction(rxn_smarts) - mol_reactant_1, mol_reactant_2 = build_reactants(reactant_smiles_1, reactant_smiles_2) - reaction_tuple = [[mol_reactant_1, mol_reactant_2]] - delete_atoms = True # Default to identifying byproducts - - # Process the reaction - mols, molecule_and_csv_path_dict = process_reactions( - rxn, csv_cache, reaction_tuple, None, molecule_and_csv_path_dict, delete_atoms - ) - - # Save outputs - save_grid_image(mols, cache, None) - - return molecule_and_csv_path_dict - - -if __name__ == "__main__": - """ - Main execution block with example reaction data. - - This demonstrates the usage of the module with example polyesterification - reactions between hydroxy carboxylic acids. - """ - - # Example reaction database for polyesterification reactions - detected_reactions = { - 1: { - "reaction_name": "Hydroxy Carboxylic Acid Polycondensation(Polyesterification)", - "same_reactants": True, - "reactant_1": "hydroxy_carboxylic_acid", - "product": "polyester_chain", - "delete_atom": True, - "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]", - "reference": { - "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329", - "reaction_and_mechanism": [ - "https://pubs.acs.org/doi/10.1021/ed048pA734.1", - "https://pubs.acs.org/doi/10.1021/ed073pA312", - ], - }, - "monomer_1": { - "smiles": "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1", - "functionality_type": "di_different", - "functional_group_name": "hydroxy_carboxylic_acid", - "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]", - "functional_count_1": 1, - "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]", - "functional_count_2": 2, - }, - }, - 2: { - "reaction_name": "Hydroxy Carboxylic Acid Polycondensation(Polyesterification)", - "same_reactants": True, - "reactant_1": "hydroxy_carboxylic_acid", - "product": "polyester_chain", - "delete_atom": True, - "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]", - "reference": { - "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329", - "reaction_and_mechanism": [ - "https://pubs.acs.org/doi/10.1021/ed048pA734.1", - "https://pubs.acs.org/doi/10.1021/ed073pA312", - ], - }, - "monomer_1": { - "smiles": "O=C(O)CCCC(O)CCCO", - "functionality_type": "di_different", - "functional_group_name": "hydroxy_carboxylic_acid", - "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]", - "functional_count_1": 2, - "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]", - "functional_count_2": 1, - }, - }, - 3: { - "reaction_name": "Hydroxy Carboxylic and Hydroxy Carboxylic Polycondensation(Polyesterification)", - "same_reactants": False, - "reactant_1": "hydroxy_carboxylic_acid", - "reactant_2": "hydroxy_carboxylic_acid", - "product": "polyester_chain", - "delete_atom": True, - "reaction": "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]", - "reference": { - "smarts": "https://pubs.acs.org/doi/10.1021/acs.jcim.3c00329", - "reaction_and_mechanism": [ - "https://pubs.acs.org/doi/10.1021/ed048pA734.1", - "https://pubs.acs.org/doi/10.1021/ed073pA312", - ], - }, - "monomer_1": { - "smiles": "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1", - "functionality_type": "di_different", - "functional_group_name": "hydroxy_carboxylic_acid", - "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]", - "functional_count_1": 1, - "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]", - "functional_count_2": 2, - }, - "monomer_2": { - "smiles": "O=C(O)CCCC(O)CCCO", - "functionality_type": "di_different", - "functional_group_name": "hydroxy_carboxylic_acid", - "functional_group_smarts_1": "[OX2H1;!$(OC=*):1]", - "functional_count_1": 2, - "functional_group_smarts_2": "[CX3:2](=[O])[OX2H1]", - "functional_count_2": 1, - }, - }, - } - - # Configuration parameters - cache = "C:\\Users\\Janitha\\Documents\\GitHub\\reaction_lammps_mupt\\cache\\00_cache" - rxn_smarts = "[O;!$(OC=*):1]-[H:3].[CX3:2](=[O:5])[OX2H1:4]>>[OX2:1]-[CX3:2](=[O:5]).[O:4]-[H:3]" - reactant_smiles_1 = "O=C(O)c1cc(O)c(Cl)c(C(=O)O)c1" - reactant_smiles_2 = "O=C(O)CCCC(O)CCCO" - - # Execute the reaction processing pipeline - print("Starting reaction processing pipeline...") - - # Process a single reaction using run_all - # run_all(cache, rxn_smarts, reactant_smiles_1, reactant_smiles_2) - - # Process all reactions in the dictionary - molecule_dict_csv_path_dict, detected_reactions, molecule_images = process_reaction_dict(detected_reactions, cache) - - # Display results - import pprint - print("\n" + "="*80) - print("REACTION PROCESSING RESULTS") - print("="*80) - pprint.pprint(molecule_dict_csv_path_dict) - - print("\n" + "="*80) - print("SUMMARY") - print("="*80) - print(f"Total reactions processed: {len(molecule_dict_csv_path_dict)}") - - # Count total products across all reactions - print("\nProcessing complete! CSV files and images have been saved to the cache directory.") diff --git a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py b/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py deleted file mode 100644 index 3ad4257..0000000 --- a/AutoREACTER/reaction_template_builder/reaction_template_pipeline/util.py +++ /dev/null @@ -1,167 +0,0 @@ -from rdkit import Chem - -def compare_products(reactions_dict, _prod2): - """ - Compares a new product molecule against a collection of existing products - to identify duplicates, ignoring atom mapping. - - Args: - reactions_dict (dict): A dictionary where values are dictionaries - potentially containing a "product" RDKit molecule object. - _prod2 (rdkit.Chem.rdchem.Mol): The RDKit molecule object to check for duplicates. - - Returns: - bool: Returns False if a duplicate product is found, True otherwise. - """ - # Create a copy of the input molecule to avoid modifying the original object - prod2 = Chem.Mol(_prod2) - - for key, dict_2 in reactions_dict.items(): - # Retrieve the product molecule from the nested dictionary - prod = dict_2.get("product") - - if prod is not None: - # Create a copy of the stored product for comparison - prod1 = Chem.Mol(prod) - - # Remove atom map numbers from both molecules. - # Atom maps are often used in reactions to track atoms but should be - # ignored when checking if two chemical structures are identical. - for atom in prod1.GetAtoms(): - atom.SetAtomMapNum(0) - for atom in prod2.GetAtoms(): - atom.SetAtomMapNum(0) - - # Convert both molecules to canonical SMILES strings. - # Canonicalization ensures that the same molecule always results in the same string. - smi1 = Chem.MolToSmiles(prod1, canonical=True) - smi2 = Chem.MolToSmiles(prod2, canonical=True) - - # Perform string comparison to detect duplicates - if smi1 == smi2: - print("DUPLICATE PRODUCT FOUND") - return False - - # No duplicates found after checking all entries - return True - -def compare_rdkit_molecules_canonical(data_smiles_list, mol_smi_2): - """ - Compares two RDKit molecule SMILES strings to determine if they - represent the same chemical structure using canonical SMILES. - - Args: - data_smiles_list (list): List of SMILES strings to compare against. - mol_smi_2 (str): SMILES string of the second molecule. - - Returns: - bool: True if molecules are chemically identical, False otherwise. - """ - if not mol_smi_2: - return data_smiles_list, False - mol2 = Chem.MolFromSmiles(mol_smi_2) - if mol2 is None: - return data_smiles_list, False - - for mol_smi_1 in data_smiles_list: - mol1 = Chem.MolFromSmiles(mol_smi_1) - - # Handle cases where SMILES might be invalid - if mol1 is None or mol2 is None: - return data_smiles_list, False # or raise a ValueError - # Generate canonical SMILES and compare them - canonical_smi_1 = Chem.MolToSmiles(mol1, canonical=True) - canonical_smi_2 = Chem.MolToSmiles(mol2, canonical=True) - - if canonical_smi_1 == canonical_smi_2: - return data_smiles_list, True - - data_smiles_list.append(mol_smi_2) - return data_smiles_list, False - -def format_detected_reactions_dict(detected_reactions, non_monomer_molecules_to_retain = None): - """ - Aggregates and formats information from a dictionary of detected reactions - into a single summary dictionary. - - This function compiles unique reaction names, SMARTS references, and - mechanism references into comma-separated strings. - - Args: - detected_reactions (dict): A dictionary containing reaction data, - where each value is a dictionary with keys - like "reaction_name" and "reference". - - Returns: - dict: A dictionary containing aggregated strings for: - - "reactions_names" - - "smart_references" - - "mechanism_references" - """ - reactions_names = [] - smart_references = [] - mechanism_references = [] - - seen_reactions = set() - seen_smarts = set() - seen_mechanisms = set() - - formatted_dict = {} - data_smiles_list = [] - - for key, reaction in detected_reactions.items(): - # Deduplicate reaction names safely (no substring bugs) - reactions_name = reaction.get("reaction_name") - if reactions_name and reactions_name not in seen_reactions: - seen_reactions.add(reactions_name) - reactions_names.append(reactions_name) - - # Reference is optional; do not skip monomer processing if missing - reference = reaction.get("reference") or {} - smarts_ref = reference.get("smarts") - mech_refs = reference.get("reaction_and_mechanism") - - if smarts_ref and smarts_ref not in seen_smarts: - seen_smarts.add(smarts_ref) - smart_references.append(smarts_ref) - - if mech_refs: - mech_block = ", ".join(mech_refs) - if mech_block and mech_block not in seen_mechanisms: - seen_mechanisms.add(mech_block) - mechanism_references.append(mech_block) - - # Always process monomers (if present) - m1 = reaction.get("monomer_1") - m2 = reaction.get("monomer_2") - m1_smiles = m1.get("smiles") if isinstance(m1, dict) else None - m2_smiles = m2.get("smiles") if isinstance(m2, dict) else None - - if m1_smiles: - data_smiles_list, _ = compare_rdkit_molecules_canonical(data_smiles_list, m1_smiles) - if m2_smiles: - data_smiles_list, _ = compare_rdkit_molecules_canonical(data_smiles_list, m2_smiles) - - formatted_dict = { - "reactions_names": ", ".join(reactions_names), - "smart_references": ", ".join(smart_references), - "mechanism_references": ", ".join(mechanism_references), - } - return formatted_dict , data_smiles_list - -def prep_for_3d_molecule_generation(data_smiles_list, molecule_dict_csv_path_dict): - molecule_dict = {} - for i, smiles in enumerate(data_smiles_list): - key = f"data_{i+1}" - molecule = Chem.MolFromSmiles(smiles) - if molecule is None: - raise ValueError(f"Invalid SMILES: {smiles}") - molecule = Chem.AddHs(molecule) - molecule_dict[key] = molecule - for i, (key, value) in enumerate(molecule_dict_csv_path_dict.items()): - reactant = value.get("reactant") - product = value.get("product") - if reactant and product: - molecule_dict[f"pre_{i+1}"] = reactant - molecule_dict[f"post_{i+1}"] = product - return molecule_dict diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb index 69c0c6c..e59d05f 100644 --- a/examples/example_1.ipynb +++ b/examples/example_1.ipynb @@ -12,9 +12,7 @@ "- `Initialization()` sets up the AutoREACTER runtime environment.\n", "- `InputParser()` prepares the parser for validating and normalizing user input files.\n", "- `GetCacheDir()` retrieves the staging cache directory used for intermediate data.\n", - "- `RunDirectoryManager.make_dated_run_dir()` creates a timestamped run directory to store outputs for this session.\n", - "\n", - "This ensures each run is isolated and reproducible." + "- `RunDirectoryManager.make_dated_run_dir()` creates a timestamped run directory to store outputs for this session." ] }, { @@ -30,7 +28,8 @@ "from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupsDetector\n", "from AutoREACTER.detectors.reaction_detector import ReactionDetector\n", "from AutoREACTER.detectors.non_monomer_detector import NonReactantsDetector\n", - "from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n", + "from AutoREACTER.reaction_preparation.reaction_processor.prepare_reactions import PrepareReactions\n", + "# from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n", "from rdkit import Chem\n", "from rdkit.Chem import Draw\n", "import json\n", @@ -41,6 +40,7 @@ "dated_cache_dir = RunDirectoryManager.make_dated_run_dir(cache_dir, chdir_to=\"none\")\n", "# #future use\n", "# RunDirectoryManager.copy_into_run(cache_dir, dated_cache_dir)\n", + "print(f\"Cache directory: {cache_dir}\")\n", "\n", "\n" ] @@ -52,12 +52,7 @@ "source": [ "### Cache Cleanup (Optional)\n", "\n", - "This cell can be used to clean up cached data generated by previous runs.\n", - "\n", - "- `RetentionCleanup.run()` removes cached directories from the AutoREACTER cache base directory.\n", - "- Useful for freeing disk space or resetting the cache state.\n", - "\n", - "Uncomment and run only if you want to remove cached run data." + "This cell can be used to clean up cached data generated by previous runs.\n" ] }, { @@ -67,6 +62,10 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "# `RetentionCleanup.run()` removes cached directories from the AutoREACTER cache base directory.\n", + "# Useful for freeing disk space or resetting the cache state.\n", + "\n", "# RetentionCleanup.run(GetCacheDir().cache_base_dir)" ] }, @@ -75,16 +74,10 @@ "id": "b1dd989a", "metadata": {}, "source": [ - "### Load, Validate, and Visualize Monomers\n", + "### Load and Validate Monomers\n", "\n", "This cell loads the example input file, validates it using the `InputParser`, and visualizes the monomers.\n", - "\n", - "- The JSON input file is loaded into `input_data`.\n", - "- `validate_inputs()` checks the schema and normalizes the data.\n", - "- `molecule_representation_of_initial_molecules()` extracts RDKit molecule objects.\n", - "- `Draw.MolsToGridImage()` displays the monomers in a grid with their labels.\n", - "\n", - "This provides a quick visual check of the input molecules before running further analysis." + "\n" ] }, { @@ -94,10 +87,32 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "# validate_inputs() checks the schema and normalizes the data.\n", + "# molecule_representation_of_initial_molecules() extracts RDKit molecule objects.\n", + "# Draw.MolsToGridImage() displays the monomers in a grid with their labels.\n", + "\n", "with open(\"example_1_inputs_count_mode.json\", \"r\") as f:\n", " input_data = json.load(f)\n", "\n", - "validated_inputs = input_parser.validate_inputs(input_data)\n", + "validated_inputs = input_parser.validate_inputs(input_data)\n" + ] + }, + { + "cell_type": "markdown", + "id": "a48c7aee", + "metadata": {}, + "source": [ + "### Visualize Monomers (Optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63c6e279", + "metadata": {}, + "outputs": [], + "source": [ "initial_molecules = input_parser.initial_molecules_image_grid(validated_inputs)\n", "initial_molecules" ] @@ -130,11 +145,7 @@ "source": [ "### Functional Group Detection\n", "\n", - "This cell runs the functional group detection step.\n", - "\n", - "- `FunctionalGroupsDetector()` initializes the detector.\n", - "- `functional_groups_detector()` analyzes the validated monomers.\n", - "- It identifies functional groups present in each molecule and generates corresponding visualization images." + "This cell runs the functional group detection step.\n" ] }, { @@ -144,6 +155,10 @@ "metadata": {}, "outputs": [], "source": [ + "# FunctionalGroupsDetector() initializes the detector.\n", + "# functional_groups_detector() analyzes the validated monomers.\n", + "# It identifies functional groups present in each molecule and generates corresponding visualization images.\n", + "\n", "# Run the detector only after the inputs have been validated, as it consumes validated_inputs.monomers to execute the detection workflow.\n", "functional_groups_detector = FunctionalGroupsDetector()\n", "functional_groups, functional_groups_imgs = \\\n", @@ -157,12 +172,9 @@ "id": "76ad6db0", "metadata": {}, "source": [ - "### Functional Group Visualization\n", + "### Functional Group Visualization (Optional)\n", "\n", - "This cell visualizes the detected functional groups on the molecules.\n", - "\n", - "- `molecules_to_visualization()` highlights the functional groups identified in the previous step.\n", - "- The resulting image shows each molecule with the detected functional groups marked." + "This cell visualizes the detected functional groups on the molecules." ] }, { @@ -172,6 +184,9 @@ "metadata": {}, "outputs": [], "source": [ + "# molecules_to_visualization() highlights the functional groups identified in the previous step.\n", + "# The resulting image shows each molecule with the detected functional groups marked.\n", + "\n", "img = functional_groups_detector.functional_group_highlighted_molecules_image_grid(functional_groups_imgs)\n", "img\n" ] @@ -183,11 +198,7 @@ "source": [ "### Reaction Detection\n", "\n", - "This cell identifies possible reactions between the detected functional groups.\n", - "\n", - "- `ReactionDetector()` initializes the reaction detection module.\n", - "- `reaction_detector()` analyzes the detected functional groups and generates possible reaction instances.\n", - "- `create_reaction_image_grid()` visualizes the detected reactions in a grid format." + "This cell identifies possible reactions between the detected functional groups.\n" ] }, { @@ -197,9 +208,30 @@ "metadata": {}, "outputs": [], "source": [ + "\n", + "# ReactionDetector() initializes the reaction detection module.\n", + "# reaction_detector() analyzes the detected functional groups and generates possible reaction instances.\n", + "# create_reaction_image_grid() visualizes the detected reactions in a grid format.\n", + "\n", "reaction_detector = ReactionDetector()\n", - "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n", - "print(f\"Number of reaction instances detected: {len(reaction_instances)}\")\n", + "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n" + ] + }, + { + "cell_type": "markdown", + "id": "0149d1d3", + "metadata": {}, + "source": [ + "### Reaction visualization (Optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b443f92e", + "metadata": {}, + "outputs": [], + "source": [ "img = reaction_detector.available_reaction_image_grid(reaction_instances)\n", "img" ] @@ -211,10 +243,7 @@ "source": [ "### Reaction Selection\n", "\n", - "This cell filters and selects the relevant reactions from the detected reaction instances.\n", - "\n", - "- `reaction_selection()` processes the detected reactions.\n", - "- It removes invalid or redundant reaction candidates." + "This cell filters and selects the relevant reactions from the detected reaction instances.\n" ] }, { @@ -224,9 +253,22 @@ "metadata": {}, "outputs": [], "source": [ + "# reaction_selection() processes the detected reactions.\n", + "# It removes user selected reactions from the list of detected reactions and returns the final set of reactions to be used for template generation.\n", + "\n", "selected_reactions = reaction_detector.reaction_selection(reaction_instances)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee4e5c22", + "metadata": {}, + "outputs": [], + "source": [ + "print(selected_reactions)" + ] + }, { "cell_type": "markdown", "id": "09b1cb36", @@ -235,12 +277,7 @@ "### Non-Reactant (Non-Monomer) Detection\n", "\n", "This cell identifies molecules that do **not participate in any detected reactions**.\n", - "\n", - "- `NonReactantsDetector()` initializes the detector.\n", - "- `non_monomer_detector()` to determine which molecules are non-reactive.\n", - "- `non_reactants_to_visualization()` grid to non reactive molecules\n", - "\n", - "If no non-reactant molecules are detected, this stage is **automatically skipped**, and the workflow continues without generating any visualization." + "\n" ] }, { @@ -250,8 +287,33 @@ "metadata": {}, "outputs": [], "source": [ + "# NonReactantsDetector() initializes the detector.\n", + "# non_monomer_detector() to determine which molecules are non-reactive.\n", + "\n", + "\n", "non_monomer_detector = NonReactantsDetector()\n", - "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n", + "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2e03f325", + "metadata": {}, + "source": [ + "### non_reactants_to_visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5040c430", + "metadata": {}, + "outputs": [], + "source": [ + "# `()` grid to non reactive molecules\n", + "\n", + "# If no non-reactant molecules are detected, this stage is **automatically skipped**, and the workflow continues without generating any visualization.\n", + "\n", "img_non_reactants = non_monomer_detector.non_reactants_to_visualization(non_reactants_list)\n", "img_non_reactants" ] @@ -263,12 +325,7 @@ "source": [ "### Update Inputs After Non-Reactant Filtering\n", "\n", - "This cell updates the validated inputs after identifying non-reactive molecules.\n", - "\n", - "- `non_reactant_selection()` removes or marks molecules that do not participate in any reactions.\n", - "- The resulting `updated_inputs` contains the filtered set of monomers that will be used in the next stages of the workflow.\n", - "\n", - "If no non-reactants were detected in the previous step, the inputs remain unchanged." + "This cell updates the validated inputs after identifying non-reactive molecules.\n" ] }, { @@ -278,6 +335,10 @@ "metadata": {}, "outputs": [], "source": [ + "# non_reactant_selection() removes or marks molecules that do not participate in any reactions.\n", + "# The resulting `updated_inputs` contains the filtered set of monomers that will be used in the next stages of the workflow.\n", + "# If no non-reactants were detected in the previous step, the inputs remain unchanged.\n", + "\n", "updated_inputs = non_monomer_detector.non_reactant_selection(validated_inputs, non_reactants_list)" ] }, @@ -287,7 +348,21 @@ "id": "d62efe2a", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "prepare_reactions = PrepareReactions(cache_dir)\n", + "prepared_reactions = prepare_reactions.prepare_reactions(selected_reactions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7c134e5", + "metadata": {}, + "outputs": [], + "source": [ + "img = prepare_reactions.reaction_templates_highlighted_image_grid(prepared_reactions)\n", + "img" + ] }, { "cell_type": "code",