diff --git a/AutoREACTER/detectors/detector.py b/AutoREACTER/detectors/detector.py index a52932c..0ab06ad 100644 --- a/AutoREACTER/detectors/detector.py +++ b/AutoREACTER/detectors/detector.py @@ -8,6 +8,15 @@ the simulation. """ +import warnings + +warnings.warn( + """This script is deprecated and will be modified in future versions. Within v0.2, the whole package will primaraliy + support on jupyter notebook and the CLI is removed. Please use the notebook version for now and refer to the README for how to use the package.""", + DeprecationWarning, + stacklevel=2 +) + import json # Attempt to import detector modules. Handles different import paths depending on diff --git a/AutoREACTER/detectors/non_monomer_detector.py b/AutoREACTER/detectors/non_monomer_detector.py new file mode 100644 index 0000000..9fc915c --- /dev/null +++ b/AutoREACTER/detectors/non_monomer_detector.py @@ -0,0 +1,315 @@ +from typing import List +from rdkit import Chem +from rdkit.Chem import rdmolops, Draw + +# AutoREACTER internal imports for simulation configuration and reaction detection +from AutoREACTER.input_parser import SimulationSetup +from AutoREACTER.input_parser import MonomerEntry +from AutoREACTER.detectors.reaction_detector import ReactionInstance + + +class NonReactantsDetector: + """ + A detector class responsible for identifying monomers that do not participate + in any detected chemical reactions within a simulation. + + This class provides methods to: + - Detect non-reacting monomers by comparing reaction participants with monomer lists + - Visualize non-reacting monomers using RDKit grid image generation + - Handle user selection for retaining or discarding non-reacting monomers + + Attributes: + None: This class primarily operates on input parameters rather than storing state. + + Example: + >>> detector = NonReactantsDetector() + >>> updated_setup = detector.non_monomer_detector(setup, reactions) + >>> detector.non_reactant_selection(setup, non_reactants) + """ + + def _same_molecule(self, reactants_list: List[str], mol2_smi: str) -> List[str]: + """ + Checks if a molecule (given as SMILES string) exists in the reactants list. + + This method performs canonical SMILES comparison to ensure that molecules + with the same structure but different string representations are correctly + identified as identical. + + Args: + reactants_list (List[str]): A list of SMILES strings representing molecules + that participate in detected reactions. + mol2_smi (str): The SMILES string of the molecule to check for membership. + + Returns: + List[str]: The updated reactants_list with the new molecule appended + if it was not already present. If the input SMILES is invalid + (cannot be parsed), the original list is returned unchanged. + + Raises: + None: Invalid SMILES are silently handled by returning the original list. + + Note: + Canonical SMILES are used for comparison to ensure structural equivalence + regardless of string representation differences (e.g., branching order). + """ + # Parse the input SMILES string into an RDKit Mol object + mol2 = Chem.MolFromSmiles(mol2_smi) + + # Return original list if SMILES is invalid (None indicates parsing failure) + if mol2 is None: + return reactants_list # Or handle error: invalid SMILES + + # Convert to canonical SMILES for consistent comparison across different representations + can_smi2 = Chem.MolToSmiles(mol2, canonical=True) + detected = False + + # Iterate through existing reactants to check for duplicate molecules + for mol_smi in reactants_list: + mol1 = Chem.MolFromSmiles(mol_smi) + if mol1 is not None: + # Compare canonical SMILES to determine if molecules are identical + if Chem.MolToSmiles(mol1, canonical=True) == can_smi2: + detected = True + break + + # Append molecule to list if it wasn't found (not a duplicate) + if not detected: + reactants_list.append(mol2_smi) + + return reactants_list + + def _same_molecule_for_initaials(self, reactants_list: List[str], mol2: Chem.Mol) -> bool: + """ + Checks if an RDKit Mol object represents a molecule already in the reactants list. + + This method differs from _same_molecule in that it accepts an RDKit Mol object + directly rather than a SMILES string, and it also removes explicit hydrogens + before comparison to ensure fair structural comparison. + + Args: + reactants_list (List[str]): A list of SMILES strings representing molecules + that participate in detected reactions. + mol2 (Chem.Mol): The RDKit Mol object to check for membership. + + Returns: + bool: True if the molecule exists in the reactants list (structurally + equivalent), False otherwise. Returns False if the input mol is None. + + Note: + - Explicit hydrogens are removed from both the query molecule and list + molecules before comparison to ensure structural equivalence. + - This method handles the typo "initaials" in the function name (preserved + for backward compatibility). + """ + # Handle None input gracefully + if mol2 is None: + return False + + # Step 1: Remove Hs from query molecule and get its canonical SMILES + # Removing hydrogens ensures fair comparison regardless of how hydrogens were added + mol2_clean = rdmolops.RemoveHs(mol2) + can_smi2 = Chem.MolToSmiles(mol2_clean, canonical=True) + + # Iterate through the reactants list for comparison + for mol_smi in reactants_list: + mol1 = Chem.MolFromSmiles(mol_smi) + if mol1 is not None: + # Step 2: Also remove Hs from input list molecules for fair comparison + mol1 = rdmolops.RemoveHs(mol1) + can_smi1 = Chem.MolToSmiles(mol1, canonical=True) + + # Check for structural equivalence + if can_smi1 == can_smi2: + return True + + # Return False if no matching molecule was found in the list + return False + + def non_monomer_detector(self, + simulation_setup: SimulationSetup, + reaction_instances: List[ReactionInstance]) -> List[MonomerEntry]: + """ + Detects monomers that do not participate in any of the identified reactions. + + This method iterates through all monomers in the simulation setup and identifies + those that are not involved in any of the detected reaction instances. It builds + a list of reactants from the reaction instances and compares each monomer against + this list to determine non-reactivity. + + Args: + simulation_setup (SimulationSetup): The simulation setup containing monomers + and reaction configuration. + reaction_instances (List[ReactionInstance]): A list of identified and filtered + reactions by the user. + + Returns: + List[MonomerEntry]: A list of monomer entries that do not participate in any + detected reactions (non-reactant monomers). + + Example: + >>> setup = SimulationSetup(...) + >>> reactions = [ReactionInstance(...), ...] + >>> detector = NonReactantsDetector() + >>> non_reactants = detector.non_monomer_detector(setup, reactions) + >>> # non_reactants contains monomers not found in any reaction + """ + + # Initialize lists to track reactants and non-reactants + reactants_list = [] # Stores SMILES of all molecules participating in reactions + non_reactants_list = [] # Stores monomer entries that don't participate in reactions + + # Build the reactants list from all reaction instances + # Each reaction involves up to two monomers (monomer_1 and monomer_2) + for reaction in reaction_instances: + # Add the first monomer from each reaction + mol1 = reaction.monomer_1.smiles + reactants_list = self._same_molecule(reactants_list, mol1) + + # Add the second monomer if it exists (some reactions may have only one participant) + if reaction.monomer_2: + mol2 = reaction.monomer_2.smiles + reactants_list = self._same_molecule(reactants_list, mol2) + + # Check each monomer in the simulation setup against the reactants list + for monomer in simulation_setup.monomers: + # Use RDKit Mol object for comparison (more reliable than SMILES strings) + is_in_reactions = self._same_molecule_for_initaials(reactants_list, monomer.rdkit_mol) + if not is_in_reactions: + # Monomer is not found in any reaction - add to non-reactants list + non_reactants_list.append(monomer) + + # Return the list of non-reactant monomers + return non_reactants_list + + def non_reactants_to_visualization(self, non_reactants_list: List[MonomerEntry]) -> Draw.MolsToGridImage: + """ + Generates a grid image visualization of non-reacting monomers. + + This method creates a visual representation of all non-reacting monomers + using RDKit's MolsToGridImage function, allowing users to visually identify + which molecules are not participating in reactions. + + Args: + non_reactants_list (List[MonomerEntry]): A list of monomer entries that do not + participate in any reactions. + + Returns: + Draw.MolsToGridImage: An RDKit grid image object containing the molecular + structures of all non-reacting monomers. + + Note: + The grid image displays molecules with their names as legends and arranges + them in a grid with 3 molecules per row. Each molecule image is 400x400 pixels. + """ + # Print details of each non-reacting monomer to console + if not non_reactants_list: + print("No non-reacting monomers detected.") + return None # Or handle as appropriate (e.g., return an empty image) + + for monomer in non_reactants_list: + print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + + # Generate grid image using RDKit + # - Extract RDKit Mol objects from monomer entries + # - Use monomer names as legend text + # - Configure grid layout: 3 molecules per row, 400x400 pixel images + img = Draw.MolsToGridImage([monomer.rdkit_mol for monomer in non_reactants_list], + legends=[monomer.name for monomer in non_reactants_list], + molsPerRow=3, + subImgSize=(400,400)) + return img + + def non_reactant_selection(self, simulation_setup: SimulationSetup, non_reactants_list: List[MonomerEntry]) -> SimulationSetup: + """ + Handles user selection of non-reactant monomers to retain or discard in the simulation. + + This method presents the user with options to either keep or remove non-reacting + monomers from the simulation. It supports three selection modes: + - N (No): Discard all non-reactant monomers + - A (All): Retain all non-reactant monomers + - S (Select): Allow selective retention based on monomer IDs + + The method updates the 'status' attribute of selected monomers to 'discarded' + for those that should be removed from the simulation. + + Args: + simulation_setup (SimulationSetup): The simulation setup object whose monomers + will be potentially modified. + non_reactants_list (List[MonomerEntry]): A list of monomer entries identified + as non-reacting. + + Returns: + SimulationSetup: The updated simulation setup with non-reactant monomers + either retained or marked as discarded based on user input. + + Note: + - This method modifies the status attribute of MonomerEntry objects in place. + - For single non-reactant monomers, only N and A options are presented. + - The status values follow the StatusType convention: 'active' or 'discarded'. + """ + if not non_reactants_list: + print("No non-reacting monomers to select from.") + return simulation_setup # No changes needed if there are no non-reactants + + # Display selection guide for user interaction with non-reactant monomers + print(""" +Selection Guide for Non-Reactant Monomers: +- N: Discard all non-reactant monomers from the simulation. +- A: Retain all non-reactant monomers in the simulation. +- S: Select specific non-reactant monomers to retain. You will be prompted +to enter the IDs of the monomers you wish to keep, separated by commas. +Example: If you want to keep monomers with IDs 1 and 3, you would enter: 1,3 + """) + # Proceed only if there are non-reactant monomers to process + if non_reactants_list: + # Display information about non-participating monomers + print("The following monomers do not participate in any detected reactions:") + for monomer in non_reactants_list: + print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + + # Main selection loop - continues until valid input is received + while True: + # Handle special case: only one non-reactant monomer + if len(non_reactants_list) == 1: + print("Only one non-reactant monomer detected") + # Simplified prompt for single monomer scenario + user_input = input(f"Do you want to keep {non_reactants_list[0].name} in the simulation? (N/A): ").strip().upper() + if user_input not in ['N', 'A']: + print("Invalid input. Please enter N or A.") + continue + if user_input == 'N': + # Mark single monomer as discarded + non_reactants_list[0].status = 'discarded' + break + elif user_input == 'A': + # Keep the single monomer (retain) + break + else: + # Multi-monomer scenario - offer all three options + user_input = input("Do you want to retain these non-reactant monomers in the simulation? (N/A/S): ").strip().upper() + if user_input not in ['N', 'A', 'S']: + print("Invalid input. Please enter N, A, or S.") + continue + + if user_input == 'N': + # Discard all non-reactants: mark each as 'discarded' + for monomer in non_reactants_list: + monomer.status = 'discarded' + break + elif user_input == 'A': + # Retain all non-reactants: do nothing (keep 'active' status) + break + elif user_input == 'S': + # Selective retention: user specifies which IDs to keep + # Others will be marked as 'discarded' + selected_ids = input("Enter the IDs of the monomers you want to retain, separated by commas: ") + # Parse input into list of integers ( monomer IDs) + selected_ids = [int(id.strip()) for id in selected_ids.split(',')] + for monomer in non_reactants_list: + # Discard monomers NOT in the selected list + if monomer.id not in selected_ids: + monomer.status = 'discarded' + break + + # Return the modified simulation setup + return simulation_setup diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py index b542474..3fcd5e7 100644 --- a/AutoREACTER/detectors/reaction_detector.py +++ b/AutoREACTER/detectors/reaction_detector.py @@ -1,8 +1,11 @@ +""" +Polymerization Reaction Detector and Visualizer. -# Dictionary defining various polymerization reactions. -# Each reaction includes metadata such as whether reactants are the same, -# the types of reactants, the product type, whether to delete atoms, -# and the SMARTS string for the reaction pattern using RDKit syntax. +This module provides tools to identify potential polymerization reactions between +monomers based on their functional groups using SMARTS patterns and RDKit. +It includes functionality to detect homo-polymerization and co-polymerization, +and generates visual grids of the resulting chemical reactions. +""" """ TODO: Missing Polymerization Mechanisms @@ -25,12 +28,14 @@ """ import pathlib -from typing import Dict, Any, Tuple, Optional +import os +from typing import Dict, Any, Tuple, Optional, List, Set from dataclasses import dataclass from PIL import Image, ImageDraw, ImageFont from rdkit import Chem from rdkit.Chem import Draw, rdChemReactions +# Attempt to import internal library components try: from reactions_library import ReactionLibrary except (ImportError, ModuleNotFoundError): @@ -41,14 +46,31 @@ except (ImportError, ModuleNotFoundError): from functional_groups_detector import FunctionalGroupInfo, MonomerRole -class SMARTSERROR(Exception): +class SMARTSerror(Exception): """Error from developer-defined SMARTS patterns not producing expected products. - Please contact the developer to resolve this issue. Raise a GitHub issue if you encounter this error. + Please contact the developer to resolve this issue. + Raise a GitHub issue if you encounter this error. at """ pass +class EmptyReactionListError(Exception): + """Raised when no reaction instances are detected, but the user attempts to select reactions. + This should be prevented by the reaction_selection method, but this error serves as a safeguard.""" + pass + @dataclass(slots=True, frozen=True) class FunctionalGroupInfo: + """ + Stores metadata about a specific functional group detected in a molecule. + + Attributes: + functionality_type: Category of the group (e.g., 'vinyl', 'diol'). + fg_name: Human-readable name of the functional group. + fg_smarts_1: Primary SMARTS pattern for detection. + fg_count_1: Number of occurrences of the primary pattern. + fg_smarts_2: Secondary SMARTS pattern (optional). + fg_count_2: Number of occurrences of the secondary pattern (optional). + """ functionality_type: str fg_name: str fg_smarts_1: str @@ -58,12 +80,23 @@ class FunctionalGroupInfo: @dataclass(slots=True, frozen=True) class MonomerRole: + """ + Represents a monomer and its associated functional groups. + + Attributes: + smiles: SMILES string of the monomer. + name: Identifier or name of the monomer. + functionalities: A tuple of FunctionalGroupInfo objects present in this monomer. + """ smiles: str name: str functionalities: Tuple[FunctionalGroupInfo, ...] @dataclass(slots=True, frozen=True) class ReactionTemplate: + """ + Defines a generic template for a polymerization reaction. + """ reaction_name: str reactant_1: str # functional group name reactant_2: Optional[str] # functional group name or None @@ -74,26 +107,49 @@ class ReactionTemplate: @dataclass(slots=True, frozen=True) class ReactionInstance: + """ + Represents a specific instance of a reaction between identified monomers. + + Attributes: + reaction_name: Name of the polymerization type. + reaction_smarts: The SMARTS string used to perform the reaction. + delete_atom: Flag indicating if specific atoms should be removed post-reaction. + references: Dictionary containing literature references. + monomer_1: The first monomer involved. + functional_group_1: The specific FG on monomer 1 participating. + monomer_2: The second monomer (None for homo-polymerization). + functional_group_2: The specific FG on monomer 2 participating. + """ reaction_name: str reaction_smarts: str delete_atom: bool references: dict - monomer_1: MonomerRole functional_group_1: FunctionalGroupInfo monomer_2: Optional[MonomerRole] = None functional_group_2: Optional[FunctionalGroupInfo] = None class ReactionDetector: + """ + Detects valid chemical reactions between a list of monomers based on a + predefined library of polymerization mechanisms. + """ def __init__(self): + """Initializes the detector by loading the reaction library.""" self.reactions = ReactionLibrary().reactions - def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> list: - fg_hits = [] - for fg in monomer_entry.functionalities: - if fg.fg_name == target_group_name: - fg_hits.append(fg) - return fg_hits + def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> List[FunctionalGroupInfo]: + """ + Filters functional groups in a monomer that match a target name. + + Args: + monomer_entry: The monomer to search. + target_group_name: The name of the FG to look for. + + Returns: + A list of matching FunctionalGroupInfo objects. + """ + return [fg for fg in monomer_entry.functionalities if fg.fg_name == target_group_name] def _seen_pair_key( self, @@ -103,100 +159,82 @@ def _seen_pair_key( monomer_role_2: Optional[MonomerRole] = None, fg_2: Optional[FunctionalGroupInfo] = None, ) -> Tuple: - + """ + Generates a unique, order-independent key for a reaction pair to avoid duplicates. + + Args: + reaction_name: Name of the reaction. + monomer_role_1: First monomer. + fg_1: First functional group. + monomer_role_2: Second monomer (optional). + fg_2: Second functional group (optional). + + Returns: + A tuple representing the unique state of this reaction instance. + """ if monomer_role_2 is None or fg_2 is None: return (reaction_name, monomer_role_1, fg_1) - # symmetric case + # Sort reactants by memory ID to ensure (A, B) and (B, A) produce the same key ordered = tuple(sorted( [(monomer_role_1, fg_1), (monomer_role_2, fg_2)], key=lambda x: (id(x[0]), id(x[1])) )) return (reaction_name, ordered[0], ordered[1]) - def reaction_detector(self, monomer_roles: list[MonomerRole]) -> list[ReactionInstance]: + def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionInstance]: """ - Detects possible polymerization reactions based on the functional groups in the provided monomer dictionary. - - This function iterates through predefined reactions and matches them against the functional groups - in the monomers. It supports homopolymerization (same reactants) and copolymerization (different reactants). - For each match, it records the monomer details and reaction index. - + Scans a list of monomers to find all possible polymerization reactions. + Args: - monomer_dictionary (dict): A dictionary where keys are monomer indices or IDs, - and values are dictionaries containing monomer details, - including 'smiles' and functional groups with 'functional_group_name'. - + monomer_roles: List of monomers with their detected functional groups. + Returns: - dict: A dictionary of detected reactions, structured as: - {reaction_name: {, rx_index: {monomer_1: {...}, monomer_2: {... (if applicable)}}}} - - Note: - This logic matches functional_group_name against reaction reactant types. - It does not validate the reaction SMARTS against the exact monomer structures (RDKit reaction execution is later). + A list of ReactionInstance objects representing valid matches. """ - - # create a new list to hold reaction instances reaction_instances = [] - seen_pairs = set() # to track unique reactant pairs and avoid duplicates + seen_pairs: Set[Tuple] = set() - # Iterate over each predefined reaction for reaction_name, reaction_info in self.reactions.items(): reactant_1_name = reaction_info.get("reactant_1") - reactant_2_name = reaction_info.get("reactant_2") # may be None + reactant_2_name = reaction_info.get("reactant_2") same_reactants = reaction_info.get("same_reactants", False) - # HOMO CASE + # CASE 1: HOMO-POLYMERIZATION (e.g., A + A) if same_reactants and reactant_2_name is None: for monomer_role in monomer_roles: fg_hits = self._matching_fgs(monomer_role, reactant_1_name) - if not fg_hits: - continue for fg in fg_hits: - pair_key = self._seen_pair_key( - reaction_name, - monomer_role, - fg - ) - - if pair_key in seen_pairs: - continue - - seen_pairs.add(pair_key) - reaction_instances.append( - ReactionInstance( - reaction_name=reaction_name, - reaction_smarts=reaction_info["reaction"], - delete_atom=reaction_info["delete_atom"], - references=reaction_info["reference"], - monomer_1=monomer_role, - functional_group_1=fg, - monomer_2=None, - functional_group_2=None + pair_key = self._seen_pair_key(reaction_name, monomer_role, fg) + if pair_key not in seen_pairs: + seen_pairs.add(pair_key) + reaction_instances.append( + ReactionInstance( + reaction_name=reaction_name, + reaction_smarts=reaction_info["reaction"], + delete_atom=reaction_info["delete_atom"], + references=reaction_info["reference"], + monomer_1=monomer_role, + functional_group_1=fg + ) ) - ) - # COMONOMER CASE + # CASE 2: CO-POLYMERIZATION (e.g., A + B) else: for monomer_role_i in monomer_roles: fg_hits_i = self._matching_fgs(monomer_role_i, reactant_1_name) - if not fg_hits_i: - continue + if not fg_hits_i: continue + for fg_i in fg_hits_i: for monomer_role_j in monomer_roles: + # Prevent a monomer reacting with itself in a co-monomer definition if monomer_role_i == monomer_role_j: - continue # skip same monomer + continue + fg_hits_j = self._matching_fgs(monomer_role_j, reactant_2_name) - if not fg_hits_j: - continue for fg_j in fg_hits_j: - # Create a unique pair identifier to avoid duplicates pair_key = self._seen_pair_key( - reaction_name, - monomer_role_i, - fg_i, - monomer_role_j, - fg_j - ) + reaction_name, monomer_role_i, fg_i, monomer_role_j, fg_j + ) if pair_key not in seen_pairs: seen_pairs.add(pair_key) reaction_instances.append( @@ -210,35 +248,46 @@ def reaction_detector(self, monomer_roles: list[MonomerRole]) -> list[ReactionIn monomer_2=monomer_role_j, functional_group_2=fg_j ) - ) - + ) return reaction_instances - def create_reaction_image(self, reactant_a_smiles, reactant_b_smiles, reaction_smarts, reaction_name): - - reactant_a = Chem.MolFromSmiles(reactant_a_smiles) - reactant_b = Chem.MolFromSmiles(reactant_b_smiles) - reactant_a = Chem.AddHs(reactant_a) - reactant_b = Chem.AddHs(reactant_b) - # debug prints - # print(Chem.MolToSmiles(reactant_a)) - # print(Chem.MolToSmiles(reactant_b)) + def create_reaction_image(self, reactant_a_smiles: str, reactant_b_smiles: str, + reaction_smarts: str, reaction_name: str) -> Image.Image: + """ + Simulates a reaction using RDKit and generates a 2D image of the transformation. + + Args: + reactant_a_smiles: SMILES for the first reactant. + reactant_b_smiles: SMILES for the second reactant. + reaction_smarts: SMARTS pattern defining the reaction. + reaction_name: Name of the reaction for metadata. + + Returns: + A PIL Image object of the reaction. + + Raises: + SMARTSerror: If the reaction SMARTS fails to produce products. + """ + reactant_a = Chem.AddHs(Chem.MolFromSmiles(reactant_a_smiles)) + reactant_b = Chem.AddHs(Chem.MolFromSmiles(reactant_b_smiles)) + rxn_engine = rdChemReactions.ReactionFromSmarts(reaction_smarts) + + # Try reacting A + B products_sets = rxn_engine.RunReactants((reactant_a, reactant_b)) - # debug print - # print(f"Products sets for {reaction_smarts}") + # Fallback: Try reacting B + A if the first order failed if not products_sets: products_sets = rxn_engine.RunReactants((reactant_b, reactant_a)) if not products_sets: - print(f"No products generated for {reaction_smarts} with either reactant order.") - raise SMARTSERROR(f"Reaction SMARTS '{reaction_smarts}' did not produce any products for reactants '{reactant_a_smiles}' and '{reactant_b_smiles}' in either order.") - + raise SMARTSerror(f"Reaction SMARTS '{reaction_smarts}' failed for {reactant_a_smiles} + {reactant_b_smiles}") + # Prepare the reaction for visualization display_rxn = rdChemReactions.ChemicalReaction() display_rxn.AddReactantTemplate(reactant_a) display_rxn.AddReactantTemplate(reactant_b) + # Sanitize and add the first set of products found for prod in products_sets[0]: try: Chem.SanitizeMol(prod) @@ -248,36 +297,37 @@ def create_reaction_image(self, reactant_a_smiles, reactant_b_smiles, reaction_s return Draw.ReactionToImage(display_rxn, subImgSize=(400, 400)) - - def create_reaction_image_grid(self, selected_reaction_instances): - + def create_reaction_image_grid(self, selected_reaction_instances: List[ReactionInstance]) -> Optional[Image.Image]: + """ + Creates a vertical grid image containing all selected reaction visualizations. + + Args: + selected_reaction_instances: List of reactions to visualize. + + Returns: + A combined PIL Image or None if no images were generated. + """ img_list = [] for rxn in selected_reaction_instances: - reactant_a = rxn.monomer_1.smiles + reactant_b = rxn.monomer_2.smiles if rxn.monomer_2 else rxn.monomer_1.smiles - if rxn.monomer_2: - reactant_b = rxn.monomer_2.smiles - else: - reactant_b = rxn.monomer_1.smiles - - img = self.create_reaction_image( - reactant_a, - reactant_b, - rxn.reaction_smarts, - rxn.reaction_name - ) - - if img: - img_list.append(img) + try: + img = self.create_reaction_image( + reactant_a, reactant_b, rxn.reaction_smarts, rxn.reaction_name + ) + if img: img_list.append(img) + except SMARTSerror as e: + print(f"Skipping visualization: {e}") if not img_list: return None + # Calculate dimensions for the vertical stack single_width, single_height = img_list[0].size total_height = single_height * len(img_list) - total_width = single_width + 80 + total_width = single_width + 80 # Extra space for numbering new_img = Image.new("RGB", (total_width, total_height), (255, 255, 255)) draw = ImageDraw.Draw(new_img) @@ -287,6 +337,7 @@ def create_reaction_image_grid(self, selected_reaction_instances): except: font = ImageFont.load_default() + # Paste each reaction image into the grid y_offset = 0 for i, img in enumerate(img_list, start=1): draw.text((10, y_offset + 10), f"{i}.", fill=(0, 0, 0), font=font) @@ -295,69 +346,61 @@ def create_reaction_image_grid(self, selected_reaction_instances): return new_img - def reaction_selection(self, reaction_instances: list[ReactionInstance]) -> list[ReactionInstance]: + def reaction_selection(self, reaction_instances: List[ReactionInstance]) -> List[ReactionInstance]: + """ + Interactive CLI to allow users to select specific reactions from the detected list. + + Args: + reaction_instances: List of all detected reactions. + + Returns: + A list of user-selected ReactionInstance objects. + """ + if not reaction_instances: + raise EmptyReactionListError("No reaction instances detected. Cannot proceed with selection.") + + if len(reaction_instances) == 1: + print("Only one reaction detected. Automatically selecting it.") + return reaction_instances + print("Detected Reactions:") for i, rx in enumerate(reaction_instances, start=1): print(f"{i}. {rx.reaction_name}") - print(f" Monomer 1: {rx.monomer_1.smiles}") - print(f" FG 1: {rx.functional_group_1.fg_name}") - + print(f" Monomer 1: {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})") if rx.monomer_2: - print(f" Monomer 2: {rx.monomer_2.smiles}") - print(f" FG 2: {rx.functional_group_2.fg_name}") - + print(f" Monomer 2: {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") print() - if not reaction_instances: - print("No reaction instances available.") - return [] while True: - raw = input("Enter reaction numbers (comma-separated): ").strip() - + raw = input("Enter reaction numbers (comma-separated, e.g., 1,3): ").strip() if not raw: - print("Empty input. Try again.") - continue - - tokens = [t.strip() for t in raw.split(",")] - - if not all(t.isdigit() for t in tokens): - print("Invalid input. Use integers like: 1,2,3") continue - indices = sorted(set(int(t) for t in tokens)) - - if any(i < 1 or i > len(reaction_instances) for i in indices): - print("One or more indices out of range.") - continue + try: + tokens = [int(t.strip()) for t in raw.split(",")] + if any(i < 1 or i > len(reaction_instances) for i in tokens): + print("Out of range. Try again.") + continue + indices = sorted(set(tokens)) + break + except ValueError: + print("Invalid input. Please use numbers.") - break selected = [reaction_instances[i-1] for i in indices] print(f"Selected {len(selected)} reactions.") - for rx in selected: - print(f"{rx.reaction_name}: {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})", end="") - if rx.monomer_2: - print(f" + {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") - else: - print() return selected if __name__ == "__main__": - detector = ReactionDetector() - # Example monomer dictionary for testing - def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole]: + # --- Test Setup --- + def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> List[MonomerRole]: + """Helper to convert raw dictionary data into MonomerRole objects.""" monomer_roles = [] - for _, entry in monomer_dictionary.items(): - smiles = entry["smiles"] - functionalities = [] - for key, value in entry.items(): - if not isinstance(key, int): - continue - + if not isinstance(key, int): continue functionalities.append( FunctionalGroupInfo( functionality_type=value["functionality_type"], @@ -368,17 +411,10 @@ def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole] fg_count_2=value.get("functional_count_2", 0), ) ) - - monomer_roles.append( - MonomerRole( - smiles=smiles, - name=smiles, # or replace with actual name if you have one - functionalities=tuple(functionalities), - ) - ) - + monomer_roles.append(MonomerRole(smiles=smiles, name=smiles, functionalities=tuple(functionalities))) return monomer_roles + # Mock monomer data for testing detection logic monomer_dictionary = { 1: { "smiles": "C=CCO", @@ -636,27 +672,31 @@ def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole] } } - detector = ReactionDetector() - # Convert dictionary to object model + detector = ReactionDetector() monomer_roles = convert_dict_to_monomer_roles(monomer_dictionary) - - # Run detector reaction_instances = detector.reaction_detector(monomer_roles) - import os - from pathlib import Path + # --- Save Results --- autoreacter_dir = pathlib.Path(__file__).parent.parent.parent.resolve() save_dir = autoreacter_dir / "cache" / "_cache_test" / "reaction_visualization" os.makedirs(save_dir, exist_ok=True) - file_path = save_dir / f"reaction_instances.png" + file_path = save_dir / "reaction_instances.png" img = detector.create_reaction_image_grid(reaction_instances) + if img: img.save(file_path) print(f"Reaction grid image saved to: {file_path}") - else: - print("No valid reaction images generated.") - # Optional: Allow user to select reactions - selected_reactions = detector.reaction_selection(reaction_instances) \ No newline at end of file + # Optional interactive selection + selected_reactions = detector.reaction_selection(reaction_instances) + + print(f"Total detected reactions: {len(selected_reactions)}") + for i, rx in enumerate(selected_reactions, start=1): + print(f"{i}. {rx.reaction_name} - {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})", end="") + if rx.monomer_2: + print(f" + {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") + else: + print() + print(selected_reactions) \ No newline at end of file diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index 5742c63..c5fd07a 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -53,8 +53,12 @@ class DuplicateMonomerError(InputError): # Type aliases for clarity and validation StatusType = Literal["active", "discarded"] -CompositionMethodType = Literal["counts", "stoichiometric_ratio"] # Placeholder for future support. -ForceFieldType = Literal["PCFF-IFF", "PCFF", "Compass", "CVFF-IFF", "CVFF", "Clay-FF", "DRIEDING", "OPLS-AA"] +CompositionMethodType = Literal["counts", "stoichiometric_ratio"] +ForceFieldType = Literal[ + "PCFF-IFF", "PCFF", "Compass", + "CVFF-IFF", "CVFF", "Clay-FF", + "DRIEDING", "OPLS-AA" +] @dataclass(slots=True, frozen=True) @@ -103,25 +107,25 @@ class SimulationSetup: Attributes: simulation_name: A simple name for the simulation, used for output organization. temperature: List of temperature values (Kelvin). Normalized to list internally. - density: Overall target density for the system (g/cm^3). + density: List of density values in g/cm^3. force_field: Optional string specifying the force field to use (e.g., "PCFF", "OPLS-AA"). Default is "PCFF". monomers: List of MonomerEntry objects representing the system composition. - composition_method: The method used to define composition ("counts" or "stoichiometric_ratio"). + composition_method: The method used to define composition ("counts" or "ratio"). composition: Raw composition dictionary from inputs for flexible downstream use. stoichiometric_ratio: Mapping of monomer IDs to ratios. number_of_total_atoms: List of total atom counts per target (place holder to be calculated). box_estimates: Estimated box size based on density (placeholder). """ simulation_name: str - temperature: list[float] - density: float + temperature: list[float] + density: list[float] force_field: str | None - monomers: list[MonomerEntry] - composition_method: CompositionMethodType | None = None - composition : dict[str, Any] | None = None - stoichiometric_ratio: dict[int, float] | None = None - number_of_total_atoms: list[int] | None = None - box_estimates: float | None = None + monomers: list[MonomerEntry] + composition_method: CompositionMethodType | None = None + composition: dict[str, Any] | None = None + stoichiometric_ratio: dict[int, float] | None = None + number_of_total_atoms: list[int] | None = None + box_estimates: float | None = None class InputParser: @@ -147,25 +151,37 @@ def validate_inputs(self, inputs: dict) -> SimulationSetup: A validated SimulationSetup object ready for the simulation engine. """ # Check top-level structure and determine composition method. - self.validate_basic_format(inputs) + self.validate_basic_format(inputs) + simulation_name = inputs["simulation_name"] - temperature = self._validate_temperature(inputs["temperature"]) - density = self._validate_density(inputs["density"]) - composition_method = self._get_inputs_mode(inputs["composition"]) - composition_dict = self._validate_composition(inputs["composition"], composition_method) - monomers = self._validate_monomer_entry(inputs, composition_method, composition_dict) - force_field = self._validate_force_field(inputs.get("force_field", None)) - + + replicas_dict = inputs["replicas"] + composition_method = self._get_inputs_mode(replicas_dict) + + validated_replicas = self._validate_replicas( + replicas_dict, composition_method + ) + + monomers = self._validate_monomer_entry( + inputs, + composition_method, + validated_replicas["systems"], + ) + + force_field = self._validate_force_field( + inputs.get("force_field", None) + ) return SimulationSetup( simulation_name=simulation_name, - temperature=temperature, - density=density, + temperature=validated_replicas["temperatures"], + density=validated_replicas["density"], monomers=monomers, composition_method=composition_method, - composition=composition_dict, + composition=validated_replicas, force_field=force_field ) + def molecule_representation_of_initial_molecules(self, simulation_setup: SimulationSetup) -> list[Chem.Mol]: """ @@ -197,16 +213,20 @@ def validate_basic_format(self, inputs: dict) -> None: InputSchemaError: If the input is not a dict or is missing required keys. """ if not isinstance(inputs, dict): - raise InputSchemaError(f"Expected input to be a dictionary. Got {type(inputs).__name__} instead.") + raise InputSchemaError( + f"Expected input to be a dictionary. Got {type(inputs).__name__} instead." + ) - required_keys = ["simulation_name", "temperature", "density", "composition", "monomers"] + required_keys = ["simulation_name", "replicas", "monomers"] for key in required_keys: if key not in inputs: - raise InputSchemaError(f"Missing required key: {key!r} in inputs dictionary.") + raise InputSchemaError( + f"Missing required key: {key!r} in inputs dictionary." + ) return None - def _get_inputs_mode(self, composition_dict: dict) -> CompositionMethodType: + def _get_inputs_mode(self, replicas_dict: dict) -> CompositionMethodType: """ Determines the composition method from the composition dictionary. @@ -214,23 +234,25 @@ def _get_inputs_mode(self, composition_dict: dict) -> CompositionMethodType: composition_dict: The 'composition' sub-dictionary from the inputs. Returns: - The composition method string ("counts" or "stoichiometric_ratio"). + The composition method string ("counts" or "ratio"). Raises: InputSchemaError: If the composition dict is invalid or method is unsupported. """ - if not isinstance(composition_dict, dict): + if not isinstance(replicas_dict, dict): raise InputSchemaError( - f"'composition' must be a dictionary. Got {type(composition_dict).__name__} instead." + f"'replicas' must be a dictionary. Got {type(replicas_dict).__name__} instead." ) - - method = composition_dict.get("method") - allowed: set[CompositionMethodType] = {"counts", "stoichiometric_ratio"} + + method = replicas_dict.get("method") + allowed = {"counts", "ratio"} if method not in allowed: - raise InputSchemaError(f"Unsupported composition method: {method!r}") + raise InputSchemaError( + f"Unsupported replicas method: {method!r}" + ) - return method + return "counts" if method == "counts" else "stoichiometric_ratio" def _validate_temperature(self, temp: Any) -> list[float]: """ @@ -273,9 +295,24 @@ def _validate_density(self, density: Any) -> float: Raises: NumericFieldError: If density is not a positive number. """ - if not isinstance(density, (int, float)) or density <= 0: - raise NumericFieldError(f"'density' must be a positive number. Got: {density!r}") - return density + if isinstance(density, (int, float)): + density_list = [float(density)] + + elif isinstance(density, list) and all(isinstance(d, (int, float)) for d in density): + density_list = [float(d) for d in density] + + else: + raise NumericFieldError( + f"'density' must be a number or list of numbers. Got: {density!r}" + ) + + for d in density_list: + if d <= 0: + raise NumericFieldError( + f"Density values must be positive. Got: {d!r}" + ) + + return density_list def _validate_force_field(self, force_field: Any) -> str: """ @@ -372,8 +409,8 @@ def _validate_composition( def _validate_monomer_entry(self, inputs: dict, method: CompositionMethodType, - composition_dict: dict - ) -> list[MonomerEntry]: + systems: list[dict] + ) -> list[MonomerEntry]: """ Validates monomer entries and constructs MonomerEntry objects. @@ -385,6 +422,7 @@ def _validate_monomer_entry(self, inputs: The raw input dictionary. method: The composition method ("counts" or "stoichiometric_ratio"). composition_dict: Validated composition dictionary containing target tags. + systems: List of system dictionaries from the replicas section, used to validate counts/ratios. Returns: A list of validated MonomerEntry objects. @@ -397,103 +435,92 @@ def _validate_monomer_entry(self, NumericFieldError: If count/ratio values are invalid. """ validated_monomers: list[MonomerEntry] = [] - seen_monomer_list: list = [] + seen_smiles: list[str] = [] monomers = inputs.get("monomers") if not isinstance(monomers, list): - if isinstance(monomers, dict): - # Allow single monomer dict for convenience, but normalize to list internally. - monomers = [monomers] - else: - raise ValueError(f"'monomers' must be a list of monomer definitions. Got: {type(monomers).__name__} instead.") - + raise InputSchemaError("'monomers' must be a list.") + + # Collect all tags + system_tags = [system["tag"] for system in systems] + for monomer_id, monomer_dict in enumerate(monomers, start=1): - id = monomer_id - - # Handle naming - if "name" in monomer_dict: - name = monomer_dict["name"] - if not isinstance(name, str) or not name.strip(): - name = str("data_" + str(id)) - else: - name = str("data_" + str(id)) - - # Validate SMILES - smiles = monomer_dict.get("smiles", None) - if not isinstance(smiles, str) or not smiles.strip(): - raise SmilesValidationError(f"Monomer {id}: 'smiles' must be a non-empty string. Got: {smiles!r}") - else: - # Validate and canonicalize SMILES immediately. - smiles, mol = self._validate_smiles(smiles) - # Check for duplicates against previously validated monomers. - seen_monomer_list = self.validate_no_duplicate_smiles(smiles, seen_monomer_list=seen_monomer_list) - # Handle Counts Mode - if method == "counts": - count_info = monomer_dict.get("count") + name = monomer_dict.get("name") - if not isinstance(count_info, dict): - raise InputSchemaError( - f"Monomer {monomer_id}: 'count' must be a dictionary mapping target tags to counts." - ) + if not isinstance(name, str) or not name.strip(): + name = f"data_{monomer_id}" # Default name if not provided or invalid + + smiles_raw = monomer_dict.get("smiles") - # Extract valid tags from composition - valid_tags = {t["tag"] for t in composition_dict["targets"]} - count_tags = set(count_info.keys()) + smiles, mol = self._validate_smiles(smiles_raw) + seen_smiles = self.validate_no_duplicate_smiles( + smiles, seen_smiles + ) - # Check missing tags - missing = valid_tags - count_tags - if missing: - raise InputSchemaError( - f"Monomer {monomer_id}: missing count entries for targets: {missing}" - ) + if method == "counts": - # Check extra tags - extra = count_tags - valid_tags - if extra: - raise InputSchemaError( - f"Monomer {monomer_id}: unknown target tags in count: {extra}" - ) + count_map = {} + + for system in systems: + tag = system["tag"] + monomer_counts = system["monomer_counts"] - # Validate count values - for tag, value in count_info.items(): - if not isinstance(value, int) or value <= 0: + if name not in monomer_counts: + raise InputSchemaError( + f"Monomer {name!r} missing in system '{tag}'." + ) + + value = monomer_counts[name] + if not isinstance(value, int) or value < 0: raise NumericFieldError( - f"Monomer {monomer_id}, target '{tag}': count must be a positive integer. Got: {value!r}" + f"Invalid count for monomer {name!r} in system '{tag}'." ) - count = count_info + count_map[tag] = value + + count = count_map ratio = None - - # Handle Stoichiometric Ratio Mode - elif method == "stoichiometric_ratio": - ratio = monomer_dict.get("ratio", None) - if not isinstance(ratio, (int, float)) or ratio <= 0: + + else: + # ratio defined per system (must be consistent) + first_system = systems[0] + ratios = first_system["monomer_ratios"] + + if name not in ratios: + raise InputSchemaError( + f"Monomer {name!r} missing in ratio definition." + ) + + ratio_value = ratios[name] + + if not isinstance(ratio_value, (int, float)) or ratio_value < 0: raise NumericFieldError( - f"Monomer {monomer_id}: 'ratio' must be a positive number in stoichiometric mode. Got: {ratio!r}" + f"Invalid ratio for monomer {name!r}." ) + + ratio = float(ratio_value) count = None - - else: - raise InputSchemaError(f"Unsupported composition method: {method!r}") - - # Derive atom count and molar mass from RDKit - mol_with_h = Chem.AddHs(mol) # Ensure we count implicit hydrogens for accurate atom count and molar mass. - mw = Descriptors.MolWt(mol_with_h) + + # RDKit derived properties + mol_with_h = Chem.AddHs(mol) atom_count = mol_with_h.GetNumAtoms() - - validated_monomers.append(MonomerEntry( - id=id, - data_id=str("data_" + str(id)), - name=name, - smiles=smiles, - molar_mass=mw, - atom_count=atom_count, - count=count, - ratio=ratio, - rdkit_mol=mol - )) - + mw = Descriptors.MolWt(mol_with_h) + + validated_monomers.append( + MonomerEntry( + id=monomer_id, + data_id=f"data_{monomer_id}", + name=name, + smiles=smiles, + count=count, + ratio=ratio, + atom_count=atom_count, + molar_mass=mw, + rdkit_mol=mol, + ) + ) + return validated_monomers def _int_to_dict(self, integer: int) -> dict: @@ -593,88 +620,250 @@ def validate_no_duplicate_smiles(self, current_monomer: str, seen_monomer_list: ) seen_monomer_list.append(current_monomer) return seen_monomer_list + + def _validate_replicas(self, replicas_dict: dict, method: CompositionMethodType) -> dict: + """Validates the 'replicas' section of the input, ensuring correct structure and required fields based on the composition method. + Args: + replicas_dict: The 'replicas' sub-dictionary from the input. + method: The composition method being used ("counts" or "stoichiometric_ratio"). + Returns: + A validated dictionary containing the method, temperatures, density, and systems. + Raises: + InputSchemaError: If the structure is invalid, required fields are missing, or tags are duplicated. + NumericFieldError: If numeric fields like 'total_atoms' are invalid. + """ + + temperatures = self._validate_temperature( + replicas_dict.get("temperatures") + ) + + density = self._validate_density( + replicas_dict.get("density") + ) + + systems = replicas_dict.get("systems") + + if not isinstance(systems, list) or not systems: + raise InputSchemaError( + "'replicas.systems' must be a non-empty list." + ) + + seen_tags = set() + + for system in systems: + + if not isinstance(system, dict): + raise InputSchemaError( + "Each system must be a dictionary." + ) + + tag = system.get("tag") + + if not isinstance(tag, str) or not tag.strip(): + raise InputSchemaError( + "Each system must include a non-empty 'tag'." + ) + + if tag in seen_tags: + raise InputSchemaError( + f"Duplicate system tag detected: {tag!r}" + ) + + seen_tags.add(tag) + + if method == "stoichiometric_ratio": + + total_atoms = system.get("total_atoms") + + if not isinstance(total_atoms, int) or total_atoms <= 0: + raise NumericFieldError( + "'total_atoms' must be a positive integer in ratio mode." + ) + + ratios = system.get("monomer_ratios") + + if not isinstance(ratios, dict): + raise InputSchemaError( + "'monomer_ratios' must be provided in ratio mode." + ) + + if method == "counts": + + counts = system.get("monomer_counts") + + if not isinstance(counts, dict): + raise InputSchemaError( + "'monomer_counts' must be provided in counts mode." + ) + + if "total_atoms" in system: + raise InputSchemaError( + "'total_atoms' should not appear in counts mode." + ) + + return { + "method": method, + "temperatures": temperatures, + "density": density, + "systems": systems + } if __name__ == "__main__": # Sample input data for quick manual verification of the parser. - + # Example 1: Counts Mode inputs = { "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - "composition": { + "replicas": { + "temperatures": [300, 400, 500], + "density": 0.8, "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } ] }, - - "monomers": [ + + "monomers": [ { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" }, { - "name": "ethanol", - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } + "name": "ethanol", + "smiles": "CCO" } ] } - + # Example 2: Stoichiometric Mode - inputs_stoichiometric = { - "simulation_name": "Example_Stoichiometric_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "stoichiometric_ratio", - "targets": [ - {"tag": "10k", "total_atoms": 10000}, - {"tag": "100k", "total_atoms": 100000} + inputs_stoichiometric = { + + "simulation_name": "Example_Ratio_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": [0.8], + "method": "ratio", + + "systems": [ + + { + "tag": "10k_base", + "total_atoms": 10000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + }, + + { + "tag": "100k_base", + "total_atoms": 100000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + } + + ] + }, + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] + } + input_ff = { + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": 0.8, + "method": "counts", + + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } ] }, + "force_field": "PCFF", + "monomers": [ { "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "ratio": 1.0 + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "ratio": 1.0 + "smiles": "C1=CC(=CC(=C1)N)N" }, { "name": "ethanol", - "smiles": "CCO", - "ratio": 0.5 + "smiles": "CCO" } ] } - parser = InputParser() print("Validating Counts Mode Input:") print(parser.validate_inputs(inputs)) - print("\nValidating Stoichiometric Mode Input:") + print("\nValidating Ratio Mode Input:") print(parser.validate_inputs(inputs_stoichiometric)) + + print("\nValidating Force Field Input:") + print(parser.validate_inputs(input_ff)) + + diff --git a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py b/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py index 78c1a34..7c8e4ac 100644 --- a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py +++ b/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py @@ -16,40 +16,6 @@ - Implement duplicate reaction detection and handling. """ - -"""TODO (Reaction Template Pipeline): -from dataclasses import dataclass -from typing import Literal - -@dataclass(slots=True, frozen=True) -class MonomerRole: - spec: MoleculeSpec - functionality_type: str - fg_name: str - fg_smarts_1: str - fg_count_1: int - fg_smarts_2: str | None = None - fg_count_2: int | None = None - -@dataclass(slots=True, frozen=True) -class ReactionDefinition: - reaction_name: str - reaction_smarts: str - same_reactants: bool - delete_atom: bool - references: dict - monomer_1: MonomerRole - monomer_2: MonomerRole | None = None -""" - - -""" -def withdraw(balance, amount): - if amount > balance: - raise InsufficientFundsError(amount - balance, f"Cannot withdraw {amount}, only {balance} available.") - return balance - amount -""" - """ TODO: Preserve stable reaction_id for each reaction (never renumber). Maintain a seq_id mapping for the current filtered set (continuous 1..N) for UI/export. diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb index c5bcb7e..80a5804 100644 --- a/examples/example_1.ipynb +++ b/examples/example_1.ipynb @@ -23,6 +23,7 @@ "from AutoREACTER.cache import GetCacheDir, RunDirectoryManager, RetentionCleanup\n", "from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupsDetector\n", "from AutoREACTER.detectors.reaction_detector import ReactionDetector\n", + "from AutoREACTER.detectors.non_monomer_detector import NonReactantsDetector\n", "from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n", "from rdkit import Chem\n", "from rdkit.Chem import Draw\n", @@ -31,9 +32,10 @@ "Initialization()\n", "input_parser = InputParser()\n", "\n", + "input_parser = InputParser()\n", "cache_dir = GetCacheDir().staging_dir\n", "dated_cache_dir = RunDirectoryManager.make_dated_run_dir(cache_dir, chdir_to=\"none\")\n", - "# future use\n", + "# #future use\n", "# RunDirectoryManager.copy_into_run(cache_dir, dated_cache_dir)\n", "\n", "\n" @@ -105,15 +107,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Detector must be called after the inputs are validated, because it operates on validated MonomerEntry objects (validated_inputs.monomers)\n", + "# Run the detector only after the inputs have been validated, as it consumes validated_inputs.monomers to execute the detection workflow.\n", "functional_groups_detector = FunctionalGroupsDetector()\n", - "functional_groups, functional_groups_imgs = \n", + "functional_groups, functional_groups_imgs = \\\n", " functional_groups_detector.functional_groups_detector(\n", " validated_inputs.monomers\n", - " )\n", - "\n", - "for fg_img in functional_groups_imgs:\n", - " print(fg_img.indexes_to_highlight)" + " )" ] }, { @@ -135,6 +134,7 @@ "outputs": [], "source": [ "reaction_detector = ReactionDetector()\n", + "non_monomer_detector = NonReactantsDetector()\n", "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n", "img = reaction_detector.create_reaction_image_grid(reaction_instances)\n", "img" @@ -150,6 +150,28 @@ "selected_reactions = reaction_detector.reaction_selection(reaction_instances)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c811c031", + "metadata": {}, + "outputs": [], + "source": [ + "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n", + "img_non_reactants = non_monomer_detector.non_reactants_to_visualization(non_reactants_list)\n", + "img_non_reactants" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00d75b1a", + "metadata": {}, + "outputs": [], + "source": [ + "updated_inputs = non_monomer_detector.non_reactant_selection(validated_inputs, non_reactants_list)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/examples/example_1_inputs.json b/examples/example_1_inputs.json deleted file mode 100644 index c910d54..0000000 --- a/examples/example_1_inputs.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "simulation_name": "MySimulation", - "temperature": [300, 400, 500], - "density": 0.8, - "monomers": { - "1": "O=C(O)CCCCO", - "2": "O=C(O)c1ccc(O)cc1", - "3": "CCO" - }, - "number_of_monomers": { - "1": 500, - "2": 400, - "3": 300 - } -} diff --git a/examples/example_1_inputs_count_mode.json b/examples/example_1_inputs_count_mode.json index 5713b43..b965947 100644 --- a/examples/example_1_inputs_count_mode.json +++ b/examples/example_1_inputs_count_mode.json @@ -1,39 +1,53 @@ { - "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], + "method": "counts", + + "systems": [ + + { + "tag": "10k", + + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + + { + "tag": "100k", + + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } + + ] }, - { - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } - } - ] + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] } diff --git a/examples/example_1_inputs_count_mode_FF.json b/examples/example_1_inputs_count_mode_FF.json index 62b74cd..095a1f3 100644 --- a/examples/example_1_inputs_count_mode_FF.json +++ b/examples/example_1_inputs_count_mode_FF.json @@ -1,39 +1,44 @@ { "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - "force_field": "PCFF", - "composition": { + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} + + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "data_3": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "data_3": 1100 + } + } ] }, - - "monomers": [ + + "force_field": "PCFF", + + "monomers": [ { "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + "smiles": "C1=CC(=CC(=C1)N)N" }, { - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } + "smiles": "CCO" } ] -} +} \ No newline at end of file diff --git a/examples/example_1_inputs_count_mode_with_non_monomers.json b/examples/example_1_inputs_count_mode_with_non_monomers.json new file mode 100644 index 0000000..23643f2 --- /dev/null +++ b/examples/example_1_inputs_count_mode_with_non_monomers.json @@ -0,0 +1,46 @@ +{ + + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], + "method": "counts", + + "systems": [ + + { + "tag": "10k", + + "monomer_counts": { + "tmc": 220, + "mpd": 220 + } + }, + + { + "tag": "100k", + + "monomer_counts": { + "tmc": 2200, + "mpd": 2200 + } + } + + ] + }, + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + } + + ] +} diff --git a/examples/example_1_inputs_ratio_mode.json b/examples/example_1_inputs_ratio_mode.json new file mode 100644 index 0000000..b46ede2 --- /dev/null +++ b/examples/example_1_inputs_ratio_mode.json @@ -0,0 +1,56 @@ +{ + + "simulation_name": "Example_Ratio_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": [0.8], + "method": "ratio", + + "systems": [ + + { + "tag": "10k_base", + "total_atoms": 10000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + }, + + { + "tag": "100k_base", + "total_atoms": 100000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + } + + ] + }, + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] + } + diff --git a/examples/example_1_inputs_stoichiometry_mode.json b/examples/example_1_inputs_stoichiometry_mode.json deleted file mode 100644 index 36b9d42..0000000 --- a/examples/example_1_inputs_stoichiometry_mode.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "simulation_name": "Example_Stoichiometric_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "stoichiometric_ratio", - "targets": [ - {"tag": "10k", "total_atoms": 10000}, - {"tag": "100k", "total_atoms": 100000} - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "ratio": 1.0 - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "ratio": 1.0 - }, - { - "smiles": "CCO", - "ratio": 0.5 - } - ] -}