From 6f7a43cb7c1a8b4720db7808e4795036967833fd Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:33:29 -0500 Subject: [PATCH 01/13] Refactor ReactionDetector: typing, docs, visuals Add typing, docstrings and refactor ReactionDetector and related dataclasses --- AutoREACTER/detectors/reaction_detector.py | 365 +++++++++++---------- 1 file changed, 194 insertions(+), 171 deletions(-) diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py index b542474..11d6fdc 100644 --- a/AutoREACTER/detectors/reaction_detector.py +++ b/AutoREACTER/detectors/reaction_detector.py @@ -1,8 +1,11 @@ +""" +Polymerization Reaction Detector and Visualizer. -# Dictionary defining various polymerization reactions. -# Each reaction includes metadata such as whether reactants are the same, -# the types of reactants, the product type, whether to delete atoms, -# and the SMARTS string for the reaction pattern using RDKit syntax. +This module provides tools to identify potential polymerization reactions between +monomers based on their functional groups using SMARTS patterns and RDKit. +It includes functionality to detect homo-polymerization and co-polymerization, +and generates visual grids of the resulting chemical reactions. +""" """ TODO: Missing Polymerization Mechanisms @@ -25,12 +28,14 @@ """ import pathlib -from typing import Dict, Any, Tuple, Optional +import os +from typing import Dict, Any, Tuple, Optional, List, Set from dataclasses import dataclass from PIL import Image, ImageDraw, ImageFont from rdkit import Chem from rdkit.Chem import Draw, rdChemReactions +# Attempt to import internal library components try: from reactions_library import ReactionLibrary except (ImportError, ModuleNotFoundError): @@ -41,14 +46,26 @@ except (ImportError, ModuleNotFoundError): from functional_groups_detector import FunctionalGroupInfo, MonomerRole -class SMARTSERROR(Exception): +class SMARTSerror(Exception): """Error from developer-defined SMARTS patterns not producing expected products. - Please contact the developer to resolve this issue. Raise a GitHub issue if you encounter this error. + Please contact the developer to resolve this issue. + Raise a GitHub issue if you encounter this error. at """ pass @dataclass(slots=True, frozen=True) class FunctionalGroupInfo: + """ + Stores metadata about a specific functional group detected in a molecule. + + Attributes: + functionality_type: Category of the group (e.g., 'vinyl', 'diol'). + fg_name: Human-readable name of the functional group. + fg_smarts_1: Primary SMARTS pattern for detection. + fg_count_1: Number of occurrences of the primary pattern. + fg_smarts_2: Secondary SMARTS pattern (optional). + fg_count_2: Number of occurrences of the secondary pattern (optional). + """ functionality_type: str fg_name: str fg_smarts_1: str @@ -58,12 +75,23 @@ class FunctionalGroupInfo: @dataclass(slots=True, frozen=True) class MonomerRole: + """ + Represents a monomer and its associated functional groups. + + Attributes: + smiles: SMILES string of the monomer. + name: Identifier or name of the monomer. + functionalities: A tuple of FunctionalGroupInfo objects present in this monomer. + """ smiles: str name: str functionalities: Tuple[FunctionalGroupInfo, ...] @dataclass(slots=True, frozen=True) class ReactionTemplate: + """ + Defines a generic template for a polymerization reaction. + """ reaction_name: str reactant_1: str # functional group name reactant_2: Optional[str] # functional group name or None @@ -74,26 +102,49 @@ class ReactionTemplate: @dataclass(slots=True, frozen=True) class ReactionInstance: + """ + Represents a specific instance of a reaction between identified monomers. + + Attributes: + reaction_name: Name of the polymerization type. + reaction_smarts: The SMARTS string used to perform the reaction. + delete_atom: Flag indicating if specific atoms should be removed post-reaction. + references: Dictionary containing literature references. + monomer_1: The first monomer involved. + functional_group_1: The specific FG on monomer 1 participating. + monomer_2: The second monomer (None for homo-polymerization). + functional_group_2: The specific FG on monomer 2 participating. + """ reaction_name: str reaction_smarts: str delete_atom: bool references: dict - monomer_1: MonomerRole functional_group_1: FunctionalGroupInfo monomer_2: Optional[MonomerRole] = None functional_group_2: Optional[FunctionalGroupInfo] = None class ReactionDetector: + """ + Detects valid chemical reactions between a list of monomers based on a + predefined library of polymerization mechanisms. + """ def __init__(self): + """Initializes the detector by loading the reaction library.""" self.reactions = ReactionLibrary().reactions - def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> list: - fg_hits = [] - for fg in monomer_entry.functionalities: - if fg.fg_name == target_group_name: - fg_hits.append(fg) - return fg_hits + def _matching_fgs(self, monomer_entry: MonomerRole, target_group_name: str) -> List[FunctionalGroupInfo]: + """ + Filters functional groups in a monomer that match a target name. + + Args: + monomer_entry: The monomer to search. + target_group_name: The name of the FG to look for. + + Returns: + A list of matching FunctionalGroupInfo objects. + """ + return [fg for fg in monomer_entry.functionalities if fg.fg_name == target_group_name] def _seen_pair_key( self, @@ -103,100 +154,82 @@ def _seen_pair_key( monomer_role_2: Optional[MonomerRole] = None, fg_2: Optional[FunctionalGroupInfo] = None, ) -> Tuple: - + """ + Generates a unique, order-independent key for a reaction pair to avoid duplicates. + + Args: + reaction_name: Name of the reaction. + monomer_role_1: First monomer. + fg_1: First functional group. + monomer_role_2: Second monomer (optional). + fg_2: Second functional group (optional). + + Returns: + A tuple representing the unique state of this reaction instance. + """ if monomer_role_2 is None or fg_2 is None: return (reaction_name, monomer_role_1, fg_1) - # symmetric case + # Sort reactants by memory ID to ensure (A, B) and (B, A) produce the same key ordered = tuple(sorted( [(monomer_role_1, fg_1), (monomer_role_2, fg_2)], key=lambda x: (id(x[0]), id(x[1])) )) return (reaction_name, ordered[0], ordered[1]) - def reaction_detector(self, monomer_roles: list[MonomerRole]) -> list[ReactionInstance]: + def reaction_detector(self, monomer_roles: List[MonomerRole]) -> List[ReactionInstance]: """ - Detects possible polymerization reactions based on the functional groups in the provided monomer dictionary. - - This function iterates through predefined reactions and matches them against the functional groups - in the monomers. It supports homopolymerization (same reactants) and copolymerization (different reactants). - For each match, it records the monomer details and reaction index. - + Scans a list of monomers to find all possible polymerization reactions. + Args: - monomer_dictionary (dict): A dictionary where keys are monomer indices or IDs, - and values are dictionaries containing monomer details, - including 'smiles' and functional groups with 'functional_group_name'. - + monomer_roles: List of monomers with their detected functional groups. + Returns: - dict: A dictionary of detected reactions, structured as: - {reaction_name: {, rx_index: {monomer_1: {...}, monomer_2: {... (if applicable)}}}} - - Note: - This logic matches functional_group_name against reaction reactant types. - It does not validate the reaction SMARTS against the exact monomer structures (RDKit reaction execution is later). + A list of ReactionInstance objects representing valid matches. """ - - # create a new list to hold reaction instances reaction_instances = [] - seen_pairs = set() # to track unique reactant pairs and avoid duplicates + seen_pairs: Set[Tuple] = set() - # Iterate over each predefined reaction for reaction_name, reaction_info in self.reactions.items(): reactant_1_name = reaction_info.get("reactant_1") - reactant_2_name = reaction_info.get("reactant_2") # may be None + reactant_2_name = reaction_info.get("reactant_2") same_reactants = reaction_info.get("same_reactants", False) - # HOMO CASE + # CASE 1: HOMO-POLYMERIZATION (e.g., A + A) if same_reactants and reactant_2_name is None: for monomer_role in monomer_roles: fg_hits = self._matching_fgs(monomer_role, reactant_1_name) - if not fg_hits: - continue for fg in fg_hits: - pair_key = self._seen_pair_key( - reaction_name, - monomer_role, - fg - ) - - if pair_key in seen_pairs: - continue - - seen_pairs.add(pair_key) - reaction_instances.append( - ReactionInstance( - reaction_name=reaction_name, - reaction_smarts=reaction_info["reaction"], - delete_atom=reaction_info["delete_atom"], - references=reaction_info["reference"], - monomer_1=monomer_role, - functional_group_1=fg, - monomer_2=None, - functional_group_2=None + pair_key = self._seen_pair_key(reaction_name, monomer_role, fg) + if pair_key not in seen_pairs: + seen_pairs.add(pair_key) + reaction_instances.append( + ReactionInstance( + reaction_name=reaction_name, + reaction_smarts=reaction_info["reaction"], + delete_atom=reaction_info["delete_atom"], + references=reaction_info["reference"], + monomer_1=monomer_role, + functional_group_1=fg + ) ) - ) - # COMONOMER CASE + # CASE 2: CO-POLYMERIZATION (e.g., A + B) else: for monomer_role_i in monomer_roles: fg_hits_i = self._matching_fgs(monomer_role_i, reactant_1_name) - if not fg_hits_i: - continue + if not fg_hits_i: continue + for fg_i in fg_hits_i: for monomer_role_j in monomer_roles: + # Prevent a monomer reacting with itself in a co-monomer definition if monomer_role_i == monomer_role_j: - continue # skip same monomer + continue + fg_hits_j = self._matching_fgs(monomer_role_j, reactant_2_name) - if not fg_hits_j: - continue for fg_j in fg_hits_j: - # Create a unique pair identifier to avoid duplicates pair_key = self._seen_pair_key( - reaction_name, - monomer_role_i, - fg_i, - monomer_role_j, - fg_j - ) + reaction_name, monomer_role_i, fg_i, monomer_role_j, fg_j + ) if pair_key not in seen_pairs: seen_pairs.add(pair_key) reaction_instances.append( @@ -210,35 +243,46 @@ def reaction_detector(self, monomer_roles: list[MonomerRole]) -> list[ReactionIn monomer_2=monomer_role_j, functional_group_2=fg_j ) - ) - + ) return reaction_instances - def create_reaction_image(self, reactant_a_smiles, reactant_b_smiles, reaction_smarts, reaction_name): - - reactant_a = Chem.MolFromSmiles(reactant_a_smiles) - reactant_b = Chem.MolFromSmiles(reactant_b_smiles) - reactant_a = Chem.AddHs(reactant_a) - reactant_b = Chem.AddHs(reactant_b) - # debug prints - # print(Chem.MolToSmiles(reactant_a)) - # print(Chem.MolToSmiles(reactant_b)) + def create_reaction_image(self, reactant_a_smiles: str, reactant_b_smiles: str, + reaction_smarts: str, reaction_name: str) -> Image.Image: + """ + Simulates a reaction using RDKit and generates a 2D image of the transformation. + + Args: + reactant_a_smiles: SMILES for the first reactant. + reactant_b_smiles: SMILES for the second reactant. + reaction_smarts: SMARTS pattern defining the reaction. + reaction_name: Name of the reaction for metadata. + + Returns: + A PIL Image object of the reaction. + + Raises: + SMARTSerror: If the reaction SMARTS fails to produce products. + """ + reactant_a = Chem.AddHs(Chem.MolFromSmiles(reactant_a_smiles)) + reactant_b = Chem.AddHs(Chem.MolFromSmiles(reactant_b_smiles)) + rxn_engine = rdChemReactions.ReactionFromSmarts(reaction_smarts) + + # Try reacting A + B products_sets = rxn_engine.RunReactants((reactant_a, reactant_b)) - # debug print - # print(f"Products sets for {reaction_smarts}") + # Fallback: Try reacting B + A if the first order failed if not products_sets: products_sets = rxn_engine.RunReactants((reactant_b, reactant_a)) if not products_sets: - print(f"No products generated for {reaction_smarts} with either reactant order.") - raise SMARTSERROR(f"Reaction SMARTS '{reaction_smarts}' did not produce any products for reactants '{reactant_a_smiles}' and '{reactant_b_smiles}' in either order.") - + raise SMARTSerror(f"Reaction SMARTS '{reaction_smarts}' failed for {reactant_a_smiles} + {reactant_b_smiles}") + # Prepare the reaction for visualization display_rxn = rdChemReactions.ChemicalReaction() display_rxn.AddReactantTemplate(reactant_a) display_rxn.AddReactantTemplate(reactant_b) + # Sanitize and add the first set of products found for prod in products_sets[0]: try: Chem.SanitizeMol(prod) @@ -248,36 +292,37 @@ def create_reaction_image(self, reactant_a_smiles, reactant_b_smiles, reaction_s return Draw.ReactionToImage(display_rxn, subImgSize=(400, 400)) - - def create_reaction_image_grid(self, selected_reaction_instances): - + def create_reaction_image_grid(self, selected_reaction_instances: List[ReactionInstance]) -> Optional[Image.Image]: + """ + Creates a vertical grid image containing all selected reaction visualizations. + + Args: + selected_reaction_instances: List of reactions to visualize. + + Returns: + A combined PIL Image or None if no images were generated. + """ img_list = [] for rxn in selected_reaction_instances: - reactant_a = rxn.monomer_1.smiles + reactant_b = rxn.monomer_2.smiles if rxn.monomer_2 else rxn.monomer_1.smiles - if rxn.monomer_2: - reactant_b = rxn.monomer_2.smiles - else: - reactant_b = rxn.monomer_1.smiles - - img = self.create_reaction_image( - reactant_a, - reactant_b, - rxn.reaction_smarts, - rxn.reaction_name - ) - - if img: - img_list.append(img) + try: + img = self.create_reaction_image( + reactant_a, reactant_b, rxn.reaction_smarts, rxn.reaction_name + ) + if img: img_list.append(img) + except SMARTSerror as e: + print(f"Skipping visualization: {e}") if not img_list: return None + # Calculate dimensions for the vertical stack single_width, single_height = img_list[0].size total_height = single_height * len(img_list) - total_width = single_width + 80 + total_width = single_width + 80 # Extra space for numbering new_img = Image.new("RGB", (total_width, total_height), (255, 255, 255)) draw = ImageDraw.Draw(new_img) @@ -287,6 +332,7 @@ def create_reaction_image_grid(self, selected_reaction_instances): except: font = ImageFont.load_default() + # Paste each reaction image into the grid y_offset = 0 for i, img in enumerate(img_list, start=1): draw.text((10, y_offset + 10), f"{i}.", fill=(0, 0, 0), font=font) @@ -295,69 +341,58 @@ def create_reaction_image_grid(self, selected_reaction_instances): return new_img - def reaction_selection(self, reaction_instances: list[ReactionInstance]) -> list[ReactionInstance]: + def reaction_selection(self, reaction_instances: List[ReactionInstance]) -> List[ReactionInstance]: + """ + Interactive CLI to allow users to select specific reactions from the detected list. + + Args: + reaction_instances: List of all detected reactions. + + Returns: + A list of user-selected ReactionInstance objects. + """ + if not reaction_instances: + print("No reaction instances available.") + return [] + print("Detected Reactions:") for i, rx in enumerate(reaction_instances, start=1): print(f"{i}. {rx.reaction_name}") - print(f" Monomer 1: {rx.monomer_1.smiles}") - print(f" FG 1: {rx.functional_group_1.fg_name}") - + print(f" Monomer 1: {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})") if rx.monomer_2: - print(f" Monomer 2: {rx.monomer_2.smiles}") - print(f" FG 2: {rx.functional_group_2.fg_name}") - + print(f" Monomer 2: {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") print() - if not reaction_instances: - print("No reaction instances available.") - return [] while True: - raw = input("Enter reaction numbers (comma-separated): ").strip() - + raw = input("Enter reaction numbers (comma-separated, e.g., 1,3): ").strip() if not raw: - print("Empty input. Try again.") - continue - - tokens = [t.strip() for t in raw.split(",")] - - if not all(t.isdigit() for t in tokens): - print("Invalid input. Use integers like: 1,2,3") continue - indices = sorted(set(int(t) for t in tokens)) - - if any(i < 1 or i > len(reaction_instances) for i in indices): - print("One or more indices out of range.") - continue + try: + tokens = [int(t.strip()) for t in raw.split(",")] + if any(i < 1 or i > len(reaction_instances) for i in tokens): + print("Out of range. Try again.") + continue + indices = sorted(set(tokens)) + break + except ValueError: + print("Invalid input. Please use numbers.") - break selected = [reaction_instances[i-1] for i in indices] print(f"Selected {len(selected)} reactions.") - for rx in selected: - print(f"{rx.reaction_name}: {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})", end="") - if rx.monomer_2: - print(f" + {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") - else: - print() return selected if __name__ == "__main__": - detector = ReactionDetector() - # Example monomer dictionary for testing - def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole]: + # --- Test Setup --- + def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> List[MonomerRole]: + """Helper to convert raw dictionary data into MonomerRole objects.""" monomer_roles = [] - for _, entry in monomer_dictionary.items(): - smiles = entry["smiles"] - functionalities = [] - for key, value in entry.items(): - if not isinstance(key, int): - continue - + if not isinstance(key, int): continue functionalities.append( FunctionalGroupInfo( functionality_type=value["functionality_type"], @@ -368,17 +403,10 @@ def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole] fg_count_2=value.get("functional_count_2", 0), ) ) - - monomer_roles.append( - MonomerRole( - smiles=smiles, - name=smiles, # or replace with actual name if you have one - functionalities=tuple(functionalities), - ) - ) - + monomer_roles.append(MonomerRole(smiles=smiles, name=smiles, functionalities=tuple(functionalities))) return monomer_roles + # Mock monomer data for testing detection logic monomer_dictionary = { 1: { "smiles": "C=CCO", @@ -636,27 +664,22 @@ def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> list[MonomerRole] } } - detector = ReactionDetector() - # Convert dictionary to object model + detector = ReactionDetector() monomer_roles = convert_dict_to_monomer_roles(monomer_dictionary) - - # Run detector reaction_instances = detector.reaction_detector(monomer_roles) - import os - from pathlib import Path + # --- Save Results --- autoreacter_dir = pathlib.Path(__file__).parent.parent.parent.resolve() save_dir = autoreacter_dir / "cache" / "_cache_test" / "reaction_visualization" os.makedirs(save_dir, exist_ok=True) - file_path = save_dir / f"reaction_instances.png" + file_path = save_dir / "reaction_instances.png" img = detector.create_reaction_image_grid(reaction_instances) + if img: img.save(file_path) print(f"Reaction grid image saved to: {file_path}") - else: - print("No valid reaction images generated.") - # Optional: Allow user to select reactions - selected_reactions = detector.reaction_selection(reaction_instances) \ No newline at end of file + # Optional interactive selection + selected_reactions = detector.reaction_selection(reaction_instances) From 3baf3d037dff8c96962630b65844afbfe4ce0645 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:45:03 -0500 Subject: [PATCH 02/13] Add example simulation schema JSON files --- examples/new_schema - Copy.json | 54 +++++++++++++++++++++++++++++++++ examples/new_schema.json | 43 ++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 examples/new_schema - Copy.json create mode 100644 examples/new_schema.json diff --git a/examples/new_schema - Copy.json b/examples/new_schema - Copy.json new file mode 100644 index 0000000..3a9b444 --- /dev/null +++ b/examples/new_schema - Copy.json @@ -0,0 +1,54 @@ +{ + "simulation_name": "Example_Ratio_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": 0.8, + "method": "ratio", + + "systems": [ + { + "tag": "10k_base", + "total_atoms": 10000, + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "CCO": 0.5 + } + }, + { + "tag": "100k_base", + "total_atoms": 100000, + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "CCO": 0.5 + } + }, + { + "tag": "100k_dry", + "total_atoms": 100000, + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "CCO": 0.0 + } + } + ] + }, + + "monomers": [ + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + { + "name": "CCO", + "smiles": "CCO" + } + ] +} \ No newline at end of file diff --git a/examples/new_schema.json b/examples/new_schema.json new file mode 100644 index 0000000..62a7af1 --- /dev/null +++ b/examples/new_schema.json @@ -0,0 +1,43 @@ +{ + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": 0.8, + "method": "counts", + + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "CCO": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "CCO": 0 + } + } + ] + }, + + "monomers": [ + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + { + "name": "CCO", + "smiles": "CCO" + } + ] +} \ No newline at end of file From 8bb3c3e18f4e923c8fcfc9384027aa3747519e10 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:47:36 -0500 Subject: [PATCH 03/13] Add deprecation warning to detector module Insert a DeprecationWarning in AutoREACTER/detectors/detector.py to inform users the script is deprecated and will be changed in v0.2. --- AutoREACTER/detectors/detector.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/AutoREACTER/detectors/detector.py b/AutoREACTER/detectors/detector.py index a52932c..a99a718 100644 --- a/AutoREACTER/detectors/detector.py +++ b/AutoREACTER/detectors/detector.py @@ -8,6 +8,15 @@ the simulation. """ +import warnings + +warnings.warn( + """This script is deprecated and will be modified in future versions. Within v0.2, the whole package will primaraliy + support on jupyter notebook and the CLI will be removed. Please use the notebook version for now and refer to the README for how to use the package.""", + DeprecationWarning, + stacklevel=2 +) + import json # Attempt to import detector modules. Handles different import paths depending on From a60ffe2352005398997d9fb3cd8ef178f46bc32c Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:49:46 -0500 Subject: [PATCH 04/13] Update reaction_detector.py --- AutoREACTER/detectors/reaction_detector.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py index 11d6fdc..594e2a7 100644 --- a/AutoREACTER/detectors/reaction_detector.py +++ b/AutoREACTER/detectors/reaction_detector.py @@ -683,3 +683,12 @@ def convert_dict_to_monomer_roles(monomer_dictionary: dict) -> List[MonomerRole] # Optional interactive selection selected_reactions = detector.reaction_selection(reaction_instances) + + print(f"Total detected reactions: {len(selected_reactions)}") + for i, rx in enumerate(selected_reactions, start=1): + print(f"{i}. {rx.reaction_name} - {rx.monomer_1.smiles} ({rx.functional_group_1.fg_name})", end="") + if rx.monomer_2: + print(f" + {rx.monomer_2.smiles} ({rx.functional_group_2.fg_name})") + else: + print() + print(selected_reactions) \ No newline at end of file From 11556bcddd18fbc4b603c7ab8a279d734449625b Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 12:50:38 -0500 Subject: [PATCH 05/13] Remove commented-out TODO code block --- .../run_reaction_template_pipeline.py | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py b/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py index 78c1a34..7c8e4ac 100644 --- a/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py +++ b/AutoREACTER/reaction_template_builder/run_reaction_template_pipeline.py @@ -16,40 +16,6 @@ - Implement duplicate reaction detection and handling. """ - -"""TODO (Reaction Template Pipeline): -from dataclasses import dataclass -from typing import Literal - -@dataclass(slots=True, frozen=True) -class MonomerRole: - spec: MoleculeSpec - functionality_type: str - fg_name: str - fg_smarts_1: str - fg_count_1: int - fg_smarts_2: str | None = None - fg_count_2: int | None = None - -@dataclass(slots=True, frozen=True) -class ReactionDefinition: - reaction_name: str - reaction_smarts: str - same_reactants: bool - delete_atom: bool - references: dict - monomer_1: MonomerRole - monomer_2: MonomerRole | None = None -""" - - -""" -def withdraw(balance, amount): - if amount > balance: - raise InsufficientFundsError(amount - balance, f"Cannot withdraw {amount}, only {balance} available.") - return balance - amount -""" - """ TODO: Preserve stable reaction_id for each reaction (never renumber). Maintain a seq_id mapping for the current filtered set (continuous 1..N) for UI/export. From 5996679dd59ca9d42749f5b9ea167a9cc2c7be15 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:14:29 -0500 Subject: [PATCH 06/13] Default force field to PCFF; update warning Update deprecation message in detectors/detector.py to clarify the CLI has been removed and clean up wording. Change SimulationSetup.force_field in input_parser.py from str | None to ForceFieldType | None and provide a sensible default value (PCFF) so simulations have an explicit default force field. --- AutoREACTER/detectors/detector.py | 2 +- AutoREACTER/input_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AutoREACTER/detectors/detector.py b/AutoREACTER/detectors/detector.py index a99a718..0ab06ad 100644 --- a/AutoREACTER/detectors/detector.py +++ b/AutoREACTER/detectors/detector.py @@ -12,7 +12,7 @@ warnings.warn( """This script is deprecated and will be modified in future versions. Within v0.2, the whole package will primaraliy - support on jupyter notebook and the CLI will be removed. Please use the notebook version for now and refer to the README for how to use the package.""", + support on jupyter notebook and the CLI is removed. Please use the notebook version for now and refer to the README for how to use the package.""", DeprecationWarning, stacklevel=2 ) diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index 5742c63..02e54fc 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -115,7 +115,7 @@ class SimulationSetup: simulation_name: str temperature: list[float] density: float - force_field: str | None + force_field: ForceFieldType | None = "PCFF" # Default to PCFF if not provided monomers: list[MonomerEntry] composition_method: CompositionMethodType | None = None composition : dict[str, Any] | None = None From 798ee40c6b2b497aa9754a461e2ca8d6f89e6b91 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:47:15 -0500 Subject: [PATCH 07/13] Add EmptyReactionListError and selection fix Introduce EmptyReactionListError in reaction_detector as a safeguard when no reaction instances are present. Update ReactionDetector.reaction_selection to raise this error if the list is empty and to automatically select the single detected reaction (with an informational print). In input_parser, remove the implicit default for SimulationSetup.force_field (previously defaulted to "PCFF") by setting force_field to None, requiring explicit specification. Also fix a line-wrapping/syntax issue in examples/example_1.ipynb for the functional_groups_detector call. --- AutoREACTER/detectors/reaction_detector.py | 12 ++++++++++-- AutoREACTER/input_parser.py | 2 +- examples/example_1.ipynb | 3 +-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/AutoREACTER/detectors/reaction_detector.py b/AutoREACTER/detectors/reaction_detector.py index 594e2a7..3fcd5e7 100644 --- a/AutoREACTER/detectors/reaction_detector.py +++ b/AutoREACTER/detectors/reaction_detector.py @@ -53,6 +53,11 @@ class SMARTSerror(Exception): at """ pass +class EmptyReactionListError(Exception): + """Raised when no reaction instances are detected, but the user attempts to select reactions. + This should be prevented by the reaction_selection method, but this error serves as a safeguard.""" + pass + @dataclass(slots=True, frozen=True) class FunctionalGroupInfo: """ @@ -352,8 +357,11 @@ def reaction_selection(self, reaction_instances: List[ReactionInstance]) -> List A list of user-selected ReactionInstance objects. """ if not reaction_instances: - print("No reaction instances available.") - return [] + raise EmptyReactionListError("No reaction instances detected. Cannot proceed with selection.") + + if len(reaction_instances) == 1: + print("Only one reaction detected. Automatically selecting it.") + return reaction_instances print("Detected Reactions:") for i, rx in enumerate(reaction_instances, start=1): diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index 02e54fc..cb0d89c 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -115,7 +115,7 @@ class SimulationSetup: simulation_name: str temperature: list[float] density: float - force_field: ForceFieldType | None = "PCFF" # Default to PCFF if not provided + force_field: None monomers: list[MonomerEntry] composition_method: CompositionMethodType | None = None composition : dict[str, Any] | None = None diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb index c5bcb7e..73e7749 100644 --- a/examples/example_1.ipynb +++ b/examples/example_1.ipynb @@ -107,8 +107,7 @@ "source": [ "# Detector must be called after the inputs are validated, because it operates on validated MonomerEntry objects (validated_inputs.monomers)\n", "functional_groups_detector = FunctionalGroupsDetector()\n", - "functional_groups, functional_groups_imgs = \n", - " functional_groups_detector.functional_groups_detector(\n", + "functional_groups, functional_groups_imgs = functional_groups_detector.functional_groups_detector(\n", " validated_inputs.monomers\n", " )\n", "\n", From 597efcc5c6068b0ec21da0952d39b5271d70c3f4 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 14:24:29 -0500 Subject: [PATCH 08/13] Add NonReactantsDetector and notebook integration Introduce NonReactantsDetector (AutoREACTER/detectors/non_monomer_detector.py) to identify monomers that do not participate in detected reactions, visualize them with RDKit, and provide an interactive selection flow to retain or discard non-reactant monomers in SimulationSetup. The detector includes helpers for canonical SMILES comparison, visualization (MolsToGridImage), and a user prompt to mark monomer.status as 'discarded'. Update examples/example_1.ipynb to import and call the new detector, adding cells to generate and display non-reactants and to update the validated inputs based on user selection. --- AutoREACTER/detectors/non_monomer_detector.py | 149 ++++++++++++++++++ examples/example_1.ipynb | 37 ++++- 2 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 AutoREACTER/detectors/non_monomer_detector.py diff --git a/AutoREACTER/detectors/non_monomer_detector.py b/AutoREACTER/detectors/non_monomer_detector.py new file mode 100644 index 0000000..8cc8b7c --- /dev/null +++ b/AutoREACTER/detectors/non_monomer_detector.py @@ -0,0 +1,149 @@ + +from dataclasses import dataclass +from typing import Any, List, Literal, Optional, Tuple +from rdkit import Chem +from rdkit.Chem import rdmolops, Draw + +from AutoREACTER.input_parser import SimulationSetup +from AutoREACTER.input_parser import MonomerEntry +from AutoREACTER.detectors.reaction_detector import ReactionInstance +from AutoREACTER.detectors.reaction_detector import FunctionalGroupInfo +from AutoREACTER.detectors.reaction_detector import MonomerRole + + +class NonReactantsDetector: + + def _same_molecule(self, reactants_list: List[str], mol2_smi: str) -> List[str]: + """ + Checks if a molecule (SMILES) exists in the list, + if not, appends it and returns the updated list. + """ + mol2 = Chem.MolFromSmiles(mol2_smi) + if mol2 is None: + return reactants_list # Or handle error: invalid SMILES + + can_smi2 = Chem.MolToSmiles(mol2, canonical=True) + detected = False + + for mol_smi in reactants_list: + mol1 = Chem.MolFromSmiles(mol_smi) + if mol1 is not None: + if Chem.MolToSmiles(mol1, canonical=True) == can_smi2: + detected = True + break + + if not detected: + reactants_list.append(mol2_smi) + + return reactants_list + + def _same_molecule_for_initaials(self, reactants_list: List[str], mol2: Chem.Mol) -> bool: + if mol2 is None: + return False + + # 1. In case - Remove Hs from query molecule and get its canonical SMILES + mol2_clean = rdmolops.RemoveHs(mol2) + can_smi2 = Chem.MolToSmiles(mol2_clean, canonical=True) + + for mol_smi in reactants_list: + mol1 = Chem.MolFromSmiles(mol_smi) + if mol1 is not None: + # 2. Also remove Hs from input list molecules for fair comparison + mol1 = rdmolops.RemoveHs(mol1) + can_smi1 = Chem.MolToSmiles(mol1, canonical=True) + + if can_smi1 == can_smi2: + return True + + def non_monomer_detector(self, + simulation_setup: SimulationSetup, + reaction_instances: List[ReactionInstance]) -> SimulationSetup: + """ + Detects monomers that do not participate in any of the identified reactions. + Args: + simulation_setup: The simulation setup containing monomers and reactions. + reaction_instances: A list of identified and filterd reactions by the user + Returns: + SimulationSetup: The updated simulation setup with non-reactant monomers identified. + This function iterates through the monomers in the simulation setup and checks if they are involved in any of the detected reactions. + If a monomer is not found in any reaction, It will be added to a non-reactants list. + Then ask the used if they want to retain these non-reactant monomers in the simulation. + Inputs will be "N" for no and "A" for all "S" for selected. + SimulationSetup.monomers: list[MonomerEntry] . status: StatusType = "active" will be updated to "discarded" for non-reactant monomers if the user choose to discard them. + Finally, the function returns the updated SimulationSetup with non-reactant monomers either retained or discarded based on user input. + """ + print(""" + Selection Guide for Non-Reactant Monomers: + - N: Discard all non-reactant monomers from the simulation. + - A: Retain all non-reactant monomers in the simulation. + - S: Select specific non-reactant monomers to retain. You will be prompted to enter the IDs of the monomers you wish to keep, separated by commas.""") + reactants_list = [] + non_reactants_list = [] + for reaction in reaction_instances: + mol1 = reaction.monomer_1.smiles + reactants_list = self._same_molecule(reactants_list, mol1) + if reaction.monomer_2: + mol2 = reaction.monomer_2.smiles + reactants_list = self._same_molecule(reactants_list, mol2) + for monomer in simulation_setup.monomers: + is_in_reactions = self._same_molecule_for_initaials(reactants_list, monomer.rdkit_mol) + if not is_in_reactions: + non_reactants_list.append(monomer) + + return non_reactants_list + + def non_reactants_to_visualization(self, non_reactants_list: List[MonomerEntry]) -> Draw.MolsToGridImage: + for monomer in non_reactants_list: + print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + + img = Draw.MolsToGridImage([monomer.rdkit_mol for monomer in non_reactants_list], + legends=[monomer.name for monomer in non_reactants_list], + molsPerRow=3, + subImgSize=(400,400)) + return img + + def non_reactant_selection(self, simulation_setup: SimulationSetup, non_reactants_list: List[MonomerEntry]) -> SimulationSetup: + """ + Handles user selection of non-reactant monomers to retain or discard in the simulation. + """ + if non_reactants_list: + print("The following monomers do not participate in any detected reactions:") + for monomer in non_reactants_list: + print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + + while True: + if len(non_reactants_list) == 1: + print("Only one non-reactant monomer detected") + user_input = input(f"Do you want to keep {non_reactants_list[0].name} in the simulation? (N/A): ").strip().upper() + if user_input not in ['N', 'A']: + print("Invalid input. Please enter N or A.") + continue + if user_input == 'N': + non_reactants_list[0].status = 'discarded' + break + elif user_input == 'A': + break + else: + user_input = input("Do you want to retain these non-reactant monomers in the simulation? (N/A/S): ").strip().upper() + if user_input not in ['N', 'A', 'S']: + print("Invalid input. Please enter N, A, or S.") + continue + + if user_input == 'N': + # Discard all non-reactants + for monomer in non_reactants_list: + monomer.status = 'discarded' + break + elif user_input == 'A': + # Retain all non-reactants + break + elif user_input == 'S': + # Selectively retain + selected_ids = input("Enter the IDs of the monomers you want to retain, separated by commas: ") + selected_ids = [int(id.strip()) for id in selected_ids.split(',')] + for monomer in non_reactants_list: + if monomer.id not in selected_ids: + monomer.status = 'discarded' + break + + return simulation_setup \ No newline at end of file diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb index 73e7749..5227037 100644 --- a/examples/example_1.ipynb +++ b/examples/example_1.ipynb @@ -23,6 +23,7 @@ "from AutoREACTER.cache import GetCacheDir, RunDirectoryManager, RetentionCleanup\n", "from AutoREACTER.detectors.functional_groups_detector import FunctionalGroupsDetector\n", "from AutoREACTER.detectors.reaction_detector import ReactionDetector\n", + "from AutoREACTER.detectors.non_monomer_detector import NonReactantsDetector\n", "from AutoREACTER.reaction_template_builder.run_reaction_template_pipeline import ReactionTemplatePipeline\n", "from rdkit import Chem\n", "from rdkit.Chem import Draw\n", @@ -31,9 +32,10 @@ "Initialization()\n", "input_parser = InputParser()\n", "\n", + "input_parser = InputParser()\n", "cache_dir = GetCacheDir().staging_dir\n", "dated_cache_dir = RunDirectoryManager.make_dated_run_dir(cache_dir, chdir_to=\"none\")\n", - "# future use\n", + "# #future use\n", "# RunDirectoryManager.copy_into_run(cache_dir, dated_cache_dir)\n", "\n", "\n" @@ -105,14 +107,12 @@ "metadata": {}, "outputs": [], "source": [ - "# Detector must be called after the inputs are validated, because it operates on validated MonomerEntry objects (validated_inputs.monomers)\n", + "# Detector Has to called Now after the inputs are validated, because it needs the input dict to run the detection workflow\n", "functional_groups_detector = FunctionalGroupsDetector()\n", - "functional_groups, functional_groups_imgs = functional_groups_detector.functional_groups_detector(\n", + "functional_groups, functional_groups_imgs = \\\n", + " functional_groups_detector.functional_groups_detector(\n", " validated_inputs.monomers\n", - " )\n", - "\n", - "for fg_img in functional_groups_imgs:\n", - " print(fg_img.indexes_to_highlight)" + " )" ] }, { @@ -134,6 +134,7 @@ "outputs": [], "source": [ "reaction_detector = ReactionDetector()\n", + "non_monomer_detector = NonReactantsDetector()\n", "reaction_instances = reaction_detector.reaction_detector(functional_groups)\n", "img = reaction_detector.create_reaction_image_grid(reaction_instances)\n", "img" @@ -149,6 +150,28 @@ "selected_reactions = reaction_detector.reaction_selection(reaction_instances)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c811c031", + "metadata": {}, + "outputs": [], + "source": [ + "non_reactants_list = non_monomer_detector.non_monomer_detector(validated_inputs, selected_reactions)\n", + "img_non_reactants = non_monomer_detector.non_reactants_to_visualization(non_reactants_list)\n", + "img_non_reactants" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00d75b1a", + "metadata": {}, + "outputs": [], + "source": [ + "updated_inputs = non_monomer_detector.non_reactant_selection(validated_inputs, non_reactants_list)" + ] + }, { "cell_type": "code", "execution_count": null, From a6949493dbab13ade840f8b02bb25b3b146df2ab Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 14:36:50 -0500 Subject: [PATCH 09/13] Detect and visualize non-reactant monomers Add NonReactantsDetector that identifies monomers not participating in detected reactions using RDKit (canonical SMILES comparisons and H removal), generates grid visualizations, and provides an interactive selection UI to mark non-reactant monomers as discarded. non_monomer_detector now returns a List[MonomerEntry] of non-reactants and helper methods (_same_molecule, _same_molecule_for_initaials) were added/refactored. Also tidy imports and type formatting in input_parser (ForceFieldType/CompositionMethodType). Update examples: remove a duplicated monomer from example_1_inputs_count_mode.json and add example_1_inputs_count_mode_with_non_monomers.json demonstrating a case with non-reactant monomers. --- AutoREACTER/detectors/non_monomer_detector.py | 242 +++++++++++++++--- AutoREACTER/input_parser.py | 8 +- examples/example_1_inputs_count_mode.json | 7 - ...1_inputs_count_mode_with_non_monomers.json | 39 +++ 4 files changed, 257 insertions(+), 39 deletions(-) create mode 100644 examples/example_1_inputs_count_mode_with_non_monomers.json diff --git a/AutoREACTER/detectors/non_monomer_detector.py b/AutoREACTER/detectors/non_monomer_detector.py index 8cc8b7c..14fdd7a 100644 --- a/AutoREACTER/detectors/non_monomer_detector.py +++ b/AutoREACTER/detectors/non_monomer_detector.py @@ -1,101 +1,234 @@ +""" +AutoREACTER - Non-Reactant Monomer Detection Module + +This module provides functionality to detect monomer molecules that do not participate +in any identified chemical reactions within a simulation setup. It allows users to +visualize these non-reacting monomers and decide whether to retain or discard them +from the simulation. + +Author: AutoREACTER Development Team +""" from dataclasses import dataclass from typing import Any, List, Literal, Optional, Tuple + +# RDKit chemistry library imports for molecular operations and visualization from rdkit import Chem from rdkit.Chem import rdmolops, Draw +# AutoREACTER internal imports for simulation configuration and reaction detection from AutoREACTER.input_parser import SimulationSetup from AutoREACTER.input_parser import MonomerEntry from AutoREACTER.detectors.reaction_detector import ReactionInstance -from AutoREACTER.detectors.reaction_detector import FunctionalGroupInfo -from AutoREACTER.detectors.reaction_detector import MonomerRole +from AutoREACTER.detectors.reaction_detector import FunctionalGroupInfo +from AutoREACTER.detectors.reaction_detector import MonomerRole class NonReactantsDetector: + """ + A detector class responsible for identifying monomers that do not participate + in any detected chemical reactions within a simulation. + + This class provides methods to: + - Detect non-reacting monomers by comparing reaction participants with monomer lists + - Visualize non-reacting monomers using RDKit grid image generation + - Handle user selection for retaining or discarding non-reacting monomers + + Attributes: + None: This class primarily operates on input parameters rather than storing state. + + Example: + >>> detector = NonReactantsDetector() + >>> updated_setup = detector.non_monomer_detector(setup, reactions) + >>> detector.non_reactant_selection(setup, non_reactants) + """ def _same_molecule(self, reactants_list: List[str], mol2_smi: str) -> List[str]: """ - Checks if a molecule (SMILES) exists in the list, - if not, appends it and returns the updated list. + Checks if a molecule (given as SMILES string) exists in the reactants list. + + This method performs canonical SMILES comparison to ensure that molecules + with the same structure but different string representations are correctly + identified as identical. + + Args: + reactants_list (List[str]): A list of SMILES strings representing molecules + that participate in detected reactions. + mol2_smi (str): The SMILES string of the molecule to check for membership. + + Returns: + List[str]: The updated reactants_list with the new molecule appended + if it was not already present. If the input SMILES is invalid + (cannot be parsed), the original list is returned unchanged. + + Raises: + None: Invalid SMILES are silently handled by returning the original list. + + Note: + Canonical SMILES are used for comparison to ensure structural equivalence + regardless of string representation differences (e.g., branching order). """ + # Parse the input SMILES string into an RDKit Mol object mol2 = Chem.MolFromSmiles(mol2_smi) + + # Return original list if SMILES is invalid (None indicates parsing failure) if mol2 is None: - return reactants_list # Or handle error: invalid SMILES + return reactants_list # Or handle error: invalid SMILES + # Convert to canonical SMILES for consistent comparison across different representations can_smi2 = Chem.MolToSmiles(mol2, canonical=True) detected = False + # Iterate through existing reactants to check for duplicate molecules for mol_smi in reactants_list: mol1 = Chem.MolFromSmiles(mol_smi) if mol1 is not None: + # Compare canonical SMILES to determine if molecules are identical if Chem.MolToSmiles(mol1, canonical=True) == can_smi2: detected = True break + # Append molecule to list if it wasn't found (not a duplicate) if not detected: reactants_list.append(mol2_smi) return reactants_list def _same_molecule_for_initaials(self, reactants_list: List[str], mol2: Chem.Mol) -> bool: + """ + Checks if an RDKit Mol object represents a molecule already in the reactants list. + + This method differs from _same_molecule in that it accepts an RDKit Mol object + directly rather than a SMILES string, and it also removes explicit hydrogens + before comparison to ensure fair structural comparison. + + Args: + reactants_list (List[str]): A list of SMILES strings representing molecules + that participate in detected reactions. + mol2 (Chem.Mol): The RDKit Mol object to check for membership. + + Returns: + bool: True if the molecule exists in the reactants list (structurally + equivalent), False otherwise. Returns False if the input mol is None. + + Note: + - Explicit hydrogens are removed from both the query molecule and list + molecules before comparison to ensure structural equivalence. + - This method handles the typo "initaials" in the function name (preserved + for backward compatibility). + """ + # Handle None input gracefully if mol2 is None: return False - # 1. In case - Remove Hs from query molecule and get its canonical SMILES + # Step 1: Remove Hs from query molecule and get its canonical SMILES + # Removing hydrogens ensures fair comparison regardless of how hydrogens were added mol2_clean = rdmolops.RemoveHs(mol2) can_smi2 = Chem.MolToSmiles(mol2_clean, canonical=True) + # Iterate through the reactants list for comparison for mol_smi in reactants_list: mol1 = Chem.MolFromSmiles(mol_smi) if mol1 is not None: - # 2. Also remove Hs from input list molecules for fair comparison + # Step 2: Also remove Hs from input list molecules for fair comparison mol1 = rdmolops.RemoveHs(mol1) can_smi1 = Chem.MolToSmiles(mol1, canonical=True) + # Check for structural equivalence if can_smi1 == can_smi2: return True + + # Return False if no matching molecule was found in the list + return False def non_monomer_detector(self, simulation_setup: SimulationSetup, - reaction_instances: List[ReactionInstance]) -> SimulationSetup: + reaction_instances: List[ReactionInstance]) -> List[MonomerEntry]: """ Detects monomers that do not participate in any of the identified reactions. + + This method iterates through all monomers in the simulation setup and identifies + those that are not involved in any of the detected reaction instances. It builds + a list of reactants from the reaction instances and compares each monomer against + this list to determine non-reactivity. + Args: - simulation_setup: The simulation setup containing monomers and reactions. - reaction_instances: A list of identified and filterd reactions by the user + simulation_setup (SimulationSetup): The simulation setup containing monomers + and reaction configuration. + reaction_instances (List[ReactionInstance]): A list of identified and filtered + reactions by the user. + Returns: - SimulationSetup: The updated simulation setup with non-reactant monomers identified. - This function iterates through the monomers in the simulation setup and checks if they are involved in any of the detected reactions. - If a monomer is not found in any reaction, It will be added to a non-reactants list. - Then ask the used if they want to retain these non-reactant monomers in the simulation. - Inputs will be "N" for no and "A" for all "S" for selected. - SimulationSetup.monomers: list[MonomerEntry] . status: StatusType = "active" will be updated to "discarded" for non-reactant monomers if the user choose to discard them. - Finally, the function returns the updated SimulationSetup with non-reactant monomers either retained or discarded based on user input. - """ - print(""" - Selection Guide for Non-Reactant Monomers: - - N: Discard all non-reactant monomers from the simulation. - - A: Retain all non-reactant monomers in the simulation. - - S: Select specific non-reactant monomers to retain. You will be prompted to enter the IDs of the monomers you wish to keep, separated by commas.""") - reactants_list = [] - non_reactants_list = [] + List[MonomerEntry]: A list of monomer entries that do not participate in any + detected reactions (non-reactant monomers). + + Example: + >>> setup = SimulationSetup(...) + >>> reactions = [ReactionInstance(...), ...] + >>> detector = NonReactantsDetector() + >>> non_reactants = detector.non_monomer_detector(setup, reactions) + >>> # non_reactants contains monomers not found in any reaction + """ + + # Initialize lists to track reactants and non-reactants + reactants_list = [] # Stores SMILES of all molecules participating in reactions + non_reactants_list = [] # Stores monomer entries that don't participate in reactions + + # Build the reactants list from all reaction instances + # Each reaction involves up to two monomers (monomer_1 and monomer_2) for reaction in reaction_instances: + # Add the first monomer from each reaction mol1 = reaction.monomer_1.smiles reactants_list = self._same_molecule(reactants_list, mol1) + + # Add the second monomer if it exists (some reactions may have only one participant) if reaction.monomer_2: mol2 = reaction.monomer_2.smiles reactants_list = self._same_molecule(reactants_list, mol2) + + # Check each monomer in the simulation setup against the reactants list for monomer in simulation_setup.monomers: + # Use RDKit Mol object for comparison (more reliable than SMILES strings) is_in_reactions = self._same_molecule_for_initaials(reactants_list, monomer.rdkit_mol) if not is_in_reactions: + # Monomer is not found in any reaction - add to non-reactants list non_reactants_list.append(monomer) + # Return the list of non-reactant monomers return non_reactants_list def non_reactants_to_visualization(self, non_reactants_list: List[MonomerEntry]) -> Draw.MolsToGridImage: + """ + Generates a grid image visualization of non-reacting monomers. + + This method creates a visual representation of all non-reacting monomers + using RDKit's MolsToGridImage function, allowing users to visually identify + which molecules are not participating in reactions. + + Args: + non_reactants_list (List[MonomerEntry]): A list of monomer entries that do not + participate in any reactions. + + Returns: + Draw.MolsToGridImage: An RDKit grid image object containing the molecular + structures of all non-reacting monomers. + + Note: + The grid image displays molecules with their names as legends and arranges + them in a grid with 3 molecules per row. Each molecule image is 400x400 pixels. + """ + # Print details of each non-reacting monomer to console + if not non_reactants_list: + print("No non-reacting monomers detected.") + return None # Or handle as appropriate (e.g., return an empty image) + for monomer in non_reactants_list: print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + # Generate grid image using RDKit + # - Extract RDKit Mol objects from monomer entries + # - Use monomer names as legend text + # - Configure grid layout: 3 molecules per row, 400x400 pixel images img = Draw.MolsToGridImage([monomer.rdkit_mol for monomer in non_reactants_list], legends=[monomer.name for monomer in non_reactants_list], molsPerRow=3, @@ -105,45 +238,94 @@ def non_reactants_to_visualization(self, non_reactants_list: List[MonomerEntry]) def non_reactant_selection(self, simulation_setup: SimulationSetup, non_reactants_list: List[MonomerEntry]) -> SimulationSetup: """ Handles user selection of non-reactant monomers to retain or discard in the simulation. + + This method presents the user with options to either keep or remove non-reacting + monomers from the simulation. It supports three selection modes: + - N (No): Discard all non-reactant monomers + - A (All): Retain all non-reactant monomers + - S (Select): Allow selective retention based on monomer IDs + + The method updates the 'status' attribute of selected monomers to 'discarded' + for those that should be removed from the simulation. + + Args: + simulation_setup (SimulationSetup): The simulation setup object whose monomers + will be potentially modified. + non_reactants_list (List[MonomerEntry]): A list of monomer entries identified + as non-reacting. + + Returns: + SimulationSetup: The updated simulation setup with non-reactant monomers + either retained or marked as discarded based on user input. + + Note: + - This method modifies the status attribute of MonomerEntry objects in place. + - For single non-reactant monomers, only N and A options are presented. + - The status values follow the StatusType convention: 'active' or 'discarded'. """ + if not non_reactants_list: + print("No non-reacting monomers to select from.") + return simulation_setup # No changes needed if there are no non-reactants + + # Display selection guide for user interaction with non-reactant monomers + print(""" +Selection Guide for Non-Reactant Monomers: +- N: Discard all non-reactant monomers from the simulation. +- A: Retain all non-reactant monomers in the simulation. +- S: Select specific non-reactant monomers to retain. You will be prompted +to enter the IDs of the monomers you wish to keep, separated by commas. +Example: If you want to keep monomers with IDs 1 and 3, you would enter: 1,3 + """) + # Proceed only if there are non-reactant monomers to process if non_reactants_list: + # Display information about non-participating monomers print("The following monomers do not participate in any detected reactions:") for monomer in non_reactants_list: print(f"ID: {monomer.id}, Name: {monomer.name}, SMILES: {monomer.smiles}") + # Main selection loop - continues until valid input is received while True: + # Handle special case: only one non-reactant monomer if len(non_reactants_list) == 1: print("Only one non-reactant monomer detected") + # Simplified prompt for single monomer scenario user_input = input(f"Do you want to keep {non_reactants_list[0].name} in the simulation? (N/A): ").strip().upper() if user_input not in ['N', 'A']: print("Invalid input. Please enter N or A.") continue if user_input == 'N': + # Mark single monomer as discarded non_reactants_list[0].status = 'discarded' break elif user_input == 'A': + # Keep the single monomer (retain) break - else: + else: + # Multi-monomer scenario - offer all three options user_input = input("Do you want to retain these non-reactant monomers in the simulation? (N/A/S): ").strip().upper() if user_input not in ['N', 'A', 'S']: print("Invalid input. Please enter N, A, or S.") continue if user_input == 'N': - # Discard all non-reactants + # Discard all non-reactants: mark each as 'discarded' for monomer in non_reactants_list: monomer.status = 'discarded' break elif user_input == 'A': - # Retain all non-reactants + # Retain all non-reactants: do nothing (keep 'active' status) break elif user_input == 'S': - # Selectively retain + # Selective retention: user specifies which IDs to keep + # Others will be marked as 'discarded' selected_ids = input("Enter the IDs of the monomers you want to retain, separated by commas: ") + # Parse input into list of integers ( monomer IDs) selected_ids = [int(id.strip()) for id in selected_ids.split(',')] for monomer in non_reactants_list: + # Discard monomers NOT in the selected list if monomer.id not in selected_ids: monomer.status = 'discarded' break - return simulation_setup \ No newline at end of file + # Return the modified simulation setup + return simulation_setup diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index cb0d89c..64afc9a 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -53,8 +53,12 @@ class DuplicateMonomerError(InputError): # Type aliases for clarity and validation StatusType = Literal["active", "discarded"] -CompositionMethodType = Literal["counts", "stoichiometric_ratio"] # Placeholder for future support. -ForceFieldType = Literal["PCFF-IFF", "PCFF", "Compass", "CVFF-IFF", "CVFF", "Clay-FF", "DRIEDING", "OPLS-AA"] +CompositionMethodType = Literal["counts", "stoichiometric_ratio"] +ForceFieldType = Literal[ + "PCFF-IFF", "PCFF", "Compass", + "CVFF-IFF", "CVFF", "Clay-FF", + "DRIEDING", "OPLS-AA" +] @dataclass(slots=True, frozen=True) diff --git a/examples/example_1_inputs_count_mode.json b/examples/example_1_inputs_count_mode.json index 5713b43..883185b 100644 --- a/examples/example_1_inputs_count_mode.json +++ b/examples/example_1_inputs_count_mode.json @@ -27,13 +27,6 @@ "10k": 220, "100k": 2200 } - }, - { - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } } ] } diff --git a/examples/example_1_inputs_count_mode_with_non_monomers.json b/examples/example_1_inputs_count_mode_with_non_monomers.json new file mode 100644 index 0000000..d2c5857 --- /dev/null +++ b/examples/example_1_inputs_count_mode_with_non_monomers.json @@ -0,0 +1,39 @@ +{ + "simulation_name": "Example_Count_Mode", + "temperature": [300, 400, 500], + "density": 0.8, + + "composition": { + "method": "counts", + "targets": [ + {"tag": "10k"}, + {"tag": "100k"} + ] + }, + + "monomers": [ + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", + "count": { + "10k": 220, + "100k": 2200 + } + }, + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N", + "count": { + "10k": 220, + "100k": 2200 + } + }, + { + "smiles": "CCO", + "count": { + "10k": 220, + "100k": 2200 + } + } + ] +} From 1efef05071fa9645e3c54b45caeacdfb6f81815f Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 15:35:09 -0500 Subject: [PATCH 10/13] Update non_monomer_detector.py --- AutoREACTER/detectors/non_monomer_detector.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/AutoREACTER/detectors/non_monomer_detector.py b/AutoREACTER/detectors/non_monomer_detector.py index 14fdd7a..9fc915c 100644 --- a/AutoREACTER/detectors/non_monomer_detector.py +++ b/AutoREACTER/detectors/non_monomer_detector.py @@ -1,18 +1,4 @@ -""" -AutoREACTER - Non-Reactant Monomer Detection Module - -This module provides functionality to detect monomer molecules that do not participate -in any identified chemical reactions within a simulation setup. It allows users to -visualize these non-reacting monomers and decide whether to retain or discard them -from the simulation. - -Author: AutoREACTER Development Team -""" - -from dataclasses import dataclass -from typing import Any, List, Literal, Optional, Tuple - -# RDKit chemistry library imports for molecular operations and visualization +from typing import List from rdkit import Chem from rdkit.Chem import rdmolops, Draw @@ -20,8 +6,6 @@ from AutoREACTER.input_parser import SimulationSetup from AutoREACTER.input_parser import MonomerEntry from AutoREACTER.detectors.reaction_detector import ReactionInstance -from AutoREACTER.detectors.reaction_detector import FunctionalGroupInfo -from AutoREACTER.detectors.reaction_detector import MonomerRole class NonReactantsDetector: From 158e79207e89c84ccf05c05504d4fb841318eb56 Mon Sep 17 00:00:00 2001 From: janitha1996 <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:10:28 -0500 Subject: [PATCH 11/13] Use replicas schema; add ratio/counts validation Refactor input parsing to use a new 'replicas' section (temperatures, density, systems) instead of the old top-level composition format. Added _validate_replicas to validate replica targets and moved composition mode detection to read 'method' from replicas (supports 'counts' and 'ratio' mapped internally to existing logic). Normalized temperature and density to lists, updated SimulationSetup typing accordingly, and adjusted monomer validation to consume per-system monomer_counts or monomer_ratios (including duplicate-SMILES checks and RDKit-derived properties). Also added force_field handling and updated example JSON files to the new schema (deleted several old examples and added a ratio-mode example). Overall this change unifies replicas/target handling and enables both counts and stoichiometric ratio workflows. --- AutoREACTER/input_parser.py | 515 ++++++++++++------ examples/example_1_inputs.json | 15 - examples/example_1_inputs_count_mode.json | 79 ++- examples/example_1_inputs_count_mode_FF.json | 55 +- ...1_inputs_count_mode_with_non_monomers.json | 79 +-- examples/example_1_inputs_ratio_mode.json | 56 ++ .../example_1_inputs_stoichiometry_mode.json | 30 - examples/new_schema - Copy.json | 54 -- examples/new_schema.json | 43 -- 9 files changed, 529 insertions(+), 397 deletions(-) delete mode 100644 examples/example_1_inputs.json create mode 100644 examples/example_1_inputs_ratio_mode.json delete mode 100644 examples/example_1_inputs_stoichiometry_mode.json delete mode 100644 examples/new_schema - Copy.json delete mode 100644 examples/new_schema.json diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index 64afc9a..9c1da5a 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -11,7 +11,7 @@ """ from dataclasses import dataclass, asdict -from typing import Any +from typing import Any, List from rdkit import Chem from rdkit.Chem import Descriptors import logging @@ -107,25 +107,25 @@ class SimulationSetup: Attributes: simulation_name: A simple name for the simulation, used for output organization. temperature: List of temperature values (Kelvin). Normalized to list internally. - density: Overall target density for the system (g/cm^3). + density: List of density values in g/cm^3. force_field: Optional string specifying the force field to use (e.g., "PCFF", "OPLS-AA"). Default is "PCFF". monomers: List of MonomerEntry objects representing the system composition. - composition_method: The method used to define composition ("counts" or "stoichiometric_ratio"). + composition_method: The method used to define composition ("counts" or "ratio"). composition: Raw composition dictionary from inputs for flexible downstream use. stoichiometric_ratio: Mapping of monomer IDs to ratios. number_of_total_atoms: List of total atom counts per target (place holder to be calculated). box_estimates: Estimated box size based on density (placeholder). """ simulation_name: str - temperature: list[float] - density: float - force_field: None - monomers: list[MonomerEntry] - composition_method: CompositionMethodType | None = None - composition : dict[str, Any] | None = None - stoichiometric_ratio: dict[int, float] | None = None - number_of_total_atoms: list[int] | None = None - box_estimates: float | None = None + temperature: list[float] + density: list[float] + force_field: str | None + monomers: list[MonomerEntry] + composition_method: CompositionMethodType | None = None + composition: dict[str, Any] | None = None + stoichiometric_ratio: dict[int, float] | None = None + number_of_total_atoms: list[int] | None = None + box_estimates: float | None = None class InputParser: @@ -151,25 +151,37 @@ def validate_inputs(self, inputs: dict) -> SimulationSetup: A validated SimulationSetup object ready for the simulation engine. """ # Check top-level structure and determine composition method. - self.validate_basic_format(inputs) + self.validate_basic_format(inputs) + simulation_name = inputs["simulation_name"] - temperature = self._validate_temperature(inputs["temperature"]) - density = self._validate_density(inputs["density"]) - composition_method = self._get_inputs_mode(inputs["composition"]) - composition_dict = self._validate_composition(inputs["composition"], composition_method) - monomers = self._validate_monomer_entry(inputs, composition_method, composition_dict) - force_field = self._validate_force_field(inputs.get("force_field", None)) - + + replicas_dict = inputs["replicas"] + composition_method = self._get_inputs_mode(replicas_dict) + + validated_replicas = self._validate_replicas( + replicas_dict, composition_method + ) + + monomers = self._validate_monomer_entry( + inputs, + composition_method, + validated_replicas["systems"], + ) + + force_field = self._validate_force_field( + inputs.get("force_field", None) + ) return SimulationSetup( simulation_name=simulation_name, - temperature=temperature, - density=density, + temperature=validated_replicas["temperatures"], + density=validated_replicas["density"], monomers=monomers, composition_method=composition_method, - composition=composition_dict, + composition=validated_replicas, force_field=force_field ) + def molecule_representation_of_initial_molecules(self, simulation_setup: SimulationSetup) -> list[Chem.Mol]: """ @@ -201,16 +213,20 @@ def validate_basic_format(self, inputs: dict) -> None: InputSchemaError: If the input is not a dict or is missing required keys. """ if not isinstance(inputs, dict): - raise InputSchemaError(f"Expected input to be a dictionary. Got {type(inputs).__name__} instead.") + raise InputSchemaError( + f"Expected input to be a dictionary. Got {type(inputs).__name__} instead." + ) - required_keys = ["simulation_name", "temperature", "density", "composition", "monomers"] + required_keys = ["simulation_name", "replicas", "monomers"] for key in required_keys: if key not in inputs: - raise InputSchemaError(f"Missing required key: {key!r} in inputs dictionary.") + raise InputSchemaError( + f"Missing required key: {key!r} in inputs dictionary." + ) return None - def _get_inputs_mode(self, composition_dict: dict) -> CompositionMethodType: + def _get_inputs_mode(self, replicas_dict: dict) -> CompositionMethodType: """ Determines the composition method from the composition dictionary. @@ -218,23 +234,25 @@ def _get_inputs_mode(self, composition_dict: dict) -> CompositionMethodType: composition_dict: The 'composition' sub-dictionary from the inputs. Returns: - The composition method string ("counts" or "stoichiometric_ratio"). + The composition method string ("counts" or "ratio"). Raises: InputSchemaError: If the composition dict is invalid or method is unsupported. """ - if not isinstance(composition_dict, dict): + if not isinstance(replicas_dict, dict): raise InputSchemaError( - f"'composition' must be a dictionary. Got {type(composition_dict).__name__} instead." + f"'replicas' must be a dictionary. Got {type(replicas_dict).__name__} instead." ) - - method = composition_dict.get("method") - allowed: set[CompositionMethodType] = {"counts", "stoichiometric_ratio"} + + method = replicas_dict.get("method") + allowed = {"counts", "ratio"} if method not in allowed: - raise InputSchemaError(f"Unsupported composition method: {method!r}") + raise InputSchemaError( + f"Unsupported replicas method: {method!r}" + ) - return method + return "counts" if method == "counts" else "stoichiometric_ratio" def _validate_temperature(self, temp: Any) -> list[float]: """ @@ -277,9 +295,24 @@ def _validate_density(self, density: Any) -> float: Raises: NumericFieldError: If density is not a positive number. """ - if not isinstance(density, (int, float)) or density <= 0: - raise NumericFieldError(f"'density' must be a positive number. Got: {density!r}") - return density + if isinstance(density, (int, float)): + density_list = [float(density)] + + elif isinstance(density, list) and all(isinstance(d, (int, float)) for d in density): + density_list = [float(d) for d in density] + + else: + raise NumericFieldError( + f"'density' must be a number or list of numbers. Got: {density!r}" + ) + + for d in density_list: + if d <= 0: + raise NumericFieldError( + f"Density values must be positive. Got: {d!r}" + ) + + return density_list def _validate_force_field(self, force_field: Any) -> str: """ @@ -376,8 +409,8 @@ def _validate_composition( def _validate_monomer_entry(self, inputs: dict, method: CompositionMethodType, - composition_dict: dict - ) -> list[MonomerEntry]: + systems: list[dict] + ) -> list[MonomerEntry]: """ Validates monomer entries and constructs MonomerEntry objects. @@ -389,6 +422,7 @@ def _validate_monomer_entry(self, inputs: The raw input dictionary. method: The composition method ("counts" or "stoichiometric_ratio"). composition_dict: Validated composition dictionary containing target tags. + systems: List of system dictionaries from the replicas section, used to validate counts/ratios. Returns: A list of validated MonomerEntry objects. @@ -401,103 +435,92 @@ def _validate_monomer_entry(self, NumericFieldError: If count/ratio values are invalid. """ validated_monomers: list[MonomerEntry] = [] - seen_monomer_list: list = [] + seen_smiles: list[str] = [] monomers = inputs.get("monomers") if not isinstance(monomers, list): - if isinstance(monomers, dict): - # Allow single monomer dict for convenience, but normalize to list internally. - monomers = [monomers] - else: - raise ValueError(f"'monomers' must be a list of monomer definitions. Got: {type(monomers).__name__} instead.") - + raise InputSchemaError("'monomers' must be a list.") + + # Collect all tags + system_tags = [system["tag"] for system in systems] + for monomer_id, monomer_dict in enumerate(monomers, start=1): - id = monomer_id - - # Handle naming - if "name" in monomer_dict: - name = monomer_dict["name"] - if not isinstance(name, str) or not name.strip(): - name = str("data_" + str(id)) - else: - name = str("data_" + str(id)) - - # Validate SMILES - smiles = monomer_dict.get("smiles", None) - if not isinstance(smiles, str) or not smiles.strip(): - raise SmilesValidationError(f"Monomer {id}: 'smiles' must be a non-empty string. Got: {smiles!r}") - else: - # Validate and canonicalize SMILES immediately. - smiles, mol = self._validate_smiles(smiles) - # Check for duplicates against previously validated monomers. - seen_monomer_list = self.validate_no_duplicate_smiles(smiles, seen_monomer_list=seen_monomer_list) - # Handle Counts Mode - if method == "counts": - count_info = monomer_dict.get("count") + name = monomer_dict.get("name") - if not isinstance(count_info, dict): - raise InputSchemaError( - f"Monomer {monomer_id}: 'count' must be a dictionary mapping target tags to counts." - ) + if not isinstance(name, str) or not name.strip(): + name = f"data_{monomer_id}" # Default name if not provided or invalid + + smiles_raw = monomer_dict.get("smiles") - # Extract valid tags from composition - valid_tags = {t["tag"] for t in composition_dict["targets"]} - count_tags = set(count_info.keys()) + smiles, mol = self._validate_smiles(smiles_raw) + seen_smiles = self.validate_no_duplicate_smiles( + smiles, seen_smiles + ) - # Check missing tags - missing = valid_tags - count_tags - if missing: - raise InputSchemaError( - f"Monomer {monomer_id}: missing count entries for targets: {missing}" - ) + if method == "counts": - # Check extra tags - extra = count_tags - valid_tags - if extra: - raise InputSchemaError( - f"Monomer {monomer_id}: unknown target tags in count: {extra}" - ) + count_map = {} + + for system in systems: + tag = system["tag"] + monomer_counts = system["monomer_counts"] - # Validate count values - for tag, value in count_info.items(): - if not isinstance(value, int) or value <= 0: + if name not in monomer_counts: + raise InputSchemaError( + f"Monomer {name!r} missing in system '{tag}'." + ) + + value = monomer_counts[name] + if not isinstance(value, int) or value < 0: raise NumericFieldError( - f"Monomer {monomer_id}, target '{tag}': count must be a positive integer. Got: {value!r}" + f"Invalid count for monomer {name!r} in system '{tag}'." ) - count = count_info + count_map[tag] = value + + count = count_map ratio = None - - # Handle Stoichiometric Ratio Mode - elif method == "stoichiometric_ratio": - ratio = monomer_dict.get("ratio", None) - if not isinstance(ratio, (int, float)) or ratio <= 0: + + else: + # ratio defined per system (must be consistent) + first_system = systems[0] + ratios = first_system["monomer_ratios"] + + if name not in ratios: + raise InputSchemaError( + f"Monomer {name!r} missing in ratio definition." + ) + + ratio_value = ratios[name] + + if not isinstance(ratio_value, (int, float)) or ratio_value < 0: raise NumericFieldError( - f"Monomer {monomer_id}: 'ratio' must be a positive number in stoichiometric mode. Got: {ratio!r}" + f"Invalid ratio for monomer {name!r}." ) + + ratio = float(ratio_value) count = None - - else: - raise InputSchemaError(f"Unsupported composition method: {method!r}") - - # Derive atom count and molar mass from RDKit - mol_with_h = Chem.AddHs(mol) # Ensure we count implicit hydrogens for accurate atom count and molar mass. - mw = Descriptors.MolWt(mol_with_h) + + # RDKit derived properties + mol_with_h = Chem.AddHs(mol) atom_count = mol_with_h.GetNumAtoms() - - validated_monomers.append(MonomerEntry( - id=id, - data_id=str("data_" + str(id)), - name=name, - smiles=smiles, - molar_mass=mw, - atom_count=atom_count, - count=count, - ratio=ratio, - rdkit_mol=mol - )) - + mw = Descriptors.MolWt(mol_with_h) + + validated_monomers.append( + MonomerEntry( + id=monomer_id, + data_id=f"data_{monomer_id}", + name=name, + smiles=smiles, + count=count, + ratio=ratio, + atom_count=atom_count, + molar_mass=mw, + rdkit_mol=mol, + ) + ) + return validated_monomers def _int_to_dict(self, integer: int) -> dict: @@ -597,88 +620,250 @@ def validate_no_duplicate_smiles(self, current_monomer: str, seen_monomer_list: ) seen_monomer_list.append(current_monomer) return seen_monomer_list + + def _validate_replicas(self, replicas_dict: dict, method: CompositionMethodType) -> dict: + """Validates the 'replicas' section of the input, ensuring correct structure and required fields based on the composition method. + Args: + replicas_dict: The 'replicas' sub-dictionary from the input. + method: The composition method being used ("counts" or "stoichiometric_ratio"). + Returns: + A validated dictionary containing the method, temperatures, density, and systems. + Raises: + InputSchemaError: If the structure is invalid, required fields are missing, or tags are duplicated. + NumericFieldError: If numeric fields like 'total_atoms' are invalid. + """ + + temperatures = self._validate_temperature( + replicas_dict.get("temperatures") + ) + + density = self._validate_density( + replicas_dict.get("density") + ) + + systems = replicas_dict.get("systems") + + if not isinstance(systems, list) or not systems: + raise InputSchemaError( + "'replicas.systems' must be a non-empty list." + ) + + seen_tags = set() + + for system in systems: + + if not isinstance(system, dict): + raise InputSchemaError( + "Each system must be a dictionary." + ) + + tag = system.get("tag") + + if not isinstance(tag, str) or not tag.strip(): + raise InputSchemaError( + "Each system must include a non-empty 'tag'." + ) + + if tag in seen_tags: + raise InputSchemaError( + f"Duplicate system tag detected: {tag!r}" + ) + + seen_tags.add(tag) + + if method == "stoichiometric_ratio": + + total_atoms = system.get("total_atoms") + + if not isinstance(total_atoms, int) or total_atoms <= 0: + raise NumericFieldError( + "'total_atoms' must be a positive integer in ratio mode." + ) + + ratios = system.get("monomer_ratios") + + if not isinstance(ratios, dict): + raise InputSchemaError( + "'monomer_ratios' must be provided in ratio mode." + ) + + if method == "counts": + + counts = system.get("monomer_counts") + + if not isinstance(counts, dict): + raise InputSchemaError( + "'monomer_counts' must be provided in counts mode." + ) + + if "total_atoms" in system: + raise InputSchemaError( + "'total_atoms' should not appear in counts mode." + ) + + return { + "method": method, + "temperatures": temperatures, + "density": density, + "systems": systems + } if __name__ == "__main__": # Sample input data for quick manual verification of the parser. - + # Example 1: Counts Mode inputs = { "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - "composition": { + "replicas": { + "temperatures": [300, 400, 500], + "density": 0.8, "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } ] }, - - "monomers": [ + + "monomers": [ { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" }, { - "name": "ethanol", - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } + "name": "ethanol", + "smiles": "CCO" } ] } - + # Example 2: Stoichiometric Mode - inputs_stoichiometric = { - "simulation_name": "Example_Stoichiometric_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "stoichiometric_ratio", - "targets": [ - {"tag": "10k", "total_atoms": 10000}, - {"tag": "100k", "total_atoms": 100000} + inputs_stoichiometric = { + + "simulation_name": "Example_Ratio_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": [0.8], + "method": "ratio", + + "systems": [ + + { + "tag": "10k_base", + "total_atoms": 10000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + }, + + { + "tag": "100k_base", + "total_atoms": 100000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + } + + ] + }, + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] + } + input_ff = { + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": 0.8, + "method": "counts", + + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } ] }, + "force_field": "PCFF", + "monomers": [ { "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "ratio": 1.0 + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "ratio": 1.0 + "smiles": "C1=CC(=CC(=C1)N)N" }, { "name": "ethanol", - "smiles": "CCO", - "ratio": 0.5 + "smiles": "CCO" } ] } - parser = InputParser() print("Validating Counts Mode Input:") print(parser.validate_inputs(inputs)) - print("\nValidating Stoichiometric Mode Input:") + print("\nValidating Ratio Mode Input:") print(parser.validate_inputs(inputs_stoichiometric)) + + print("\nValidating Force Field Input:") + print(parser.validate_inputs(input_ff)) + + diff --git a/examples/example_1_inputs.json b/examples/example_1_inputs.json deleted file mode 100644 index c910d54..0000000 --- a/examples/example_1_inputs.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "simulation_name": "MySimulation", - "temperature": [300, 400, 500], - "density": 0.8, - "monomers": { - "1": "O=C(O)CCCCO", - "2": "O=C(O)c1ccc(O)cc1", - "3": "CCO" - }, - "number_of_monomers": { - "1": 500, - "2": 400, - "3": 300 - } -} diff --git a/examples/example_1_inputs_count_mode.json b/examples/example_1_inputs_count_mode.json index 883185b..b965947 100644 --- a/examples/example_1_inputs_count_mode.json +++ b/examples/example_1_inputs_count_mode.json @@ -1,32 +1,53 @@ { - "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } + + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], + "method": "counts", + + "systems": [ + + { + "tag": "10k", + + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "ethanol": 110 + } + }, + + { + "tag": "100k", + + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "ethanol": 1100 + } + } + + ] }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } - } - ] + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] } diff --git a/examples/example_1_inputs_count_mode_FF.json b/examples/example_1_inputs_count_mode_FF.json index 62b74cd..095a1f3 100644 --- a/examples/example_1_inputs_count_mode_FF.json +++ b/examples/example_1_inputs_count_mode_FF.json @@ -1,39 +1,44 @@ { "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - "force_field": "PCFF", - "composition": { + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} + + "systems": [ + { + "tag": "10k", + "monomer_counts": { + "tmc": 220, + "mpd": 220, + "data_3": 110 + } + }, + { + "tag": "100k", + "monomer_counts": { + "tmc": 2200, + "mpd": 2200, + "data_3": 1100 + } + } ] }, - - "monomers": [ + + "force_field": "PCFF", + + "monomers": [ { "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" }, { "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + "smiles": "C1=CC(=CC(=C1)N)N" }, { - "smiles": "CCO", - "count": { - "10k": 110, - "100k": 1100 - } + "smiles": "CCO" } ] -} +} \ No newline at end of file diff --git a/examples/example_1_inputs_count_mode_with_non_monomers.json b/examples/example_1_inputs_count_mode_with_non_monomers.json index d2c5857..23643f2 100644 --- a/examples/example_1_inputs_count_mode_with_non_monomers.json +++ b/examples/example_1_inputs_count_mode_with_non_monomers.json @@ -1,39 +1,46 @@ { - "simulation_name": "Example_Count_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "counts", - "targets": [ - {"tag": "10k"}, - {"tag": "100k"} - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "count": { - "10k": 220, - "100k": 2200 - } - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "count": { - "10k": 220, - "100k": 2200 - } + + "simulation_name": "Example_Count_Mode", + + "replicas": { + "temperatures": [300, 400, 500], + "density": [0.8], + "method": "counts", + + "systems": [ + + { + "tag": "10k", + + "monomer_counts": { + "tmc": 220, + "mpd": 220 + } + }, + + { + "tag": "100k", + + "monomer_counts": { + "tmc": 2200, + "mpd": 2200 + } + } + + ] }, - { - "smiles": "CCO", - "count": { - "10k": 220, - "100k": 2200 - } - } - ] + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + } + + ] } diff --git a/examples/example_1_inputs_ratio_mode.json b/examples/example_1_inputs_ratio_mode.json new file mode 100644 index 0000000..b46ede2 --- /dev/null +++ b/examples/example_1_inputs_ratio_mode.json @@ -0,0 +1,56 @@ +{ + + "simulation_name": "Example_Ratio_Mode", + + "replicas": { + "temperatures": [300, 400], + "density": [0.8], + "method": "ratio", + + "systems": [ + + { + "tag": "10k_base", + "total_atoms": 10000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + }, + + { + "tag": "100k_base", + "total_atoms": 100000, + + "monomer_ratios": { + "tmc": 1.0, + "mpd": 1.0, + "ethanol": 0.5 + } + } + + ] + }, + + "monomers": [ + + { + "name": "tmc", + "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" + }, + + { + "name": "mpd", + "smiles": "C1=CC(=CC(=C1)N)N" + }, + + { + "name": "ethanol", + "smiles": "CCO" + } + + ] + } + diff --git a/examples/example_1_inputs_stoichiometry_mode.json b/examples/example_1_inputs_stoichiometry_mode.json deleted file mode 100644 index 36b9d42..0000000 --- a/examples/example_1_inputs_stoichiometry_mode.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "simulation_name": "Example_Stoichiometric_Mode", - "temperature": [300, 400, 500], - "density": 0.8, - - "composition": { - "method": "stoichiometric_ratio", - "targets": [ - {"tag": "10k", "total_atoms": 10000}, - {"tag": "100k", "total_atoms": 100000} - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O", - "ratio": 1.0 - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N", - "ratio": 1.0 - }, - { - "smiles": "CCO", - "ratio": 0.5 - } - ] -} diff --git a/examples/new_schema - Copy.json b/examples/new_schema - Copy.json deleted file mode 100644 index 3a9b444..0000000 --- a/examples/new_schema - Copy.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "simulation_name": "Example_Ratio_Mode", - - "replicas": { - "temperatures": [300, 400], - "density": 0.8, - "method": "ratio", - - "systems": [ - { - "tag": "10k_base", - "total_atoms": 10000, - "monomer_ratios": { - "tmc": 1.0, - "mpd": 1.0, - "CCO": 0.5 - } - }, - { - "tag": "100k_base", - "total_atoms": 100000, - "monomer_ratios": { - "tmc": 1.0, - "mpd": 1.0, - "CCO": 0.5 - } - }, - { - "tag": "100k_dry", - "total_atoms": 100000, - "monomer_ratios": { - "tmc": 1.0, - "mpd": 1.0, - "CCO": 0.0 - } - } - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N" - }, - { - "name": "CCO", - "smiles": "CCO" - } - ] -} \ No newline at end of file diff --git a/examples/new_schema.json b/examples/new_schema.json deleted file mode 100644 index 62a7af1..0000000 --- a/examples/new_schema.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "simulation_name": "Example_Count_Mode", - - "replicas": { - "temperatures": [300, 400], - "density": 0.8, - "method": "counts", - - "systems": [ - { - "tag": "10k", - "monomer_counts": { - "tmc": 220, - "mpd": 220, - "CCO": 110 - } - }, - { - "tag": "100k", - "monomer_counts": { - "tmc": 2200, - "mpd": 2200, - "CCO": 0 - } - } - ] - }, - - "monomers": [ - { - "name": "tmc", - "smiles": "ClC(=O)c1cc(cc(c1)C(Cl)=O)C(Cl)=O" - }, - { - "name": "mpd", - "smiles": "C1=CC(=CC(=C1)N)N" - }, - { - "name": "CCO", - "smiles": "CCO" - } - ] -} \ No newline at end of file From 6596d0f663ab0a2db3c775832f410488a445bf2d Mon Sep 17 00:00:00 2001 From: Janitha Mahanthe <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:26:03 -0500 Subject: [PATCH 12/13] Update AutoREACTER/input_parser.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- AutoREACTER/input_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AutoREACTER/input_parser.py b/AutoREACTER/input_parser.py index 9c1da5a..c5fd07a 100644 --- a/AutoREACTER/input_parser.py +++ b/AutoREACTER/input_parser.py @@ -11,7 +11,7 @@ """ from dataclasses import dataclass, asdict -from typing import Any, List +from typing import Any from rdkit import Chem from rdkit.Chem import Descriptors import logging From 33fd90af950fc652a5ce8a7b3c159aea66bee33e Mon Sep 17 00:00:00 2001 From: Janitha Mahanthe <119646255+janitha-mahanthe@users.noreply.github.com> Date: Tue, 3 Mar 2026 16:26:57 -0500 Subject: [PATCH 13/13] Update examples/example_1.ipynb Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- examples/example_1.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/example_1.ipynb b/examples/example_1.ipynb index 5227037..80a5804 100644 --- a/examples/example_1.ipynb +++ b/examples/example_1.ipynb @@ -107,7 +107,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Detector Has to called Now after the inputs are validated, because it needs the input dict to run the detection workflow\n", + "# Run the detector only after the inputs have been validated, as it consumes validated_inputs.monomers to execute the detection workflow.\n", "functional_groups_detector = FunctionalGroupsDetector()\n", "functional_groups, functional_groups_imgs = \\\n", " functional_groups_detector.functional_groups_detector(\n",