diff --git a/README.md b/README.md index c56dc19..18d3e50 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,16 @@ M END """ std_molblock = standardizer.standardize_molblock(o_molblock) +``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import standardizer + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +std_smiles = standardizer.standardize_molblock_from_smiles(SMILES, get_smiles=True) + ``` ### Get the parent compound [(info)](https://github.com/chembl/ChEMBL_Structure_Pipeline/wiki/Work-done-by-each-step#get_parent_molblock) @@ -74,6 +84,17 @@ M END parent_molblock, _ = standardizer.get_parent_molblock(o_molblock) ``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import standardizer + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +parent_smiles = standardizer.get_parent_mol_from_smiles(SMILES, get_smiles=True) + +``` + ### Check a compound [(info)](https://github.com/chembl/ChEMBL_Structure_Pipeline/wiki/Work-done-by-each-step#checkmolecule) The checker assesses the quality of a structure. It highlights specific features or issues in the structure that may need to be revised. Together with the description of the issue, the checker process returns a penalty score (between 0-9) which reflects the seriousness of the issue (the higher the score, the more critical is the issue) @@ -96,6 +117,16 @@ M END """ issues = checker.check_molblock(o_molblock) +``` +> **Note** +> Using SMILES: + +```python +from chembl_structure_pipeline import checker + +SMILES = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +issues = checker.check_mol_from_smiles(SMILES) + ``` ## References diff --git a/chembl_structure_pipeline/__init__.py b/chembl_structure_pipeline/__init__.py index 17b81c5..2ee8d9d 100644 --- a/chembl_structure_pipeline/__init__.py +++ b/chembl_structure_pipeline/__init__.py @@ -101,7 +101,7 @@ from .standardizer import standardize_molblock, standardize_mol from .standardizer import get_parent_molblock, get_parent_mol -__version__ = "1.2.0" +__version__ = "1.2.1" # # Copyright (c) 2019 Greg Landrum diff --git a/chembl_structure_pipeline/checker.py b/chembl_structure_pipeline/checker.py index 4d0e68d..bb2ea3e 100644 --- a/chembl_structure_pipeline/checker.py +++ b/chembl_structure_pipeline/checker.py @@ -508,3 +508,17 @@ def check_molblock(mb): if tpl: res.append(tpl) return tuple(sorted(res, reverse=True)) + + +def check_mol_from_smiles(smiles: str, sanitize=False): + """ + Use the check_molblock function to determine + if the molecule has any issues based on + a given SMILES string. + Args (str): SMILES string. + Returns (tuple): Issues with smiles + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + return check_molblock(mol_block) diff --git a/chembl_structure_pipeline/standardizer.py b/chembl_structure_pipeline/standardizer.py index a71429b..207de9e 100644 --- a/chembl_structure_pipeline/standardizer.py +++ b/chembl_structure_pipeline/standardizer.py @@ -453,6 +453,26 @@ def get_parent_molblock(ctab, neutralize=True, check_exclusion=True, verbose=Fal return Chem.MolToMolBlock(parent, kekulize=False), exclude +def get_parent_mol_from_smiles( + smiles: str, get_smiles: bool = False, sanitize: bool = False +): + """ + Use the get_parent_molblock function to retraive the parent molblock + using the given SMILES string + Args (str): SMILES string. + Returns (mol): parent mol. + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + if get_smiles: + parent_molblock,_ = get_parent_molblock(mol_block) + rdkit_mol = Chem.MolFromMolBlock(parent_molblock) + parentsmiles = Chem.MolToSmiles(rdkit_mol) + return parentsmiles + return get_parent_molblock(mol_block) + + def standardize_mol(m, check_exclusion=True, sanitize=True): if check_exclusion: exclude = exclude_flag(m, includeRDKitSanitization=False) @@ -512,3 +532,23 @@ def standardize_molblock(ctab, check_exclusion=True): if exclude_flag(m, includeRDKitSanitization=False): return ctab return Chem.MolToMolBlock(standardize_mol(m, check_exclusion=False, sanitize=False)) + + +def standardize_molblock_from_smiles( + smiles: str, get_smiles: bool = False, sanitize: bool = False +): + """ + Use the standardize_molblock function to identify issues and fix it for + a molecule based on the given SMILES string. + Args (str): SMILES string. + Returns (mol): Fixed molecule. + """ + mol = Chem.MolFromSmiles(smiles, sanitize) + if mol: + mol_block = Chem.MolToMolBlock(mol) + if get_smiles: + standardized_mol = standardize_molblock(mol_block) + rdkit_mol = Chem.MolFromMolBlock(standardized_mol) + standardizedsmiles = Chem.MolToSmiles(rdkit_mol) + return standardizedsmiles + return standardize_molblock(mol_block)