From 3a5e23f7c09f600d41bb4948422f69bcec45bef8 Mon Sep 17 00:00:00 2001 From: Benjamin Brown Date: Fri, 17 Jun 2022 18:50:31 -0500 Subject: [PATCH 1/2] Added support for users to specify their own dummy element types for pseudo-reactions. Also added some usage info. --- .../bcl_chemistry_fragment_add_med_chem.h | 7 ++ .../bcl_chemistry_fragment_add_med_chem.cpp | 90 ++++++++++++++++++- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/include/chemistry/bcl_chemistry_fragment_add_med_chem.h b/include/chemistry/bcl_chemistry_fragment_add_med_chem.h index 0283f3855..ac8754203 100644 --- a/include/chemistry/bcl_chemistry_fragment_add_med_chem.h +++ b/include/chemistry/bcl_chemistry_fragment_add_med_chem.h @@ -68,9 +68,16 @@ namespace bcl util::ShPtr< FragmentEnsemble> m_FragmentPool; std::string m_MedChemFilename; + //! alternate element types to Undefined to control directionality + std::string m_MedChemFragmentLinkElementType; + std::string m_TargetMoleculeLinkElementType; + //! restrict medchem additions to aromatic rings bool m_RestrictAdditionsToAroRings; + //! must use this flag if you want to specify an element type as a dummy atom + bool m_EnableDummyAtom; + ////////// // data // ////////// diff --git a/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp b/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp index 6bbb6a390..9657460a8 100644 --- a/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp +++ b/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp @@ -21,6 +21,7 @@ BCL_StaticInitializationFiascoFinder // includes from bcl - sorted alphabetically #include "chemistry/bcl_chemistry_atoms_complete_standardizer.h" +#include "chemistry/bcl_chemistry_element_types.h" #include "chemistry/bcl_chemistry_fragment_map_conformer.h" #include "chemistry/bcl_chemistry_fragment_track_mutable_atoms.h" #include "chemistry/bcl_chemistry_hydrogens_handler.h" @@ -240,6 +241,35 @@ namespace bcl // mutate label BCL_MessageStd( "AddMedChem!"); + // this will cause issues so it's banned + if( m_TargetMoleculeLinkElementType.empty() && !m_EnableDummyAtom) + { + BCL_MessageStd + ( + "Invalid combination of target molecule atom selection options. " + "To obtain pseudo-reaction-style control over the reaction without " + "specifying specific atom indices, the following options are available: " + "1. Set 'mutable_elements=X', do not specify any other atom selectors. " + "2. Choose an element type that is unique in the target molecule, such as " + "Rb, set 'mutable_elements=Rb', 'target_molecule_link_element=Rb', and " + "'enable_target_dummy_atom=true'. " + "For either option 1 or 2, you are free to change the link/dummy element type " + "of the medchem fragments via the 'medchem_fragment_link_element' flag depending " + "on how your library is constructed." + ); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); + } + + // get connecting element types + const ElementType medchem_fragment_link_element + ( + m_MedChemFragmentLinkElementType.empty() ? GetElementTypes().e_Undefined : GetElementTypes().ElementTypeLookup( m_MedChemFragmentLinkElementType) + ); + const ElementType target_molecule_link_element + ( + m_TargetMoleculeLinkElementType.empty() ? GetElementTypes().e_Undefined : GetElementTypes().ElementTypeLookup( m_TargetMoleculeLinkElementType) + ); + // redo the whole thing n-max times; increment can also be made in an inner while-loop during atom index selection size_t try_index( 0); for( ; try_index < m_NumberMaxAttempts; ++try_index) @@ -256,9 +286,10 @@ namespace bcl size_t undefined_index( util::GetUndefinedSize_t()); for( size_t i( 0), end_i( medchem_atom_v.GetSize()); i < end_i; ++i) { - if( medchem_atom_v( i).GetElementType() == GetElementTypes().e_Undefined) + if( medchem_atom_v( i).GetElementType() == medchem_fragment_link_element) { undefined_index = i; + break; } } if( undefined_index == util::GetUndefinedSize_t()) @@ -278,7 +309,12 @@ namespace bcl storage::Vector< size_t> defined_indices; for( size_t i( 0), end_i( medchem_atom_v.GetSize()); i < end_i; ++i) { - if( medchem_atom_v( i).GetElementType() != GetElementTypes().e_Undefined) + if + ( + // remove undefined elements anyway, even if we use a separate link element type + medchem_atom_v( i).GetElementType() != GetElementTypes().e_Undefined || + medchem_atom_v( i).GetElementType() != medchem_fragment_link_element + ) { defined_indices.PushBack( i); } @@ -309,7 +345,11 @@ namespace bcl // if the chosen atom is undefined then just grab a bonded atom // this is biased to the lower index bonded atom, but should not generally matter size_t undefined_base_index( util::GetUndefinedSize_t()); - if( picked_atom->GetElementType() == GetElementTypes().e_Undefined) + if + ( + picked_atom->GetElementType() == GetElementTypes().e_Undefined || + ( picked_atom->GetElementType() == target_molecule_link_element && m_EnableDummyAtom ) + ) { undefined_base_index = FRAGMENT.GetAtomVector().GetAtomIndex( *picked_atom); picked_atom = util::SiPtr< const AtomConformationalInterface>( picked_atom->GetBonds().Begin()->GetTargetAtom()); @@ -521,7 +561,19 @@ namespace bcl io::Serializer parameters( FragmentMutateInterface::GetSerializer()); parameters.SetClassDescription ( - "Appends a classic medicinal chemistry functional group to the current molecule" + "Appends a classic medicinal chemistry functional group to the current molecule. " + "By default, fragments passed with the 'medchem_library' flag will be appended " + "to the input " + "molecule; " + "To obtain pseudo-reaction-style control over the reaction without " + "specifying specific atom indices, the following options are available: " + "1. Set 'mutable_elements=X', do not specify any other atom selectors. " + "2. Choose an element type that is unique in the target molecule, such as " + "Rb, set 'mutable_elements=Rb', 'target_molecule_link_element=Rb', and " + "'enable_target_dummy_atom=true'. " + "For either option 1 or 2, you are free to change the link/dummy element type " + "of the medchem fragments via the 'medchem_fragment_link_element' flag depending " + "on how your library is constructed." ); parameters.AddInitializer @@ -540,6 +592,36 @@ namespace bcl "false" ); + parameters.AddInitializer + ( + "medchem_fragment_link_element", + "alternative link element type for the medchem fragments; if unspecified, defaults to " + "the undefined element type (X).", + io::Serialization::GetAgent( &m_MedChemFragmentLinkElementType) + ); + + parameters.AddInitializer + ( + "target_molecule_link_element", + "alternative link element type for the input molecules; " + "if you are not using an undefined element (specific 'X' in SDF) to mark the attachment site " + "by specifying 'mutable_elements=X', then use this flag to change the element type; " + "requires that 'enable_target_dummy_atoms' is set; " + "be careful that this is applied appropriately with the mutable_elements atom selector", + io::Serialization::GetAgent( &m_TargetMoleculeLinkElementType) + ); + + parameters.AddInitializer + ( + "enable_target_dummy_atom", + "allows users to specify dummy element types other than undefined (X) for directed " + "pseudo-reaction-style attachment of medchem fragments; " + "by default if 'mutable_elements' is set to X and no other atom selectors are specified " + "then only X elements will 'react' with the link element type in the medchem library " + "fragments (default is also X). ", + io::Serialization::GetAgent( &m_EnableDummyAtom) + ); + return parameters; } From 1cb35a712482912d5da5d3bcdbd0e47f4247a65e Mon Sep 17 00:00:00 2001 From: Benjamin Brown Date: Tue, 21 Jun 2022 00:21:43 -0500 Subject: [PATCH 2/2] Fixed a bunch of atom selection bugs from the last commit. --- .../bcl_chemistry_fragment_add_med_chem.cpp | 116 ++++++++++++++++-- 1 file changed, 103 insertions(+), 13 deletions(-) diff --git a/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp b/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp index 9657460a8..fe8c777c0 100644 --- a/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp +++ b/source/chemistry/bcl_chemistry_fragment_add_med_chem.cpp @@ -242,20 +242,44 @@ namespace bcl BCL_MessageStd( "AddMedChem!"); // this will cause issues so it's banned - if( m_TargetMoleculeLinkElementType.empty() && !m_EnableDummyAtom) + if + ( + ( m_TargetMoleculeLinkElementType.empty() && m_EnableDummyAtom ) || + ( !m_TargetMoleculeLinkElementType.empty() && !m_EnableDummyAtom ) + ) { BCL_MessageStd ( + "\n" "Invalid combination of target molecule atom selection options. " "To obtain pseudo-reaction-style control over the reaction without " - "specifying specific atom indices, the following options are available: " - "1. Set 'mutable_elements=X', do not specify any other atom selectors. " + "specifying specific atom indices, the following options are available: \n" + "1. Set 'mutable_elements=X', do not specify any other atom selectors. \n" "2. Choose an element type that is unique in the target molecule, such as " "Rb, set 'mutable_elements=Rb', 'target_molecule_link_element=Rb', and " - "'enable_target_dummy_atom=true'. " + "'enable_target_dummy_atom=true'. \n" "For either option 1 or 2, you are free to change the link/dummy element type " "of the medchem fragments via the 'medchem_fragment_link_element' flag depending " "on how your library is constructed." + "\n" + ); + return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); + } + else if + ( + !m_TargetMoleculeLinkElementType.empty() && + m_EnableDummyAtom && + m_MutableElements.Find( GetElementTypes().ElementTypeLookup( m_TargetMoleculeLinkElementType)) >= m_MutableElements.GetSize() + ) + { + BCL_MessageStd + ( + "\n" + "A custom target molecule link element type was specified and enabled, but " + "the input molecule does not contain any elements of the desired type. " + "Alternatively, the specified element type is mismatched with the allowed mutable " + "element types." + "\n" ); return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } @@ -297,6 +321,23 @@ namespace bcl return math::MutateResult< FragmentComplete>( util::ShPtr< FragmentComplete>(), *this); } + // dummy atoms are allowed only one bonded partner, which must be a heavy atom + BCL_Assert + ( + medchem_atom_v( undefined_index).GetBonds().GetSize() == size_t( 1), + "Encountered a medchem fragment from the library whose target link atom " + "contains more than one bond! This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + + BCL_Assert + ( + medchem_atom_v( undefined_index).GetBonds().Begin()->GetTargetAtom().GetElementType() != GetElementTypes().e_Hydrogen, + "Encountered a medchem fragment from the library whose target link atom " + "is bonded to a hdyrogen atom. This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + // get the element to which the reactive atom is bonded auto &atom_bonded_to_undefined( medchem_atom_v( undefined_index).GetBonds().Begin()->GetTargetAtom()); size_t atom_bonded_to_undefined_index( medchem_atom_v.GetAtomIndex( atom_bonded_to_undefined)); @@ -312,8 +353,8 @@ namespace bcl if ( // remove undefined elements anyway, even if we use a separate link element type - medchem_atom_v( i).GetElementType() != GetElementTypes().e_Undefined || - medchem_atom_v( i).GetElementType() != medchem_fragment_link_element + medchem_atom_v( i).GetElementType() != GetElementTypes().e_Undefined && + i != undefined_index ) { defined_indices.PushBack( i); @@ -351,7 +392,27 @@ namespace bcl ( picked_atom->GetElementType() == target_molecule_link_element && m_EnableDummyAtom ) ) { + // this is the dummy atom undefined_base_index = FRAGMENT.GetAtomVector().GetAtomIndex( *picked_atom); + + // dummy atoms are allowed only one bonded partner, which must be a heavy atom + BCL_Assert + ( + FRAGMENT.GetAtomVector()( undefined_base_index).GetBonds().GetSize() == size_t( 1), + "The user atom selection specified a target atom dummy atom for linking " + "that contains more than one bond! This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + + BCL_Assert + ( + FRAGMENT.GetAtomVector()( undefined_base_index).GetBonds().Begin()->GetTargetAtom().GetElementType() != GetElementTypes().e_Hydrogen, + "The user atom selection specified a target atom dummy atom for linking " + "that is bonded to a hydrogen atom! This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + + // reassign picked atom to the atom bonded to the dummy atom picked_atom = util::SiPtr< const AtomConformationalInterface>( picked_atom->GetBonds().Begin()->GetTargetAtom()); } @@ -393,9 +454,33 @@ namespace bcl picked_atom = this->PickAtom( FRAGMENT, true); } - if( picked_atom->GetElementType() == GetElementTypes().e_Undefined) + if + ( + picked_atom->GetElementType() == GetElementTypes().e_Undefined || + ( picked_atom->GetElementType() == target_molecule_link_element && m_EnableDummyAtom ) + ) { + // this is the dummy atom undefined_base_index = FRAGMENT.GetAtomVector().GetAtomIndex( *picked_atom); + + // dummy atoms are allowed only one bonded partner, which must be a heavy atom + BCL_Assert + ( + FRAGMENT.GetAtomVector()( undefined_base_index).GetBonds().GetSize() == size_t( 1), + "The user atom selection specified a target atom dummy atom for linking " + "that contains more than one bond! This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + + BCL_Assert + ( + FRAGMENT.GetAtomVector()( undefined_base_index).GetBonds().Begin()->GetTargetAtom().GetElementType() != GetElementTypes().e_Hydrogen, + "The user atom selection specified a target atom dummy atom for linking " + "that is bonded to a hydrogen atom! This is not allowed. Atoms designating " + "pseudo-reactions must be bonded to only one heavy atom. Exiting..." + ); + + // reassign picked atom to the atom bonded to the dummy atom picked_atom = util::SiPtr< const AtomConformationalInterface>( picked_atom->GetBonds().Begin()->GetTargetAtom()); } @@ -465,7 +550,7 @@ namespace bcl AtomVector< AtomComplete> all_defined_atom_v( FRAGMENT.GetAtomVector()); all_defined_atom_v.Reorder( keep_indices); AtomsCompleteStandardizer standardizer_2( all_defined_atom_v, "", true); - standardizer.SetConjugationOfBondTypes( all_defined_atom_v); + standardizer_2.SetConjugationOfBondTypes( all_defined_atom_v); // make new fragment FragmentComplete fragment( all_defined_atom_v, FRAGMENT.GetName()); @@ -596,8 +681,10 @@ namespace bcl ( "medchem_fragment_link_element", "alternative link element type for the medchem fragments; if unspecified, defaults to " - "the undefined element type (X).", - io::Serialization::GetAgent( &m_MedChemFragmentLinkElementType) + "the undefined element type (X). " + "Dummy/linker atoms are required to have only one bond to the target atom of interest. ", + io::Serialization::GetAgent( &m_MedChemFragmentLinkElementType), + "X" ); parameters.AddInitializer @@ -607,8 +694,10 @@ namespace bcl "if you are not using an undefined element (specific 'X' in SDF) to mark the attachment site " "by specifying 'mutable_elements=X', then use this flag to change the element type; " "requires that 'enable_target_dummy_atoms' is set; " - "be careful that this is applied appropriately with the mutable_elements atom selector", - io::Serialization::GetAgent( &m_TargetMoleculeLinkElementType) + "be careful that this is applied appropriately with the mutable_elements atom selector " + "Dummy/linker atoms are required to have only one bond to the target atom of interest. ", + io::Serialization::GetAgent( &m_TargetMoleculeLinkElementType), + "" ); parameters.AddInitializer @@ -619,7 +708,8 @@ namespace bcl "by default if 'mutable_elements' is set to X and no other atom selectors are specified " "then only X elements will 'react' with the link element type in the medchem library " "fragments (default is also X). ", - io::Serialization::GetAgent( &m_EnableDummyAtom) + io::Serialization::GetAgent( &m_EnableDummyAtom), + "0" ); return parameters;