diff --git a/Code/GraphMol/AddHs.cpp b/Code/GraphMol/AddHs.cpp index 8afb8732c3e..c42115b781b 100644 --- a/Code/GraphMol/AddHs.cpp +++ b/Code/GraphMol/AddHs.cpp @@ -1095,7 +1095,7 @@ void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly) { // recurse if needed (was github isusue 544) if (atom->hasQuery()) { // std::cerr<<" q: "<getQuery()->getDescription()<getQuery()->getDescription() == "RecursiveStructure") { + if (atom->getQuery()->getDescription() == "RS") { auto *rqm = static_cast(const_cast( static_cast(atom->getQuery()) ->getQueryMol())); @@ -1109,7 +1109,7 @@ void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly) { QueryAtom::QUERYATOM_QUERY::CHILD_TYPE qry = childStack.front(); childStack.pop_front(); // std::cerr<<" child: "<getDescription()<getDescription() == "RecursiveStructure") { + if (qry->getDescription() == "RS") { // std::cerr<<" recurse"<(const_cast( static_cast(qry.get()) diff --git a/Code/GraphMol/Atom.cpp b/Code/GraphMol/Atom.cpp index 741c17644f5..651375eeb65 100644 --- a/Code/GraphMol/Atom.cpp +++ b/Code/GraphMol/Atom.cpp @@ -586,6 +586,21 @@ int Atom::getPerturbationOrder(const INT_LIST &probe) const { int nSwaps = static_cast(countSwapsToInterconvert(probe, ref)); return nSwaps; } +int Atom::getPerturbationOrder(const INT_VECT &probe) const { + PRECONDITION( + dp_mol, + "perturbation order not defined for atoms not associated with molecules") + INT_VECT ref; + ref.reserve(getOwningMol().getAtomDegree(this)); + ROMol::OEDGE_ITER beg, end; + boost::tie(beg, end) = getOwningMol().getAtomBonds(this); + while (beg != end) { + ref.push_back(getOwningMol()[*beg]->getIdx()); + ++beg; + } + int nSwaps = static_cast(countSwapsToInterconvert(probe, ref)); + return nSwaps; +} void Atom::invertChirality() { switch (getChiralTag()) { diff --git a/Code/GraphMol/Atom.h b/Code/GraphMol/Atom.h index 7c42bbb7c91..97b407187ca 100644 --- a/Code/GraphMol/Atom.h +++ b/Code/GraphMol/Atom.h @@ -332,6 +332,7 @@ class RDKIT_GRAPHMOL_EXPORT Atom : public RDProps { */ int getPerturbationOrder(const INT_LIST &probe) const; + int getPerturbationOrder(const INT_VECT &probe) const; //! calculates any of our lazy \c properties /*! diff --git a/Code/GraphMol/Bond.cpp b/Code/GraphMol/Bond.cpp index 20e777357ec..b152bbe1372 100644 --- a/Code/GraphMol/Bond.cpp +++ b/Code/GraphMol/Bond.cpp @@ -177,6 +177,60 @@ double Bond::getBondTypeAsDouble() const { } } +uint8_t Bond::getTwiceBondType() const { + switch (getBondType()) { + case UNSPECIFIED: + case IONIC: + case ZERO: + return 0; + break; + case SINGLE: + return 2; + break; + case DOUBLE: + return 4; + break; + case TRIPLE: + return 6; + break; + case QUADRUPLE: + return 8; + break; + case QUINTUPLE: + return 10; + break; + case HEXTUPLE: + return 12; + break; + case ONEANDAHALF: + return 3; + break; + case TWOANDAHALF: + return 5; + break; + case THREEANDAHALF: + return 7; + break; + case FOURANDAHALF: + return 9; + break; + case FIVEANDAHALF: + return 11; + break; + case AROMATIC: + return 3; + break; + case DATIVEONE: + return 2; + break; // FIX: this should probably be different + case DATIVE: + return 2; + break; // FIX: again probably wrong + default: + UNDER_CONSTRUCTION("Bad bond type"); + } +} + double Bond::getValenceContrib(const Atom *atom) const { switch (getBondType()) { case UNSPECIFIED: diff --git a/Code/GraphMol/Bond.h b/Code/GraphMol/Bond.h index bb33a8fafc2..7f493ffd941 100644 --- a/Code/GraphMol/Bond.h +++ b/Code/GraphMol/Bond.h @@ -124,6 +124,9 @@ class RDKIT_GRAPHMOL_EXPORT Bond : public RDProps { //! \brief returns our \c bondType as a double //! (e.g. SINGLE->1.0, AROMATIC->1.5, etc.) double getBondTypeAsDouble() const; + //! \brief returns twice our \c bondType + //! (e.g. SINGLE->2, AROMATIC->3, etc.) + uint8_t getTwiceBondType() const; //! returns our contribution to the explicit valence of an Atom /*! @@ -335,8 +338,8 @@ class RDKIT_GRAPHMOL_EXPORT Bond : public RDProps { std::uint8_t d_bondType; std::uint8_t d_dirTag; std::uint8_t d_stereo; - atomindex_t d_index; - atomindex_t d_beginAtomIdx, d_endAtomIdx; + std::uint16_t d_index; + std::uint16_t d_beginAtomIdx, d_endAtomIdx; ROMol *dp_mol; INT_VECT *dp_stereoAtoms; diff --git a/Code/GraphMol/Canon.cpp b/Code/GraphMol/Canon.cpp index 542a563e16e..5f87618723b 100644 --- a/Code/GraphMol/Canon.cpp +++ b/Code/GraphMol/Canon.cpp @@ -68,6 +68,331 @@ bool atomHasFourthValence(const Atom *atom) { } return false; } + +struct _possibleCompare + : public std::binary_function { + bool operator()(const PossibleType &arg1, const PossibleType &arg2) const { + return (arg1.get<0>() < arg2.get<0>()); + } +}; + +// finds cycles +class DFSCycleFinder { + public: + DFSCycleFinder( + ROMol &mol, std::vector &colors, const UINT_VECT &ranks, + UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, + const boost::dynamic_bitset<> *bondsInPlay, + const std::vector *bondSymbols, bool doRandom) : + d_mol(mol), d_colors(colors), d_ranks(ranks), d_atomOrders(atomOrders), + d_atomRingClosures(atomRingClosures), d_bondsInPlay(bondsInPlay), + d_bondSymbols(bondSymbols), d_doRandom(doRandom) + {} + + void find(int atomIdx, int inBondIdx); + + private: + ROMol& d_mol; + std::vector& d_colors; + const UINT_VECT& d_ranks; + UINT_VECT& d_atomOrders; + VECT_INT_VECT& d_atomRingClosures; + const boost::dynamic_bitset<>* d_bondsInPlay; + const std::vector* d_bondSymbols; + bool d_doRandom; +}; + +void DFSCycleFinder::find(int atomIdx, int inBondIdx) { + Atom *atom = d_mol.getAtomWithIdx(atomIdx); + d_atomOrders.push_back(atomIdx); + + d_colors[atomIdx] = GREY_NODE; + + // --------------------- + // Build the list of possible destinations from here + // --------------------- + std::vector possibles; + possibles.resize(0); + ROMol::OBOND_ITER_PAIR bondsPair = d_mol.getAtomBonds(atom); + possibles.reserve(bondsPair.second - bondsPair.first); + + while (bondsPair.first != bondsPair.second) { + Bond *theBond = d_mol[*(bondsPair.first)]; + bondsPair.first++; + if (d_bondsInPlay && !(*d_bondsInPlay)[theBond->getIdx()]) { + continue; + } + if (inBondIdx < 0 || + theBond->getIdx() != static_cast(inBondIdx)) { + int otherIdx = theBond->getOtherAtomIdx(atomIdx); + long rank = d_ranks[otherIdx]; + // --------------------- + // things are a bit more complicated if we are sitting on a + // ring atom. we would like to traverse first to the + // ring-closure atoms, then to atoms outside the ring first, + // then to atoms in the ring that haven't already been visited + // (non-ring-closure atoms). + // + // Here's how the black magic works: + // - non-ring atom neighbors have their original ranks + // - ring atom neighbors have this added to their ranks: + // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS + // - ring-closure neighbors lose a factor of: + // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS + // + // This tactic biases us to traverse to non-ring neighbors first, + // original ordering if bond orders are all equal... crafty, neh? + // --------------------- + if (d_doRandom) { // randomize the rank + rank = getRandomGenerator()(); + } else { + if (d_colors[otherIdx] == GREY_NODE) { + rank -= static_cast(MAX_BONDTYPE + 1) * MAX_NATOMS * MAX_NATOMS; + if (!d_bondSymbols) { + rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * + MAX_NATOMS; + } else { + const std::string &symb = (*d_bondSymbols)[theBond->getIdx()]; + std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); + rank += (hsh % MAX_NATOMS) * MAX_NATOMS; + } + } else if (theBond->getOwningMol().getRingInfo()->numBondRings( + theBond->getIdx())) { + if (!d_bondSymbols) { + rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * + MAX_NATOMS * MAX_NATOMS; + } else { + const std::string &symb = (*d_bondSymbols)[theBond->getIdx()]; + std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); + rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; + } + } + } + possibles.emplace_back(rank, otherIdx, theBond); + } + } + + // --------------------- + // Sort on ranks + // --------------------- + std::sort(possibles.begin(), possibles.end(), _possibleCompare()); + // --------------------- + // Now work the children + // --------------------- + for (auto &possible : possibles) { + int possibleIdx = possible.get<1>(); + Bond *bond = possible.get<2>(); + switch (d_colors[possibleIdx]) { + case WHITE_NODE: + // we haven't seen this node at all before, traverse + find(possibleIdx, bond->getIdx()); + break; + case GREY_NODE: + // we've seen this, but haven't finished it (we're finishing a ring) + d_atomRingClosures[possibleIdx].push_back(bond->getIdx()); + d_atomRingClosures[atomIdx].push_back(bond->getIdx()); + break; + default: + // this node has been finished. don't do anything. + break; + } + } + d_colors[atomIdx] = BLACK_NODE; +} + +class DFSStackBuilder { + public: + DFSStackBuilder( + ROMol &mol, std::vector &colors, const UINT_VECT &ranks, + UINT_VECT &cyclesAvailable, MolStack &molStack, UINT_VECT &atomOrders, + UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, + std::vector &atomTraversalBondOrder, + const boost::dynamic_bitset<> *bondsInPlay, + const std::vector *bondSymbols, bool doRandom) : + d_mol(mol), d_colors(colors), d_ranks(ranks), + d_cyclesAvailable(cyclesAvailable), d_molStack(molStack), + d_atomOrders(atomOrders), d_bondVisitOrders(bondVisitOrders), + d_atomRingClosures(atomRingClosures), + d_atomTraversalBondOrder(atomTraversalBondOrder), + d_bondsInPlay(bondsInPlay), d_bondSymbols(bondSymbols), + d_doRandom(doRandom), d_seenFromHere(mol.getNumAtoms()) + {} + + void build(int atomIdx, int inBondIdx); + + private: + ROMol &d_mol; + std::vector& d_colors; + const UINT_VECT& d_ranks; + UINT_VECT& d_cyclesAvailable; + MolStack& d_molStack; + UINT_VECT& d_atomOrders; + UINT_VECT& d_bondVisitOrders; + VECT_INT_VECT& d_atomRingClosures; + std::vector& d_atomTraversalBondOrder; + const boost::dynamic_bitset<>* d_bondsInPlay; + const std::vector* d_bondSymbols; + bool d_doRandom; + + // reset and reused across all recursive calls + boost::dynamic_bitset<> d_seenFromHere; + std::vector d_ringsClosed; +}; + +void DFSStackBuilder::build(int atomIdx, int inBondIdx) { +#if 0 + std::cerr<<"traverse from atom: "<getIdx()] = rdcast(d_molStack.size()); + d_colors[atomIdx] = GREY_NODE; + + std::vector possibles; + ROMol::OBOND_ITER_PAIR bondsPair = d_mol.getAtomBonds(atom); + possibles.reserve(bondsPair.second - bondsPair.first); + + size_t ringClosuresSize = d_atomRingClosures[atomIdx].size(); + INT_VECT& travList = d_atomTraversalBondOrder[atom->getIdx()]; + if (inBondIdx >= 0) { + travList.reserve(ringClosuresSize + possibles.capacity() + 1); + travList.push_back(inBondIdx); + } else { + travList.reserve(ringClosuresSize + possibles.capacity()); + } + + // --------------------- + // Add any ring closures + // --------------------- + if (ringClosuresSize) { + d_ringsClosed.clear(); + for (auto bIdx : d_atomRingClosures[atomIdx]) { + travList.push_back(bIdx); + Bond *bond = d_mol.getBondWithIdx(bIdx); + d_seenFromHere.set(bond->getOtherAtomIdx(atomIdx)); + unsigned int ringIdx; + if (bond->getPropIfPresent(common_properties::_TraversalRingClosureBondKey, + ringIdx)) { + // this is end of the ring closure + // we can just pull the ring index from the bond itself: + d_molStack.emplace_back(bond, atomIdx); + d_bondVisitOrders[bIdx] = d_molStack.size(); + d_molStack.emplace_back(ringIdx); + // don't make the ring digit immediately available again: we don't want + // to have the same + // ring digit opening and closing rings on an atom. + d_ringsClosed.push_back(ringIdx - 1); + } else { + // this is the beginning of the ring closure, we need to come up with a + // ring index: + auto cAIt = + std::find(d_cyclesAvailable.begin(), d_cyclesAvailable.end(), 1); + if (cAIt == d_cyclesAvailable.end()) { + throw ValueErrorException( + "Too many rings open at once. SMILES cannot be generated."); + } + unsigned int lowestRingIdx = cAIt - d_cyclesAvailable.begin(); + d_cyclesAvailable[lowestRingIdx] = 0; + ++lowestRingIdx; + bond->setProp(common_properties::_TraversalRingClosureBondKey, + lowestRingIdx); + d_molStack.emplace_back(lowestRingIdx); + } + } + for (auto ringIdx : d_ringsClosed) { + d_cyclesAvailable[ringIdx] = 1; + } + } + + // --------------------- + // Build the list of possible destinations from here + // --------------------- + + while (bondsPair.first != bondsPair.second) { + Bond *theBond = d_mol[*(bondsPair.first)]; + bondsPair.first++; + if (d_bondsInPlay && !(*d_bondsInPlay)[theBond->getIdx()]) { + continue; + } + if (inBondIdx >= 0 && theBond->getIdx() == static_cast(inBondIdx)) { + continue; + } + int otherIdx = theBond->getOtherAtomIdx(atomIdx); + // --------------------- + // This time we skip the ring-closure atoms (we did them + // above); we want to traverse first to atoms outside the ring + // then to atoms in the ring that haven't already been visited + // (non-ring-closure atoms). + // + // otherwise it's the same ranking logic as above + // --------------------- + if (d_colors[otherIdx] != WHITE_NODE || d_seenFromHere[otherIdx]) { + // ring closure or finished atom... skip it. + continue; + } + unsigned long rank = d_ranks[otherIdx]; + if (d_doRandom) { // randomize the rank + rank = getRandomGenerator()(); + } else { + if (theBond->getOwningMol().getRingInfo()->numBondRings( + theBond->getIdx())) { + if (!d_bondSymbols) { + rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * + MAX_NATOMS * MAX_NATOMS; + } else { + const std::string &symb = (*d_bondSymbols)[theBond->getIdx()]; + std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); + rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; + } + } + } + + possibles.emplace_back(rank, otherIdx, theBond); + } + + // --------------------- + // Sort on ranks + // --------------------- + std::sort(possibles.begin(), possibles.end(), _possibleCompare()); + // --------------------- + // Now work the children + // --------------------- + for (auto possiblesIt = possibles.begin(); possiblesIt != possibles.end(); + possiblesIt++) { + int possibleIdx = possiblesIt->get<1>(); + if (d_colors[possibleIdx] != WHITE_NODE) { + // we're either done or it's a ring-closure, which we already processed... + // this test isn't strictly required, because we only added WHITE notes to + // the possibles list, but it seems logical to document it + continue; + } + Bond *bond = possiblesIt->get<2>(); + Atom *otherAtom = d_mol.getAtomWithIdx(possibleIdx); + // ww might have some residual data from earlier calls, clean that up: + otherAtom->clearProp(common_properties::_TraversalBondIndexOrderKey); + travList.push_back(bond->getIdx()); + if (possiblesIt + 1 != possibles.end()) { + // we're branching + d_molStack.emplace_back( + MOL_STACK_BRANCH_OPEN, rdcast(possiblesIt - possibles.begin())); + } + d_molStack.emplace_back(bond, atomIdx); + d_bondVisitOrders[bond->getIdx()] = d_molStack.size(); + build(possibleIdx, bond->getIdx()); + if (possiblesIt + 1 != possibles.end()) { + d_molStack.emplace_back( + MOL_STACK_BRANCH_CLOSE, rdcast(possiblesIt - possibles.begin())); + } + } + + d_colors[atomIdx] = BLACK_NODE; +} + } // end of anonymous namespace bool chiralAtomNeedsTagInversion(const RDKit::ROMol &mol, @@ -80,13 +405,6 @@ bool chiralAtomNeedsTagInversion(const RDKit::ROMol &mol, !isUnsaturated(atom, mol))); } -struct _possibleCompare - : public std::binary_function { - bool operator()(const PossibleType &arg1, const PossibleType &arg2) const { - return (arg1.get<0>() < arg2.get<0>()); - } -}; - bool checkBondsInSameBranch(MolStack &molStack, Bond *dblBnd, Bond *dirBnd) { bool seenDblBond = false; int branchCounter = 0; @@ -545,321 +863,13 @@ void canonicalizeDoubleBond(Bond *dblBond, UINT_VECT &bondVisitOrders, } } -// finds cycles -void dfsFindCycles(ROMol &mol, int atomIdx, int inBondIdx, - std::vector &colors, const UINT_VECT &ranks, - UINT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, - const boost::dynamic_bitset<> *bondsInPlay, - const std::vector *bondSymbols, bool doRandom) { - Atom *atom = mol.getAtomWithIdx(atomIdx); - atomOrders.push_back(atomIdx); - - colors[atomIdx] = GREY_NODE; - - // --------------------- - // - // Build the list of possible destinations from here - // - // --------------------- - std::vector possibles; - possibles.resize(0); - ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); - possibles.reserve(bondsPair.second - bondsPair.first); - - while (bondsPair.first != bondsPair.second) { - Bond *theBond = mol[*(bondsPair.first)]; - bondsPair.first++; - if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) { - continue; - } - if (inBondIdx < 0 || - theBond->getIdx() != static_cast(inBondIdx)) { - int otherIdx = theBond->getOtherAtomIdx(atomIdx); - long rank = ranks[otherIdx]; - // --------------------- - // - // things are a bit more complicated if we are sitting on a - // ring atom. we would like to traverse first to the - // ring-closure atoms, then to atoms outside the ring first, - // then to atoms in the ring that haven't already been visited - // (non-ring-closure atoms). - // - // Here's how the black magic works: - // - non-ring atom neighbors have their original ranks - // - ring atom neighbors have this added to their ranks: - // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS - // - ring-closure neighbors lose a factor of: - // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS - // - // This tactic biases us to traverse to non-ring neighbors first, - // original ordering if bond orders are all equal... crafty, neh? - // - // --------------------- - if (!doRandom) { - if (colors[otherIdx] == GREY_NODE) { - rank -= static_cast(MAX_BONDTYPE + 1) * MAX_NATOMS * MAX_NATOMS; - if (!bondSymbols) { - rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * - MAX_NATOMS; - } else { - const std::string &symb = (*bondSymbols)[theBond->getIdx()]; - std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); - rank += (hsh % MAX_NATOMS) * MAX_NATOMS; - } - } else if (theBond->getOwningMol().getRingInfo()->numBondRings( - theBond->getIdx())) { - if (!bondSymbols) { - rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * - MAX_NATOMS * MAX_NATOMS; - } else { - const std::string &symb = (*bondSymbols)[theBond->getIdx()]; - std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); - rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; - } - } - } else { - // randomize the rank - rank = getRandomGenerator()(); - } - // std::cerr << " " << atomIdx << ": " << otherIdx << " " << - // rank - // << std::endl; - // std::cerr<<"aIdx: "<< atomIdx <<" p: "<getBondType()<<" "<() << " " - // << possibles.front().get<1>() << std::endl; - // // --------------------- - // - // Now work the children - // - // --------------------- - for (auto &possible : possibles) { - int possibleIdx = possible.get<1>(); - Bond *bond = possible.get<2>(); - switch (colors[possibleIdx]) { - case WHITE_NODE: - // ----- - // we haven't seen this node at all before, traverse - // ----- - dfsFindCycles(mol, possibleIdx, bond->getIdx(), colors, ranks, - atomOrders, atomRingClosures, bondsInPlay, bondSymbols, - doRandom); - break; - case GREY_NODE: - // ----- - // we've seen this, but haven't finished it (we're finishing a ring) - // ----- - atomRingClosures[possibleIdx].push_back(bond->getIdx()); - atomRingClosures[atomIdx].push_back(bond->getIdx()); - break; - default: - // ----- - // this node has been finished. don't do anything. - // ----- - break; - } - } - colors[atomIdx] = BLACK_NODE; -} // namespace Canon - -void dfsBuildStack(ROMol &mol, int atomIdx, int inBondIdx, - std::vector &colors, VECT_INT_VECT &cycles, - const UINT_VECT &ranks, UINT_VECT &cyclesAvailable, - MolStack &molStack, UINT_VECT &atomOrders, - UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, - std::vector &atomTraversalBondOrder, - const boost::dynamic_bitset<> *bondsInPlay, - const std::vector *bondSymbols, bool doRandom) { -#if 0 - std::cerr<<"traverse from atom: "< seenFromHere(mol.getNumAtoms()); - - seenFromHere.set(atomIdx); - molStack.push_back(MolStackElem(atom)); - atomOrders[atom->getIdx()] = rdcast(molStack.size()); - colors[atomIdx] = GREY_NODE; - - INT_LIST travList; - if (inBondIdx >= 0) { - travList.push_back(inBondIdx); - } - - // --------------------- - // - // Add any ring closures - // - // --------------------- - if (atomRingClosures[atomIdx].size()) { - std::vector ringsClosed; - for (auto bIdx : atomRingClosures[atomIdx]) { - travList.push_back(bIdx); - Bond *bond = mol.getBondWithIdx(bIdx); - seenFromHere.set(bond->getOtherAtomIdx(atomIdx)); - unsigned int ringIdx; - if (bond->getPropIfPresent(common_properties::_TraversalRingClosureBond, - ringIdx)) { - // this is end of the ring closure - // we can just pull the ring index from the bond itself: - molStack.push_back(MolStackElem(bond, atomIdx)); - bondVisitOrders[bIdx] = molStack.size(); - molStack.push_back(MolStackElem(ringIdx)); - // don't make the ring digit immediately available again: we don't want - // to have the same - // ring digit opening and closing rings on an atom. - ringsClosed.push_back(ringIdx - 1); - } else { - // this is the beginning of the ring closure, we need to come up with a - // ring index: - auto cAIt = - std::find(cyclesAvailable.begin(), cyclesAvailable.end(), 1); - if (cAIt == cyclesAvailable.end()) { - throw ValueErrorException( - "Too many rings open at once. SMILES cannot be generated."); - } - unsigned int lowestRingIdx = cAIt - cyclesAvailable.begin(); - cyclesAvailable[lowestRingIdx] = 0; - ++lowestRingIdx; - bond->setProp(common_properties::_TraversalRingClosureBond, - lowestRingIdx); - molStack.push_back(MolStackElem(lowestRingIdx)); - } - } - for (auto ringIdx : ringsClosed) { - cyclesAvailable[ringIdx] = 1; - } - } - - // --------------------- - // - // Build the list of possible destinations from here - // - // --------------------- - std::vector possibles; - possibles.resize(0); - ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); - possibles.reserve(bondsPair.second - bondsPair.first); - - while (bondsPair.first != bondsPair.second) { - Bond *theBond = mol[*(bondsPair.first)]; - bondsPair.first++; - if (bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) { - continue; - } - if (inBondIdx < 0 || - theBond->getIdx() != static_cast(inBondIdx)) { - int otherIdx = theBond->getOtherAtomIdx(atomIdx); - // --------------------- - // - // This time we skip the ring-closure atoms (we did them - // above); we want to traverse first to atoms outside the ring - // then to atoms in the ring that haven't already been visited - // (non-ring-closure atoms). - // - // otherwise it's the same ranking logic as above - // - // --------------------- - if (colors[otherIdx] != WHITE_NODE || seenFromHere[otherIdx]) { - // ring closure or finished atom... skip it. - continue; - } - unsigned long rank = ranks[otherIdx]; - if (!doRandom) { - if (theBond->getOwningMol().getRingInfo()->numBondRings( - theBond->getIdx())) { - if (!bondSymbols) { - rank += static_cast(MAX_BONDTYPE - theBond->getBondType()) * - MAX_NATOMS * MAX_NATOMS; - } else { - const std::string &symb = (*bondSymbols)[theBond->getIdx()]; - std::uint32_t hsh = gboost::hash_range(symb.begin(), symb.end()); - rank += (hsh % MAX_NATOMS) * MAX_NATOMS * MAX_NATOMS; - } - } - } else { - // randomize the rank - rank = getRandomGenerator()(); - } - - possibles.emplace_back(rank, otherIdx, theBond); - } - } - - // --------------------- - // - // Sort on ranks - // - // --------------------- - std::sort(possibles.begin(), possibles.end(), _possibleCompare()); - // if (possibles.size()) - // std::cerr << " aIdx2: " << atomIdx - // << " first: " << possibles.front().get<0>() << " " - // << possibles.front().get<1>() << std::endl; - - // --------------------- - // - // Now work the children - // - // --------------------- - for (auto possiblesIt = possibles.begin(); possiblesIt != possibles.end(); - possiblesIt++) { - int possibleIdx = possiblesIt->get<1>(); - if (colors[possibleIdx] != WHITE_NODE) { - // we're either done or it's a ring-closure, which we already processed... - // this test isn't strictly required, because we only added WHITE notes to - // the possibles list, but it seems logical to document it - continue; - } - Bond *bond = possiblesIt->get<2>(); - Atom *otherAtom = mol.getAtomWithIdx(possibleIdx); - // ww might have some residual data from earlier calls, clean that up: - otherAtom->clearProp(common_properties::_TraversalBondIndexOrder); - travList.push_back(bond->getIdx()); - if (possiblesIt + 1 != possibles.end()) { - // we're branching - molStack.push_back( - MolStackElem("(", rdcast(possiblesIt - possibles.begin()))); - } - molStack.push_back(MolStackElem(bond, atomIdx)); - bondVisitOrders[bond->getIdx()] = molStack.size(); - dfsBuildStack(mol, possibleIdx, bond->getIdx(), colors, cycles, ranks, - cyclesAvailable, molStack, atomOrders, bondVisitOrders, - atomRingClosures, atomTraversalBondOrder, bondsInPlay, - bondSymbols, doRandom); - if (possiblesIt + 1 != possibles.end()) { - molStack.push_back( - MolStackElem(")", rdcast(possiblesIt - possibles.begin()))); - } - } - - atomTraversalBondOrder[atom->getIdx()] = travList; - colors[atomIdx] = BLACK_NODE; -} - void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, std::vector &colors, - VECT_INT_VECT &cycles, const UINT_VECT &ranks, - UINT_VECT &cyclesAvailable, MolStack &molStack, - UINT_VECT &atomOrders, UINT_VECT &bondVisitOrders, + const UINT_VECT &ranks, UINT_VECT &cyclesAvailable, + MolStack &molStack, UINT_VECT &atomOrders, + UINT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, - std::vector &atomTraversalBondOrder, + std::vector &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector *bondSymbols, bool doRandom) { @@ -879,11 +889,14 @@ void canonicalDFSTraversal(ROMol &mol, int atomIdx, int inBondIdx, std::vector tcolors; tcolors.resize(colors.size()); std::copy(colors.begin(), colors.end(), tcolors.begin()); - dfsFindCycles(mol, atomIdx, inBondIdx, tcolors, ranks, atomOrders, - atomRingClosures, bondsInPlay, bondSymbols, doRandom); - dfsBuildStack(mol, atomIdx, inBondIdx, colors, cycles, ranks, cyclesAvailable, - molStack, atomOrders, bondVisitOrders, atomRingClosures, - atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); + DFSCycleFinder cycle_finder(mol, tcolors, ranks, atomOrders, atomRingClosures, + bondsInPlay, bondSymbols, doRandom); + cycle_finder.find(atomIdx, inBondIdx); + + DFSStackBuilder stack_builder( + mol, colors, ranks, cyclesAvailable, molStack, atomOrders, bondVisitOrders, + atomRingClosures, atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); + stack_builder.build(atomIdx, inBondIdx); } bool canHaveDirection(const Bond *bond) { @@ -1030,12 +1043,11 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, UINT_VECT bondDirCounts(mol.getNumBonds(), 0); UINT_VECT atomDirCounts(nAtoms, 0); UINT_VECT cyclesAvailable(MAX_CYCLES, 1); - VECT_INT_VECT cycles(nAtoms); boost::dynamic_bitset<> ringStereoChemAdjusted(nAtoms); // make sure that we've done the stereo perception: - if (!mol.hasProp(common_properties::_StereochemDone)) { + if (!mol.hasProp(common_properties::_StereochemDoneKey)) { MolOps::assignStereochemistry(mol, false); } @@ -1044,13 +1056,13 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, if (!mol.getRingInfo()->isInitialized()) { MolOps::findSSSR(mol); } - mol.getAtomWithIdx(atomIdx)->setProp(common_properties::_TraversalStartPoint, + mol.getAtomWithIdx(atomIdx)->setProp(common_properties::_TraversalStartPointKey, true); VECT_INT_VECT atomRingClosures(nAtoms); - std::vector atomTraversalBondOrder(nAtoms); + std::vector atomTraversalBondOrder(nAtoms); Canon::canonicalDFSTraversal( - mol, atomIdx, -1, colors, cycles, ranks, cyclesAvailable, molStack, + mol, atomIdx, -1, colors, ranks, cyclesAvailable, molStack, atomVisitOrders, bondVisitOrders, atomRingClosures, atomTraversalBondOrder, bondsInPlay, bondSymbols, doRandom); @@ -1067,14 +1079,14 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, for (const auto &bndItr : boost::make_iterator_range(mol.getAtomBonds(atom))) { if (bondsInPlay && !(*bondsInPlay)[mol[bndItr]->getIdx()]) { - atom->setProp(common_properties::_brokenChirality, true); + atom->setProp(common_properties::_brokenChiralityKey, true); break; } } - if (atom->hasProp(common_properties::_brokenChirality)) { + if (atom->hasProp(common_properties::_brokenChiralityKey)) { continue; } - const INT_LIST &trueOrder = atomTraversalBondOrder[atom->getIdx()]; + const INT_VECT &trueOrder = atomTraversalBondOrder[atom->getIdx()]; // Check if the atom can be chiral, and if chirality needs inversion if (trueOrder.size() >= 3) { @@ -1082,7 +1094,7 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, // We have to make sure that trueOrder contains all the // bonds, even if they won't be written to the SMARTS if (trueOrder.size() < atom->getDegree()) { - INT_LIST tOrder = trueOrder; + INT_VECT tOrder = trueOrder; for (const auto &bndItr : boost::make_iterator_range(mol.getAtomBonds(atom))) { int bndIdx = mol[bndItr]->getIdx(); @@ -1158,14 +1170,14 @@ void canonicalizeFragment(ROMol &mol, int atomIdx, if (doIsomericSmiles) { if (msI.type == MOL_STACK_ATOM && msI.obj.atom->getChiralTag() != Atom::CHI_UNSPECIFIED && - !msI.obj.atom->hasProp(common_properties::_brokenChirality)) { - if (msI.obj.atom->hasProp(common_properties::_ringStereoAtoms)) { + !msI.obj.atom->hasProp(common_properties::_brokenChiralityKey)) { + if (msI.obj.atom->hasProp(common_properties::_ringStereoAtomsKey)) { if (!ringStereoChemAdjusted[msI.obj.atom->getIdx()]) { msI.obj.atom->setChiralTag(Atom::CHI_TETRAHEDRAL_CCW); ringStereoChemAdjusted.set(msI.obj.atom->getIdx()); } const INT_VECT &ringStereoAtoms = msI.obj.atom->getProp( - common_properties::_ringStereoAtoms); + common_properties::_ringStereoAtomsKey); for (auto nbrV : ringStereoAtoms) { int nbrIdx = abs(nbrV) - 1; // Adjust the chirality flag of the ring stereo atoms according to diff --git a/Code/GraphMol/Canon.h b/Code/GraphMol/Canon.h index 44dc321f2e7..218a044fea0 100644 --- a/Code/GraphMol/Canon.h +++ b/Code/GraphMol/Canon.h @@ -87,6 +87,10 @@ class RDKIT_GRAPHMOL_EXPORT MolStackElem { } number = idx; } + explicit MolStackElem(MolStackTypes ctor_type, int idx) { + type = ctor_type; + number = idx; + } MolStackTypes type; //!< stores the type of node MolStackUnion obj; //!< holds our pointer (if appropriate) int number = diff --git a/Code/GraphMol/ChemReactions/ReactionRunner.cpp b/Code/GraphMol/ChemReactions/ReactionRunner.cpp index f3b9a63cff9..0de51945eae 100644 --- a/Code/GraphMol/ChemReactions/ReactionRunner.cpp +++ b/Code/GraphMol/ChemReactions/ReactionRunner.cpp @@ -258,14 +258,14 @@ void updateImplicitAtomProperties(Atom *prodAtom, const Atom *reactAtom) { // return return; } - if (!prodAtom->hasProp(common_properties::_QueryFormalCharge)) { + if (!prodAtom->hasProp(common_properties::_QueryFormalChargeKey)) { prodAtom->setFormalCharge(reactAtom->getFormalCharge()); } - if (!prodAtom->hasProp(common_properties::_QueryIsotope)) { + if (!prodAtom->hasProp(common_properties::_QueryIsotopeKey)) { prodAtom->setIsotope(reactAtom->getIsotope()); } - if (!prodAtom->hasProp(common_properties::_ReactionDegreeChanged)) { - if (!prodAtom->hasProp(common_properties::_QueryHCount)) { + if (!prodAtom->hasProp(common_properties::_ReactionDegreeChangedKey)) { + if (!prodAtom->hasProp(common_properties::_QueryHCountKey)) { prodAtom->setNumExplicitHs(reactAtom->getNumExplicitHs()); prodAtom->setNoImplicit(reactAtom->getNoImplicit()); } @@ -302,14 +302,14 @@ RWMOL_SPTR convertTemplateToMol(const ROMOL_SPTR prodTemplateSptr) { auto *newAtom = new Atom(*oAtom); res->addAtom(newAtom, false, true); int mapNum; - if (newAtom->getPropIfPresent(common_properties::molAtomMapNumber, + if (newAtom->getPropIfPresent(common_properties::molAtomMapNumberKey, mapNum)) { // set bookmarks for the mapped atoms: res->setAtomBookmark(newAtom, mapNum); // now clear the molAtomMapNumber property so that it doesn't // end up in the products (this was bug 3140490): - newAtom->clearProp(common_properties::molAtomMapNumber); - newAtom->setProp(common_properties::reactionMapNum, mapNum); + newAtom->clearProp(common_properties::molAtomMapNumberKey); + newAtom->setProp(common_properties::reactionMapNumKey, mapNum); } newAtom->setChiralTag(Atom::CHI_UNSPECIFIED); @@ -317,7 +317,7 @@ RWMOL_SPTR convertTemplateToMol(const ROMOL_SPTR prodTemplateSptr) { // to 4 (=SET), then bring its stereochem over, otherwise we'll // ignore it: int iFlag; - if (oAtom->getPropIfPresent(common_properties::molInversionFlag, iFlag)) { + if (oAtom->getPropIfPresent(common_properties::molInversionFlagKey, iFlag)) { if (iFlag == 4) { newAtom->setChiralTag(oAtom->getChiralTag()); } @@ -325,18 +325,18 @@ RWMOL_SPTR convertTemplateToMol(const ROMOL_SPTR prodTemplateSptr) { // check for properties we need to set: int val; - if (newAtom->getPropIfPresent(common_properties::_QueryFormalCharge, val)) { + if (newAtom->getPropIfPresent(common_properties::_QueryFormalChargeKey, val)) { newAtom->setFormalCharge(val); } - if (newAtom->getPropIfPresent(common_properties::_QueryHCount, val)) { + if (newAtom->getPropIfPresent(common_properties::_QueryHCountKey, val)) { newAtom->setNumExplicitHs(val); newAtom->setNoImplicit(true); // this was github #1544 } - if (newAtom->getPropIfPresent(common_properties::_QueryMass, val)) { + if (newAtom->getPropIfPresent(common_properties::_QueryMassKey, val)) { // FIX: technically should do something with this // newAtom->setMass(val); } - if (newAtom->getPropIfPresent(common_properties::_QueryIsotope, val)) { + if (newAtom->getPropIfPresent(common_properties::_QueryIsotopeKey, val)) { newAtom->setIsotope(val); } } @@ -372,7 +372,7 @@ RWMOL_SPTR convertTemplateToMol(const ROMOL_SPTR prodTemplateSptr) { newB->setIsAromatic(false); } } else if (queryDescription == "BondNull") { - newB->setProp(common_properties::NullBond, 1); + newB->setProp(common_properties::NullBondKey, 1); } } @@ -429,7 +429,7 @@ ReactantProductAtomMapping *getAtomMappingsReactantProduct( for (const auto &i : match) { const Atom *templateAtom = reactantTemplate.getAtomWithIdx(i.first); int molAtomMapNumber; - if (templateAtom->getPropIfPresent(common_properties::molAtomMapNumber, + if (templateAtom->getPropIfPresent(common_properties::molAtomMapNumberKey, molAtomMapNumber)) { if (product->hasAtomBookmark(molAtomMapNumber)) { RWMol::ATOM_PTR_LIST atomIdxs = @@ -707,8 +707,8 @@ void setReactantBondPropertiesToProduct(RWMOL_SPTR product, Bond *pBond = (*product)[*(bondItP.first)]; ++bondItP.first; - if (!pBond->hasProp(common_properties::NullBond) && - !pBond->hasProp(common_properties::_MolFileBondQuery)) { + if (!pBond->hasProp(common_properties::NullBondKey) && + !pBond->hasProp(common_properties::_MolFileBondQueryKey)) { continue; } @@ -730,8 +730,8 @@ void setReactantBondPropertiesToProduct(RWMOL_SPTR product, pBond->setBondType(rBond->getBondType()); pBond->setIsAromatic(rBond->getIsAromatic()); - if (pBond->hasProp(common_properties::NullBond)) { - pBond->clearProp(common_properties::NullBond); + if (pBond->hasProp(common_properties::NullBondKey)) { + pBond->clearProp(common_properties::NullBondKey); } } } @@ -783,7 +783,7 @@ void setReactantAtomPropertiesToProduct(Atom *productAtom, bool setImplicitProperties) { // which properties need to be set from the reactant? if (productAtom->getAtomicNum() <= 0 || - productAtom->hasProp(common_properties::_MolFileAtomQuery)) { + productAtom->hasProp(common_properties::_MolFileAtomQueryKey)) { productAtom->setAtomicNum(reactantAtom.getAtomicNum()); productAtom->setIsAromatic(reactantAtom.getIsAromatic()); // don't copy isotope information over from dummy atoms @@ -793,13 +793,13 @@ void setReactantAtomPropertiesToProduct(Atom *productAtom, productAtom->setIsotope(reactantAtom.getIsotope()); } // remove dummy labels (if present) - if (productAtom->hasProp(common_properties::dummyLabel)) { - productAtom->clearProp(common_properties::dummyLabel); + if (productAtom->hasProp(common_properties::dummyLabelKey)) { + productAtom->clearProp(common_properties::dummyLabelKey); } - if (productAtom->hasProp(common_properties::_MolFileRLabel)) { - productAtom->clearProp(common_properties::_MolFileRLabel); + if (productAtom->hasProp(common_properties::_MolFileRLabelKey)) { + productAtom->clearProp(common_properties::_MolFileRLabelKey); } - productAtom->setProp(common_properties::reactantAtomIdx, + productAtom->setProp(common_properties::reactantAtomIdxKey, reactantAtom.getIdx()); productAtom->setProp(WAS_DUMMY, true); } else { @@ -808,7 +808,7 @@ void setReactantAtomPropertiesToProduct(Atom *productAtom, productAtom->clearProp(WAS_DUMMY); } } - productAtom->setProp(common_properties::reactantAtomIdx, + productAtom->setProp(common_properties::reactantAtomIdxKey, reactantAtom.getIdx()); if (setImplicitProperties) { updateImplicitAtomProperties(productAtom, &reactantAtom); @@ -823,7 +823,7 @@ void setReactantAtomPropertiesToProduct(Atom *productAtom, // FIX: this should be free-standing, not in this function. if (reactantAtom.getChiralTag() != Atom::CHI_UNSPECIFIED && reactantAtom.getChiralTag() != Atom::CHI_OTHER && - productAtom->hasProp(common_properties::molInversionFlag)) { + productAtom->hasProp(common_properties::molInversionFlagKey)) { checkProductChirality(reactantAtom.getChiralTag(), productAtom); } @@ -854,7 +854,7 @@ void addMissingProductAtom(const Atom &reactAtom, unsigned reactNeighborIdx, ReactantProductAtomMapping *mapping) { auto *newAtom = new Atom(reactAtom); unsigned reactAtomIdx = reactAtom.getIdx(); - newAtom->setProp(common_properties::reactantAtomIdx, + newAtom->setProp(common_properties::reactantAtomIdxKey, reactAtomIdx); unsigned productIdx = product->addAtom(newAtom, false, true); mapping->reactProdAtomMap[reactAtomIdx].push_back(productIdx); @@ -875,7 +875,7 @@ void addReactantNeighborsToProduct( boost::dynamic_bitset<> &visitedAtoms, std::vector &chiralAtomsToCheck, ReactantProductAtomMapping *mapping) { - std::list atomStack; + std::deque atomStack; atomStack.push_back(&reactantAtom); // std::cerr << "-------------------" << std::endl; @@ -1007,7 +1007,7 @@ void checkAndCorrectChiralityOfMatchingAtomsInProduct( Atom *productAtom = product->getAtomWithIdx(productAtomIdx); int inversionFlag = 0; - productAtom->getPropIfPresent(common_properties::molInversionFlag, + productAtom->getPropIfPresent(common_properties::molInversionFlagKey, inversionFlag); // if stereochemistry wasn't present in the reactant or if we're // either creating or destroying stereo we don't mess with this @@ -1111,7 +1111,7 @@ void checkAndCorrectChiralityOfMatchingAtomsInProduct( invert = true; } int inversionFlag; - if (productAtom->getPropIfPresent(common_properties::molInversionFlag, + if (productAtom->getPropIfPresent(common_properties::molInversionFlagKey, inversionFlag) && inversionFlag == 1) { invert = !invert; @@ -1207,7 +1207,7 @@ void copyEnhancedStereoGroups(const ROMol &reactant, RWMOL_SPTR product, } // If chirality defined explicitly by the reaction, skip the atom int flagVal = 0; - productAtom->getPropIfPresent(common_properties::molInversionFlag, + productAtom->getPropIfPresent(common_properties::molInversionFlagKey, flagVal); if (flagVal == 4) { continue; diff --git a/Code/GraphMol/Chirality.cpp b/Code/GraphMol/Chirality.cpp index 751e538a868..aa9da13442a 100644 --- a/Code/GraphMol/Chirality.cpp +++ b/Code/GraphMol/Chirality.cpp @@ -755,7 +755,7 @@ const Atom *findHighestCIPNeighbor(const Atom *atom, const Atom *skipAtom) { continue; } unsigned cip = 0; - if (!neighbor->getPropIfPresent(common_properties::_CIPRank, cip)) { + if (!neighbor->getPropIfPresent(common_properties::_CIPRankKey, cip)) { // If at least one of the atoms doesn't have a CIP rank, the highest rank // does not make sense, so return a nullptr. return nullptr; @@ -872,7 +872,7 @@ void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) { invariant = (invariant << nMassBits) | mass; int mapnum = -1; - atom->getPropIfPresent(common_properties::molAtomMapNumber, mapnum); + atom->getPropIfPresent(common_properties::molAtomMapNumberKey, mapnum); mapnum = (mapnum + 1) % 1024; // increment to allow map numbers of zero // (though that would be stupid) invariant = (invariant << 10) | mapnum; @@ -881,16 +881,21 @@ void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) { } } -void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, - bool seedWithInvars) { +void iterateCIPRanks(const ROMol &mol, const DOUBLE_VECT &invars, + UINT_VECT &ranks, bool seedWithInvars) { PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size"); unsigned int numAtoms = mol.getNumAtoms(); + if (numAtoms == 0) { return; } + if (numAtoms == 1) { + ranks[0] = 0; + return; + } + CIP_ENTRY_VECT cipEntries(numAtoms); - INT_LIST allIndices; - for (unsigned int i = 0; i < numAtoms; ++i) { - allIndices.push_back(i); + for (auto& vec : cipEntries) { + vec.reserve(16); } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "invariants:" << std::endl; @@ -900,7 +905,10 @@ void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, #endif // rank those: - Rankers::rankVect(invars, ranks); + std::vector rank_indices(numAtoms); + // The dual of rank_indices: rank_indices[i] == j <=> rank_indices_dual[j] == i + std::vector rank_indices_dual(numAtoms); + Rankers::rankVect(invars, rank_indices, ranks); #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl; for (unsigned int i = 0; i < numAtoms; ++i) { @@ -911,11 +919,11 @@ void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, // Note: in general one should avoid the temptation to // use invariants here, those lead to incorrect answers for (unsigned int i = 0; i < numAtoms; i++) { - if (!seedWithInvars) { + if (seedWithInvars) { + cipEntries[i].push_back(static_cast(invars[i])); + } else { cipEntries[i].push_back(mol[i]->getAtomicNum()); cipEntries[i].push_back(static_cast(ranks[i])); - } else { - cipEntries[i].push_back(static_cast(invars[i])); } } @@ -932,18 +940,33 @@ void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, unsigned int maxIts = numAtoms / 2 + 1; unsigned int numIts = 0; int lastNumRanks = -1; - unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1; + unsigned int numRanks = ranks[rank_indices[numAtoms-1]] + 1; + std::vector counts(ranks.size()); + std::vector updatedNbrIdxs; + updatedNbrIdxs.reserve(8); while (numRanks < numAtoms && numIts < maxIts && (lastNumRanks < 0 || static_cast(lastNumRanks) < numRanks)) { unsigned int longestEntry = 0; // ---------------------------------------------------- - // // for each atom, get a sorted list of its neighbors' ranks: - // - for (int &index : allIndices) { - CIP_ENTRY localEntry; - localEntry.reserve(16); + for (unsigned int index = 0; index < numAtoms; ++index) { + // If the atom already has a unique rank, then skip processing it. + if (numIts > 0) { + unsigned int rank_index = rank_indices_dual[index]; + bool has_unique_rank_left = true; + bool has_unique_rank_right = true; + if (rank_index > 0) { + has_unique_rank_left = ranks[index] != ranks[rank_indices[rank_index-1]]; + } + if (rank_index < numAtoms-1) { + has_unique_rank_right = ranks[index] != ranks[rank_indices[rank_index+1]]; + } + if (has_unique_rank_left && has_unique_rank_right) { continue; } + } + + // Note: counts is cleaned up when we drain into cipEntries. + updatedNbrIdxs.clear(); // start by pushing on our neighbors' ranks: ROMol::OEDGE_ITER beg, end; @@ -952,58 +975,68 @@ void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, const Bond *bond = mol[*beg]; ++beg; unsigned int nbrIdx = bond->getOtherAtomIdx(index); - const Atom *nbr = mol[nbrIdx]; + updatedNbrIdxs.push_back(nbrIdx); - int rank = ranks[nbrIdx] + 1; // put the neighbor in 2N times where N is the bond order as a double. // this is to treat aromatic linkages on fair footing. i.e. at least in - // the - // first iteration --c(:c):c and --C(=C)-C should look the same. + // the first iteration --c(:c):c and --C(=C)-C should look the same. // this was part of issue 3009911 - unsigned int count; - if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 && - (nbr->getDegree() == 4 || nbr->getDegree() == 3)) { - // a special case for chiral phosphorous compounds - // (this was leading to incorrect assignment of - // R/S labels ): - count = 1; - - // general justification of this is: - // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions: - // Multiple-Bond Unsaturation and Aromaticity". It contains several - // conventions of which convention (b) is the one applying here: - // "(b) Contributions by d orbitals to bonds of quadriligant atoms are - // neglected." - // FIX: this applies to more than just P + // a special case for chiral phosphorus compounds + // (this was leading to incorrect assignment of R/S labels ): + bool isChiralPhosphorusSpecialCase; + if (bond->getBondType() != Bond::DOUBLE) { + isChiralPhosphorusSpecialCase = false; + } else { // double bond + const Atom *nbr = mol[nbrIdx]; + if (nbr->getAtomicNum() != 15) { + isChiralPhosphorusSpecialCase = false; + } else { + unsigned int nbrDeg = nbr->getDegree(); + isChiralPhosphorusSpecialCase = nbrDeg == 3 || nbrDeg == 4; + } + }; + + // general justification of this is: + // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions: + // Multiple-Bond Unsaturation and Aromaticity". It contains several + // conventions of which convention (b) is the one applying here: + // "(b) Contributions by d orbitals to bonds of quadriligant atoms are + // neglected." + // FIX: this applies to more than just P + if (isChiralPhosphorusSpecialCase) { + counts[nbrIdx] += 1; } else { - count = static_cast( - floor(2. * bond->getBondTypeAsDouble() + .1)); + counts[nbrIdx] += bond->getTwiceBondType(); } - auto ePos = - std::lower_bound(localEntry.begin(), localEntry.end(), rank); - localEntry.insert(ePos, count, rank); - ++nbr; - } - // add a zero for each coordinated H: - // (as long as we're not a query atom) - if (!mol[index]->hasQuery()) { - localEntry.insert(localEntry.begin(), mol[index]->getTotalNumHs(), 0); } // we now have a sorted list of our neighbors' ranks, // copy it on in reversed order: - cipEntries[index].insert(cipEntries[index].end(), localEntry.rbegin(), - localEntry.rend()); - if (cipEntries[index].size() > longestEntry) { - longestEntry = rdcast(cipEntries[index].size()); + if (updatedNbrIdxs.size() > 1) { // compare vs 1 for performance. + std::sort(std::begin(updatedNbrIdxs), std::end(updatedNbrIdxs), + [&ranks](unsigned int idx1, unsigned int idx2) { + return ranks[idx1] > ranks[idx2]; + }); + } + auto& cipEntry = cipEntries[index]; + for (auto nbrIdx : updatedNbrIdxs) { + unsigned int count = counts[nbrIdx]; + cipEntry.insert(cipEntry.end(), count, ranks[nbrIdx] + 1); + counts[nbrIdx] = 0; + } + // add a zero for each coordinated H as long as we're not a query atom + if (!mol[index]->hasQuery()) { + cipEntry.insert(cipEntry.end(), mol[index]->getTotalNumHs(), 0); + } + + if (cipEntry.size() > longestEntry) { + longestEntry = rdcast(cipEntry.size()); } } // ---------------------------------------------------- - // // pad the entries so that we compare rounds to themselves: - // - for (int &index : allIndices) { + for (unsigned int index = 0; index < numAtoms; ++index) { auto sz = rdcast(cipEntries[index].size()); if (sz < longestEntry) { cipEntries[index].insert(cipEntries[index].end(), longestEntry - sz, @@ -1011,13 +1044,15 @@ void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, } } // ---------------------------------------------------- - // // sort the new ranks and update the list of active indices: - // lastNumRanks = numRanks; - Rankers::rankVect(cipEntries, ranks); - numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1; + bool reset_indices = numIts == 0; + Rankers::rankVect(cipEntries, rank_indices, ranks, reset_indices); + numRanks = ranks[rank_indices[numAtoms-1]] + 1; + for (unsigned i = 0; i < numAtoms; ++i) { + rank_indices_dual[rank_indices[i]] = i; + } // now truncate each vector and stick the rank at the end for (unsigned int i = 0; i < numAtoms; ++i) { @@ -1055,7 +1090,7 @@ void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) { // copy the ranks onto the atoms: for (unsigned int i = 0; i < numAtoms; ++i) { - mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1); + mol[i]->setProp(common_properties::_CIPRankKey, ranks[i], 1); } } @@ -1191,7 +1226,7 @@ bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) { const Atom *nbr = bond->getOtherAtom(atom); ringNbrs.push_back(nbr); unsigned int rnk = 0; - nbr->getPropIfPresent(common_properties::_CIPRank, rnk); + nbr->getPropIfPresent(common_properties::_CIPRankKey, rnk); nbrRanks.insert(rnk); } ++beg; @@ -1199,9 +1234,9 @@ bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) { unsigned int rank1 = 0, rank2 = 0; switch (nonRingNbrs.size()) { case 2: - if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRank, + if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRankKey, rank1) && - nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRank, + nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRankKey, rank2)) { if (rank1 == rank2) { res = false; @@ -1450,6 +1485,8 @@ std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, "bad rank vector size"); bool atomChanged = false; unsigned int unassignedAtoms = 0; + Chirality::INT_PAIR_VECT nbrs; + nbrs.reserve(4); // ------------------ // now loop over each atom and, if it's marked as chiral, @@ -1463,7 +1500,7 @@ std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, // we understand: if (flagPossibleStereoCenters || (tag != Atom::CHI_UNSPECIFIED && tag != Atom::CHI_OTHER)) { - if (atom->hasProp(common_properties::_CIPCode)) { + if (atom->hasProp(common_properties::_CIPCodeKey)) { continue; } @@ -1471,7 +1508,7 @@ std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, // if we need to, get the "CIP" ranking of each atom: assignAtomCIPRanks(mol, ranks); } - Chirality::INT_PAIR_VECT nbrs; + nbrs.clear(); bool legalCenter, hasDupes; boost::tie(legalCenter, hasDupes) = isAtomPotentialChiralCenter(atom, mol, ranks, nbrs); @@ -1479,7 +1516,7 @@ std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, ++unassignedAtoms; } if (legalCenter && !hasDupes && flagPossibleStereoCenters) { - atom->setProp(common_properties::_ChiralityPossible, 1); + atom->setProp(common_properties::_ChiralityPossibleKey, 1); } if (legalCenter && !hasDupes && tag != Atom::CHI_UNSPECIFIED && @@ -1521,7 +1558,7 @@ std::pair assignAtomChiralCodes(ROMol &mol, UINT_VECT &ranks, } else { cipCode = "R"; } - atom->setProp(common_properties::_CIPCode, cipCode); + atom->setProp(common_properties::_CIPCodeKey, cipCode); } } } @@ -1697,7 +1734,7 @@ void rerankAtoms(const ROMol &mol, UINT_VECT &ranks) { const Atom *atom = mol.getAtomWithIdx(i); // Priority order: R > S > nothing std::string cipCode; - if (atom->getPropIfPresent(common_properties::_CIPCode, cipCode)) { + if (atom->getPropIfPresent(common_properties::_CIPCodeKey, cipCode)) { if (cipCode == "S") { invars[i] += 10; } else if (cipCode == "R") { @@ -1721,7 +1758,7 @@ void rerankAtoms(const ROMol &mol, UINT_VECT &ranks) { iterateCIPRanks(mol, invars, ranks, true); // copy the ranks onto the atoms: for (unsigned int i = 0; i < mol.getNumAtoms(); i++) { - mol.getAtomWithIdx(i)->setProp(common_properties::_CIPRank, ranks[i]); + mol.getAtomWithIdx(i)->setProp(common_properties::_CIPRankKey, ranks[i]); } #ifdef VERBOSE_CANON @@ -1808,7 +1845,7 @@ namespace MolOps { */ void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, bool flagPossibleStereoCenters) { - if (!force && mol.hasProp(common_properties::_StereochemDone)) { + if (!force && mol.hasProp(common_properties::_StereochemDoneKey)) { return; } @@ -1834,11 +1871,11 @@ void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms(); ++atIt) { if (cleanIt) { - if ((*atIt)->hasProp(common_properties::_CIPCode)) { - (*atIt)->clearProp(common_properties::_CIPCode); + if ((*atIt)->hasProp(common_properties::_CIPCodeKey)) { + (*atIt)->clearProp(common_properties::_CIPCodeKey); } - if ((*atIt)->hasProp(common_properties::_ChiralityPossible)) { - (*atIt)->clearProp(common_properties::_ChiralityPossible); + if ((*atIt)->hasProp(common_properties::_ChiralityPossibleKey)) { + (*atIt)->clearProp(common_properties::_ChiralityPossibleKey); } } if (!hasStereoAtoms && (*atIt)->getChiralTag() != Atom::CHI_UNSPECIFIED && @@ -1925,11 +1962,11 @@ void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms(); ++atIt) { - if ((*atIt)->hasProp(common_properties::_ringStereochemCand)) { - (*atIt)->clearProp(common_properties::_ringStereochemCand); + if ((*atIt)->hasProp(common_properties::_ringStereochemCandKey)) { + (*atIt)->clearProp(common_properties::_ringStereochemCandKey); } - if ((*atIt)->hasProp(common_properties::_ringStereoAtoms)) { - (*atIt)->clearProp(common_properties::_ringStereoAtoms); + if ((*atIt)->hasProp(common_properties::_ringStereoAtomsKey)) { + (*atIt)->clearProp(common_properties::_ringStereoAtomsKey); } } boost::dynamic_bitset<> possibleSpecialCases(mol.getNumAtoms()); @@ -1937,9 +1974,9 @@ void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, for (auto atom : mol.atoms()) { if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED && - !atom->hasProp(common_properties::_CIPCode) && + !atom->hasProp(common_properties::_CIPCodeKey) && (!possibleSpecialCases[atom->getIdx()] || - !atom->hasProp(common_properties::_ringStereoAtoms))) { + !atom->hasProp(common_properties::_ringStereoAtomsKey))) { atom->setChiralTag(Atom::CHI_UNSPECIFIED); // If the atom has an explicit hydrogen and no charge, that H @@ -2033,7 +2070,7 @@ void assignStereochemistry(ROMol &mol, bool cleanIt, bool force, #endif } } - mol.setProp(common_properties::_StereochemDone, 1, true); + mol.setProp(common_properties::_StereochemDoneKey, 1, true); #if 0 std::cerr << "---\n"; @@ -2088,13 +2125,13 @@ void findPotentialStereoBonds(ROMol &mol, bool cleanIt) { // ------------------ // get the CIP ranking of each atom if we need it: if (!cipDone) { - if (!begAtom->hasProp(common_properties::_CIPRank)) { + if (!begAtom->hasProp(common_properties::_CIPRankKey)) { Chirality::assignAtomCIPRanks(mol, ranks); } else { // no need to recompute if we don't need to recompute. :-) for (unsigned int ai = 0; ai < mol.getNumAtoms(); ++ai) { ranks[ai] = mol.getAtomWithIdx(ai)->getProp( - common_properties::_CIPRank); + common_properties::_CIPRankKey); } } cipDone = true; @@ -2568,11 +2605,11 @@ void removeStereochemistry(ROMol &mol) { for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms(); ++atIt) { (*atIt)->setChiralTag(Atom::CHI_UNSPECIFIED); - if ((*atIt)->hasProp(common_properties::_CIPCode)) { - (*atIt)->clearProp(common_properties::_CIPCode); + if ((*atIt)->hasProp(common_properties::_CIPCodeKey)) { + (*atIt)->clearProp(common_properties::_CIPCodeKey); } - if ((*atIt)->hasProp(common_properties::_CIPRank)) { - (*atIt)->clearProp(common_properties::_CIPRank); + if ((*atIt)->hasProp(common_properties::_CIPRankKey)) { + (*atIt)->clearProp(common_properties::_CIPRankKey); } } for (ROMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds(); diff --git a/Code/GraphMol/FileParsers/SDWriter.cpp b/Code/GraphMol/FileParsers/SDWriter.cpp index e8f60169716..8928908bbc9 100644 --- a/Code/GraphMol/FileParsers/SDWriter.cpp +++ b/Code/GraphMol/FileParsers/SDWriter.cpp @@ -118,8 +118,12 @@ void _MolToSDStream(std::ostream *dp_ostream, const ROMol &mol, int confId, // if use did not specify any properties, write all non computed properties // out to the file STR_VECT properties = mol.getPropList(); + std::vector compLstNums; + mol.getPropIfPresent(RDKit::detail::computedPropName, compLstNums); STR_VECT compLst; - mol.getPropIfPresent(RDKit::detail::computedPropName, compLst); + for (auto num : compLstNums) { + compLst.push_back(Dict::get_strkey(num)); + } STR_VECT_CI pi; for (pi = properties.begin(); pi != properties.end(); pi++) { diff --git a/Code/GraphMol/FileParsers/TDTWriter.cpp b/Code/GraphMol/FileParsers/TDTWriter.cpp index 269ef80582b..7bd093c7a5e 100644 --- a/Code/GraphMol/FileParsers/TDTWriter.cpp +++ b/Code/GraphMol/FileParsers/TDTWriter.cpp @@ -133,8 +133,12 @@ void TDTWriter::write(const ROMol &mol, int confId) { // if use did not specify any properties, write all non computed properties // out to the file STR_VECT properties = mol.getPropList(); + std::vector compLstNums; + mol.getPropIfPresent(RDKit::detail::computedPropName, compLstNums); STR_VECT compLst; - mol.getPropIfPresent(RDKit::detail::computedPropName, compLst); + for (auto num : compLstNums) { + compLst.push_back(Dict::get_strkey(num)); + } STR_VECT_CI pi; for (pi = properties.begin(); pi != properties.end(); pi++) { diff --git a/Code/GraphMol/MolOps.cpp b/Code/GraphMol/MolOps.cpp index d452d680d24..c7b97708488 100644 --- a/Code/GraphMol/MolOps.cpp +++ b/Code/GraphMol/MolOps.cpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -432,7 +433,8 @@ std::vector> detectChemistryProblems( std::vector getMolFrags(const ROMol &mol, bool sanitizeFrags, INT_VECT *frags, VECT_INT_VECT *fragsMolAtomMapping, - bool copyConformers) { + bool copyConformers, + bool *shortCircuitOnSingle) { bool ownIt = false; INT_VECT *mapping; if (frags) { @@ -444,16 +446,21 @@ std::vector getMolFrags(const ROMol &mol, bool sanitizeFrags, unsigned int nFrags = getMolFrags(mol, *mapping); std::vector res; if (nFrags == 1) { - auto *tmp = new RWMol(mol); - RWMOL_SPTR sptr(tmp); - res.push_back(sptr); if (fragsMolAtomMapping) { - INT_VECT comp; - for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) { - comp.push_back(idx); + (*fragsMolAtomMapping).emplace_back(mol.getNumAtoms()); + std::iota(fragsMolAtomMapping->back().begin(), fragsMolAtomMapping->back().end(), 0); + } + if (shortCircuitOnSingle != nullptr) { + *shortCircuitOnSingle = true; + if (ownIt) { + delete mapping; } - (*fragsMolAtomMapping).push_back(comp); + return std::vector(); } + + auto *tmp = new RWMol(mol); + RWMOL_SPTR sptr(tmp); + res.push_back(sptr); } else { std::vector ids(mol.getNumAtoms(), -1); boost::dynamic_bitset<> copiedAtoms(mol.getNumAtoms(), 0); @@ -493,7 +500,7 @@ std::vector getMolFrags(const ROMol &mol, bool sanitizeFrags, for (unsigned int idx = 0; idx < mol.getNumAtoms(); ++idx) { const Atom *oAtm = mol.getAtomWithIdx(idx); INT_VECT ringStereoAtomsMol; - if (oAtm->getPropIfPresent(common_properties::_ringStereoAtoms, + if (oAtm->getPropIfPresent(common_properties::_ringStereoAtomsKey, ringStereoAtomsMol)) { INT_VECT ringStereoAtomsCopied; for (int rnbr : ringStereoAtomsMol) { @@ -505,7 +512,7 @@ std::vector getMolFrags(const ROMol &mol, bool sanitizeFrags, ringStereoAtomsCopied.push_back(ridx); } res[(*mapping)[idx]]->getAtomWithIdx(ids[idx])->setProp( - common_properties::_ringStereoAtoms, ringStereoAtomsCopied); + common_properties::_ringStereoAtomsKey, ringStereoAtomsCopied); } } diff --git a/Code/GraphMol/MolOps.h b/Code/GraphMol/MolOps.h index f592c0d226a..6a33629d28b 100644 --- a/Code/GraphMol/MolOps.h +++ b/Code/GraphMol/MolOps.h @@ -105,7 +105,7 @@ RDKIT_GRAPHMOL_EXPORT std::vector> getMolFrags( const ROMol &mol, bool sanitizeFrags = true, std::vector *frags = nullptr, std::vector> *fragsMolAtomMapping = nullptr, - bool copyConformers = true); + bool copyConformers = true, bool *shortCircuitOnSingle = nullptr); //! splits a molecule into pieces based on labels assigned using a query /*! diff --git a/Code/GraphMol/MolPickler.cpp b/Code/GraphMol/MolPickler.cpp index 4a4643f6c3a..298a9987725 100644 --- a/Code/GraphMol/MolPickler.cpp +++ b/Code/GraphMol/MolPickler.cpp @@ -340,7 +340,7 @@ void finalizeQueryFromDescription(Query *query, query->setMatchFunc(nullQueryFun); } else if (descr == "AtomType") { query->setDataFunc(queryAtomType); - } else if (descr == "AtomInNRings" || descr == "RecursiveStructure") { + } else if (descr == "AtomInNRings" || descr == "RS") { // don't need to do anything here because the classes // automatically have everything set } else if (descr == "AtomAnd" || descr == "AtomOr" || descr == "AtomXor") { diff --git a/Code/GraphMol/QueryOps.h b/Code/GraphMol/QueryOps.h index 22d2447e7a8..c12f175f6e2 100644 --- a/Code/GraphMol/QueryOps.h +++ b/Code/GraphMol/QueryOps.h @@ -721,7 +721,7 @@ class RDKIT_GRAPHMOL_EXPORT RecursiveStructureQuery public: RecursiveStructureQuery() : Queries::SetQuery() { setDataFunc(getAtIdx); - setDescription("RecursiveStructure"); + setDescription("RS"); // RecursiveSubstructure }; //! initialize from an ROMol pointer /*! @@ -733,7 +733,7 @@ class RDKIT_GRAPHMOL_EXPORT RecursiveStructureQuery d_serialNumber(serialNumber) { setQueryMol(query); setDataFunc(getAtIdx); - setDescription("RecursiveStructure"); + setDescription("RS"); // RecursiveSubstricture }; //! returns the index of an atom static inline int getAtIdx(Atom const *at) { diff --git a/Code/GraphMol/ROMol.cpp b/Code/GraphMol/ROMol.cpp index 1c84fca988f..3e76bae2dd9 100644 --- a/Code/GraphMol/ROMol.cpp +++ b/Code/GraphMol/ROMol.cpp @@ -132,7 +132,7 @@ void ROMol::initFromOther(const ROMol &other, bool quickCopy, int confId) { } } else { d_props.reset(); - STR_VECT computed; + std::vector computed; d_props.setVal(RDKit::detail::computedPropName, computed); } // std::cerr<<"--------- done init from other: "< computed; d_props.setVal(RDKit::detail::computedPropName, computed); } diff --git a/Code/GraphMol/SmilesParse/SmartsWrite.cpp b/Code/GraphMol/SmilesParse/SmartsWrite.cpp index 0e6787f52c0..a1b65ea78f3 100644 --- a/Code/GraphMol/SmilesParse/SmartsWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmartsWrite.cpp @@ -314,7 +314,7 @@ std::string getAtomSmartsSimple(const QueryAtom *qatom, std::string getRecursiveStructureQuerySmarts( const QueryAtom::QUERYATOM_QUERY *query) { PRECONDITION(query, "bad query"); - PRECONDITION(query->getDescription() == "RecursiveStructure", "bad query"); + PRECONDITION(query->getDescription() == "RS", "bad query"); const auto *rquery = static_cast(query); auto *qmol = const_cast(rquery->getQueryMol()); std::string res = MolToSmarts(*qmol); @@ -443,7 +443,7 @@ std::string _recurseGetSmarts(const QueryAtom *qatom, bool needParen; std::string csmarts1; // deal with the first child - if (dsc1 == "RecursiveStructure") { + if (dsc1 == "RS") { csmarts1 = getRecursiveStructureQuerySmarts(child1); features |= static_cast(QueryBoolFeatures::HAS_RECURSION); } else if ((dsc1 != "AtomOr") && (dsc1 != "AtomAnd")) { @@ -478,7 +478,7 @@ std::string _recurseGetSmarts(const QueryAtom *qatom, std::string csmarts2; // deal with the next child - if (dsc2 == "RecursiveStructure") { + if (dsc2 == "RS") { csmarts2 = getRecursiveStructureQuerySmarts(child2); features |= static_cast(QueryBoolFeatures::HAS_RECURSION); } else if ((dsc2 != "AtomOr") && (dsc2 != "AtomAnd")) { @@ -839,7 +839,7 @@ std::string GetAtomSmarts(const QueryAtom *qatom) { if (res.length() == 1) { // single atom symbol we don't need parens needParen = false; } - } else if (descrip == "RecursiveStructure") { + } else if (descrip == "RS") { // it's a bare recursive structure query: res = getRecursiveStructureQuerySmarts(query); needParen = true; diff --git a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp index 5348f5af00a..876b0a67390 100644 --- a/Code/GraphMol/SmilesParse/SmilesParseOps.cpp +++ b/Code/GraphMol/SmilesParse/SmilesParseOps.cpp @@ -219,7 +219,7 @@ void AdjustAtomChiralityFlags(RWMol *mol) { // need to insert into the list in a particular order // INT_VECT ringClosures; - atom->getPropIfPresent(common_properties::_RingClosures, ringClosures); + atom->getPropIfPresent(common_properties::_RingClosuresKey, ringClosures); #if 0 std::cerr << "CLOSURES: "; @@ -296,7 +296,7 @@ void AdjustAtomChiralityFlags(RWMol *mol) { // those cases by looking for unsaturated atoms // if (Canon::chiralAtomNeedsTagInversion( - *mol, atom, atom->hasProp(common_properties::_SmilesStart), + *mol, atom, atom->hasProp(common_properties::_SmilesStartKey), ringClosures.size())) { ++nSwaps; } @@ -328,7 +328,7 @@ Bond::BondType GetUnspecifiedBondType(const RWMol *mol, const Atom *atom1, void SetUnspecifiedBondTypes(RWMol *mol) { PRECONDITION(mol, "no molecule"); for (auto bond : mol->bonds()) { - if (bond->hasProp(RDKit::common_properties::_unspecifiedOrder)) { + if (bond->hasProp(RDKit::common_properties::_unspecifiedOrderKey)) { bond->setBondType(GetUnspecifiedBondType(mol, bond->getBeginAtom(), bond->getEndAtom())); if (bond->getBondType() == Bond::AROMATIC) { @@ -461,7 +461,7 @@ void CloseMolRings(RWMol *mol, bool toleratePartials) { // << bond2->getBondType() << "(" // << bond2->hasProp(common_properties::_unspecifiedOrder) // << "):" << bond2->getBondDir() << std::endl; - if (!bond1->hasProp(common_properties::_unspecifiedOrder)) { + if (!bond1->hasProp(common_properties::_unspecifiedOrderKey)) { matchedBond = bond1; if (matchedBond->getBondType() == Bond::DATIVEL) { matchedBond->setBeginAtomIdx(atom2->getIdx()); @@ -507,25 +507,25 @@ void CloseMolRings(RWMol *mol, bool toleratePartials) { // property: if (bondIdx > -1) { CHECK_INVARIANT( - atom1->hasProp(common_properties::_RingClosures) && - atom2->hasProp(common_properties::_RingClosures), + atom1->hasProp(common_properties::_RingClosuresKey) && + atom2->hasProp(common_properties::_RingClosuresKey), "somehow atom doesn't have _RingClosures property."); INT_VECT closures; - atom1->getProp(common_properties::_RingClosures, closures); + atom1->getProp(common_properties::_RingClosuresKey, closures); auto closurePos = std::find(closures.begin(), closures.end(), -(bookmark.first + 1)); CHECK_INVARIANT(closurePos != closures.end(), "could not find bookmark in atom _RingClosures"); *closurePos = bondIdx - 1; - atom1->setProp(common_properties::_RingClosures, closures); + atom1->setProp(common_properties::_RingClosuresKey, closures); - atom2->getProp(common_properties::_RingClosures, closures); + atom2->getProp(common_properties::_RingClosuresKey, closures); closurePos = std::find(closures.begin(), closures.end(), -(bookmark.first + 1)); CHECK_INVARIANT(closurePos != closures.end(), "could not find bookmark in atom _RingClosures"); *closurePos = bondIdx - 1; - atom2->setProp(common_properties::_RingClosures, closures); + atom2->setProp(common_properties::_RingClosuresKey, closures); } bookmarkedAtomsToRemove.push_back(atom1); bookmarkedAtomsToRemove.push_back(atom2); @@ -545,11 +545,11 @@ void CloseMolRings(RWMol *mol, bool toleratePartials) { void CleanupAfterParsing(RWMol *mol) { PRECONDITION(mol, "no molecule"); for (auto atom : mol->atoms()) { - atom->clearProp(common_properties::_RingClosures); - atom->clearProp(common_properties::_SmilesStart); + atom->clearProp(common_properties::_RingClosuresKey); + atom->clearProp(common_properties::_SmilesStartKey); } for (auto bond : mol->bonds()) { - bond->clearProp(common_properties::_unspecifiedOrder); + bond->clearProp(common_properties::_unspecifiedOrderKey); } } diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.cpp b/Code/GraphMol/SmilesParse/SmilesWrite.cpp index 699065e7507..6c79824af99 100644 --- a/Code/GraphMol/SmilesParse/SmilesWrite.cpp +++ b/Code/GraphMol/SmilesParse/SmilesWrite.cpp @@ -38,11 +38,13 @@ bool inOrganicSubset(int atomicNumber) { return false; } -std::string GetAtomSmiles(const Atom *atom, bool doKekule, const Bond *bondIn, +namespace { + +void GetAtomSmiles(std::ostringstream& out, const Atom *atom, + bool doKekule, const Bond *bondIn, bool allHsExplicit, bool isomericSmiles) { RDUNUSED_PARAM(bondIn); PRECONDITION(atom, "bad atom"); - std::string res; int fc = atom->getFormalCharge(); int num = atom->getAtomicNum(); int isotope = atom->getIsotope(); @@ -50,24 +52,21 @@ std::string GetAtomSmiles(const Atom *atom, bool doKekule, const Bond *bondIn, bool needsBracket = false; std::string symb; bool hasCustomSymbol = - atom->getPropIfPresent(common_properties::smilesSymbol, symb); + atom->getPropIfPresent(common_properties::smilesSymbolKey, symb); if (!hasCustomSymbol) { symb = PeriodicTable::getTable()->getElementSymbol(num); } // check for atomic stereochemistry - std::string atString = ""; - if (isomericSmiles || - (atom->hasOwningMol() && - atom->getOwningMol().hasProp(common_properties::_doIsoSmiles))) { + Atom::ChiralType chiral_type = Atom::CHI_UNSPECIFIED; + if (isomericSmiles) { if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED && - !atom->hasProp(common_properties::_brokenChirality)) { - switch (atom->getChiralTag()) { + !atom->hasProp(common_properties::_brokenChiralityKey)) { + auto chiral_tag = atom->getChiralTag(); + switch (chiral_tag) { case Atom::CHI_TETRAHEDRAL_CW: - atString = "@@"; - break; case Atom::CHI_TETRAHEDRAL_CCW: - atString = "@"; + chiral_type = chiral_tag; break; default: break; @@ -103,103 +102,105 @@ std::string GetAtomSmiles(const Atom *atom, bool doKekule, const Bond *bondIn, } if (fc || nonStandard || - atom->hasProp(common_properties::molAtomMapNumber)) { + atom->hasProp(common_properties::molAtomMapNumberKey)) { needsBracket = true; - } else if ((isomericSmiles || (atom->hasOwningMol() && - atom->getOwningMol().hasProp( - common_properties::_doIsoSmiles))) && - (isotope || atString != "")) { + } else if (isomericSmiles && + (isotope || chiral_type != Atom::CHI_UNSPECIFIED )) { needsBracket = true; } } else { needsBracket = true; } if (needsBracket) { - res += "["; + out << "["; } - if (isotope && (isomericSmiles || (atom->hasOwningMol() && - atom->getOwningMol().hasProp( - common_properties::_doIsoSmiles)))) { - res += std::to_string(isotope); + if (isotope && isomericSmiles) { + out << isotope; } // this was originally only done for the organic subset, // applying it to other atom-types is a fix for Issue 3152751: if (!doKekule && atom->getIsAromatic() && symb[0] >= 'A' && symb[0] <= 'Z') { symb[0] -= ('A' - 'a'); } - res += symb; - - res += atString; + out << symb; + switch (chiral_type) { + case Atom::CHI_TETRAHEDRAL_CW: + out << "@@"; + break; + case Atom::CHI_TETRAHEDRAL_CCW: + out << "@"; + break; + default: + break; + } if (needsBracket) { unsigned int totNumHs = atom->getTotalNumHs(); if (totNumHs > 0) { - res += "H"; + out << "H"; if (totNumHs > 1) { - res += std::to_string(totNumHs); + out << totNumHs; } } if (fc > 0) { - res += "+"; + out << "+"; if (fc > 1) { - res += std::to_string(fc); + out << fc; } } else if (fc < 0) { if (fc < -1) { - res += std::to_string(fc); + out << fc; } else { - res += "-"; + out << "-"; } } int mapNum; - if (atom->getPropIfPresent(common_properties::molAtomMapNumber, mapNum)) { - res += ":"; - res += std::to_string(mapNum); + if (atom->getPropIfPresent(common_properties::molAtomMapNumberKey, mapNum)) { + out << ":" << mapNum; } - res += "]"; + out << "]"; } // If the atom has this property, the contained string will // be inserted directly in the SMILES: std::string label; - if (atom->getPropIfPresent(common_properties::_supplementalSmilesLabel, + if (atom->getPropIfPresent(common_properties::_supplementalSmilesLabelKey, label)) { - res += label; + out << label; } - - return res; } -std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx, bool doKekule, - bool allBondsExplicit) { +void GetBondSmiles(std::ostringstream&out, const Bond *bond, int atomToLeftIdx, + bool doKekule, bool allBondsExplicit) { PRECONDITION(bond, "bad bond"); if (atomToLeftIdx < 0) { atomToLeftIdx = bond->getBeginAtomIdx(); } - std::string res = ""; bool aromatic = false; - if (!doKekule && (bond->getBondType() == Bond::SINGLE || - bond->getBondType() == Bond::DOUBLE || - bond->getBondType() == Bond::AROMATIC)) { - if (bond->hasOwningMol()) { - auto a1 = bond->getOwningMol().getAtomWithIdx(atomToLeftIdx); - auto a2 = bond->getOwningMol().getAtomWithIdx( - bond->getOtherAtomIdx(atomToLeftIdx)); - if ((a1->getIsAromatic() && a2->getIsAromatic()) && - (a1->getAtomicNum() || a2->getAtomicNum())) { - aromatic = true; + if (!doKekule && bond->hasOwningMol()) { + switch (bond->getBondType()) { + case Bond::SINGLE: // fallthrough + case Bond::DOUBLE: // fallthrough + case Bond::AROMATIC: { + auto a1 = bond->getOwningMol().getAtomWithIdx(atomToLeftIdx); + if (a1->getIsAromatic()) { + auto a2 = bond->getOwningMol().getAtomWithIdx( + bond->getOtherAtomIdx(atomToLeftIdx)); + aromatic = a2->getIsAromatic() && (a1->getAtomicNum() || a2->getAtomicNum()); + } + break; } - } else { - aromatic = false; + default: + break; } } Bond::BondDir dir = bond->getBondDir(); - bond->clearProp(common_properties::_TraversalRingClosureBond); + bond->clearProp(common_properties::_TraversalRingClosureBondKey); switch (bond->getBondType()) { case Bond::SINGLE: @@ -208,20 +209,20 @@ std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx, bool doKekule, case Bond::ENDDOWNRIGHT: if (allBondsExplicit || (bond->hasOwningMol() && bond->getOwningMol().hasProp( - common_properties::_doIsoSmiles))) { - res = "\\"; + common_properties::_doIsoSmilesKey))) { + out << "\\"; } break; case Bond::ENDUPRIGHT: if (allBondsExplicit || (bond->hasOwningMol() && bond->getOwningMol().hasProp( - common_properties::_doIsoSmiles))) { - res = "/"; + common_properties::_doIsoSmilesKey))) { + out << "/"; } break; default: if (allBondsExplicit) { - res = "-"; + out << "-"; } break; } @@ -233,20 +234,20 @@ std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx, bool doKekule, // currently this is possible by removing all // isAromatic flags, but there should maybe be another way if (allBondsExplicit) { - res = "-"; + out << "-"; } else if (aromatic && !bond->getIsAromatic()) { - res = "-"; + out << "-"; } } break; case Bond::DOUBLE: // see note above if (!aromatic || !bond->getIsAromatic() || allBondsExplicit) { - res = "="; + out << "="; } break; case Bond::TRIPLE: - res = "#"; + out << "#"; break; case Bond::AROMATIC: if (dir != Bond::NONE && dir != Bond::UNKNOWN) { @@ -254,39 +255,54 @@ std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx, bool doKekule, case Bond::ENDDOWNRIGHT: if (allBondsExplicit || (bond->hasOwningMol() && bond->getOwningMol().hasProp( - common_properties::_doIsoSmiles))) { - res = "\\"; + common_properties::_doIsoSmilesKey))) { + out << "\\"; } break; case Bond::ENDUPRIGHT: if (allBondsExplicit || (bond->hasOwningMol() && bond->getOwningMol().hasProp( - common_properties::_doIsoSmiles))) { - res = "/"; + common_properties::_doIsoSmilesKey))) { + out << "/"; } break; default: if (allBondsExplicit || !aromatic) { - res = ":"; + out << ":"; } break; } } else if (allBondsExplicit || !aromatic) { - res = ":"; + out << ":"; } break; case Bond::DATIVE: if (atomToLeftIdx >= 0 && bond->getBeginAtomIdx() == static_cast(atomToLeftIdx)) { - res = "->"; + out << "->"; } else { - res = "<-"; + out << "<-"; } break; default: - res = "~"; + out << "~"; } - return res; +} + +} // end anonymous namespace + +std::string GetAtomSmiles(const Atom *atom, bool doKekule, const Bond *bondIn, + bool allHsExplicit, bool isomericSmiles) { + std::ostringstream out; + GetAtomSmiles(out, atom, doKekule, bondIn, allHsExplicit, isomericSmiles); + return out.str(); +} + +std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx, bool doKekule, + bool allBondsExplicit) { + std::ostringstream out; + GetBondSmiles(out, bond, atomToLeftIdx, doKekule, allBondsExplicit); + return out.str(); } std::string FragmentSmilesConstruct( @@ -306,19 +322,24 @@ std::string FragmentSmilesConstruct( Canon::MolStack molStack; // try to prevent excessive reallocation - molStack.reserve(mol.getNumAtoms() + mol.getNumBonds()); - std::stringstream res; + molStack.reserve(2 * mol.getNumAtoms() + 2 * mol.getNumBonds()); + std::ostringstream res; std::map ringClosureMap; int ringIdx, closureVal; if (!canonical) { - mol.setProp(common_properties::_StereochemDone, 1); + mol.setProp(common_properties::_StereochemDoneKey, 1); } std::list ringClosuresToErase; Canon::canonicalizeFragment(mol, atomIdx, colors, ranks, molStack, bondsInPlay, bondSymbols, doIsomericSmiles, doRandom); + bool getAtomSmilesDoIsomericSmiles = doIsomericSmiles; + if (!getAtomSmilesDoIsomericSmiles) { + getAtomSmilesDoIsomericSmiles = mol.hasProp(common_properties::_doIsoSmilesKey); + } + Bond *bond = nullptr; for (auto &mSE : molStack) { switch (mSE.type) { @@ -329,8 +350,8 @@ std::string FragmentSmilesConstruct( ringClosuresToErase.clear(); // std::cout<<"\t\tAtom: "<getIdx()<getIdx()]; } @@ -341,7 +362,7 @@ std::string FragmentSmilesConstruct( bond = mSE.obj.bond; // std::cout<<"\t\tBond: "<getIdx()<getIdx()]; } @@ -405,50 +426,32 @@ static bool SortBasedOnFirstElement( return a.first < b.first; } -std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, - int rootedAtAtom, bool canonical, bool allBondsExplicit, - bool allHsExplicit, bool doRandom) { - if (!mol.getNumAtoms()) { - return ""; - } - PRECONDITION(rootedAtAtom < 0 || - static_cast(rootedAtAtom) < mol.getNumAtoms(), - "rootedAtomAtom must be less than the number of atoms"); - - std::vector> fragsMolAtomMapping; - auto mols = - MolOps::getMolFrags(mol, false, nullptr, &fragsMolAtomMapping, false); - std::vector vfragsmi(mols.size()); - - // for(unsigned i=0; i> allAtomOrdering; - for (unsigned fragIdx = 0; fragIdx < mols.size(); fragIdx++) { - ROMol *tmol = mols[fragIdx].get(); +namespace { +// returns atomOrdering +std::vector processMolToSmilesFragment( + const ROMol& mol, ROMol& tmol, + bool doIsomericSmiles, bool doKekule, int rootedAtAtom, bool canonical, + bool allBondsExplicit, bool allHsExplicit, bool doRandom, + const std::vector& fragMolAtomMapping, std::string* fragmentSmiles +) { // update property cache - for (auto atom : tmol->atoms()) { + for (auto atom : tmol.atoms()) { atom->updatePropertyCache(false); } // clean up the chirality on any atom that is marked as chiral, // but that should not be: if (doIsomericSmiles) { - tmol->setProp(common_properties::_doIsoSmiles, 1); - if (!mol.hasProp(common_properties::_StereochemDone)) { - MolOps::assignStereochemistry(*tmol, true); + tmol.setProp(common_properties::_doIsoSmilesKey, 1); + if (!mol.hasProp(common_properties::_StereochemDoneKey)) { + MolOps::assignStereochemistry(tmol, true); } } #if 0 std::cout << "----------------------------" << std::endl; std::cout << "MolToSmiles:"<< std::endl; - tmol->debugMol(std::cout); + tmol.debugMol(std::cout); std::cout << "----------------------------" << std::endl; #endif @@ -459,20 +462,20 @@ std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, } std::string res; - unsigned int nAtoms = tmol->getNumAtoms(); + unsigned int nAtoms = tmol.getNumAtoms(); std::vector ranks(nAtoms); std::vector atomOrdering; if (canonical) { - if (tmol->hasProp("_canonicalRankingNumbers")) { - for (const auto atom : tmol->atoms()) { + if (tmol.hasProp(common_properties::_canonicalRankingNumbersKey)) { + for (const auto atom : tmol.atoms()) { unsigned int rankNum = 0; - atom->getPropIfPresent("_canonicalRankingNumber", rankNum); + atom->getPropIfPresent(common_properties::_canonicalRankingNumbersKey, rankNum); ranks[atom->getIdx()] = rankNum; } } else { bool breakTies = true; - Canon::rankMolAtoms(*tmol, ranks, breakTies, doIsomericSmiles, + Canon::rankMolAtoms(tmol, ranks, breakTies, doIsomericSmiles, doIsomericSmiles); } } else { @@ -481,7 +484,7 @@ std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, #ifdef VERBOSE_CANON for (unsigned int tmpI = 0; tmpI < ranks.size(); tmpI++) { std::cout << tmpI << " " << ranks[tmpI] << " " - << *(tmol->getAtomWithIdx(tmpI)) << std::endl; + << *(tmol.getAtomWithIdx(tmpI)) << std::endl; } #endif @@ -505,17 +508,46 @@ std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, CHECK_INVARIANT(nextAtomIdx >= 0, "no start atom found"); subSmi = SmilesWrite::FragmentSmilesConstruct( - *tmol, nextAtomIdx, colors, ranks, doKekule, canonical, + tmol, nextAtomIdx, colors, ranks, doKekule, canonical, doIsomericSmiles, allBondsExplicit, allHsExplicit, doRandom, atomOrdering); res += subSmi; - vfragsmi[fragIdx] = res; + *fragmentSmiles = res; for (unsigned int &vit : atomOrdering) { - vit = fragsMolAtomMapping[fragIdx][vit]; // Lookup the Id in the original + vit = fragMolAtomMapping[vit]; // Lookup the Id in the original // molecule } + + return atomOrdering; +} + +} // end anonymous namespace + +std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, + int rootedAtAtom, bool canonical, bool allBondsExplicit, + bool allHsExplicit, bool doRandom) { + if (!mol.getNumAtoms()) { + return ""; + } + PRECONDITION(rootedAtAtom < 0 || + static_cast(rootedAtAtom) < mol.getNumAtoms(), + "rootedAtomAtom must be less than the number of atoms"); + + std::vector> fragsMolAtomMapping; + auto mols = + MolOps::getMolFrags(mol, false, nullptr, &fragsMolAtomMapping, false); + std::vector vfragsmi(mols.size()); + + std::vector> allAtomOrdering; + for (unsigned fragIdx = 0; fragIdx < mols.size(); fragIdx++) { + ROMol *tmol = mols[fragIdx].get(); + + std::vector atomOrdering = processMolToSmilesFragment( + mol, *tmol, doIsomericSmiles, doKekule, rootedAtAtom, canonical, + allBondsExplicit, allHsExplicit, doRandom, + fragsMolAtomMapping[fragIdx], &vfragsmi[fragIdx]); allAtomOrdering.push_back(atomOrdering); } @@ -558,6 +590,76 @@ std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, return result; } // end of MolToSmiles() +std::vector MolToSmilesFast(ROMol &mol, bool doIsomericSmiles, bool doKekule, + int rootedAtAtom, bool canonical, bool allBondsExplicit, + bool allHsExplicit, bool doRandom) { + if (!mol.getNumAtoms()) { + return {}; + } + PRECONDITION(rootedAtAtom < 0 || + static_cast(rootedAtAtom) < mol.getNumAtoms(), + "rootedAtomAtom must be less than the number of atoms"); + + std::vector> fragsMolAtomMapping; + bool shortCircuitOnSingle = false; + auto mols = MolOps::getMolFrags( + mol, false, nullptr, &fragsMolAtomMapping, false, &shortCircuitOnSingle); + + std::vector vfragsmi(shortCircuitOnSingle ? 1 : mols.size()); + + std::vector> allAtomOrdering; + if (shortCircuitOnSingle) { + std::vector atomOrdering = processMolToSmilesFragment( + mol, mol, doIsomericSmiles, doKekule, rootedAtAtom, canonical, + allBondsExplicit, allHsExplicit, doRandom, + fragsMolAtomMapping[0], &vfragsmi[0]); + allAtomOrdering.push_back(atomOrdering); + } else { + for (unsigned fragIdx = 0; fragIdx < mols.size(); fragIdx++) { + ROMol *tmol = mols[fragIdx].get(); + + std::vector atomOrdering = processMolToSmilesFragment( + mol, *tmol, doIsomericSmiles, doKekule, rootedAtAtom, canonical, + allBondsExplicit, allHsExplicit, doRandom, + fragsMolAtomMapping[fragIdx], &vfragsmi[fragIdx]); + allAtomOrdering.push_back(atomOrdering); + } + } + + std::vector result; + result.reserve(vfragsmi.size()); + std::vector flattenedAtomOrdering; + flattenedAtomOrdering.reserve(mol.getNumAtoms()); + if (canonical) { + // Sort the vfragsmi, but also sort the atom order vectors into the same + // order + typedef std::pair> PairStrAndVec; + std::vector tmp(vfragsmi.size()); + for (unsigned int ti = 0; ti < vfragsmi.size(); ++ti) { + tmp[ti] = PairStrAndVec(vfragsmi[ti], allAtomOrdering[ti]); + } + std::sort(tmp.begin(), tmp.end(), SortBasedOnFirstElement); + + for (unsigned int ti = 0; ti < vfragsmi.size(); ++ti) { + result.push_back(tmp[ti].first); + flattenedAtomOrdering.insert(flattenedAtomOrdering.end(), + tmp[ti].second.begin(), + tmp[ti].second.end()); + } + } else { // Not canonical + for (auto &i : allAtomOrdering) { + flattenedAtomOrdering.insert(flattenedAtomOrdering.end(), i.begin(), + i.end()); + } + for (unsigned i = 0; i < vfragsmi.size(); ++i) { + result.push_back(vfragsmi[i]); + } + } + mol.setProp(common_properties::_smilesAtomOutputOrder, flattenedAtomOrdering, + true); + return result; +} // end of MolToSmilesFast() + std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles, bool doKekule, int rootedAtAtom, bool canonical, bool allBondsExplicit, bool allHsExplicit, diff --git a/Code/GraphMol/SmilesParse/SmilesWrite.h b/Code/GraphMol/SmilesParse/SmilesWrite.h index 5fd52d5bab2..f95abf4e803 100644 --- a/Code/GraphMol/SmilesParse/SmilesWrite.h +++ b/Code/GraphMol/SmilesParse/SmilesWrite.h @@ -76,6 +76,13 @@ RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles( int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false, bool allHsExplicit = false, bool doRandom = false); +// Like MolToSmiles, but can modify the input molecule to avoid intermediate +// allocations. +RDKIT_SMILESPARSE_EXPORT std::vector MolToSmilesFast( + ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false, + int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false, + bool allHsExplicit = false, bool doRandom = false); + //! \brief returns a vector of random SMILES for a molecule (may contain //! duplicates) /*! diff --git a/Code/GraphMol/SmilesParse/smiles.tab.cpp.cmake b/Code/GraphMol/SmilesParse/smiles.tab.cpp.cmake index b85438e816c..22b8d865e4e 100644 --- a/Code/GraphMol/SmilesParse/smiles.tab.cpp.cmake +++ b/Code/GraphMol/SmilesParse/smiles.tab.cpp.cmake @@ -1674,7 +1674,7 @@ yyreduce: molList->resize( sz + 1); (*molList)[ sz ] = new RWMol(); RDKit::RWMol *curMol = (*molList)[ sz ]; - (yyvsp[0].atom)->setProp(RDKit::common_properties::_SmilesStart,1); + (yyvsp[0].atom)->setProp(RDKit::common_properties::_SmilesStartKey,1); curMol->addAtom((yyvsp[0].atom), true, true); //delete $1; (yyval.moli) = sz; @@ -1736,7 +1736,7 @@ yyreduce: #line 216 "smiles.yy" { RWMol *mp = (*molList)[(yyval.moli)]; - (yyvsp[0].atom)->setProp(RDKit::common_properties::_SmilesStart,1,true); + (yyvsp[0].atom)->setProp(RDKit::common_properties::_SmilesStartKey,1,true); mp->addAtom((yyvsp[0].atom),true,true); } #line 1743 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" @@ -1752,14 +1752,14 @@ yyreduce: Bond *newB = mp->createPartialBond(atom->getIdx(), Bond::UNSPECIFIED); mp->setBondBookmark(newB,(yyvsp[0].ival)); - newB->setProp(RDKit::common_properties::_unspecifiedOrder,1); + newB->setProp(RDKit::common_properties::_unspecifiedOrderKey,1); SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-((yyvsp[0].ival)+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); } #line 1765 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" break; @@ -1771,8 +1771,8 @@ yyreduce: Atom *atom=mp->getActiveAtom(); Bond *newB = mp->createPartialBond(atom->getIdx(), (yyvsp[-1].bond)->getBondType()); - if((yyvsp[-1].bond)->hasProp(RDKit::common_properties::_unspecifiedOrder)){ - newB->setProp(RDKit::common_properties::_unspecifiedOrder,1); + if((yyvsp[-1].bond)->hasProp(RDKit::common_properties::_unspecifiedOrderKey)){ + newB->setProp(RDKit::common_properties::_unspecifiedOrderKey,1); } newB->setBondDir((yyvsp[-1].bond)->getBondDir()); mp->setAtomBookmark(atom,(yyvsp[0].ival)); @@ -1781,9 +1781,9 @@ yyreduce: SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-((yyvsp[0].ival)+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); delete (yyvsp[-1].bond); } #line 1790 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" @@ -1802,9 +1802,9 @@ yyreduce: SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-((yyvsp[0].ival)+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); } #line 1810 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" break; @@ -1890,7 +1890,7 @@ yyreduce: { (yyval.atom) = (yyvsp[-3].atom); (yyval.atom)->setNoImplicit(true); - (yyval.atom)->setProp(RDKit::common_properties::molAtomMapNumber,(yyvsp[-1].ival)); + (yyval.atom)->setProp(RDKit::common_properties::molAtomMapNumberKey,(yyvsp[-1].ival)); } #line 1896 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" break; @@ -2062,14 +2062,14 @@ yyreduce: case 69: #line 408 "smiles.yy" - { - if((yyvsp[-1].ival) >= std::numeric_limits::max()/10 || + { + if((yyvsp[-1].ival) >= std::numeric_limits::max()/10 || (yyvsp[-1].ival)*10 >= std::numeric_limits::max()-(yyvsp[0].ival) ){ yyerror(input,molList,branchPoints,scanner,start_token,"number too large"); yyErrorCleanup(molList); YYABORT; } - (yyval.ival) = (yyvsp[-1].ival)*10 + (yyvsp[0].ival); + (yyval.ival) = (yyvsp[-1].ival)*10 + (yyvsp[0].ival); } #line 2075 "/scratch/RDKit_git/Code/GraphMol/SmilesParse/smiles.tab.cpp" break; diff --git a/Code/GraphMol/SmilesParse/smiles.yy b/Code/GraphMol/SmilesParse/smiles.yy index 47da09f7cda..aca240b34b1 100644 --- a/Code/GraphMol/SmilesParse/smiles.yy +++ b/Code/GraphMol/SmilesParse/smiles.yy @@ -169,7 +169,7 @@ mol: atomd { molList->resize( sz + 1); (*molList)[ sz ] = new RWMol(); RDKit::RWMol *curMol = (*molList)[ sz ]; - $1->setProp(RDKit::common_properties::_SmilesStart,1); + $1->setProp(RDKit::common_properties::_SmilesStartKey,1); curMol->addAtom($1, true, true); //delete $1; $$ = sz; @@ -215,7 +215,7 @@ mol: atomd { | mol SEPARATOR_TOKEN atomd { RWMol *mp = (*molList)[$$]; - $3->setProp(RDKit::common_properties::_SmilesStart,1,true); + $3->setProp(RDKit::common_properties::_SmilesStartKey,1,true); mp->addAtom($3,true,true); } @@ -227,14 +227,14 @@ mol: atomd { Bond *newB = mp->createPartialBond(atom->getIdx(), Bond::UNSPECIFIED); mp->setBondBookmark(newB,$2); - newB->setProp(RDKit::common_properties::_unspecifiedOrder,1); + newB->setProp(RDKit::common_properties::_unspecifiedOrderKey,1); SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-($2+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); } | mol BOND_TOKEN ring_number { @@ -242,8 +242,8 @@ mol: atomd { Atom *atom=mp->getActiveAtom(); Bond *newB = mp->createPartialBond(atom->getIdx(), $2->getBondType()); - if($2->hasProp(RDKit::common_properties::_unspecifiedOrder)){ - newB->setProp(RDKit::common_properties::_unspecifiedOrder,1); + if($2->hasProp(RDKit::common_properties::_unspecifiedOrderKey)){ + newB->setProp(RDKit::common_properties::_unspecifiedOrderKey,1); } newB->setBondDir($2->getBondDir()); mp->setAtomBookmark(atom,$3); @@ -252,9 +252,9 @@ mol: atomd { SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-($3+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); delete $2; } @@ -269,9 +269,9 @@ mol: atomd { SmilesParseOps::CheckRingClosureBranchStatus(atom,mp); INT_VECT tmp; - atom->getPropIfPresent(RDKit::common_properties::_RingClosures,tmp); + atom->getPropIfPresent(RDKit::common_properties::_RingClosuresKey,tmp); tmp.push_back(-($3+1)); - atom->setProp(RDKit::common_properties::_RingClosures,tmp); + atom->setProp(RDKit::common_properties::_RingClosuresKey,tmp); } | mol GROUP_OPEN_TOKEN atomd { @@ -337,7 +337,7 @@ atomd: simple_atom { $$ = $2; $$->setNoImplicit(true); - $$->setProp(RDKit::common_properties::molAtomMapNumber,$4); + $$->setProp(RDKit::common_properties::molAtomMapNumberKey,$4); } | ATOM_OPEN_TOKEN charge_element ATOM_CLOSE_TOKEN { @@ -405,14 +405,14 @@ number: ZERO_TOKEN /* --------------------------------------------------------------- */ nonzero_number: NONZERO_DIGIT_TOKEN -| nonzero_number digit { - if($1 >= std::numeric_limits::max()/10 || +| nonzero_number digit { + if($1 >= std::numeric_limits::max()/10 || $1*10 >= std::numeric_limits::max()-$2 ){ yyerror(input,molList,branchPoints,scanner,start_token,"number too large"); yyErrorCleanup(molList); YYABORT; } - $$ = $1*10 + $2; + $$ = $1*10 + $2; } ; diff --git a/Code/GraphMol/Substruct/SubstructMatch.cpp b/Code/GraphMol/Substruct/SubstructMatch.cpp index 7cfedec228a..ce8fc27ff70 100644 --- a/Code/GraphMol/Substruct/SubstructMatch.cpp +++ b/Code/GraphMol/Substruct/SubstructMatch.cpp @@ -619,9 +619,7 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query, SUBQUERY_MAP &subqueryMap, std::vector &locked) { PRECONDITION(query, "bad query"); - // std::cout << "*-*-* MS: " << (int)query << std::endl; - // std::cout << "\t\t" << typeid(*query).name() << std::endl; - if (query->getDescription() == "RecursiveStructure") { + if (query->getDescription() == "RS") { auto *rsq = (RecursiveStructureQuery *)query; #ifdef RDK_THREADSAFE_SSS rsq->d_mutex.lock(); @@ -640,8 +638,6 @@ void MatchSubqueries(const ROMol &mol, QueryAtom::QUERYATOM_QUERY *query, ++setIter) { rsq->insert(*setIter); } - // std::cerr<<" copying results for query serial number: - // "<getSerialNumber()<getSerialNumber()) { subqueryMap[rsq->getSerialNumber()] = query; - // std::cerr<<" storing results for query serial number: - // "<getSerialNumber()<::CHILD_VECT_CI childIt; - // std::cout << query << " " << query->endChildren()-query->beginChildren() << - // std::endl; for (childIt = query->beginChildren(); childIt != query->endChildren(); childIt++) { MatchSubqueries(mol, childIt->get(), params, subqueryMap, locked); } - // std::cout << "<<- back " << (int)query << std::endl; } bool matchCompare(const std::pair &a, const std::pair &b) { diff --git a/Code/GraphMol/new_canon.cpp b/Code/GraphMol/new_canon.cpp index 623c7464ef8..28669a85f15 100644 --- a/Code/GraphMol/new_canon.cpp +++ b/Code/GraphMol/new_canon.cpp @@ -317,7 +317,7 @@ bool hasRingNbr(const ROMol &mol, const Atom *at) { ++beg; if ((nbr->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || nbr->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) && - nbr->hasProp(common_properties::_ringStereoAtoms)) { + nbr->hasProp(common_properties::_ringStereoAtomsKey)) { return true; } } @@ -449,7 +449,7 @@ void advancedInitCanonAtom(const ROMol &mol, Canon::canon_atom &atom, atom.isRingStereoAtom = (atom.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CW || atom.atom->getChiralTag() == Atom::CHI_TETRAHEDRAL_CCW) && - atom.atom->hasProp(common_properties::_ringStereoAtoms); + atom.atom->hasProp(common_properties::_ringStereoAtomsKey); atom.hasRingNbr = hasRingNbr(mol, atom.atom); } } // end anonymous namespace diff --git a/Code/GraphMol/new_canon.h b/Code/GraphMol/new_canon.h index d6bc36f898d..d8b64ab8050 100644 --- a/Code/GraphMol/new_canon.h +++ b/Code/GraphMol/new_canon.h @@ -279,9 +279,9 @@ class RDKIT_GRAPHMOL_EXPORT AtomCompareFunctor { */ int molAtomMapNumber_i = 0; int molAtomMapNumber_j = 0; - dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber, + dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumberKey, molAtomMapNumber_i); - dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber, + dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumberKey, molAtomMapNumber_j); if (molAtomMapNumber_i < molAtomMapNumber_j) { return -1; @@ -347,11 +347,11 @@ class RDKIT_GRAPHMOL_EXPORT AtomCompareFunctor { ivi = 0; ivj = 0; std::string cipCode; - if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode, + if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCodeKey, cipCode)) { ivi = cipCode == "R" ? 2 : 1; } - if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode, + if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCodeKey, cipCode)) { ivj = cipCode == "R" ? 2 : 1; } @@ -501,11 +501,11 @@ class RDKIT_GRAPHMOL_EXPORT ChiralAtomCompareFunctor { ivi = 0; ivj = 0; std::string cipCode; - if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode, + if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCodeKey, cipCode)) { ivi = cipCode == "R" ? 2 : 1; } - if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode, + if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCodeKey, cipCode)) { ivj = cipCode == "R" ? 2 : 1; } diff --git a/Code/GraphMol/test1.cpp b/Code/GraphMol/test1.cpp index 94da835c432..e64c88ef829 100644 --- a/Code/GraphMol/test1.cpp +++ b/Code/GraphMol/test1.cpp @@ -162,11 +162,11 @@ void testMolProps() { // check for computed properties m2.setProp("cprop1", 1, true); m2.setProp("cprop2", 2, true); - STR_VECT cplst; + std::vector cplst; m2.getProp(RDKit::detail::computedPropName, cplst); CHECK_INVARIANT(cplst.size() == 2, ""); - CHECK_INVARIANT(cplst[0] == "cprop1", ""); - CHECK_INVARIANT(cplst[1] == "cprop2", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[0]) == "cprop1", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[1]) == "cprop2", ""); propNames = m2.getPropList(false, false); TEST_ASSERT(propNames.size() == 1); @@ -289,11 +289,11 @@ void testAtomProps() { // check for computed properties a1->setProp("cprop1", 1, true); a1->setProp("cprop2", 2, true); - STR_VECT cplst; + std::vector cplst; a1->getProp(RDKit::detail::computedPropName, cplst); CHECK_INVARIANT(cplst.size() == 2, ""); - CHECK_INVARIANT(cplst[0] == "cprop1", ""); - CHECK_INVARIANT(cplst[1] == "cprop2", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[0]) == "cprop1", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[1]) == "cprop2", ""); a1->clearProp("cprop1"); CHECK_INVARIANT(!a1->hasProp("cprop1"), ""); @@ -344,11 +344,11 @@ void testBondProps() { // check for computed properties b1->setProp("cprop1", 1, true); b1->setProp("cprop2", 2, true); - STR_VECT cplst; + std::vector cplst; b1->getProp(RDKit::detail::computedPropName, cplst); CHECK_INVARIANT(cplst.size() == 2, ""); - CHECK_INVARIANT(cplst[0] == "cprop1", ""); - CHECK_INVARIANT(cplst[1] == "cprop2", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[0]) == "cprop1", ""); + CHECK_INVARIANT(Dict::get_strkey(cplst[1]) == "cprop2", ""); b1->clearProp("cprop1"); CHECK_INVARIANT(!b1->hasProp("cprop1"), ""); diff --git a/Code/GraphMol/testPickler.cpp b/Code/GraphMol/testPickler.cpp index 0ee566f1857..447e724c2e2 100644 --- a/Code/GraphMol/testPickler.cpp +++ b/Code/GraphMol/testPickler.cpp @@ -499,8 +499,7 @@ void testQueries() { MolPickler::molFromPickle(pickle, *m1); TEST_ASSERT(m1->getNumAtoms() == 1); TEST_ASSERT(m1->getAtomWithIdx(0)->hasQuery()); - TEST_ASSERT(m1->getAtomWithIdx(0)->getQuery()->getDescription() == - "RecursiveStructure"); + TEST_ASSERT(m1->getAtomWithIdx(0)->getQuery()->getDescription() == "RS"); smi = "C"; m2 = SmilesToMol(smi); TEST_ASSERT(m2); @@ -661,8 +660,7 @@ void testQueries() { MolPickler::molFromPickle(pickle, *m1); TEST_ASSERT(m1->getNumAtoms() == 1); TEST_ASSERT(m1->getAtomWithIdx(0)->hasQuery()); - TEST_ASSERT(m1->getAtomWithIdx(0)->getQuery()->getDescription() == - "RecursiveStructure"); + TEST_ASSERT(m1->getAtomWithIdx(0)->getQuery()->getDescription() == "RS"); smi = "C"; m2 = SmilesToMol(smi); TEST_ASSERT(m2); @@ -1378,7 +1376,7 @@ void testEnhancedStereoChemistry() { void testCustomPickler() { BOOST_LOG(rdInfoLog) << "-----------------------\n"; BOOST_LOG(rdInfoLog) << "Testing custom pickler (bitvector)" - << std::endl; + << std::endl; const bool bitsSet = false; ExplicitBitVect bv(1024, bitsSet); bv.setBit(100); @@ -1445,7 +1443,7 @@ void testGithub2441() { TEST_ASSERT(m2->getConformer(12).getProp("foo") == 2); TEST_ASSERT(!m2->getConformer().hasProp("bar")); TEST_ASSERT(m2->getConformer(12).getProp("bar") == 23); - + } BOOST_LOG(rdErrorLog) << "\tdone" << std::endl; @@ -1491,19 +1489,19 @@ void testGithubIssue2510() { auto *conf = new Conformer(2); double test_num = 3.1111124589; TEST_ASSERT(static_cast(test_num) != test_num); - + RDGeom::Point3D point(3.1111124589,0.0,0.0); - + conf->setAtomPos(0, point); m1->addConformer(conf); - + MolPickler::pickleMol(*m1, pickle, PicklerOps::CoordsAsDouble); m3 = new ROMol(); MolPickler::molFromPickle(pickle, *m3); TEST_ASSERT(m1->getNumAtoms() == m3->getNumAtoms()); TEST_ASSERT(m3->getConformer().getAtomPos(0).x == test_num); - + delete m1; delete m2; delete m3; diff --git a/Code/RDGeneral/Dict.h b/Code/RDGeneral/Dict.h index 105c4fbfe67..6b60d32c1cc 100644 --- a/Code/RDGeneral/Dict.h +++ b/Code/RDGeneral/Dict.h @@ -17,7 +17,9 @@ #define RD_DICT_H_012020 #include +#include #include +#include #include #include "RDValue.h" #include "Exceptions.h" @@ -34,7 +36,470 @@ typedef std::vector STR_VECT; //! The actual storage is done using \c RDValue objects. //! class RDKIT_RDGENERAL_EXPORT Dict { + private: + struct InternalPair { + std::uint16_t key; + RDValue val; + + InternalPair() : key(0), val() {} // TODO: should we remove this? + explicit InternalPair(std::uint16_t k) : key(k), val() {} + InternalPair(std::uint16_t k, const RDValue &v) : key(k), val(v) {} + }; + + typedef std::vector InternalDataType; + public: + static std::mutex& get_mu() { + static std::mutex mu; + return mu; + } + static std::unordered_map& get_strkey_to_key() { + static std::unordered_map _strkey_to_key; + return _strkey_to_key; + } + static std::vector& getc_key_to_strkey() { + // length 110. + static std::vector _key_to_strkey = { + "2D", + "BalabanJ", + "BalanbanJ", + "Discrims", + "DistanceMatrix_Paths", + "MRV SMA", + "MolFileComments", + "MolFileInfo", + "NullBond", + "_2DConf", + "_3DConf", + "_AtomID", + "_BondsPotentialStereo", + "_CIPCode", + "_CIPRank", + "_ChiralityPossible", + "_CrippenLogP", + "_CrippenMR", + "_GasteigerCharge", + "_GasteigerHCharge", + "_MMFFSanitized", + "_MolFileAtomQuery", + "_MolFileBondAttach", + "_MolFileBondCfg", + "_MolFileBondEndPts", + "_MolFileBondQuery", + "_MolFileBondStereo", + "_MolFileBondType", + "_MolFileChiralFlag", + "_MolFileRLabel", + "_Name", + "_NeedsQueryScan", + "_QueryFormalCharge", + "_QueryHCount", + "_QueryIsotope", + "_QueryMass", + "_ReactionDegreeChanged", + "_RingClosures", + "_SLN_s", + "_SmilesStart", + "_StereochemDone", + "_TraversalBondIndexOrder", + "_TraversalRingClosureBond", + "_TraversalStartPoint", + "_TriposAtomType", + "_Unfinished_SLN_", + "_UnknownStereo", + "__computedProps", + "_brokenChirality", + "_canonicalRankingNumbers", + "_connectivityHKDeltas", + "_connectivityNVals", + "_crippenLogP", + "_crippenLogPContribs", + "_crippenMR", + "_crippenMRContribs", + "_displayLabel", + "_displayLabelW", + "_doIsoSmiles", + "_fragSMARTS", + "_hasMassQuery", + "_isotopicHs", + "_labuteASA", + "_labuteAtomContribs", + "_labuteAtomHContrib", + "_protected", + "_queryRootAtom", + "_rgroupAtomMaps", + "_rgroupBonds", + "_ringStereoAtoms", + "_ringStereoWarning", + "_ringStereochemCand", + "_smilesAtomOutputOrder", + "_starred", + "_supplementalSmilesLabel", + "_tpsa", + "_tpsaAtomContribs", + "_unspecifiedOrder", + "atomLabel", + "atomNote", + "bondNote", + "dummyLabel", + "extraRings", + "internalRgroupSmiles", + "isImplicit", + "maxAttachIdx", + "molAtomMapNumber", + "molAttchord", + "molAttchpt", + "molClass", + "molFileAlias", + "molFileValue", + "molInversionFlag", + "molLinkNodes", + "molParity", + "molReactStatus", + "molRxnComponent", + "molRxnExachg", + "molRxnRole", + "molSeqid", + "molStereoCare", + "molSubstCount", + "molTotValence", + "numArom", + "old_mapno", + "origNoImplicit", + "react_atom_idx", + "ringMembership", + "smilesSymbol", + "was_dummy", + }; + return _key_to_strkey; + } + static std::vector& getv_key_to_strkey() { + static std::vector _key_to_strkey; + return _key_to_strkey; + } + + static std::uint16_t get_key(const std::string &key) { + if (key.size() >= 2) { + // Autogenerated from rdkit_dict_trie.ipynb + switch (key[0]) { + case '2': { + if (key == "2D") return 0; + break; + } + case 'B': { + if (key == "BalabanJ") return 1; + if (key == "BalanbanJ") return 2; + break; + } + case 'D': { + if (key == "Discrims") return 3; + if (key == "DistanceMatrix_Paths") return 4; + break; + } + case 'M': { + switch (key[1]) { + case 'R': { + if (key == "MRV SMA") return 5; + break; + } + case 'o': { + if (key == "MolFileComments") return 6; + if (key == "MolFileInfo") return 7; + break; + } + } + break; + } + case 'N': { + if (key == "NullBond") return 8; + break; + } + case '_': { + switch (key[1]) { + case '2': { + if (key == "_2DConf") return 9; + break; + } + case '3': { + if (key == "_3DConf") return 10; + break; + } + case 'A': { + if (key == "_AtomID") return 11; + break; + } + case 'B': { + if (key == "_BondsPotentialStereo") return 12; + break; + } + case 'C': { + if (key == "_CIPCode") return 13; + if (key == "_CIPRank") return 14; + if (key == "_ChiralityPossible") return 15; + if (key == "_CrippenLogP") return 16; + if (key == "_CrippenMR") return 17; + break; + } + case 'G': { + if (key == "_GasteigerCharge") return 18; + if (key == "_GasteigerHCharge") return 19; + break; + } + case 'M': { + if (key == "_MMFFSanitized") return 20; + if (key == "_MolFileAtomQuery") return 21; + if (key == "_MolFileBondAttach") return 22; + if (key == "_MolFileBondCfg") return 23; + if (key == "_MolFileBondEndPts") return 24; + if (key == "_MolFileBondQuery") return 25; + if (key == "_MolFileBondStereo") return 26; + if (key == "_MolFileBondType") return 27; + if (key == "_MolFileChiralFlag") return 28; + if (key == "_MolFileRLabel") return 29; + break; + } + case 'N': { + if (key == "_Name") return 30; + if (key == "_NeedsQueryScan") return 31; + break; + } + case 'Q': { + if (key == "_QueryFormalCharge") return 32; + if (key == "_QueryHCount") return 33; + if (key == "_QueryIsotope") return 34; + if (key == "_QueryMass") return 35; + break; + } + case 'R': { + if (key == "_ReactionDegreeChanged") return 36; + if (key == "_RingClosures") return 37; + break; + } + case 'S': { + if (key == "_SLN_s") return 38; + if (key == "_SmilesStart") return 39; + if (key == "_StereochemDone") return 40; + break; + } + case 'T': { + if (key == "_TraversalBondIndexOrder") return 41; + if (key == "_TraversalRingClosureBond") return 42; + if (key == "_TraversalStartPoint") return 43; + if (key == "_TriposAtomType") return 44; + break; + } + case 'U': { + if (key == "_Unfinished_SLN_") return 45; + if (key == "_UnknownStereo") return 46; + break; + } + case '_': { + if (key == "__computedProps") return 47; + break; + } + case 'b': { + if (key == "_brokenChirality") return 48; + break; + } + case 'c': { + if (key == "_canonicalRankingNumbers") return 49; + if (key == "_connectivityHKDeltas") return 50; + if (key == "_connectivityNVals") return 51; + if (key == "_crippenLogP") return 52; + if (key == "_crippenLogPContribs") return 53; + if (key == "_crippenMR") return 54; + if (key == "_crippenMRContribs") return 55; + break; + } + case 'd': { + if (key == "_displayLabel") return 56; + if (key == "_displayLabelW") return 57; + if (key == "_doIsoSmiles") return 58; + break; + } + case 'f': { + if (key == "_fragSMARTS") return 59; + break; + } + case 'h': { + if (key == "_hasMassQuery") return 60; + break; + } + case 'i': { + if (key == "_isotopicHs") return 61; + break; + } + case 'l': { + if (key == "_labuteASA") return 62; + if (key == "_labuteAtomContribs") return 63; + if (key == "_labuteAtomHContrib") return 64; + break; + } + case 'p': { + if (key == "_protected") return 65; + break; + } + case 'q': { + if (key == "_queryRootAtom") return 66; + break; + } + case 'r': { + if (key == "_rgroupAtomMaps") return 67; + if (key == "_rgroupBonds") return 68; + if (key == "_ringStereoAtoms") return 69; + if (key == "_ringStereoWarning") return 70; + if (key == "_ringStereochemCand") return 71; + break; + } + case 's': { + if (key == "_smilesAtomOutputOrder") return 72; + if (key == "_starred") return 73; + if (key == "_supplementalSmilesLabel") return 74; + break; + } + case 't': { + if (key == "_tpsa") return 75; + if (key == "_tpsaAtomContribs") return 76; + break; + } + case 'u': { + if (key == "_unspecifiedOrder") return 77; + break; + } + } + break; + } + case 'a': { + if (key == "atomLabel") return 78; + if (key == "atomNote") return 79; + break; + } + case 'b': { + if (key == "bondNote") return 80; + break; + } + case 'd': { + if (key == "dummyLabel") return 81; + break; + } + case 'e': { + if (key == "extraRings") return 82; + break; + } + case 'i': { + switch (key[1]) { + case 'n': { + if (key == "internalRgroupSmiles") return 83; + break; + } + case 's': { + if (key == "isImplicit") return 84; + break; + } + } + break; + } + case 'm': { + switch (key[1]) { + case 'a': { + if (key == "maxAttachIdx") return 85; + break; + } + case 'o': { + if (key == "molAtomMapNumber") return 86; + if (key == "molAttchord") return 87; + if (key == "molAttchpt") return 88; + if (key == "molClass") return 89; + if (key == "molFileAlias") return 90; + if (key == "molFileValue") return 91; + if (key == "molInversionFlag") return 92; + if (key == "molLinkNodes") return 93; + if (key == "molParity") return 94; + if (key == "molReactStatus") return 95; + if (key == "molRxnComponent") return 96; + if (key == "molRxnExachg") return 97; + if (key == "molRxnRole") return 98; + if (key == "molSeqid") return 99; + if (key == "molStereoCare") return 100; + if (key == "molSubstCount") return 101; + if (key == "molTotValence") return 102; + break; + } + } + break; + } + case 'n': { + if (key == "numArom") return 103; + break; + } + case 'o': { + switch (key[1]) { + case 'l': { + if (key == "old_mapno") return 104; + break; + } + case 'r': { + if (key == "origNoImplicit") return 105; + break; + } + } + break; + } + case 'r': { + switch (key[1]) { + case 'e': { + if (key == "react_atom_idx") return 106; + break; + } + case 'i': { + if (key == "ringMembership") return 107; + break; + } + } + break; + } + case 's': { + if (key == "smilesSymbol") return 108; + break; + } + case 'w': { + if (key == "was_dummy") return 109; + break; + } + } + } + + std::lock_guard guard(get_mu()); + auto& _strkey_to_key = get_strkey_to_key(); + auto it = _strkey_to_key.find(key); + if (it != _strkey_to_key.end()) { + return it->second; + } + + auto& _key_to_strkey = getv_key_to_strkey(); + std::uint16_t new_key = 110 + _strkey_to_key.size(); + _strkey_to_key.emplace_hint(it, key, new_key); + _key_to_strkey.push_back(key); + + // Intentionally keep this log statement in here, + std::cout << "UNHANDLED Dict::get_key: INSERTING <" << key << "> as " << new_key << std::endl; + + return new_key; + } + + static std::string get_strkey(std::uint16_t key) { + if (key < 110) { + return getc_key_to_strkey()[key]; + } + key -= 110; + + std::lock_guard guard(get_mu()); + const auto& _key_to_strkey = getv_key_to_strkey(); + return _key_to_strkey[key]; + } + + // Legacy external-facing interfaces. + // Not used internally. struct Pair { std::string key; RDValue val; @@ -43,20 +508,16 @@ class RDKIT_RDGENERAL_EXPORT Dict { explicit Pair(std::string s) : key(std::move(s)), val() {} Pair(std::string s, const RDValue &v) : key(std::move(s)), val(v) {} }; - typedef std::vector DataType; Dict() {} Dict(const Dict &other) : _data(other._data) { - _hasNonPodData = other._hasNonPodData; - if (other._hasNonPodData) { // other has non pod data, need to copy - std::vector data(other._data.size()); - _data.swap(data); - for (size_t i = 0; i < _data.size(); ++i) { - _data[i].key = other._data[i].key; - copy_rdvalue(_data[i].val, other._data[i].val); - } + InternalDataType data(other._data.size()); + _data.swap(data); + for (size_t i = 0; i < _data.size(); ++i) { + _data[i].key = other._data[i].key; + copy_rdvalue(_data[i].val, other._data[i].val); } } @@ -68,10 +529,9 @@ class RDKIT_RDGENERAL_EXPORT Dict { if (!preserveExisting) { *this = other; } else { - if (other._hasNonPodData) _hasNonPodData = true; for (size_t i = 0; i < other._data.size(); ++i) { - const Pair &pair = other._data[i]; - Pair *target = nullptr; + const InternalPair &pair = other._data[i]; + InternalPair *target = nullptr; for (size_t i = 0; i < _data.size(); ++i) { if (_data[i].key == pair.key) { target = &_data[i]; @@ -81,7 +541,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { if (!target) { // need to create blank entry and copy - _data.push_back(Pair(pair.key)); + _data.push_back(InternalPair(pair.key)); copy_rdvalue(_data.back().val, pair.val); } else { // just copy @@ -93,39 +553,50 @@ class RDKIT_RDGENERAL_EXPORT Dict { Dict &operator=(const Dict &other) { if (this == &other) return *this; - if (_hasNonPodData) reset(); - - if (other._hasNonPodData) { - std::vector data(other._data.size()); - _data.swap(data); - for (size_t i = 0; i < _data.size(); ++i) { - _data[i].key = other._data[i].key; - copy_rdvalue(_data[i].val, other._data[i].val); - } - } else { - _data = other._data; + reset(); + + InternalDataType data(other._data.size()); + _data.swap(data); + for (size_t i = 0; i < _data.size(); ++i) { + _data[i].key = other._data[i].key; + copy_rdvalue(_data[i].val, other._data[i].val); } - _hasNonPodData = other._hasNonPodData; return *this; } - //---------------------------------------------------------- - //! \brief Access to the underlying non-POD containment flag - //! This is meant to be used only in bulk updates of _data. - bool &getNonPODStatus() { return _hasNonPodData; } - //---------------------------------------------------------- //! \brief Access to the underlying data. - const DataType &getData() const { return _data; } - DataType &getData() { return _data; } + const DataType getData() const { + DataType result; + for (const auto& it : _data) { + std::string strkey = Dict::get_strkey(it.key); + result.push_back(Pair(strkey, it.val)); + } + return result; + } + DataType getData() { + DataType result; + for (const auto& it : _data) { + std::string strkey = Dict::get_strkey(it.key); + result.push_back(Pair(strkey, it.val)); + } + return result; + } //---------------------------------------------------------- //! \brief Returns whether or not the dictionary contains a particular //! key. bool hasVal(const std::string &what) const { + std::uint16_t key = get_key(what); + for (const auto &data : _data) { + if (data.key == key) return true; + } + return false; + } + bool hasVal(const std::uint16_t &key) const { for (const auto &data : _data) { - if (data.key == what) return true; + if (data.key == key) return true; } return false; } @@ -139,7 +610,7 @@ class RDKIT_RDGENERAL_EXPORT Dict { STR_VECT res; res.reserve(_data.size()); for (const auto &item : _data) { - res.push_back(item.key); + res.push_back(get_strkey(item.key)); } return res; } @@ -161,26 +632,52 @@ class RDKIT_RDGENERAL_EXPORT Dict { void getVal(const std::string &what, T &res) const { res = getVal(what); } + template + void getVal(const std::uint16_t &key, T &res) const { + res = getVal(key); + } //! \overload template T getVal(const std::string &what) const { + std::uint16_t key = get_key(what); + for (auto &data : _data) { + if (data.key == key) { + return from_rdvalue(data.val); + } + } + throw KeyErrorException(what); + } + template + T getVal(const std::uint16_t &key) const { for (auto &data : _data) { - if (data.key == what) { + if (data.key == key) { return from_rdvalue(data.val); } } + std::string what = get_strkey(key); throw KeyErrorException(what); } //! \overload void getVal(const std::string &what, std::string &res) const { + std::uint16_t key = get_key(what); + for (const auto &i : _data) { + if (i.key == key) { + rdvalue_tostring(i.val, res); + return; + } + } + throw KeyErrorException(what); + } + void getVal(const std::uint16_t &key, std::string &res) const { for (const auto &i : _data) { - if (i.key == what) { + if (i.key == key) { rdvalue_tostring(i.val, res); return; } } + std::string what = get_strkey(key); throw KeyErrorException(what); } @@ -200,8 +697,19 @@ class RDKIT_RDGENERAL_EXPORT Dict { */ template bool getValIfPresent(const std::string &what, T &res) const { + std::uint16_t key = get_key(what); + for (const auto &data : _data) { + if (data.key == key) { + res = from_rdvalue(data.val); + return true; + } + } + return false; + } + template + bool getValIfPresent(const std::uint16_t &key, T &res) const { for (const auto &data : _data) { - if (data.key == what) { + if (data.key == key) { res = from_rdvalue(data.val); return true; } @@ -211,8 +719,18 @@ class RDKIT_RDGENERAL_EXPORT Dict { //! \overload bool getValIfPresent(const std::string &what, std::string &res) const { + std::uint16_t key = get_key(what); + for (const auto &i : _data) { + if (i.key == key) { + rdvalue_tostring(i.val, res); + return true; + } + } + return false; + } + bool getValIfPresent(const std::uint16_t &key, std::string &res) const { for (const auto &i : _data) { - if (i.key == what) { + if (i.key == key) { rdvalue_tostring(i.val, res); return true; } @@ -235,47 +753,81 @@ class RDKIT_RDGENERAL_EXPORT Dict { */ template void setVal(const std::string &what, T &val) { - _hasNonPodData = true; + std::uint16_t key = get_key(what); for (auto &&data : _data) { - if (data.key == what) { + if (data.key == key) { RDValue::cleanup_rdvalue(data.val); data.val = val; return; } } - _data.push_back(Pair(what, val)); + _data.emplace_back(key, val); + } + template + void setVal(const std::uint16_t &key, T &val) { + for (auto &&data : _data) { + if (data.key == key) { + RDValue::cleanup_rdvalue(data.val); + data.val = val; + return; + } + } + _data.emplace_back(key, val); } template void setPODVal(const std::string &what, T val) { - // don't change the hasNonPodData status + std::uint16_t key = get_key(what); for (auto &&data : _data) { - if (data.key == what) { + if (data.key == key) { RDValue::cleanup_rdvalue(data.val); data.val = val; return; } } - _data.push_back(Pair(what, val)); + _data.emplace_back(key, val); + } + + template + void setPODVal(const std::uint16_t &key, T val) { + for (auto &&data : _data) { + if (data.key == key) { + RDValue::cleanup_rdvalue(data.val); + data.val = val; + return; + } + } + _data.emplace_back(key, val); } void setVal(const std::string &what, bool val) { setPODVal(what, val); } + void setVal(const std::uint16_t &key, bool val) { setPODVal(key, val); } void setVal(const std::string &what, double val) { setPODVal(what, val); } + void setVal(const std::uint16_t &key, double val) { setPODVal(key, val); } void setVal(const std::string &what, float val) { setPODVal(what, val); } + void setVal(const std::uint16_t &key, float val) { setPODVal(key, val); } void setVal(const std::string &what, int val) { setPODVal(what, val); } + void setVal(const std::uint16_t &key, int val) { setPODVal(key, val); } void setVal(const std::string &what, unsigned int val) { setPODVal(what, val); } + void setVal(const std::uint16_t &key, unsigned int val) { + setPODVal(key, val); + } //! \overload void setVal(const std::string &what, const char *val) { std::string h(val); setVal(what, h); } + void setVal(const std::uint16_t &key, const char *val) { + std::string h(val); + setVal(key, h); + } //---------------------------------------------------------- //! \brief Clears the value associated with a particular key, @@ -286,11 +838,19 @@ class RDKIT_RDGENERAL_EXPORT Dict { */ void clearVal(const std::string &what) { - for (DataType::iterator it = _data.begin(); it < _data.end(); ++it) { - if (it->key == what) { - if (_hasNonPodData) { - RDValue::cleanup_rdvalue(it->val); - } + std::uint16_t key = get_key(what); + for (InternalDataType::iterator it = _data.begin(); it < _data.end(); ++it) { + if (it->key == key) { + RDValue::cleanup_rdvalue(it->val); + _data.erase(it); + return; + } + } + } + void clearVal(const std::uint16_t &key) { + for (InternalDataType::iterator it = _data.begin(); it < _data.end(); ++it) { + if (it->key == key) { + RDValue::cleanup_rdvalue(it->val); _data.erase(it); return; } @@ -301,19 +861,17 @@ class RDKIT_RDGENERAL_EXPORT Dict { //! \brief Clears all keys (and values) from the dictionary. //! void reset() { - if (_hasNonPodData) { - for (auto &&data : _data) { - RDValue::cleanup_rdvalue(data.val); - } + for (auto &&data : _data) { + RDValue::cleanup_rdvalue(data.val); } - DataType data; + InternalDataType data; _data.swap(data); } private: - DataType _data{}; //!< the actual dictionary - bool _hasNonPodData{false}; // if true, need a deep copy - // (copy_rdvalue) + InternalDataType _data{}; //!< the actual dictionary + + }; template <> diff --git a/Code/RDGeneral/RDProps.h b/Code/RDGeneral/RDProps.h index 56a2f635677..7ac7057429c 100644 --- a/Code/RDGeneral/RDProps.h +++ b/Code/RDGeneral/RDProps.h @@ -40,16 +40,21 @@ class RDProps { STR_VECT getPropList(bool includePrivate = true, bool includeComputed = true) const { const STR_VECT &tmp = d_props.keys(); - STR_VECT res, computed; + STR_VECT res; + std::vector computed; + std::vector computed_names; if (!includeComputed && - getPropIfPresent(RDKit::detail::computedPropName, computed)) { - computed.push_back(RDKit::detail::computedPropName); + getPropIfPresent(RDKit::detail::computedPropNameKey, computed)) { + for (const auto& key : computed) { + computed_names.push_back(Dict::get_strkey(key)); + } + computed_names.push_back(RDKit::detail::computedPropName); } auto pos = tmp.begin(); while (pos != tmp.end()) { if ((includePrivate || (*pos)[0] != '_') && - std::find(computed.begin(), computed.end(), *pos) == computed.end()) { + std::find(computed_names.begin(), computed_names.end(), *pos) == computed_names.end()) { res.push_back(*pos); } pos++; @@ -69,13 +74,29 @@ class RDProps { //! \overload template - void setProp(const std::string &key, T val, bool computed = false) const { + void setProp(const std::string &strkey, T val, bool computed = false) const { + std::uint16_t key = Dict::get_key(strkey); if (computed) { - STR_VECT compLst; - getPropIfPresent(RDKit::detail::computedPropName, compLst); + // STR_VECT compLst; + std::vector compLst; + getPropIfPresent(RDKit::detail::computedPropNameKey, compLst); if (std::find(compLst.begin(), compLst.end(), key) == compLst.end()) { compLst.push_back(key); - d_props.setVal(RDKit::detail::computedPropName, compLst); + d_props.setVal(RDKit::detail::computedPropNameKey, compLst); + } + } + d_props.setVal(key, val); + } + template + void setProp(const std::uint16_t &key, T val, bool computed = false) const { + if (computed) { + // std::string strkey = d_props.get_strkey(key); + // STR_VECT compLst; + std::vector compLst; + getPropIfPresent(RDKit::detail::computedPropNameKey, compLst); + if (std::find(compLst.begin(), compLst.end(), key) == compLst.end()) { + compLst.push_back(key); + d_props.setVal(RDKit::detail::computedPropNameKey, compLst); } } d_props.setVal(key, val); @@ -102,12 +123,20 @@ class RDProps { void getProp(const std::string &key, T &res) const { d_props.getVal(key, res); } + template + void getProp(const std::uint16_t &key, T &res) const { + d_props.getVal(key, res); + } //! \overload template T getProp(const std::string &key) const { return d_props.getVal(key); } + template + T getProp(const std::uint16_t &key) const { + return d_props.getVal(key); + } //! returns whether or not we have a \c property with name \c key //! and assigns the value if we do @@ -116,9 +145,14 @@ class RDProps { bool getPropIfPresent(const std::string &key, T &res) const { return d_props.getValIfPresent(key, res); } + template + bool getPropIfPresent(const std::uint16_t &key, T &res) const { + return d_props.getValIfPresent(key, res); + } //! \overload bool hasProp(const std::string &key) const { return d_props.hasVal(key); }; + bool hasProp(const std::uint16_t &key) const { return d_props.hasVal(key); }; //! clears the value of a \c property /*! @@ -129,13 +163,28 @@ class RDProps { from our list of \c computedProperties */ //! \overload - void clearProp(const std::string &key) const { - STR_VECT compLst; - if (getPropIfPresent(RDKit::detail::computedPropName, compLst)) { + void clearProp(const std::string &strkey) const { + // STR_VECT compLst; + std::uint16_t key = Dict::get_key(strkey); + std::vector compLst; + if (getPropIfPresent(RDKit::detail::computedPropNameKey, compLst)) { + auto svi = std::find(compLst.begin(), compLst.end(), key); + if (svi != compLst.end()) { + compLst.erase(svi); + d_props.setVal(RDKit::detail::computedPropNameKey, compLst); + } + } + d_props.clearVal(key); + }; + void clearProp(const std::uint16_t &key) const { + // STR_VECT compLst; + std::vector compLst; + if (getPropIfPresent(RDKit::detail::computedPropNameKey, compLst)) { + // std::string strkey = Dict::get_strkey(key); auto svi = std::find(compLst.begin(), compLst.end(), key); if (svi != compLst.end()) { compLst.erase(svi); - d_props.setVal(RDKit::detail::computedPropName, compLst); + d_props.setVal(RDKit::detail::computedPropNameKey, compLst); } } d_props.clearVal(key); @@ -143,13 +192,14 @@ class RDProps { //! clears all of our \c computed \c properties void clearComputedProps() const { - STR_VECT compLst; - if (getPropIfPresent(RDKit::detail::computedPropName, compLst)) { + // STR_VECT compLst; + std::vector compLst; + if (getPropIfPresent(RDKit::detail::computedPropNameKey, compLst)) { for (const auto &sv : compLst) { d_props.clearVal(sv); } compLst.clear(); - d_props.setVal(RDKit::detail::computedPropName, compLst); + d_props.setVal(RDKit::detail::computedPropNameKey, compLst); } } diff --git a/Code/RDGeneral/Ranking.h b/Code/RDGeneral/Ranking.h index 7cc5f91f460..4f93657436c 100644 --- a/Code/RDGeneral/Ranking.h +++ b/Code/RDGeneral/Ranking.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -63,25 +64,53 @@ class argless : public std::binary_function { \param res is used to return the ranks of each entry */ template -void rankVect(const std::vector &vect, T2 &res) { - PRECONDITION(res.size() >= vect.size(), "vector size mismatch"); - unsigned int nEntries = rdcast(vect.size()); +void rankVect(const std::vector &vect, std::vector& indices, T2 &res, bool reset_indices = true) { + size_t n = vect.size(); + PRECONDITION(res.size() >= n, "vector size mismatch"); + PRECONDITION(indices.size() == n, "vector/indices size mismatch"); + size_t n_minus_1 = n - 1; - std::vector indices(nEntries); - for (unsigned int i = 0; i < nEntries; ++i) indices[i] = i; - std::sort(indices.begin(), indices.end(), argless>(vect)); + if (reset_indices) { + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), argless>(vect)); + } else { + // Use the existing indices for sorting. + // Only sort runs of previously identical values - use the res vector to detect this, + // which was used to store the ranks of the previous run. + for (size_t start = 0; start < n; start++) { + size_t end = start; + while (end < n_minus_1 && res[indices[end]] == res[indices[end+1]]) { + end++; + } + if (end != start) { + std::sort(indices.begin()+start, indices.begin()+end+1, argless>(vect)); + start = end; + } + } + } + res[0] = 0; int currRank = 0; - T1 lastV = vect[indices[0]]; - BOOST_FOREACH (unsigned int idx, indices) { - T1 v = vect[idx]; - if (v == lastV) { + for (size_t i = 1; i < n; i++) { + unsigned int idx = indices[i]; + if (vect[idx] == vect[indices[i-1]]) { res[idx] = currRank; } else { res[idx] = ++currRank; - lastV = v; } } } + +template +void rankVect(const std::vector &vect, T2 &res) { + PRECONDITION(res.size() >= vect.size(), "vector size mismatch"); + unsigned int nEntries = rdcast(vect.size()); + + std::vector indices(nEntries); + + rankVect(vect, indices, res); +} + + } // namespace Rankers #endif diff --git a/Code/RDGeneral/StreamOps.h b/Code/RDGeneral/StreamOps.h index e26bb0e64ec..00e53de73ef 100644 --- a/Code/RDGeneral/StreamOps.h +++ b/Code/RDGeneral/StreamOps.h @@ -477,8 +477,8 @@ inline bool streamWriteProps(std::ostream &ss, const RDProps &props, const Dict &dict = props.getDict(); unsigned int count = 0; - for (Dict::DataType::const_iterator it = dict.getData().begin(); - it != dict.getData().end(); ++it) { + auto data = dict.getData(); + for (Dict::DataType::const_iterator it = data.begin(); it != data.end(); ++it) { if (propnames.find(it->key) != propnames.end()) { if (isSerializable(*it, handlers)) { count++; @@ -489,8 +489,7 @@ inline bool streamWriteProps(std::ostream &ss, const RDProps &props, streamWrite(ss, count); // packed int? unsigned int writtenCount = 0; - for (Dict::DataType::const_iterator it = dict.getData().begin(); - it != dict.getData().end(); ++it) { + for (Dict::DataType::const_iterator it = data.begin(); it != data.end(); ++it) { if (propnames.find(it->key) != propnames.end()) { if (isSerializable(*it, handlers)) { // note - not all properties are serializable, this may be @@ -535,7 +534,6 @@ inline void readRDStringVecValue(std::istream &ss, RDValue &value) { } inline bool streamReadProp(std::istream &ss, Dict::Pair &pair, - bool &dictHasNonPOD, const CustomPropHandlerVec &handlers = {}) { int version = 0; streamRead(ss, pair.key, version); @@ -561,27 +559,21 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair, case DTags::StringTag: readRDValueString(ss, pair.val); - dictHasNonPOD = true; break; case DTags::VecStringTag: readRDStringVecValue(ss, pair.val); - dictHasNonPOD = true; break; case DTags::VecIntTag: readRDVecValue(ss, pair.val); - dictHasNonPOD = true; break; case DTags::VecUIntTag: readRDVecValue(ss, pair.val); - dictHasNonPOD = true; break; case DTags::VecFloatTag: readRDVecValue(ss, pair.val); - dictHasNonPOD = true; break; case DTags::VecDoubleTag: readRDVecValue(ss, pair.val); - dictHasNonPOD = true; break; case DTags::CustomTag: { std::string propType; @@ -590,7 +582,6 @@ inline bool streamReadProp(std::istream &ss, Dict::Pair &pair, for (auto &handler : handlers) { if (propType == handler->getPropName()) { handler->read(ss, pair.val); - dictHasNonPOD = true; return true; } } @@ -610,12 +601,16 @@ inline unsigned int streamReadProps(std::istream &ss, RDProps &props, Dict &dict = props.getDict(); dict.reset(); // Clear data before repopulating - dict.getData().resize(count); + + Dict::DataType dict_data; + dict_data.resize(count); for (unsigned index = 0; index < count; ++index) { - CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index], - dict.getNonPODStatus(), handlers), + CHECK_INVARIANT(streamReadProp(ss, dict_data[index], handlers), "Corrupted property serialization detected"); } + for (const auto& pair : dict_data) { + dict.setVal(pair.key, pair.val); + } return count; } diff --git a/Code/RDGeneral/testRDValue.cpp b/Code/RDGeneral/testRDValue.cpp index 00ed21aa6f7..a45d7994d30 100644 --- a/Code/RDGeneral/testRDValue.cpp +++ b/Code/RDGeneral/testRDValue.cpp @@ -179,6 +179,7 @@ void testProp(T val) { RDProps p; p.setProp("foo", val); TEST_ASSERT(streamWriteProps(ss, p)); + TEST_ASSERT(p.getProp("foo") == val); } { diff --git a/Code/RDGeneral/types.cpp b/Code/RDGeneral/types.cpp index 54c1345bbcc..deec840bf75 100644 --- a/Code/RDGeneral/types.cpp +++ b/Code/RDGeneral/types.cpp @@ -14,6 +14,7 @@ namespace RDKit { namespace detail { const std::string computedPropName = "__computedProps"; +const std::uint16_t computedPropNameKey = 47; } namespace common_properties { @@ -25,21 +26,28 @@ const std::string DistanceMatrix_Paths = "DistanceMatrix_Paths"; const std::string MolFileComments = "MolFileComments"; const std::string MolFileInfo = "MolFileInfo"; const std::string NullBond = "NullBond"; +const std::uint16_t NullBondKey = 8; const std::string _2DConf = "_2DConf"; const std::string _3DConf = "_3DConf"; const std::string _AtomID = "_AtomID"; const std::string _BondsPotentialStereo = "_BondsPotentialStereo"; const std::string _CIPCode = "_CIPCode"; +const std::uint16_t _CIPCodeKey = 13; const std::string _CIPRank = "_CIPRank"; +const std::uint16_t _CIPRankKey = 14; const std::string _ChiralityPossible = "_ChiralityPossible"; +const std::uint16_t _ChiralityPossibleKey = 15; const std::string _CrippenLogP = "_CrippenLogP"; const std::string _CrippenMR = "_CrippenMR"; const std::string _MMFFSanitized = "_MMFFSanitized"; const std::string _MolFileChiralFlag = "_MolFileChiralFlag"; const std::string MRV_SMA = "MRV SMA"; const std::string _MolFileRLabel = "_MolFileRLabel"; +const std::uint16_t _MolFileRLabelKey = 29; const std::string _MolFileAtomQuery = "_MolFileAtomQuery"; +const std::uint16_t _MolFileAtomQueryKey = 21; const std::string _MolFileBondQuery = "_MolFileBondQuery"; +const std::uint16_t _MolFileBondQueryKey = 25; const std::string _MolFileBondEndPts = "_MolFileBondEndPts"; const std::string _MolFileBondAttach = "_MolFileBondAttach"; const std::string _MolFileBondType = "_MolFileBondType"; @@ -49,20 +57,33 @@ const std::string _MolFileBondCfg = "_MolFileBondCfg"; const std::string _Name = "_Name"; const std::string _NeedsQueryScan = "_NeedsQueryScan"; const std::string _QueryFormalCharge = "_QueryFormalCharge"; +const std::uint16_t _QueryFormalChargeKey = 32; const std::string _QueryHCount = "_QueryHCount"; +const std::uint16_t _QueryHCountKey = 33; const std::string _QueryIsotope = "_QueryIsotope"; +const std::uint16_t _QueryIsotopeKey = 34; const std::string _QueryMass = "_QueryMass"; +const std::uint16_t _QueryMassKey = 35; const std::string _ReactionDegreeChanged = "_ReactionDegreeChanged"; +const std::uint16_t _ReactionDegreeChangedKey = 36; const std::string reactantAtomIdx = "react_atom_idx"; +const std::uint16_t reactantAtomIdxKey = 106; const std::string reactionMapNum = "old_mapno"; +const std::uint16_t reactionMapNumKey = 104; const std::string _RingClosures = "_RingClosures"; +const std::uint16_t _RingClosuresKey = 37; const std::string _SLN_s = "_SLN_s"; const std::string _SmilesStart = "_SmilesStart"; +const std::uint16_t _SmilesStartKey = 39; const std::string _StereochemDone = "_StereochemDone"; +const std::uint16_t _StereochemDoneKey = 40; const std::string _TraversalBondIndexOrder = "_TraversalBondIndexOrder"; +const std::uint16_t _TraversalBondIndexOrderKey = 41; const std::string _TraversalRingClosureBond = "_TraversalRingClosureBond"; +const std::uint16_t _TraversalRingClosureBondKey = 42; const std::string _TraversalStartPoint = "_TraversalStartPoint"; +const std::uint16_t _TraversalStartPointKey = 43; const std::string _TriposAtomType = "_TriposAtomType"; const std::string _Unfinished_SLN_ = "_Unfinished_SLN_"; const std::string _UnknownStereo = "_UnknownStereo"; @@ -74,7 +95,10 @@ const std::string _crippenMR = "_crippenMR"; const std::string _crippenMRContribs = "_crippenMRContribs"; const std::string _GasteigerCharge = "_GasteigerCharge"; const std::string _GasteigerHCharge = "_GasteigerHCharge"; +const std::string _canonicalRankingNumbers = "_canonicalRankingNumbers"; +const std::uint16_t _canonicalRankingNumbersKey = 49; const std::string _doIsoSmiles = "_doIsoSmiles"; +const std::uint16_t _doIsoSmilesKey = 58; const std::string _fragSMARTS = "_fragSMARTS"; const std::string _hasMassQuery = "_hasMassQuery"; const std::string _labuteASA = "_labuteASA"; @@ -83,26 +107,35 @@ const std::string _labuteAtomHContrib = "_labuteAtomHContrib"; const std::string _protected = "_protected"; const std::string _queryRootAtom = "_queryRootAtom"; const std::string _ringStereoAtoms = "_ringStereoAtoms"; +const std::uint16_t _ringStereoAtomsKey = 69; const std::string _ringStereoWarning = "_ringStereoWarning"; const std::string _ringStereochemCand = "_ringStereochemCand"; +const std::uint16_t _ringStereochemCandKey = 71; const std::string _smilesAtomOutputOrder = "_smilesAtomOutputOrder"; const std::string _starred = "_starred"; const std::string _supplementalSmilesLabel = "_supplementalSmilesLabel"; +const std::uint16_t _supplementalSmilesLabelKey = 74; const std::string _tpsa = "_tpsa"; const std::string _tpsaAtomContribs = "_tpsaAtomContribs"; const std::string _unspecifiedOrder = "_unspecifiedOrder"; +const std::uint16_t _unspecifiedOrderKey = 77; const std::string _brokenChirality = "_brokenChirality"; +const std::uint16_t _brokenChiralityKey = 48; const std::string _rgroupAtomMaps = "_rgroupAtomMaps"; const std::string _rgroupBonds = "_rgroupBonds"; const std::string dummyLabel = "dummyLabel"; +const std::uint16_t dummyLabelKey = 81; const std::string extraRings = "extraRings"; const std::string isImplicit = "isImplicit"; const std::string maxAttachIdx = "maxAttachIdx"; const std::string molAtomMapNumber = "molAtomMapNumber"; +const std::uint16_t molAtomMapNumberKey = 86; const std::string molFileAlias = "molFileAlias"; const std::string molFileValue = "molFileValue"; const std::string molInversionFlag = "molInversionFlag"; +const std::uint16_t molInversionFlagKey = 92; const std::string molParity = "molParity"; +const std::uint16_t molParityKey = 94; const std::string molStereoCare = "molStereoCare"; const std::string molRxnComponent = "molRxnComponent"; const std::string molRxnRole = "molRxnRole"; @@ -112,6 +145,7 @@ const std::string numArom = "numArom"; const std::string origNoImplicit = "origNoImplicit"; const std::string ringMembership = "ringMembership"; const std::string smilesSymbol = "smilesSymbol"; +const std::uint16_t smilesSymbolKey = 108; const std::string atomLabel = "atomLabel"; const std::string internalRgroupSmiles = "internalRgroupSmiles"; diff --git a/Code/RDGeneral/types.h b/Code/RDGeneral/types.h index 7fd1ba15bdc..64ea863b14c 100644 --- a/Code/RDGeneral/types.h +++ b/Code/RDGeneral/types.h @@ -48,6 +48,7 @@ namespace RDKit { namespace detail { // used in various places for computed properties RDKIT_RDGENERAL_EXPORT extern const std::string computedPropName; +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t computedPropNameKey; } // namespace detail namespace common_properties { @@ -61,10 +62,13 @@ RDKIT_RDGENERAL_EXPORT extern const std::string RDKIT_RDGENERAL_EXPORT extern const std::string _3DConf; // int RDKIT_RDGENERAL_EXPORT extern const std::string _doIsoSmiles; // int (should probably be removed) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _doIsoSmilesKey; + RDKIT_RDGENERAL_EXPORT extern const std::string extraRings; // vec > RDKIT_RDGENERAL_EXPORT extern const std::string _smilesAtomOutputOrder; // vec computed RDKIT_RDGENERAL_EXPORT extern const std::string _StereochemDone; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _StereochemDoneKey; RDKIT_RDGENERAL_EXPORT extern const std::string _NeedsQueryScan; // int (bool) RDKIT_RDGENERAL_EXPORT extern const std::string _fragSMARTS; // std::string RDKIT_RDGENERAL_EXPORT extern const std::string @@ -113,6 +117,10 @@ RDKIT_RDGENERAL_EXPORT extern const std::string RDKIT_RDGENERAL_EXPORT extern const std::string _GasteigerHCharge; // used to hold partial charges from implicit Hs +RDKIT_RDGENERAL_EXPORT extern const std::string + _canonicalRankingNumbers; // unsigned int computed +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _canonicalRankingNumbersKey; + /////////////////////////////////////////////////////////////// // Atom Props @@ -121,43 +129,58 @@ RDKIT_RDGENERAL_EXPORT extern const std::string _BondsPotentialStereo; // int (or bool) COMPUTED RDKIT_RDGENERAL_EXPORT extern const std::string _CIPCode; // std::string COMPUTED +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _CIPCodeKey; RDKIT_RDGENERAL_EXPORT extern const std::string _CIPRank; // int COMPUTED +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _CIPRankKey; RDKIT_RDGENERAL_EXPORT extern const std::string _ChiralityPossible; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _ChiralityPossibleKey; RDKIT_RDGENERAL_EXPORT extern const std::string _UnknownStereo; // int (bool) AddHs/Chirality RDKIT_RDGENERAL_EXPORT extern const std::string _ringStereoAtoms; // int vect Canon/Chiral/MolHash/MolOps//Renumber//RWmol +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _ringStereoAtomsKey; RDKIT_RDGENERAL_EXPORT extern const std::string _ringStereochemCand; // chirality bool COMPUTED +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _ringStereochemCandKey; RDKIT_RDGENERAL_EXPORT extern const std::string _ringStereoWarning; // obsolete ? // Smiles parsing RDKIT_RDGENERAL_EXPORT extern const std::string _SmilesStart; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _SmilesStartKey; RDKIT_RDGENERAL_EXPORT extern const std::string _TraversalBondIndexOrder; // ? unused +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _TraversalBondIndexOrderKey; RDKIT_RDGENERAL_EXPORT extern const std::string _TraversalRingClosureBond; // unsigned int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _TraversalRingClosureBondKey; RDKIT_RDGENERAL_EXPORT extern const std::string _TraversalStartPoint; // bool +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _TraversalStartPointKey; RDKIT_RDGENERAL_EXPORT extern const std::string _queryRootAtom; // int SLNParse/SubstructMatch RDKIT_RDGENERAL_EXPORT extern const std::string _hasMassQuery; // atom bool RDKIT_RDGENERAL_EXPORT extern const std::string _protected; // atom int (bool) RDKIT_RDGENERAL_EXPORT extern const std::string _supplementalSmilesLabel; // atom string (SmilesWrite) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _supplementalSmilesLabelKey; RDKIT_RDGENERAL_EXPORT extern const std::string _unspecifiedOrder; // atom int (bool) smarts/smiles +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _unspecifiedOrderKey; RDKIT_RDGENERAL_EXPORT extern const std::string _RingClosures; // INT_VECT smarts/smiles/canon +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _RingClosuresKey; RDKIT_RDGENERAL_EXPORT extern const std::string atomLabel; // atom string from CXSMILES // MDL Style Properties (MolFileParser) RDKIT_RDGENERAL_EXPORT extern const std::string molAtomMapNumber; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t molAtomMapNumberKey; RDKIT_RDGENERAL_EXPORT extern const std::string molFileAlias; // string RDKIT_RDGENERAL_EXPORT extern const std::string molFileValue; // string RDKIT_RDGENERAL_EXPORT extern const std::string molInversionFlag; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t molInversionFlagKey; RDKIT_RDGENERAL_EXPORT extern const std::string molParity; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t molParityKey; RDKIT_RDGENERAL_EXPORT extern const std::string molStereoCare; // int RDKIT_RDGENERAL_EXPORT extern const std::string molRxnComponent; // int RDKIT_RDGENERAL_EXPORT extern const std::string molRxnRole; // int @@ -172,9 +195,12 @@ RDKIT_RDGENERAL_EXPORT extern const std::string molReactStatus; // int RDKIT_RDGENERAL_EXPORT extern const std::string molFileLinkNodes; // string RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileRLabel; // unsigned int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _MolFileRLabelKey; RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileChiralFlag; // int RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileAtomQuery; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _MolFileAtomQueryKey; RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileBondQuery; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _MolFileBondQueryKey; RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileBondEndPts; // string RDKIT_RDGENERAL_EXPORT extern const std::string _MolFileBondAttach; // string RDKIT_RDGENERAL_EXPORT extern const std::string @@ -187,20 +213,29 @@ RDKIT_RDGENERAL_EXPORT extern const std::string RDKIT_RDGENERAL_EXPORT extern const std::string MRV_SMA; // smarts string from Marvin RDKIT_RDGENERAL_EXPORT extern const std::string dummyLabel; // atom string +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t dummyLabelKey; // Reaction Information (Reactions.cpp) RDKIT_RDGENERAL_EXPORT extern const std::string _QueryFormalCharge; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _QueryFormalChargeKey; RDKIT_RDGENERAL_EXPORT extern const std::string _QueryHCount; // int +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _QueryHCountKey; RDKIT_RDGENERAL_EXPORT extern const std::string _QueryIsotope; // int -RDKIT_RDGENERAL_EXPORT extern const std::string - _QueryMass; // int = round(float * 1000) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _QueryIsotopeKey; // int +RDKIT_RDGENERAL_EXPORT extern const std::string _QueryMass; // int = round(float * 1000) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _QueryMassKey; + RDKIT_RDGENERAL_EXPORT extern const std::string _ReactionDegreeChanged; // int (bool) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _ReactionDegreeChangedKey; RDKIT_RDGENERAL_EXPORT extern const std::string NullBond; // int (bool) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t NullBondKey; RDKIT_RDGENERAL_EXPORT extern const std::string _rgroupAtomMaps; RDKIT_RDGENERAL_EXPORT extern const std::string _rgroupBonds; RDKIT_RDGENERAL_EXPORT extern const std::string reactantAtomIdx; +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t reactantAtomIdxKey; RDKIT_RDGENERAL_EXPORT extern const std::string reactionMapNum; +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t reactionMapNumKey; // SLN RDKIT_RDGENERAL_EXPORT extern const std::string @@ -213,9 +248,11 @@ RDKIT_RDGENERAL_EXPORT extern const std::string _Unfinished_SLN_; // int (bool) // Smarts Smiles RDKIT_RDGENERAL_EXPORT extern const std::string _brokenChirality; // atom bool +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t _brokenChiralityKey; RDKIT_RDGENERAL_EXPORT extern const std::string isImplicit; // atom int (bool) RDKIT_RDGENERAL_EXPORT extern const std::string smilesSymbol; // atom string (only used in test?) +RDKIT_RDGENERAL_EXPORT extern const std::uint16_t smilesSymbolKey; // Tripos RDKIT_RDGENERAL_EXPORT extern const std::string diff --git a/Code/cmake/Modules/FindInchi.cmake b/Code/cmake/Modules/FindInchi.cmake index 46bfff75039..65db5f33d44 100644 --- a/Code/cmake/Modules/FindInchi.cmake +++ b/Code/cmake/Modules/FindInchi.cmake @@ -47,7 +47,9 @@ else(EXISTS ${CUSTOM_INCHI_PATH}/src/INCHI_BASE/src/ichican2.c) if(NOT DEFINED INCHI_BASE) string(REGEX REPLACE "^.*/" "" INCHI_BASE "${INCHI_URL}") endif() - downloadAndCheckMD5(${INCHI_URL} "${CUSTOM_INCHI_PATH}/${INCHI_BASE}" ${INCHI_MD5SUM}) + # Temporary fix: don't download the file. It is included in the repo for now + # Uncomment the line below to restore downloading the file + #downloadAndCheckMD5(${INCHI_URL} "${CUSTOM_INCHI_PATH}/${INCHI_BASE}" ${INCHI_MD5SUM}) execute_process(COMMAND ${CMAKE_COMMAND} -E tar xf ${CUSTOM_INCHI_PATH}/INCHI-1-SRC.zip WORKING_DIRECTORY ${CUSTOM_INCHI_PATH}) diff --git a/Docs/Book/RDKit_Book.rst b/Docs/Book/RDKit_Book.rst index 4591bf8ddf9..ca771dad29f 100644 --- a/Docs/Book/RDKit_Book.rst +++ b/Docs/Book/RDKit_Book.rst @@ -1266,16 +1266,11 @@ What has been tested - The chemical reactions code - The Open3DAlign code - The MolDraw2D drawing code + - The InChI code, with InChI IUPAC v1.05 Known Problems -------------- - - InChI generation and (probably) parsing. This seems to be a - limitation of the IUPAC InChI code. In order to allow the code to - be used in a multi-threaded environment, a mutex is used to ensure - that only one thread is using the IUPAC code at a time. This is - only enabled if the RDKit is built with the ``RDK_TEST_MULTITHREADED`` - option enabled. - The MolSuppliers (e.g. SDMolSupplier, SmilesMolSupplier?) change their internal state when a molecule is read. It is not safe to use one supplier on more than one thread. diff --git a/Docs/Book_jp/The_RDKit_Book_jp.rst b/Docs/Book_jp/The_RDKit_Book_jp.rst index 9195141e656..901aa5086db 100644 --- a/Docs/Book_jp/The_RDKit_Book_jp.rst +++ b/Docs/Book_jp/The_RDKit_Book_jp.rst @@ -846,15 +846,12 @@ RDKitを書いている間、コードがマルチスレッド環境でも作動 - chemical reactionsコード - Open3DAlignコード - MolDraw2D 描画コード +- InChIコード(v1.05) 把握済みの問題 --------------------------------------------- [`Known problems `__] -- InChiの生成と(おそらく)解析。これはIUPACInChiコードの限界である様に見えます。 - コードをマルチスレッド環境で使える様にするため、確実に一度にスレッド一つだけがIUPACコードを使うことを保証するようミューテックスが使われます。 - これはRDKitが``RDK_TEST_MULTITHREADED`` オプションを有効にしてビルドされている場合のみ利用可能です。 - - MolSuppliers(例えばSDMolSupplierやSmilesMolSupplier?)は分子が読み込まれたときに内部の状態を変えます。2つ以上のスレッドで一つのsupplierを使うのは安全ではありません。 - 再起的なクエリを含むクエリ分子を使った部分構造検索。再起的クエリは検索が実行されているとき内部の状態を修正します。 diff --git a/External/INCHI-API/INCHI-1-SRC.zip b/External/INCHI-API/INCHI-1-SRC.zip new file mode 100644 index 00000000000..54fce5dfc33 Binary files /dev/null and b/External/INCHI-API/INCHI-1-SRC.zip differ diff --git a/External/INCHI-API/download-inchi.sh b/External/INCHI-API/download-inchi.sh index a3aa6cc21db..f140dd1d1f6 100755 --- a/External/INCHI-API/download-inchi.sh +++ b/External/INCHI-API/download-inchi.sh @@ -34,44 +34,51 @@ # places it (temporarily) in a /tmp directory. This directory will # automatically dissappear after you reboot the machine. -if ! which wget > /dev/null -then - echo "**Error: wget not found!" - exit 1 -fi +# As a temporary fix to issues with the inchi-trust website, this zip +# file has already been included in the repo. +echo "INCHI-1-SRC.zip has already been included in this repo" -DIR=`pwd` -TEMPDIR=`mktemp -d -t rdkit-inchi-XXX` -if [[ $DIR =~ External/INCHI-API$ ]] -then - mkdir -p src - echo "================================================================" - echo "Downloading InChI software distribution version 1.05" - echo " http://www.inchi-trust.org/download/105/INCHI-1-SRC.zip" - echo " ====>" - echo " $TEMPDIR" - echo "================================================================" - cd $TEMPDIR - wget http://www.inchi-trust.org/download/105/INCHI-1-SRC.zip - echo "================================================================" - echo "Unarchiving" - echo "================================================================" - unzip INCHI-1-SRC - echo "================================================================" - echo "Copying files" - echo "================================================================" - cp -a INCHI-1-SRC/* "$DIR/src" - echo "================================================================" - echo "Removing temporary files" - echo "================================================================" - cd $DIR - rm -rf $TEMPDIR - echo "================================================================" - echo "Done!" - echo "Make sure you (re)run cmake before running make" - echo "================================================================" -else - echo '**Error: you must invoke this script from within the directory' - echo ' $RDKIT_SOURCE_ROOT/External/INCHI-API' - exit 1 -fi +# Uncomment all lines below (and remove the message above) if you need +# to restore the original behavior of this script. + +# if ! which wget > /dev/null +# then +# echo "**Error: wget not found!" +# exit 1 +# fi +# +# DIR=`pwd` +# TEMPDIR=`mktemp -d -t rdkit-inchi-XXX` +# if [[ $DIR =~ External/INCHI-API$ ]] +# then +# mkdir -p src +# echo "================================================================" +# echo "Downloading InChI software distribution version 1.05" +# echo " http://www.inchi-trust.org/download/105/INCHI-1-SRC.zip" +# echo " ====>" +# echo " $TEMPDIR" +# echo "================================================================" +# cd $TEMPDIR +# wget http://www.inchi-trust.org/download/105/INCHI-1-SRC.zip +# echo "================================================================" +# echo "Unarchiving" +# echo "================================================================" +# unzip INCHI-1-SRC +# echo "================================================================" +# echo "Copying files" +# echo "================================================================" +# cp -a INCHI-1-SRC/* "$DIR/src" +# echo "================================================================" +# echo "Removing temporary files" +# echo "================================================================" +# cd $DIR +# rm -rf $TEMPDIR +# echo "================================================================" +# echo "Done!" +# echo "Make sure you (re)run cmake before running make" +# echo "================================================================" +# else +# echo '**Error: you must invoke this script from within the directory' +# echo ' $RDKIT_SOURCE_ROOT/External/INCHI-API' +# exit 1 +# fi diff --git a/External/INCHI-API/inchi.cpp b/External/INCHI-API/inchi.cpp index 77616c4a5fd..03d4d0e0988 100644 --- a/External/INCHI-API/inchi.cpp +++ b/External/INCHI-API/inchi.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -73,9 +74,9 @@ #include #include #include -#if RDK_TEST_MULTITHREADED +// #if RDK_BUILD_THREADSAFE_SSS #include -#endif +// #endif //#define DEBUG 1 namespace RDKit { @@ -1254,10 +1255,6 @@ void cleanUp(RWMol& mol) { } // end cleanUp } // namespace -#if RDK_TEST_MULTITHREADED -std::mutex inchiMutex; -#endif - RWMol* InchiToMol(const std::string& inchi, ExtraInchiReturnValues& rv, bool sanitize, bool removeHs) { // input @@ -1273,9 +1270,6 @@ RWMol* InchiToMol(const std::string& inchi, ExtraInchiReturnValues& rv, { // output structure inchi_OutputStruct inchiOutput; -#if RDK_TEST_MULTITHREADED - std::lock_guard lock(inchiMutex); -#endif // DLL call int retcode = GetStructFromINCHI(&inchiInput, &inchiOutput); @@ -1711,10 +1705,21 @@ void fixOptionSymbol(const char* in, char* out) { /*! "reverse" clean up: prepare a molecule to be used with InChI sdk */ void rCleanUp(RWMol& mol) { - RWMol* q = SmilesToMol("[O-][Cl+3]([O-])([O-])O"); + static RWMol* q; + + // Note: before upstreaming to RDKit, we'll likely need to add a compilation guard like + // #if RDK_BUILD_THREADSAFE_SSS + static std::once_flag q_init_once; + std::call_once(q_init_once, [&]() {q = SmilesToMol("[O-][Cl+3]([O-])([O-])O"); }); + // #else + // if (!q) { + // q = SmilesToMol("[O-][Cl+3]([O-])([O-])O"); + // } + // #endif + // and also guard the #include at the top. + std::vector fgpMatches; SubstructMatch(mol, *q, fgpMatches); - delete q; // replace all matches for (auto match : fgpMatches) { // collect matching atoms @@ -1848,7 +1853,7 @@ std::string MolToInchi(const ROMol& mol, ExtraInchiReturnValues& rv, // convert tetrahedral chirality info to Stereo0D if (atom->getChiralTag() != Atom::CHI_UNSPECIFIED || - atom->hasProp("molParity")) { + atom->hasProp(common_properties::molParityKey)) { // we ignore the molParity if the number of neighbors are below 3 atom->calcImplicitValence(); if (atom->getNumImplicitHs() + atom->getDegree() < 3) { @@ -2076,9 +2081,6 @@ std::string MolToInchi(const ROMol& mol, ExtraInchiReturnValues& rv, // call DLL std::string inchi; { -#if RDK_TEST_MULTITHREADED - std::lock_guard lock(inchiMutex); -#endif int retcode = GetINCHI(&input, &output); // generate output @@ -2119,9 +2121,6 @@ std::string MolBlockToInchi(const std::string& molBlock, // call DLL std::string inchi; { -#if RDK_TEST_MULTITHREADED - std::lock_guard lock(inchiMutex); -#endif char* _options = nullptr; if (options) { _options = new char[strlen(options) + 1]; @@ -2158,9 +2157,6 @@ std::string InchiToInchiKey(const std::string& inchi) { char xtra1[65], xtra2[65]; int ret = 0; { -#if RDK_TEST_MULTITHREADED - std::lock_guard lock(inchiMutex); -#endif ret = GetINCHIKeyFromINCHI(inchi.c_str(), 0, 0, inchiKey, xtra1, xtra2); } std::string error; diff --git a/External/INCHI-API/test.cpp b/External/INCHI-API/test.cpp index ed318845a67..95dc14537b6 100644 --- a/External/INCHI-API/test.cpp +++ b/External/INCHI-API/test.cpp @@ -29,7 +29,7 @@ using namespace RDKit; #ifdef RDK_TEST_MULTITHREADED namespace { void runblock(const std::vector &mols, unsigned int count, - unsigned int idx, std::vector &inchis, + unsigned int idx, const std::vector &inchis, const std::vector &keys) { for (unsigned int j = 0; j < 200; j++) { for (unsigned int i = 0; i < mols.size(); ++i) { @@ -44,12 +44,18 @@ void runblock(const std::vector &mols, unsigned int count, TEST_ASSERT(key == keys[i]); std::string key2 = MolToInchiKey(*mol); TEST_ASSERT(key2 == keys[i]); + ROMol *mol2 = InchiToMol(inchi, tmp); TEST_ASSERT(mol2); ExtraInchiReturnValues tmp2; std::string inchi2 = MolToInchi(*mol2, tmp2); TEST_ASSERT(inchi == inchi2); delete mol2; + + std::string mol_block = MolToMolBlock(*mol); + ExtraInchiReturnValues tmp3; + std::string inchi3 = MolBlockToInchi(mol_block, tmp3); + TEST_ASSERT(inchi == inchi3); } } }; @@ -67,17 +73,12 @@ void testMultiThread() { std::cerr << "reading molecules" << std::endl; std::vector mols; while (!suppl.atEnd() && mols.size() < 100) { - ROMol *mol = nullptr; - try { - mol = suppl.next(); - } catch (...) { - continue; - } - if (!mol) { - continue; - } + ROMol *mol = suppl.next(); + TEST_ASSERT(mol != nullptr); mols.push_back(mol); } + TEST_ASSERT(mols.size() == 100); + std::cerr << "generating reference data" << std::endl; std::vector inchis; std::vector keys;