Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "steamroll"
description = "Package to convert 3D molecules to RDKit"
license = {file = "LICENSE"}
version = "0.0.3"
version = "0.0.4"
readme = "README.md"
keywords = []
authors = [
Expand Down
7 changes: 3 additions & 4 deletions steamroll/steamroll.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@

logger = logging.getLogger(__name__)

# Lanthanides Ce-Yb (58-70) and actinides Ac-Lr (89-103) that xyz2mol cannot
# handle. Molecules containing these elements bypass xyz2mol and go directly to
# the geometry-only xyz2ac_obabel fallback.
_SKIP_XYZ2MOL: frozenset[int] = frozenset(range(58, 71)) | frozenset(range(89, 104))
# Lanthanides and actinides are now included in TRANSITION_METALS_NUM, so
# has_tm will be True for them and they route through get_tmc_mol directly.
_SKIP_XYZ2MOL: frozenset[int] = frozenset()


class SteamrollConversionError(Exception):
Expand Down
151 changes: 46 additions & 105 deletions steamroll/xyz2mol_tmc/xyz2mol_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DOI: 10.1002/bkcs.10334

Modified by Maria Harris Rasmussen 2024
Modified by Corin Wagen 2026
"""

import copy
Expand Down Expand Up @@ -122,116 +123,51 @@
"u",
"np",
"pu",
"am",
"cm",
"bk",
"cf",
"es",
"fm",
"md",
"no",
"lr",
]


global atomic_valence
global atomic_valence_electrons

atomic_valence = defaultdict(list)
atomic_valence[1] = [1]
atomic_valence[5] = [3, 4]
atomic_valence[6] = [4, 2]
atomic_valence[7] = [3, 4]
atomic_valence[8] = [2, 1, 3] # [2,1,3]
atomic_valence[9] = [1]
atomic_valence[13] = [3, 4]
atomic_valence[14] = [4]
atomic_valence[15] = [3, 5] # [5,4,3]
atomic_valence[16] = [2, 4, 6] # [6,3,2]
atomic_valence[17] = [1]
atomic_valence[18] = [0]
atomic_valence[32] = [4]
atomic_valence[33] = [5, 3]
atomic_valence[35] = [1]
atomic_valence[34] = [2]
atomic_valence[52] = [2]
atomic_valence[53] = [1]

atomic_valence[21] = [20]
atomic_valence[22] = [20]
atomic_valence[23] = [20]
atomic_valence[24] = [20]
atomic_valence[25] = [20]
atomic_valence[26] = [20]
atomic_valence[27] = [20]
atomic_valence[28] = [20]
atomic_valence[29] = [20]
atomic_valence[30] = [20]

atomic_valence[39] = [20]
atomic_valence[40] = [20]
atomic_valence[41] = [20]
atomic_valence[42] = [20]
atomic_valence[43] = [20]
atomic_valence[44] = [20]
atomic_valence[45] = [20]
atomic_valence[46] = [20]
atomic_valence[47] = [20]
atomic_valence[48] = [20]


atomic_valence[57] = [20]
atomic_valence[58] = [20]
atomic_valence[59] = [20]
atomic_valence[60] = [20]
atomic_valence[61] = [20]
atomic_valence[62] = [20]
atomic_valence[63] = [20]
atomic_valence[64] = [20]
atomic_valence[65] = [20]
atomic_valence[66] = [20]
atomic_valence[67] = [20]
atomic_valence[68] = [20]
atomic_valence[69] = [20]
atomic_valence[70] = [20]
atomic_valence[71] = [20]
atomic_valence[72] = [20]
atomic_valence[73] = [20]
atomic_valence[74] = [20]
atomic_valence[75] = [20]
atomic_valence[76] = [20]
atomic_valence[77] = [20]
atomic_valence[78] = [20]
atomic_valence[79] = [20]
atomic_valence[80] = [20]

atomic_valence[89] = [20]
atomic_valence[90] = [20]
atomic_valence[91] = [20]
atomic_valence[92] = [20]
atomic_valence[93] = [20]
atomic_valence[94] = [20]
atomic_valence[95] = [20]
atomic_valence[96] = [20]
atomic_valence[97] = [20]
atomic_valence[98] = [20]
atomic_valence[99] = [20]
atomic_valence[100] = [20]
atomic_valence[101] = [20]
atomic_valence[102] = [20]
atomic_valence[103] = [20]


atomic_valence_electrons = {}
atomic_valence_electrons[1] = 1
atomic_valence_electrons[5] = 3
atomic_valence_electrons[6] = 4
atomic_valence_electrons[7] = 5
atomic_valence_electrons[8] = 6
atomic_valence_electrons[9] = 7
atomic_valence_electrons[13] = 3
atomic_valence_electrons[14] = 4
atomic_valence_electrons[15] = 5
atomic_valence_electrons[16] = 6
atomic_valence_electrons[17] = 7
atomic_valence_electrons[18] = 8
atomic_valence_electrons[32] = 4
atomic_valence_electrons[33] = 5
atomic_valence_electrons[35] = 7
atomic_valence_electrons[34] = 6
atomic_valence_electrons[52] = 6
atomic_valence_electrons[53] = 7
atomic_valence = defaultdict(list, {
1: [1], 5: [3, 4], 6: [4, 2], 7: [3, 4], 8: [2, 1, 3],
9: [1], 13: [3, 4], 14: [4], 15: [3, 5], 16: [2, 4, 6],
17: [1], 18: [0], 32: [4], 33: [5, 3], 34: [2], 35: [1],
52: [2], 53: [1],
# d-block, lanthanides, actinides: sentinel valence = 20
**{z: [20] for z in range(21, 31)}, # Sc–Zn
**{z: [20] for z in range(39, 49)}, # Y–Cd
**{z: [20] for z in range(57, 81)}, # La–Hg
**{z: [20] for z in range(89, 104)}, # Ac–Lr
Comment on lines +147 to +150
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it's much clearer this way

})

atomic_valence_electrons = {
# Main-group
1: 1, 5: 3, 6: 4, 7: 5, 8: 6, 9: 7,
13: 3, 14: 4, 15: 5, 16: 6, 17: 7, 18: 8,
32: 4, 33: 5, 34: 6, 35: 7, 52: 6, 53: 7,
# 3d TMs (Sc–Zn): 3, 4, … 12
**{z: z - 18 for z in range(21, 31)},
# 4d TMs (Y–Cd): 3, 4, … 12
**{z: z - 36 for z in range(39, 49)},
# 5d: La, then Lu–Hg: 3, 4, … 12
57: 3,
**{z: z - 68 for z in range(71, 81)},
# Lanthanides Ce–Yb: all trivalent
**{z: 3 for z in range(58, 71)},
# Actinides: Ac–Pu ramp 3–8, then Am–Lr trivalent
**{89: 3, 90: 4, 91: 5, 92: 6, 93: 7, 94: 8},
**{z: 3 for z in range(95, 104)},
}


def str_atom(atom):
Expand Down Expand Up @@ -279,7 +215,7 @@ def get_BO(AC, UA, DU, valences, UA_pairs, use_graph=True):


def valences_not_too_large(BO, valences):
""""""
"""Checks that the number of valences isn't too large."""
number_of_bonds_list = BO.sum(axis=1)
for valence, number_of_bonds in zip(valences, number_of_bonds_list):
if number_of_bonds > valence:
Expand Down Expand Up @@ -1038,6 +974,11 @@ def xyz2AC_obabel(atoms, xyz, tolerance=0.45):

Returns:
AC - adjacency matrix

NOTE: RDKit's GetRcovalent supplies covalent radii for lanthanides and
actinides, so the default 0.45 Å tolerance should work for f-block metals.
If structures with these elements show missing M-L bonds, consider
increasing tolerance to ~0.55-0.60 Å.
"""
global atomic_valence
# atomic_valence[8] = [2,1]
Expand Down
106 changes: 101 additions & 5 deletions steamroll/xyz2mol_tmc/xyz2mol_tmc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
"""Module for the xyz2mol functionality for TMCs"""
"""Module for the xyz2mol functionality for TMCs.

Supports d-block transition metals, lanthanides (La=57, Ce–Yb=58–70, Lu=71),
and actinides (Ac–Lr=89–103). The f-block elements use an ionic decomposition
model where 4f electrons (lanthanides) and most 5f electrons (heavier actinides)
are treated as core-like and non-bonding. Actinide covalency (especially for
early actinides U, Np, Pu) is only partially captured by this approach.
"""

import argparse
import logging
Expand All @@ -20,13 +27,23 @@
)

# fmt: off
# Includes d-block transition metals, lanthanides (La=57, Ce-Yb=58-70, Lu=71),
# and actinides (Ac-Lr=89-103). La and Lu are included as honorary members.
TRANSITION_METALS = ["Sc","Ti","V","Cr","Mn","Fe","Co","La","Ni","Cu","Zn",
"Y","Zr","Nb","Mo","Tc","Ru","Rh","Pd","Ag","Cd","Lu",
"Hf","Ta","W","Re","Os","Ir","Pt","Au","Hg",
# Lanthanides (Ce-Yb)
"Ce","Pr","Nd","Pm","Sm","Eu","Gd","Tb","Dy","Ho","Er","Tm","Yb",
# Actinides (Ac-Lr)
"Ac","Th","Pa","U","Np","Pu","Am","Cm","Bk","Cf","Es","Fm","Md","No","Lr",
]

TRANSITION_METALS_NUM = [21,22,23,24,25,26,27,57,28,29,30,39,40,41,
42,43,44,45,46,47,48,71,72,73,74,75,76,77,78,79,80,
# Lanthanides (Ce-Yb)
58,59,60,61,62,63,64,65,66,67,68,69,70,
# Actinides (Ac-Lr)
89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,
]


Expand All @@ -52,6 +69,7 @@
"Ag": [1],
"Cd": [2],
"La": [3],
"Lu": [3],
"Hf": [4],
"Ta": [3, 4, 5],
"W": [2, 3, 4, 5, 6],
Expand All @@ -61,6 +79,36 @@
"Pt": [2, 4],
"Au": [1, 3],
"Hg": [1, 2],
# Lanthanides (Ce-Yb)
"Ce": [3, 4],
"Pr": [3, 4],
"Nd": [3],
"Pm": [3],
"Sm": [2, 3],
"Eu": [2, 3],
"Gd": [3],
"Tb": [3, 4],
"Dy": [3],
"Ho": [3],
"Er": [3],
"Tm": [2, 3],
"Yb": [2, 3],
# Actinides (Ac-Lr)
"Ac": [3],
"Th": [4],
"Pa": [4, 5],
"U": [3, 4, 5, 6],
"Np": [3, 4, 5, 6, 7],
"Pu": [3, 4, 5, 6, 7],
"Am": [3, 4, 5, 6],
"Cm": [3, 4],
"Bk": [3, 4],
"Cf": [3],
"Es": [3],
"Fm": [3],
"Md": [2, 3],
"No": [2, 3],
"Lr": [3],
}
# fmt: on

Expand All @@ -71,7 +119,14 @@
params.splitGrignards = True
params.adjustCharges = False

MetalNon_Hg = "[#3,#11,#12,#19,#13,#21,#22,#23,#24,#25,#26,#27,#28,#29,#30,#39,#40,#41,#42,#43,#44,#45,#46,#47,#48,#57,#72,#73,#74,#75,#76,#77,#78,#79,#80]~[B,#6,#14,#15,#33,#51,#16,#34,#52,Cl,Br,I,#85]"
# Build MetalNon_Hg programmatically from TRANSITION_METALS_NUM so that future
# additions to that list are reflected here automatically.
_s_block = [3, 11, 12, 19, 13]
_all_metals_num = _s_block + TRANSITION_METALS_NUM
MetalNon_Hg = (
"[" + ",".join(f"#{n}" for n in _all_metals_num) + "]"
"~[B,#6,#14,#15,#33,#51,#16,#34,#52,Cl,Br,I,#85]"
)

pt = GetPeriodicTable

Expand Down Expand Up @@ -120,6 +175,23 @@
atomic_valence_electrons[48] = 12 # Cd

atomic_valence_electrons[57] = 3 # La

# Lanthanides — 4f electrons treated as core-like; dominant +3 chemistry.
# Note: only used in get_proposed_ligand_charge() which runs on metal-free fragments.
atomic_valence_electrons[58] = 3 # Ce
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rewrite this section so that the whole dictionary gets built at once?

atomic_valence_electrons[59] = 3 # Pr
atomic_valence_electrons[60] = 3 # Nd
atomic_valence_electrons[61] = 3 # Pm
atomic_valence_electrons[62] = 3 # Sm
atomic_valence_electrons[63] = 3 # Eu
atomic_valence_electrons[64] = 3 # Gd
atomic_valence_electrons[65] = 3 # Tb
atomic_valence_electrons[66] = 3 # Dy
atomic_valence_electrons[67] = 3 # Ho
atomic_valence_electrons[68] = 3 # Er
atomic_valence_electrons[69] = 3 # Tm
atomic_valence_electrons[70] = 3 # Yb

atomic_valence_electrons[72] = 4 # Hf
atomic_valence_electrons[73] = 5 # Ta
atomic_valence_electrons[74] = 6 # W
Expand All @@ -130,6 +202,24 @@
atomic_valence_electrons[79] = 11 # Au
atomic_valence_electrons[80] = 12 # Hg

# Actinides — notional bonding electron counts; early actinides are more covalent.
# Note: only used in get_proposed_ligand_charge() which runs on metal-free fragments.
atomic_valence_electrons[89] = 3 # Ac
atomic_valence_electrons[90] = 4 # Th
atomic_valence_electrons[91] = 5 # Pa
atomic_valence_electrons[92] = 6 # U
atomic_valence_electrons[93] = 7 # Np
atomic_valence_electrons[94] = 8 # Pu
atomic_valence_electrons[95] = 3 # Am (treated like lanthanide)
atomic_valence_electrons[96] = 3 # Cm
atomic_valence_electrons[97] = 3 # Bk
atomic_valence_electrons[98] = 3 # Cf
atomic_valence_electrons[99] = 3 # Es
atomic_valence_electrons[100] = 3 # Fm
atomic_valence_electrons[101] = 3 # Md
atomic_valence_electrons[102] = 3 # No
atomic_valence_electrons[103] = 3 # Lr


def shell(cmd, shell=False):
if shell:
Expand Down Expand Up @@ -160,9 +250,8 @@ def fix_NO2(smiles):
"""
m = Chem.MolFromSmiles(smiles)
emol = Chem.RWMol(m)
patt = Chem.MolFromSmarts(
"[#8-]-[#7+0]-[#8-].[#21,#22,#23,#24,#25,#26,#27,#28,#29,#30,#39,#40,#41,#42,#43,#44,#45,#46,#47,#48,#57,#72,#73,#74,#75,#76,#77,#78,#79,#80]"
)
_metal_smarts = ",".join(f"#{n}" for n in TRANSITION_METALS_NUM)
patt = Chem.MolFromSmarts(f"[#8-]-[#7+0]-[#8-].[{_metal_smarts}]")
matches = emol.GetSubstructMatches(patt)
for a1, a2, a3, a4 in matches:
if not emol.GetBondBetweenAtoms(a1, a4) and not emol.GetBondBetweenAtoms(a3, a4):
Expand Down Expand Up @@ -230,6 +319,9 @@ def get_proposed_ligand_charge(ligand_mol, cutoff=-10):
and omparing with total number of valence electrons. If charge is >=
1 (<-1) and the LUMO (HOMO) is low (high) in energy, two additional
electrons are added (removed). The suggested charge is returned.

NOTE: This runs on metal-free ligand fragments only (after MetalDisconnector
separates the metal). The EHT cutoff is independent of metal identity.
"""
valence_electrons = 0
passed, result = rdEHTTools.RunMol(ligand_mol)
Expand Down Expand Up @@ -538,6 +630,10 @@ def get_tmc_mol(xyz_file, overall_charge, with_stereo=False):
cut_atoms = []
for i, j in combinations(coordinating_atoms_idx, 2):
bond = emol.GetBondBetweenAtoms(int(i), int(j))
# NOTE: 0.4 Å threshold calibrated for d-block metals. For lanthanides/
# actinides with longer M-L bonds, relative distances within haptic rings
# should still be comparable. May need adjustment if haptic atoms are
# incorrectly pruned for f-block complexes.
if bond and abs(dMat[i, tm_idx] - dMat[j, tm_idx]) >= 0.4:
logger.debug(
"Haptic bond pattern with too great distance:",
Expand Down
9 changes: 9 additions & 0 deletions tests/data/UCl6.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
7
charge=-2
U 0.000000 0.000000 0.000000
Cl 2.630000 0.000000 0.000000
Cl -2.630000 0.000000 0.000000
Cl 0.000000 2.630000 0.000000
Cl 0.000000 -2.630000 0.000000
Cl 0.000000 0.000000 2.630000
Cl 0.000000 0.000000 -2.630000
Loading