diff --git a/gufe/components/explicitmoleculecomponent.py b/gufe/components/explicitmoleculecomponent.py index 66f3177e7..e6c11d679 100644 --- a/gufe/components/explicitmoleculecomponent.py +++ b/gufe/components/explicitmoleculecomponent.py @@ -14,7 +14,8 @@ def _ensure_ofe_name(mol: RDKitMol, name: str) -> str: """ Determine the correct name from the rdkit.Chem.Mol and the user-provided - name; ensure that is set in the rdkit representation. + name; ensure that is set in the rdkit representation. We also perform some sanitation of the name + to help downstream tools. """ try: rdkit_name = mol.GetProp("_Name") @@ -31,6 +32,16 @@ def _ensure_ofe_name(mol: RDKitMol, name: str) -> str: elif name == "": name = rdkit_name + # sanitize the name before we set it + # list of characters to replace + to_replace = [" ", "/", "*"] + if any([i in name for i in to_replace]): + # strip leading and trailing whitespace + name = name.strip() + for i in to_replace: + name = name.replace(i, "") + warnings.warn(f"Component name sanitized to: {name}") + mol.SetProp("ofe-name", name) return name diff --git a/gufe/tests/test_smallmoleculecomponent.py b/gufe/tests/test_smallmoleculecomponent.py index aab901eb7..47ce51765 100644 --- a/gufe/tests/test_smallmoleculecomponent.py +++ b/gufe/tests/test_smallmoleculecomponent.py @@ -52,6 +52,11 @@ def named_ethane(): ("bar", "", "foo", "foo"), ("baz", "bar", "foo", "foo"), ("foo", "", "", "foo"), + ("foo/bar", "", "", "foobar"), + (" foo bar ", "", "", "foobar"), + ("foo*bar", "", "", "foobar"), + ("foo", "foo/bar", "", "foobar"), + ("", "", "foo/bar", "foobar"), ], ) def test_ensure_ofe_name(internal, rdkit_name, name, expected, recwarn): @@ -64,7 +69,12 @@ def test_ensure_ofe_name(internal, rdkit_name, name, expected, recwarn): out_name = _ensure_ofe_name(rdkit, name) - if {rdkit_name, internal} - {"foo", ""}: + if "bar" in expected: + # we should warn if we have sanitized the name + assert len(recwarn) == 1 + assert f"Component name sanitized to: {expected}" in recwarn[0].message.args[0] + + elif {rdkit_name, internal} - {"foo", ""}: # we should warn if rdkit properties are anything other than 'foo' # (expected) or the empty string (not set) assert len(recwarn) == 1 diff --git a/news/ligand_name_cleanup.rst b/news/ligand_name_cleanup.rst new file mode 100644 index 000000000..00d1de15b --- /dev/null +++ b/news/ligand_name_cleanup.rst @@ -0,0 +1,23 @@ +**Added:** + +* + +**Changed:** + +* The name of an ``ExplicitMoleculeComponent`` and its subclasses will now be sanitized to remove whitespace, ``/`` and ``*`` characters, a warning will be displayed when this happens. + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +*