diff --git a/README.md b/README.md index 6f28028..490af91 100644 --- a/README.md +++ b/README.md @@ -294,6 +294,7 @@ If modelling multiple copies of the same sequence in Boltz, the input JSON must "dialect": "alphafold3", "version": 1 } +``` If the identical sequences are given as seperate entities (as shown below) you will encounter an error. diff --git a/abcfold/boltz/af3_to_boltz.py b/abcfold/boltz/af3_to_boltz.py index 7d26114..e9728fb 100644 --- a/abcfold/boltz/af3_to_boltz.py +++ b/abcfold/boltz/af3_to_boltz.py @@ -224,10 +224,17 @@ def add_modifications(self, list_of_modifications: list): yaml_string = "" yaml_string += f"{DELIM}{DELIM}modifications:\n" for modification in list_of_modifications: - yaml_string += ( - f"{DELIM}{DELIM}{DELIM}- position: {modification['ptmPosition']}\n" - ) - yaml_string += f"{DELIM}{DELIM}{DELIM} ccd: {modification['ptmType']}\n" + if "ptmType" in modification and "ptmPosition" in modification: + yaml_string += ( + f"{DELIM}{DELIM}{DELIM}- position: {modification['ptmPosition']}\n" + f"{DELIM}{DELIM}{DELIM} ccd: {modification['ptmType']}\n" + ) + elif "modificationType" in modification and "basePosition" in modification: + yaml_string += ( + f"{DELIM}{DELIM}{DELIM}- position: {modification['basePosition']}\n" + f"{DELIM}{DELIM}{DELIM} ccd: {modification['modificationType']}\n" + ) + return yaml_string def add_key_and_value(self, key: str, value: str): diff --git a/abcfold/boltz/check_install.py b/abcfold/boltz/check_install.py index f5dfdeb..660b3ff 100644 --- a/abcfold/boltz/check_install.py +++ b/abcfold/boltz/check_install.py @@ -4,7 +4,7 @@ logger = logging.getLogger("logger") -BOLTZ_VERSION = "2.1.1" +BOLTZ_VERSION = "2.2.0" def check_boltz(): diff --git a/abcfold/chai1/af3_to_chai.py b/abcfold/chai1/af3_to_chai.py index 7ab40dd..46cb4a0 100644 --- a/abcfold/chai1/af3_to_chai.py +++ b/abcfold/chai1/af3_to_chai.py @@ -227,7 +227,6 @@ def add_protein(self, seq: dict, fasta_data: dict): return protein_str, fasta_data def _add_protein(self, seq: dict, prot_id: str, fasta_data: dict): - protein_str = f">protein|{prot_id}\n{seq['protein']['sequence']}\n" sequence = seq["protein"]["sequence"] if "unpairedMsa" in seq["protein"].keys(): seq_hash = hashlib.sha256(sequence.upper().encode()).hexdigest() @@ -241,6 +240,11 @@ def _add_protein(self, seq: dict, prot_id: str, fasta_data: dict): ) fasta_data[prot_id] = sequence + if "modifications" in seq["protein"]: + sequence = self.add_modifications(sequence, seq["protein"]["modifications"]) + + protein_str = f">protein|{prot_id}\n{sequence}\n" + return protein_str, fasta_data def add_nucleotide(self, seq: dict, seq_type: str, fasta_data: dict): @@ -261,8 +265,15 @@ def add_nucleotide(self, seq: dict, seq_type: str, fasta_data: dict): return nucleotide_str, fasta_data def _add_nucleotide(self, seq: dict, seq_type: str, nucl_id: str, fasta_data: dict): - nucleotide_str = f">{seq_type}|{nucl_id}\n{seq[seq_type]['sequence']}\n" - fasta_data[nucl_id] = seq[seq_type]["sequence"] + sequence = seq[seq_type]["sequence"] + fasta_data[nucl_id] = sequence + + if "modifications" in seq[seq_type]: + sequence = self.add_modifications( + sequence, seq[seq_type]["modifications"] + ) + + nucleotide_str = f">{seq_type}|{nucl_id}\n{sequence}\n" return nucleotide_str, fasta_data def ccd_to_smiles(self, ccd_id: str): @@ -329,6 +340,31 @@ def add_ligand(self, seq: dict, fasta_data: dict): return ligand_str + def add_modifications(self, sequence: str, modifications: list) -> str: + """ + Add modifications to the fasta data + + Args: + sequence (str): the sequence to add modifications to be added to + modifications (list): list of modifications to be added + + Returns: + sequence (str): the sequence with modifications added + """ + + sequence_list = list(sequence) + for mod in modifications: + if "ptmType" in mod and "ptmPosition" in mod: + ptm_type = mod['ptmType'] + position = int(mod['ptmPosition']) - 1 + sequence_list[position] = f"({ptm_type})" + elif "modificationType" in mod and "basePosition" in mod: + mod_type = mod['modificationType'] + position = int(mod['basePosition']) - 1 + sequence_list[position] = f"({mod_type})" + + return ''.join(sequence_list) + def get_atom_name(self, atom: str) -> str: for name in ATOMS_NAMES: if atom.startswith(name): diff --git a/abcfold/html/html_utils.py b/abcfold/html/html_utils.py index 0e7964e..c641055 100644 --- a/abcfold/html/html_utils.py +++ b/abcfold/html/html_utils.py @@ -28,7 +28,7 @@ def get_plddt_regions(plddts: Union[np.ndarray, list]) -> dict: regions = {} # replace none values with -1 - plddts = np.where(plddts is None, -1, plddts) + plddts = np.where(plddts == None, -1, plddts) # noqa F401 v_low = np.where((0 <= plddts) & (plddts <= 50))[0] regions["v_low"] = get_regions_helper(v_low) diff --git a/abcfold/html/static/main_page_example.png b/abcfold/html/static/main_page_example.png index 2a87ecd..cc88874 100644 Binary files a/abcfold/html/static/main_page_example.png and b/abcfold/html/static/main_page_example.png differ diff --git a/abcfold/output/boltz.py b/abcfold/output/boltz.py index c885d50..f93e05d 100644 --- a/abcfold/output/boltz.py +++ b/abcfold/output/boltz.py @@ -77,9 +77,12 @@ def __init__( boltz_yaml = list(parent_dir.glob("*.yaml"))[0] if boltz_yaml.exists(): boltz_yaml.rename(new_parent / "boltz_input.yaml") - boltz_msa = list(parent_dir.glob("*.a3m"))[0] - if boltz_msa.exists(): - boltz_msa.rename(new_parent / boltz_msa.name) + + boltz_msas = list(parent_dir.glob("*.a3m")) + if boltz_msas: + for boltz_msa in boltz_msas: + if boltz_msa.exists(): + boltz_msa.rename(new_parent / boltz_msa.name) new_output_dirs = [] for output_dir in self.output_dirs: diff --git a/abcfold/output/chai.py b/abcfold/output/chai.py index 37fcc2f..7ae65a7 100644 --- a/abcfold/output/chai.py +++ b/abcfold/output/chai.py @@ -71,9 +71,11 @@ def __init__( chai_fasta = parent_dir / "chai1.fasta" if chai_fasta.exists(): chai_fasta.rename(new_parent / "chai1.fasta") - chai_msa = list(parent_dir.glob("*.aligned.pqt"))[0] - if chai_msa.exists(): - chai_msa.rename(new_parent / chai_msa.name) + chai_msas = list(parent_dir.glob("*.aligned.pqt")) + if chai_msas: + for chai_msa in chai_msas: + if chai_msa.exists(): + chai_msa.rename(new_parent / chai_msa.name) new_output_dirs = [] for output_dir in self.output_dirs: diff --git a/pyproject.toml b/pyproject.toml index 0df08cb..7a3ff14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,8 @@ requires = ["setuptools>=42", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "ABCFold" -version = "1.0.7" +name = "abcfold" +version = "1.0.8" description = "Input processing tools for AlphaFold3, Boltz and Chai-1" readme = "README.md" license = { text = "BSD License" } diff --git a/tests/test_af3_to_boltz.py b/tests/test_af3_to_boltz.py index a80542f..e5abee8 100644 --- a/tests/test_af3_to_boltz.py +++ b/tests/test_af3_to_boltz.py @@ -35,7 +35,6 @@ def test_af3_to_boltz(test_data): yaml_string_bonds = boltz_yaml.json_to_yaml(test_data.test_inputA_json) yaml_string_bonds = yaml_string_bonds.split("\n") - print(yaml_string_bonds) assert yaml_string_bonds[0] == "version: 1" assert yaml_string_bonds[1] == "sequences:" assert yaml_string_bonds[2] == f"{DELIM}- protein:" diff --git a/tests/test_af3_to_chai.py b/tests/test_af3_to_chai.py index fe72e2a..fe9faf0 100644 --- a/tests/test_af3_to_chai.py +++ b/tests/test_af3_to_chai.py @@ -124,6 +124,30 @@ def test_af3_to_chai_ligand(test_data): assert data == reference +@pytest.mark.skipif(not run_chai1, reason="chai_lab not installed") +def test_af3_to_chai_ptm(test_data): + with tempfile.TemporaryDirectory() as temp_dir: + chai_fasta = ChaiFasta(temp_dir) + + chai_fasta.json_to_fasta(test_data.test_inputPTM_json) + + reference = ( + ">protein|A\n" + "(HY3)VLS(P1L)GEWQL\n" + ">rna|B\n" + "(2MG)GC(5MC)\n" + ) + + filename = Path(temp_dir) / "chai1.fasta" + + assert filename.exists() + with open(filename, "r") as f: + data = f.read() + print(data) + + assert data == reference + + @pytest.mark.skipif(not run_chai1, reason="chai_lab not installed") def test_chai_output_constraints(test_data): with tempfile.TemporaryDirectory() as temp_dir: diff --git a/tests/test_data/inputPTM.json b/tests/test_data/inputPTM.json new file mode 100644 index 0000000..6ff6374 --- /dev/null +++ b/tests/test_data/inputPTM.json @@ -0,0 +1,29 @@ +{ + "name": "PTM example", + "modelSeeds": [1], + "sequences": [ + { + "protein": { + "id": "A", + "sequence": "PVLSCGEWQL", + "modifications": [ + {"ptmType": "HY3", "ptmPosition": 1}, + {"ptmType": "P1L", "ptmPosition": 5} + ] + } + }, + { + "rna": { + "id": "B", + "sequence": "AGCU", + "modifications": [ + {"modificationType": "2MG", "basePosition": 1}, + {"modificationType": "5MC", "basePosition": 4} + ] + } + } + ], + + "dialect": "alphafold3", + "version": 1 +}