-
Notifications
You must be signed in to change notification settings - Fork 1
Description
Hi,
Thank you for creating this tool which seems amazing! I have installed all the dependencies as indicated in the instructions using the conda approach and all looked fine.
However, when I ran the pytest in the MicrobeMod directory, I had the following output:
(MicrobeMod) nicolas@pop-os:/zfs-hdd/Nicolas/progs/MicrobeMod$ pytest
================================================= test session starts ==================================================
platform linux -- Python 3.13.5, pytest-8.4.1, pluggy-1.6.0 -- /zfs-hdd/Nicolas/miniconda3/envs/MicrobeMod/bin/python3.13
cachedir: .pytest_cache
rootdir: /zfs-hdd/Nicolas/progs/MicrobeMod
configfile: setup.cfg
testpaths: tests
collected 6 items
tests/test_methylation.py::test_read_modkit Contig Position Modification ... SNP_Position_Strand RefBase Sequence
0 NC_000913.3 1016245 a ... NC_000913.3:1016245-+ A CGGCATAACCGGATCGCCAATAAA
1 NC_000913.3 1016246 a ... NC_000913.3:1016246-- T GGCATAACCGGATCGCCAATAAAG
2 NC_000913.3 1016247 m ... NC_000913.3:1016247-+ C GCATAACCGGATCGCCAATAAAGT
3 NC_000913.3 1016247 a ... NC_000913.3:1016247-- C GCATAACCGGATCGCCAATAAAGT
4 NC_000913.3 1016248 a ... NC_000913.3:1016248-+ G CATAACCGGATCGCCAATAAAGTG
... ... ... ... ... ... ... ...
4995 NC_000913.3 1019999 m ... NC_000913.3:1019999-- T AGCGGCCTGCGCTACGGTAGCGAA
4996 NC_000913.3 1020000 a ... NC_000913.3:1020000-+ A GCGGCCTGCGCTACGGTAGCGAAA
4997 NC_000913.3 1020001 m ... NC_000913.3:1020001-+ C CGGCCTGCGCTACGGTAGCGAAAC
4998 NC_000913.3 1020002 m ... NC_000913.3:1020002-- G GGCCTGCGCTACGGTAGCGAAACC
4999 NC_000913.3 1020003 a ... NC_000913.3:1020003-+ G GCCTGCGCTACGGTAGCGAAACCA
[5000 rows x 15 columns]
PASSED
tests/test_methylation.py::test_write_to_fasta PASSED
tests/test_methylation.py::test_assign_motifs PASSED
tests/test_rm.py::test_parse_hmmer PASSED
tests/test_rm.py::test_read_blast PASSED
tests/test_rm.py::test_create_gene_table FAILED
======================================================= FAILURES =======================================================
________________________________________________ test_create_gene_table ________________________________________________
def test_create_gene_table():
metadata_file = (
"/"
+ os.path.dirname(__file__)[:-6]
+ "/MicrobeMod/db/restriction_metadata.csv"
)
metadata = pd.read_csv(metadata_file)
system_types = {}
for index, row in metadata.iterrows():
system_types[row["Name"]] = (row["Enzyme_type"], row["System"])
gene_locations = {
"NC_000913.3_1130": ("NC_000913.3", 1130),
"NC_000913.3_1932": ("NC_000913.3", 1932),
"NC_000913.3_2152": ("NC_000913.3", 2152),
"NC_000913.3_3194": ("NC_000913.3", 3194),
"NC_000913.3_3316": ("NC_000913.3", 3316),
"NC_000913.3_4262": ("NC_000913.3", 4262),
"NC_000913.3_4263": ("NC_000913.3", 4263),
"NC_000913.3_4265": ("NC_000913.3", 4265),
"NC_000913.3_4266": ("NC_000913.3", 4266),
"NC_000913.3_4267": ("NC_000913.3", 4267),
"NC_000913.3_4268": ("NC_000913.3", 4268),
"NC_000913.3_464": ("NC_000913.3", 464),
}
gene_hits, evalues = parse_hmmer(
"./tests/test_data/EcoliCVM05_GCF_000005845.resolved.hits"
)
blast_hits = read_blast("./tests/test_data/EcoliCVM05_GCF_000005845.blast")
gene_table = create_gene_table(
gene_hits, gene_locations, system_types, evalues, blast_hits
)
tests/test_rm.py:105:
hits = defaultdict(<class 'list'>, {'NC_000913.3_1130': ['Type_IV_05-RM_Type_IV__Type_IV_REases'], 'NC_000913.3_1932': ['Type...68': ['FAM_0-RM_Type_IV__Type_IV_REases', 'FAM_0-RM_Type_IV__Type_IV_REases'], 'NC_000913.3_464': ['Type_II_REase06']})
gene_locations = {'NC_000913.3_1130': ('NC_000913.3', 1130), 'NC_000913.3_1932': ('NC_000913.3', 1932), 'NC_000913.3_2152': ('NC_000913.3', 2152), 'NC_000913.3_3194': ('NC_000913.3', 3194), ...}
system_types = {'BamHI': ('RE', 'RM_Type_II'), 'BpuJI_N': ('RE', 'RM_Type_II'), 'Bse634I': ('RE', 'RM_Type_II'), 'BsuBI_PstI_RE': ('RE', 'RM_Type_II'), ...}
evalues = defaultdict(<class 'dict'>, {'NC_000913.3_1130': {'Type_IV_05-RM_Type_IV__Type_IV_REases': 6.8e-27}, 'NC_000913.3_1932...}, 'NC_000913.3_4268': {'FAM_0-RM_Type_IV__Type_IV_REases': 2.1e-76}, 'NC_000913.3_464': {'Type_II_REase06': 7.9e-10}})
blast_hits = {'NC_000913.3_1130': ('SsoSE61ORF22640P', 100.0, '', 'YCGR'), 'NC_000913.3_1932': ('M.SflLIN6DcmP', 100.0, 'm5C', 'CCW..._3194': ('M.Eco4792LORF2734P', 100.0, '', 'ATGCAT'), 'NC_000913.3_3316': ('M.UbaC1152DamP', 100.0, 'm6A', 'GATC'), ...}
gene_window = 10
def create_gene_table(
hits, gene_locations, system_types, evalues, blast_hits, gene_window=10
):
"""Creates a final table of RM genes organized by their types and their operon status.
Args:
hits: a defaultdictionary where keys are genes and values are lists of their HMM names.
gene_locations: a dictionary where keys are genes and values are tuples of (contig, gene number).
system_types: Dictionary of HMMs to RM system types and their metadata.
evalues: a defaultdictionary where keys are genes, values are dictionaries with HMMs as keys and values as evalues.
blast_hits: dictionary of genes-> blast hits, from read_blast()
gene_window: the window size to use to call operons.
Returns:
gene_table: A final pandas table of RM genes organized by type and operon.
"""
gene_table = []
for hit in hits:
## Get BLAST info
best_blast = blast_pid = meth_type = motif = ""
if hit in blast_hits:
best_blast = blast_hits[hit][0]
blast_pid = blast_hits[hit][1]
meth_type = blast_hits[hit][2]
motif = blast_hits[hit][3]
gene_types = set() # So that each HMM MT/RE hit only counts once per gene
for hmm in hits[hit]:
## Get the system type of this HMM
if system_types[hmm][0] not in gene_types:
gene_types.add(system_types[hmm][0])
gene_table.append(
{
"Gene": hit,
"Contig": gene_locations[hit][0],
"Gene Position": gene_locations[hit][1],
"System Type": system_types[hmm][1],
"Gene type": system_types[hmm][0],
"HMM": hmm,
"Evalue": evalues[hit][hmm],
"Predicted methylation": system_types[hmm][2],
^^^^^^^^^^^^^^^^^^^^
"Prediction confidence": system_types[hmm][3],
"REBASE homolog": best_blast,
"Homolog identity(%)": blast_pid,
"Homolog methylation": meth_type,
"Homolog motif": motif,
}
)
E IndexError: tuple index out of range
../../miniconda3/envs/MicrobeMod/lib/python3.13/site-packages/MicrobeMod/restriction_modification.py:241: IndexError
=============================================== short test summary info ================================================
FAILED tests/test_rm.py::test_create_gene_table - IndexError: tuple index out of range
============================================= 1 failed, 5 passed in 4.03s ==============================================
My python version is: Python 3.13.5
My conda version is: conda 25.5.1
Could you please help me with this error? Thank you so much and have a nice day,
Nicolas