Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions microSALT/utils/referencer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Compares existing organism references with available and updates as needed
By: Isak Sylvin, @sylvinite"""
By: Isak Sylvin, @sylvinite"""

#!/usr/bin/env python
import glob
Expand All @@ -8,13 +8,13 @@
import shutil
import subprocess
import urllib.request
from Bio import Entrez
from typing import Tuple, Union, Optional
import xml.etree.ElementTree as ET
from typing import Optional, Tuple, Union

from Bio import Entrez

from microSALT.utils.pubmlst.client import BaseClient, PubMLSTClient, get_client
from microSALT.utils.pubmlst.authentication import ClientAuthentication
from microSALT.store.db_manipulator import DB_Manipulator
from microSALT.utils.pubmlst.client import BaseClient, get_client
from microSALT.utils.pubmlst.exceptions import InvalidURLError, PubMLSTError
from microSALT.utils.pubmlst.helpers import get_service_by_url

Expand Down Expand Up @@ -68,16 +68,16 @@ def identify_new(self, cg_id="", project=False):
if ref not in self.organisms and org not in neworgs:
neworgs.append(org)
if (
not "{}.fasta".format(entry.get("reference"))
in os.listdir(self.config["folders"]["genomes"])
and not entry.get("reference") in newrefs
"{}.fasta".format(entry.get("reference"))
not in os.listdir(self.config["folders"]["genomes"])
and entry.get("reference") not in newrefs
):
newrefs.append(entry.get("reference"))
for org in neworgs:
self.add_pubmlst(org)
for org in newrefs:
self.download_ncbi(org)
except Exception as e:
except Exception:
self.logger.error(
"Unable to retrieve reference! Analysis using said reference will fail!"
)
Expand Down Expand Up @@ -128,7 +128,7 @@ def index_db(self, full_dir, suffix):
)
proc = subprocess.Popen(bash_cmd.split(), cwd=full_dir, stdout=subprocess.PIPE)
proc.communicate()
except Exception as e:
except Exception:
self.logger.error(
"Unable to index requested target {} in {}".format(file, full_dir)
)
Expand Down Expand Up @@ -267,8 +267,9 @@ def fetch_external(self) -> None:
return
try:
for entry in root:
species = entry.text.strip()
organ = species.lower().replace(" ", "_")
# Some species have extra names that are not expected, such as Klebsiella pneumoniae species complex, when we expect just Klebsiella pneumoniae
species = entry.text.strip().lower.split(" ")[:2]
organ = "_".join(species)
if "escherichia_coli" in organ and "#1" in organ:
organ = organ[:-2]
if organ in self.organisms:
Expand Down Expand Up @@ -352,7 +353,7 @@ def fetch_resistances(self, force=False):
stderr=subprocess.STDOUT,
)
output, error = process.communicate()
if not "Already up-to-date." in str(output):
if "Already up-to-date." not in str(output):
self.logger.info("Remote resFinder database updated. Syncing...")
wipeIndex = True
else:
Expand Down Expand Up @@ -427,7 +428,7 @@ def download_ncbi(self, reference):
)
out, err = proc.communicate()
self.logger.info("Downloaded reference {}".format(reference))
except Exception as e:
except Exception:
self.logger.warning("Unable to download genome '{}' from NCBI".format(reference))

def add_pubmlst(self, organism: str):
Expand All @@ -452,7 +453,7 @@ def add_pubmlst(self, organism: str):
if not subtype["description"].lower().startswith(part):
missingPart = True
else:
if not part in subtype["description"].lower():
if part not in subtype["description"].lower():
missingPart = True
if not missingPart:
# Seqdef always appear after isolates, so this is fine
Expand Down
Loading