Skip to content

Commit aa321be

Browse files
authored
V1.3.3 Update (#119)
* v1.3.3 Update
1 parent 4cf34e3 commit aa321be

11 files changed

+97
-12
lines changed

README.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ github_pages_url <- description$GITHUB_PAGES
2626

2727
<p style="font-size: 16px;"><em>Public Database Submission Pipeline</em></p>
2828

29-
**Beta Version**: v1.3.2. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!
29+
**Beta Version**: v1.3.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!
3030

3131
**General Disclaimer**: This repository was created for use by CDC programs to collaborate on public health related projects in support of the [CDC mission](https://www.cdc.gov/about/organization/mission.htm). GitHub is not hosted by the CDC, but is a third party website used by CDC and its partners to share information and collaborate on software. CDC use of GitHub does not imply an endorsement of any one particular service, product, or enterprise.
3232

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
<!-- ![GitHub last commit](https://img.shields.io/github/last-commit/montilab/cadra) -->
1111

12-
**Beta Version**: 1.3.2. This pipeline is currently in Beta testing, and
12+
**Beta Version**: 1.3.3. This pipeline is currently in Beta testing, and
1313
issues could appear during submission. Please use it at your own risk.
1414
Feedback and suggestions are welcome\!
1515

argument_handler.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ def args_parser():
130130
description="Downloads the BioSample Package XML from NCBI and updates SeqSender's metadata schema options for the BioSample database."
131131
)
132132

133+
# check_submission_status command
134+
test_network_module = subparser_modules.add_parser(
135+
"test_network_connection",
136+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
137+
description="Runs a series of test network connections to NCBI and GISAID to troubleshoot submission issues."
138+
)
139+
133140
# version command
134141
version_module = subparser_modules.add_parser(
135142
"version",

config/genbank/genbank_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
description="Optional internal field for how the GenBank submission should be named when viewed from the NCBI submission portal, . If not provided, when performing submissions <--submission_name> with the suffix \"-GB\" will be used instead.",
5050
title="genbank submission portal title",
5151
),
52-
"sra-comment": Column(
52+
"gb-comment": Column(
5353
dtype="object",
5454
checks=[
5555
Check(lambda s: s.nunique() == 1),

docs/app.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

ncbi_handler.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
# Local imports
2222
import tools
23+
import setup
2324

2425
# Process NCBI Report file
2526
def get_ncbi_report(database: str, submission_name: str, submission_dir: str, config_dict: Dict[str, Any], submission_type: str) -> Optional[str]:
@@ -63,8 +64,16 @@ def create_submit_ready_file(ftp, submission_dir: str):
6364
return ftp
6465

6566
def ncbi_login(config_dict: Dict[str, Any]):
66-
ftp = ftplib.FTP(NCBI_FTP_HOST)
67-
ftp.login(user=config_dict["Username"], passwd=config_dict["Password"])
67+
try:
68+
ftp = ftplib.FTP(NCBI_FTP_HOST)
69+
ftp.login(user=config_dict["Username"], passwd=config_dict["Password"])
70+
except ftplib.error_perm as err:
71+
print(f"Error: login error. Possible incorrect credentials for NCBI FTP site in config file. \nException{err}", file=sys.stderr)
72+
except Exception as err:
73+
print("Error unable to connect to FTP site. Running network test...", file=sys.stderr)
74+
setup.test_internet_connection(databases=["NCBI"])
75+
print(f"Exception: {err}", file=sys.stderr)
76+
sys.exit(1)
6877
return ftp
6978

7079
def ftp_upload_file(ftp, upload_file: str, upload_name: Optional[str] = None):

seqsender.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ def main():
177177
elif command == "update_biosample":
178178
print("Updating BioSample requirements.", file=sys.stdout)
179179
setup.download_biosample_xml_list()
180+
elif command == "test_network_connection":
181+
setup.test_internet_connection(databases=["GENERAL","NCBI","GISAID"])
180182
else:
181183
# If no command display help
182184
parser.print_help()

settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
PROG_DIR: str = os.path.dirname(os.path.abspath(__file__))
1313

1414
# SeqSender version
15-
VERSION: str = "1.3.2 (Beta)"
15+
VERSION: str = "1.3.3 (Beta)"
1616

1717
# Organism options with unique submission options
1818
ORGANISM_CHOICES: List[str] = ["FLU", "COV", "POX", "ARBO", "RSV", "OTHER"]

setup.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
import sys
55
from zipfile import ZipFile
66
import ftplib
7+
import io
78
import os
89
import json
910
import subprocess
11+
import socket
1012
import pandas as pd
1113
import shutil
1214
import platform
13-
from urllib.request import urlopen
15+
import urllib
1416
import gzip
1517
import stat
1618
import requests
@@ -23,6 +25,7 @@
2325
# Local imports
2426
sys.path.insert(0, str(pathlib.Path(__file__).parent))
2527
import tools
28+
from settings import NCBI_FTP_HOST
2629

2730
# Get program directory
2831
PROG_DIR: str = os.path.dirname(os.path.abspath(__file__))
@@ -95,6 +98,14 @@
9598
title=\"biosample submission portal description\",
9699
)"""
97100

101+
TEST_CONNECTIONS = {"HTTP": {"website":"http://www.google.com", "database": "GENERAL", "error_msg": "Possible internet connectivity issues; unable to connect to 'http://www.google.com'."},
102+
"HTTPS": {"website": "https://www.google.com", "database": "GENERAL", "error_msg": "Possible internet connectivity issues; unable to connect to 'https://www.google.com'."},
103+
"NCBI": {"website": "https://www.ncbi.nlm.nih.gov", "database": "NCBI", "error_msg": "Unable to connect to 'https://www.ncbi.nlm.nih.gov'; ensure NCBI services are running and you are able to connect to them before proceeding."},
104+
"NCBI API": {"website": "https://submit.ncbi.nlm.nih.gov", "database": "NCBI", "error_msg": "Unable to connect to 'https://submit.ncbi.nlm.nih.gov'; ensure NCBI services are running and you are able to connect to them before proceeding."},
105+
"GISAID": {"website": "https://www.epicov.org/epi3/start", "database": "GISAID", "error_msg": "Unable to connect to 'https://www.epicov.org/epi3'; ensure GISAID services are running and you are able to connect to them before proceeding."},
106+
"GISAID": {"website": "https://gisaid.org/", "database": "GISAID", "error_msg": "Unable to connect to 'https://www.epicov.org/epi3'; ensure GISAID services are running and you are able to connect to them before proceeding."}
107+
}
108+
98109
# Create example data for testing
99110
def create_test_data(organism: str, database: List[str], submission_dir: str) -> None:
100111
if organism not in ["FLU", "COV"]:
@@ -154,7 +165,7 @@ def download_table2asn(table2asn_dir: str) -> None:
154165
# Determine which platform to download table2asn
155166
if platform.system() == "Windows":
156167
zip_url = "https://ftp.ncbi.nlm.nih.gov/asn1-converters/by_program/table2asn/win64.table2asn.zip"
157-
with urlopen(zip_url) as zip_response:
168+
with urllib.request.urlopen(zip_url) as zip_response:
158169
with ZipFile(BytesIO(zip_response.read())) as zip_file:
159170
zip_file.extractall(table2asn_dir)
160171
return
@@ -168,7 +179,7 @@ def download_table2asn(table2asn_dir: str) -> None:
168179
# Extract table2asn to tmp folder
169180
try:
170181
with open(table2asn_dir, "wb") as file:
171-
with urlopen(zip_url) as zip_response:
182+
with urllib.request.urlopen(zip_url) as zip_response:
172183
file.write(gzip.decompress(zip_response.read()))
173184
st = os.stat(table2asn_dir)
174185
os.chmod(table2asn_dir, st.st_mode | stat.S_IXOTH | stat.S_IRWXU)
@@ -309,3 +320,42 @@ def biosample_package_to_pandera_schema(xml_file: str, name: str) -> None:
309320
indentation = indentation[:-1]
310321
file.write(indentation + ")")
311322
os.remove(xml_file)
323+
324+
def test_internet_connection(databases: List[str]) -> None:
325+
error_list = []
326+
print("Checking network settings...", file=sys.stdout)
327+
for test, info in TEST_CONNECTIONS.items():
328+
if info["database"] == "GENERAL" or info["database"] in databases:
329+
print(f"Checking {test} connection...", file=sys.stdout)
330+
try:
331+
query = requests.get(info['website'])
332+
response = query.status_code
333+
except Exception as e:
334+
error_list.append(f"{test} connectivity test failed for '{info['website']}'. Check possible firewall issues. \nException:{e}")
335+
if response in (200, 204, 301, 302):
336+
print(f"{test} '{info['website']}' connectivity test ok.", file=sys.stdout)
337+
else:
338+
error_list.append(f"{info['error_msg']} Error code received:'{response}'")
339+
if "NCBI" in databases:
340+
print("Checking DNS resolution for FTP site...", file=sys.stdout)
341+
try:
342+
ip_address = socket.gethostbyname(NCBI_FTP_HOST)
343+
except Exception as e:
344+
error_list.append(f"Unable to reach '{NCBI_FTP_HOST}'; possible DNS error. \nException:{e}")
345+
if not ip_address:
346+
error_list.append(f"Unable to resolve address for '{NCBI_FTP_HOST}'; check DNS server settings for possible issues.")
347+
else:
348+
print(f"DNS resolution test ok. Able to reach ('{NCBI_FTP_HOST} -> {ip_address})", file=sys.stdout)
349+
print("Checking port status...", file=sys.stdout)
350+
try:
351+
ftp = ftplib.FTP()
352+
ftp.connect(NCBI_FTP_HOST, 21, timeout=10)
353+
ftp.quit()
354+
print(f"{NCBI_FTP_HOST} open on port 21.", file=sys.stdout)
355+
except Exception as e:
356+
error_list.append(f"Port 21 not open for {NCBI_FTP_HOST}. Check possible firewall/server issues. \nException:{e}")
357+
if error_list:
358+
for error_string in error_list:
359+
print(error_string, file=sys.stderr)
360+
else:
361+
print("No network connection issues detected.", file=sys.stdout)

shiny/app.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
header = (
2121
ui.card_header(
2222
ui.HTML(
23-
"""<p><strong>Beta Version</strong>: 1.3.2. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
23+
"""<p><strong>Beta Version</strong>: 1.3.3. This pipeline is currently in Beta testing, and issues could appear during submission. Please use it at your own risk. Feedback and suggestions are welcome!</p>"""
2424
)
2525
),
2626
)
@@ -583,6 +583,8 @@
583583
shiny_tools.command_accordion_panel("test_data", description=" command is used generate test data for seqsender, to be used for testing the prep and submit commands."),
584584
# Update biosample command
585585
shiny_tools.command_accordion_panel("update_biosample", description=" command is used to update biosample schema options based on available BioSample Packages."),
586+
# Update biosample command
587+
shiny_tools.command_accordion_panel("test_network_connection", description=" command is used to run a series of test network connections to NCBI and GISAID to troubleshoot submission issues."),
586588
# version command
587589
shiny_tools.command_accordion_panel("version", description=" command prints the current seqsender version."),
588590
),
@@ -609,6 +611,8 @@
609611
ui.nav_panel("Output Files", output_body),
610612
ui.nav_panel("Commands", commands_body),
611613
# ui.nav_panel("FAQ", faq_body),
614+
ui.nav_spacer(),
615+
ui.nav_control(ui.a("GitHub", href="https://github.com/CDCgov/seqsender/", target="_blank")),
612616
selected="SeqSender",
613617
header=header,
614618
footer=footer,
@@ -668,6 +672,19 @@ def read_biosample_file():
668672
@reactive.file_reader(dir / "templates/")
669673
def read_genbank_file():
670674
df = pd.read_csv(dir / "templates/config.genbank.genbank.schema_template.csv", index_col = "column_name")
675+
if input.GenBank_schemas() == "FLU":
676+
src_df = pd.read_csv(dir / "templates/config.genbank.genbank.flu.src.schema_template.csv", index_col = "column_name")
677+
else:
678+
src_df = pd.read_csv(dir / "templates/config.genbank.genbank.src.schema_template.csv", index_col = "column_name")
679+
cmt_df = pd.DataFrame({
680+
"column_name": ["cmt-StructuredCommentPrefix", "cmt-StructuredCommentSuffix", "cmt-Assembly Method"],
681+
"required_column": ["Required", "Required", "Required"],
682+
"description": ["Structured comment keyword. ONLY REQUIRED IF INCLUDING COMMENT FILE. For FLU use 'FluData', HIV use 'HIV-DataBaseData', and for COV and other organisms use 'Assembly-Data'.",
683+
"Structured comment keyword. ONLY REQUIRED IF INCLUDING COMMENT FILE. For FLU use 'FluData', HIV use 'HIV-DataBaseData', and for COV and other organisms use 'Assembly-Data'.",
684+
"ONLY REQUIRED IF INCLUDING COMMENT FILE. Process used to assemble genome."]
685+
})
686+
cmt_df = cmt_df.set_index("column_name")
687+
df = pd.concat([df, src_df, cmt_df])
671688
df = df.fillna("")
672689
df = df.transpose()
673690
return df

shiny/templates/config.genbank.genbank.schema_template.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ sequence_name,Required,Sequence identifier used in fasta file. This is used to c
33
gb-sample_name,Required,Identifier name used for GenBank. Max length is 50 characters.
44
gb-fasta_definition_line_modifiers,Optional,"NCBI fasta definition line modifiers can be added here. As many modifiers as you like can be added, but each must bounded by a set of brackets. Some of the available keywords are listed at ""https://www.ncbi.nlm.nih.gov/genbank/mods_fastadefline/""."
55
gb-title,Optional,"Optional internal field for how the GenBank submission should be named when viewed from the NCBI submission portal, . If not provided, when performing submissions <--submission_name> with the suffix ""-GB"" will be used instead."
6-
sra-comment,Optional,Optional internal field explaining the purpose of the submission for when interacting and resolving submission issues with NCBI.
6+
gb-comment,Optional,Optional internal field explaining the purpose of the submission for when interacting and resolving submission issues with NCBI.

0 commit comments

Comments
 (0)