From 6dacc267d1f44f73b15e4a5de09c7bd4e5a8fd0c Mon Sep 17 00:00:00 2001 From: Panciera Date: Thu, 18 Aug 2016 15:45:03 -0400 Subject: [PATCH 1/2] initial commit of genbank script --- bio_bits/genbank.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ requirements-pip.txt | 1 + setup.py | 1 + 3 files changed, 47 insertions(+) create mode 100644 bio_bits/genbank.py diff --git a/bio_bits/genbank.py b/bio_bits/genbank.py new file mode 100644 index 0000000..56f826b --- /dev/null +++ b/bio_bits/genbank.py @@ -0,0 +1,45 @@ +''' +Usage: + genbank +''' +from Bio import SeqIO +import dateparser +import sys +from docopt import docopt + +def rec_qualifier(rec, key): + for f in rec.features: + if f.qualifiers: + if 'collection_date' in f.qualifiers: + return f.qualifiers[key] + +def fix_date(d): + date = dateparser.parse(d) + return date.strftime('%Y-%m-%d') + +def rec_to_fasta(rec): + raw_collection_date = rec_qualifier(rec, 'collection_date')[0] + country = rec_qualifier(rec, 'country')[0] + date = dateparser.parse(raw_collection_date) + date = date.strftime('%Y-%m-%d') + rec.description = "|{}|{}".format(country, date) + return rec + + # acc = rec.name + #header = "{}|{}|{}".format(acc, country, date) + #seq = str(rec.seq) + #return rec.format("fasta") +def run(infile): + with open(infile) as input: + gb = SeqIO.parse(input, 'genbank') + results = map(rec_to_fasta, gb) + SeqIO.write(results, sys.stdout, 'fasta') + +def main(): + raw_args = docopt(__doc__, version='Version 1.0') + run(raw_args['']) + sys.exit(0) + + +# ['17-Feb-2010'] +# rec = degen.id_to_record("KJ627355") diff --git a/requirements-pip.txt b/requirements-pip.txt index 21186bf..8fea4e1 100644 --- a/requirements-pip.txt +++ b/requirements-pip.txt @@ -1,5 +1,6 @@ argparse docopt +dateparser future schema sh diff --git a/setup.py b/setup.py index 46e5069..109f230 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 'degen = bio_bits.degen:main', 'plot_muts = bio_bits.plot_muts:main', 'fasta = bio_bits.fasta:main', + 'genbank = bio_bits.genbank:main', #'sequence_concat = bio_bits.sequence_concat:main', #'sequence_files_concat = bio_bits.sequence_files_concat:main', #'sequence_split = bio_bits_old.sequence_split:main', From bd36057645925e7691b8d86139c3cd7c77a13c53 Mon Sep 17 00:00:00 2001 From: Mike Panciera Date: Thu, 18 Aug 2016 16:30:29 -0400 Subject: [PATCH 2/2] Update CHANGELOG.rst --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c3d9b6d..db97202 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ CHANGELOG ========= +Version 1.4.0 +------------- + +* added genbank downloader script + Version 1.3.2 -------------