From c0c5e816159b37919870347cc25dff1d3a34c8a6 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Tue, 28 Mar 2017 19:44:29 +0530 Subject: [PATCH 01/15] Added gene(), CDS() properties and get_cds_from_gene() method --- secmet/record.py | 157 ++++++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 64 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index ba8b658..7ebf36c 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -6,67 +6,96 @@ class Record(object): - """A record containing secondary metabolite clusters""" - - def __init__(self, seq_record=None): - """Initialise a secondary metabolite record - - :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read - :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - """ - self._record = seq_record - - - @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file - - :param string filename: file name of the GenBank file to read - """ - seq_record = SeqIO.read(filename, 'genbank') - rec = cls(seq_record=seq_record) - return rec - - - @property - def id(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.id - else: - return "NO_ID_ASSIGNED" - - - @property - def seq(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.seq - else: - return None - - - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations - else: - return {} - - @property - def description(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.description - else: - return "" - - @property - def clusters(self): - """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - - clusters = [i for i in self._record.features if i.type == 'cluster'] - return clusters + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + + + @classmethod + def from_genbank(cls, filename): + """Initialise a record from a GenBank file + + :param string filename: file name of the GenBank file to read + """ + seq_record = SeqIO.read(filename, 'genbank') + rec = cls(seq_record=seq_record) + return rec + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def clusters(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + clusters = [i for i in self._record.features if i.type == 'cluster'] + return clusters + + @property + def gene(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + gene_list =[i for i in self._record.features if i.type == 'gene'] + return gene_list + + @property + def CDS(self): + if self._record is None: + return [] + CDS = [i for i in self._record.features if i.type == 'CDS'] + return CDS + + def get_cds_from_gene(self,gene): + if type(gene) != type(self.gene[0]): + return None + else: + gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] + cds = self.CDS + for i in cds: + if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: + return i + return None + + + + + From b2895335ae291172270125b3d02799d9711aacd8 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Tue, 28 Mar 2017 20:07:07 +0530 Subject: [PATCH 02/15] Add test_gene(), test_cds() in test_record.py --- tests/test_record.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_record.py b/tests/test_record.py index 3486dca..d56e8e8 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -36,3 +36,19 @@ def test_clusters(): testfile = get_testfile('nisin.gbk') rec = Record.from_genbank(testfile) assert len(rec.clusters) == 1 + +def test_gene(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_cds = [i for i in bp_rec.features if i.type == 'gene'] + assert len(bp_cds) == len(rec.gene) + +def test_cds(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_cds = [i for i in bp_rec.features if i.type == 'CDS'] + assert len(bp_cds) == len(rec.CDS) + + From 13731356c57a5d43e37aa42bee5ebd438878ff55 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Tue, 28 Mar 2017 20:28:35 +0530 Subject: [PATCH 03/15] Add test_get_cds_from_gene() method, add comments to record.py --- secmet/record.py | 4 +++- tests/test_record.py | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 7ebf36c..a615d6c 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -71,7 +71,7 @@ def clusters(self): @property def gene(self): - """A list of secondary metabolite clusters present in the record""" + """A list of secondary metabolite genes present in the record""" if self._record is None: return [] gene_list =[i for i in self._record.features if i.type == 'gene'] @@ -79,12 +79,14 @@ def gene(self): @property def CDS(self): + """A list of secondary metabolite CDS present in the record""" if self._record is None: return [] CDS = [i for i in self._record.features if i.type == 'CDS'] return CDS def get_cds_from_gene(self,gene): + """Give the CDS corresponding to a particular gene""" if type(gene) != type(self.gene[0]): return None else: diff --git a/tests/test_record.py b/tests/test_record.py index d56e8e8..996918c 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -41,7 +41,7 @@ def test_gene(): testfile = get_testfile('nisin.gbk') rec = Record.from_genbank(testfile) bp_rec = SeqIO.read(testfile, 'genbank') - bp_cds = [i for i in bp_rec.features if i.type == 'gene'] + bp_gene = [i for i in bp_rec.features if i.type == 'gene'] assert len(bp_cds) == len(rec.gene) def test_cds(): @@ -51,4 +51,28 @@ def test_cds(): bp_cds = [i for i in bp_rec.features if i.type == 'CDS'] assert len(bp_cds) == len(rec.CDS) +def test_get_cds_from_gene(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_gene = [i for i in bp_rec.features if i.type == 'gene'] + bp_cds = [i for i in bp_rec.features if i.type == 'CDS'] + #get gene name from bp_gene list + bp_gene_name = bp_gene[0].qualifiers.__getattribute__.__self__['gene'][0] + + #get cds name from bp_cds list + bp_cds_name = bp_cds[0].qualifiers.__getattribute__.__self__['gene'][0] + + #compare bp_gene_name and bp_cds_name + assert bp_gene_name == bp_cds_name + + #compare bp_cds_name and secmet rec cds name + assert bp_cds_name == rec.get_cds_from_gene(bp_gene[0]).qualifiers.__getattribute__.__self__['gene'][0] + + + + + + + From ae706ecd4aa0bbdee367029669e7cf83f11030c9 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Tue, 28 Mar 2017 20:30:36 +0530 Subject: [PATCH 04/15] Copy secmet folder to test --- tests/secmet/__init__.py | 1 + tests/secmet/record.py | 101 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 tests/secmet/__init__.py create mode 100644 tests/secmet/record.py diff --git a/tests/secmet/__init__.py b/tests/secmet/__init__.py new file mode 100644 index 0000000..b8023d8 --- /dev/null +++ b/tests/secmet/__init__.py @@ -0,0 +1 @@ +__version__ = '0.0.1' diff --git a/tests/secmet/record.py b/tests/secmet/record.py new file mode 100644 index 0000000..7ebf36c --- /dev/null +++ b/tests/secmet/record.py @@ -0,0 +1,101 @@ +# vim :set et sts=4 sw=4 fileencoding=utf-8 : +# Licensed under the APL2, see LICENSE for details +"""Secondary Metabolite Record Objects""" + +from Bio import SeqIO + + +class Record(object): + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + + + @classmethod + def from_genbank(cls, filename): + """Initialise a record from a GenBank file + + :param string filename: file name of the GenBank file to read + """ + seq_record = SeqIO.read(filename, 'genbank') + rec = cls(seq_record=seq_record) + return rec + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def clusters(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + clusters = [i for i in self._record.features if i.type == 'cluster'] + return clusters + + @property + def gene(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + gene_list =[i for i in self._record.features if i.type == 'gene'] + return gene_list + + @property + def CDS(self): + if self._record is None: + return [] + CDS = [i for i in self._record.features if i.type == 'CDS'] + return CDS + + def get_cds_from_gene(self,gene): + if type(gene) != type(self.gene[0]): + return None + else: + gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] + cds = self.CDS + for i in cds: + if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: + return i + return None + + + + + From 1b74dc1425be72e17e60c94a1adcf3cb1eb86166 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Wed, 5 Apr 2017 14:18:54 +0530 Subject: [PATCH 05/15] Updated get_cds_from_gene to accept list of gene features and return list of cds features --- secmet/record.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index a615d6c..8391c90 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -85,19 +85,22 @@ def CDS(self): CDS = [i for i in self._record.features if i.type == 'CDS'] return CDS - def get_cds_from_gene(self,gene): + def get_cds_from_gene(self,gene_list): """Give the CDS corresponding to a particular gene""" - if type(gene) != type(self.gene[0]): - return None - else: - gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] - cds = self.CDS - for i in cds: - if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: - return i - return None - - + cds_list =[] + for gene in gene_list: + if type(gene) != type(self.gene[0]): + return None + else: + gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] + cds = self.CDS + for i in cds: + if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: + cds_list.append(i) + return cds_list + + +rec = Record.from_genbank('../tests/data/nisin.gbk') From c85844dd3b26da8e18ff4054fa7b6722a3d0f3ef Mon Sep 17 00:00:00 2001 From: Manikumar Date: Fri, 2 Jun 2017 21:07:01 -0700 Subject: [PATCH 06/15] Add CDS_motif(), source() --- secmet/record.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index 8391c90..fbb6ae7 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -84,6 +84,19 @@ def CDS(self): return [] CDS = [i for i in self._record.features if i.type == 'CDS'] return CDS + @property + def CDS_motif(self): + """A list of secondary metabolite cds_motifs present in the record""" + if self._record is None: + return [] + cds_motifs_list =[i for i in self._records.features if i.type == 'CDS_motif'] + return cds_motifs_list + @property + def source(self): + if self._record is not None: + return self._record.source + else: + return None def get_cds_from_gene(self,gene_list): """Give the CDS corresponding to a particular gene""" From a72daee6cf416ada4294f46e71e351648f916c4a Mon Sep 17 00:00:00 2001 From: Manikumar Date: Fri, 2 Jun 2017 21:34:53 -0700 Subject: [PATCH 07/15] Tested CDS_motif, source, Add feature_types property --- secmet/record.py | 206 +++++++++++++++++++++++++---------------------- 1 file changed, 108 insertions(+), 98 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index fbb6ae7..f289aea 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -6,114 +6,124 @@ class Record(object): - """A record containing secondary metabolite clusters""" - - def __init__(self, seq_record=None): - """Initialise a secondary metabolite record - - :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read - :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - """ - self._record = seq_record - - - @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file - - :param string filename: file name of the GenBank file to read - """ - seq_record = SeqIO.read(filename, 'genbank') - rec = cls(seq_record=seq_record) - return rec - - @property - def id(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.id - else: - return "NO_ID_ASSIGNED" - - - @property - def seq(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.seq - else: - return None - - - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations - else: - return {} - - @property - def description(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.description - else: - return "" - - @property - def clusters(self): - """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - clusters = [i for i in self._record.features if i.type == 'cluster'] - return clusters - - @property - def gene(self): - """A list of secondary metabolite genes present in the record""" - if self._record is None: - return [] - gene_list =[i for i in self._record.features if i.type == 'gene'] - return gene_list - - @property - def CDS(self): - """A list of secondary metabolite CDS present in the record""" - if self._record is None: - return [] - CDS = [i for i in self._record.features if i.type == 'CDS'] - return CDS - @property - def CDS_motif(self): + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + + + @classmethod + def from_genbank(cls, filename): + """Initialise a record from a GenBank file + + :param string filename: file name of the GenBank file to read + """ + seq_record = SeqIO.read(filename, 'genbank') + rec = cls(seq_record=seq_record) + return rec + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def clusters(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + clusters = [i for i in self._record.features if i.type == 'cluster'] + return clusters + + @property + def gene(self): + """A list of secondary metabolite genes present in the record""" + if self._record is None: + return [] + gene_list =[i for i in self._record.features if i.type == 'gene'] + return gene_list + + @property + def CDS(self): + """A list of secondary metabolite CDS present in the record""" + if self._record is None: + return [] + CDS = [i for i in self._record.features if i.type == 'CDS'] + return CDS + @property + def CDS_motif(self): """A list of secondary metabolite cds_motifs present in the record""" if self._record is None: return [] - cds_motifs_list =[i for i in self._records.features if i.type == 'CDS_motif'] + cds_motifs_list =[i for i in self._record.features if i.type == 'CDS_motif'] return cds_motifs_list - @property - def source(self): + + @property + def source(self): if self._record is not None: - return self._record.source + for i in self._record.features: + if i.type == 'source': + return i else: return None - def get_cds_from_gene(self,gene_list): - """Give the CDS corresponding to a particular gene""" - cds_list =[] - for gene in gene_list: - if type(gene) != type(self.gene[0]): - return None - else: - gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] - cds = self.CDS - for i in cds: - if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: - cds_list.append(i) - return cds_list + @property + def feature_types(self): + type_features =[] + for i in self._record.features: + if i.type not in type_features: + type_features.append(i.type) + return type_features + + def get_cds_from_gene(self,gene_list): + """Give the CDS corresponding to a particular gene""" + cds_list =[] + for gene in gene_list: + if type(gene) != type(self.gene[0]): + return None + else: + gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] + cds = self.CDS + for i in cds: + if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: + cds_list.append(i) + return cds_list rec = Record.from_genbank('../tests/data/nisin.gbk') - From c2c90f422038540882bce90f531116e87e31bf0a Mon Sep 17 00:00:00 2001 From: Manikumar Date: Fri, 2 Jun 2017 22:10:44 -0700 Subject: [PATCH 08/15] Add from_file(cls,filename,filetype) classmethod that expands the domain of input files --- secmet/record.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f289aea..e500d63 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -27,6 +27,30 @@ def from_genbank(cls, filename): rec = cls(seq_record=seq_record) return rec + @classmethod + def from_file(cls, filename,filetype): + + """Initialise a record from a file of specified type + + :param string filename: file name of the file to read + :param string filetype: Type of the inputfile + """ + + filetype_list = ['gb','genbank','fasta','fas','fa','emb','embl'] + if filetype in filetype_list: + if filetype == 'gb' or filetype == 'genbank': + type_of_file = 'genbank' + elif filetype == 'fas' or filetype == 'fa' or filetype =='fasta': + type_of_file = 'fasta' + else: + type_of_file = 'embl' + + seq_record = SeqIO.read(filename, type_of_file) + rec = cls(seq_record=seq_record) + return rec + else: + return None + @property def id(self): """Pass through to seq_record object if available""" @@ -124,6 +148,3 @@ def get_cds_from_gene(self,gene_list): return cds_list -rec = Record.from_genbank('../tests/data/nisin.gbk') - - From 925b89ea5e4ab79665994eee82e322b33585ff17 Mon Sep 17 00:00:00 2001 From: Manikumar Date: Fri, 2 Jun 2017 22:23:25 -0700 Subject: [PATCH 09/15] Add sequence.fasta file(downloaded from NCBI database) to tests --- tests/data/sequence.fasta | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/data/sequence.fasta diff --git a/tests/data/sequence.fasta b/tests/data/sequence.fasta new file mode 100644 index 0000000..6626404 --- /dev/null +++ b/tests/data/sequence.fasta @@ -0,0 +1,13 @@ +>J04057.1 S.lactis antibiotic nisin (spaN) gene, complete cds +AGTTGACGAATATTTAATAATTTTATTAATATCTTGATTTTCTAGTTCCTGAATAATATAGAGATAGGTT +TATTGAGTCTTAGACATACTTGAATGACCTAGTCTTATAACTATACTGACAATAGAAACATTAACAAATC +TAAAACAGTCTTAATTCTATCTTGAGAAAGTATTGGTAATAATATTATTGTCGATAACGCGAGCATAATA +AACGGCTCTGATTAAATTCTGAAGTTTGTTAGATACAATGATTTCGTTCGAAGGAACTACAAAATAAATT +ATAAGGAGGCACTCAAAATGAGTACAAAAGATTTTAACTTGGATTTGGTATCTGTTTCGAAGAAAGATTC +AGGTGCATCACCACGCATTACAAGTATTTCGCTATGTACACCCGGTTGTAAAACAGGAGCTCTGATGGGT +TGTAACATGAAAACAGCAACTTGTCATTGTAGTATTCACGTAAGCAAATAACCAAATCAAAGGATAGTAT +TTTGTTAGTTCAGACATGGATACTATCCTATTTTTATAAGTTATTTAGGGTTGCTAAATAGCTTATAAAA +ATAAAGAGAGGAAAAAACATGATAAAAAGTTCATTTAAAGCTCAACCGTTTTTAGTAAGAAATACAATTT +TATCTCCAAACGATAAACGGAGTTTTACTGAATATACTCAAGTCATTGAGACTGTAAGTAAAAATAAAGT +TTTTTTGGAACAGTTACTACTAGCTAATCCTAAACTCTATGATGTTATGCAGAAATATAATGCTGGT + From b4713d056a592bb1ac307005fdc56abf0f082cea Mon Sep 17 00:00:00 2001 From: Manikumar Date: Sat, 3 Jun 2017 10:45:11 -0700 Subject: [PATCH 10/15] Tested from_file() witb sequence.fasta --- secmet/record.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index e500d63..f4edb75 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -148,3 +148,5 @@ def get_cds_from_gene(self,gene_list): return cds_list +rec = Record.from_file('../tests/data/sequence.fasta','fasta') +print rec.seq From 86ec4f79f4a7a75f63691f2de9fdb38b4412784f Mon Sep 17 00:00:00 2001 From: Manikumar Date: Sun, 4 Jun 2017 19:59:45 +0530 Subject: [PATCH 11/15] Add cluster_cds class, make_cluster_cds_pair() (constructing Hierarchy) --- secmet/record.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f4edb75..e2c71fc 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -13,8 +13,12 @@ def __init__(self, seq_record=None): :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + :param cluster_cds :Dictionary that holds pointers to corresponding cluster objects + :param cds_array :List that has the objects of type cluster_cds """ self._record = seq_record + self.cluster_cds = {} + self.cds_array =[None for i in range(100)] #Initialising the cds_array to None, Maximum 100 cluster features @classmethod @@ -108,6 +112,7 @@ def CDS(self): return [] CDS = [i for i in self._record.features if i.type == 'CDS'] return CDS + @property def CDS_motif(self): """A list of secondary metabolite cds_motifs present in the record""" @@ -147,6 +152,31 @@ def get_cds_from_gene(self,gene_list): cds_list.append(i) return cds_list + def make_cluster_cds_pair(self,cluster_object,cds_list): + """Links cluster objects with corresponding cds objects + :param cluster object: cluster feature object + :param cds_list : list of cds feature object linked to the cluster object + """ + hash_value = self.hash_function(cluster_object) #getting a hash_value which will the index of the list containing cluster_cds() class objects(cds_array) + + self.cluster_cds[cluster_object.id]=hash_value #linking hash_value(index of cds_array) and the unique id of the cluster object + + self.cds_array[hash_value] = cluster_cds(cds_list,cluster_object.id) + + def hash_function(a,b): + #Hash function yet to be defined based on the id of the cluster feature object + return 0 + + def get_cds_from_cluster(self,cluster_object): + #Should return list of cds from given cluster object + return + + + +class cluster_cds(): + def __init__(self,cds_list=[],key=None): + self.cds_list = cds_list + self.key = key + -rec = Record.from_file('../tests/data/sequence.fasta','fasta') -print rec.seq +rec = Record.from_file('../tests/data/nisin.gbk','genbank') From b80cbc2c58f7b493096a5f9daa4fbe660422a22a Mon Sep 17 00:00:00 2001 From: Manikumar Date: Sun, 4 Jun 2017 20:24:30 +0530 Subject: [PATCH 12/15] Add get_cds_from_cluster and get_cluster_from_cds --- secmet/record.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index e2c71fc..073f303 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -17,7 +17,7 @@ def __init__(self, seq_record=None): :param cds_array :List that has the objects of type cluster_cds """ self._record = seq_record - self.cluster_cds = {} + self.cluster_cds_dict = {} self.cds_array =[None for i in range(100)] #Initialising the cds_array to None, Maximum 100 cluster features @@ -159,7 +159,7 @@ def make_cluster_cds_pair(self,cluster_object,cds_list): """ hash_value = self.hash_function(cluster_object) #getting a hash_value which will the index of the list containing cluster_cds() class objects(cds_array) - self.cluster_cds[cluster_object.id]=hash_value #linking hash_value(index of cds_array) and the unique id of the cluster object + self.cluster_cds_dict[cluster_object.id]=hash_value #linking hash_value(index of cds_array) and the unique id of the cluster object self.cds_array[hash_value] = cluster_cds(cds_list,cluster_object.id) @@ -168,11 +168,22 @@ def hash_function(a,b): return 0 def get_cds_from_cluster(self,cluster_object): - #Should return list of cds from given cluster object - return + pointer = self.cluster_cds_dict[cluster_object.id] + cds_object = self.cds_array[pointer] + + return cds_object.cds_list + + def get_cluster_from_cds(self,cds_object): + for i in self.cds_array: + if i != None: + for j in i.cds_list: + if j.qualifiers['product'][0] == cds_object.qualifiers['product'][0]: + for k in self.clusters: + if k.id == i.key: + return k + - class cluster_cds(): def __init__(self,cds_list=[],key=None): self.cds_list = cds_list @@ -180,3 +191,5 @@ def __init__(self,cds_list=[],key=None): rec = Record.from_file('../tests/data/nisin.gbk','genbank') + + From 7612949ac732019f11e9d616223e547811c0cc6a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 5 Jun 2017 23:39:11 +0530 Subject: [PATCH 13/15] Removed Hash function,from_genbank(),cluster_cds_dict --- secmet/record.py | 72 +++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 073f303..64b7701 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -13,23 +13,10 @@ def __init__(self, seq_record=None): :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - :param cluster_cds :Dictionary that holds pointers to corresponding cluster objects :param cds_array :List that has the objects of type cluster_cds """ self._record = seq_record - self.cluster_cds_dict = {} - self.cds_array =[None for i in range(100)] #Initialising the cds_array to None, Maximum 100 cluster features - - - @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file - - :param string filename: file name of the GenBank file to read - """ - seq_record = SeqIO.read(filename, 'genbank') - rec = cls(seq_record=seq_record) - return rec + self.cds_array =[] #Initialising the cds_array to None, Maximum 100 cluster features @classmethod def from_file(cls, filename,filetype): @@ -39,7 +26,7 @@ def from_file(cls, filename,filetype): :param string filename: file name of the file to read :param string filetype: Type of the inputfile """ - + filetype_list = ['gb','genbank','fasta','fas','fa','emb','embl'] if filetype in filetype_list: if filetype == 'gb' or filetype == 'genbank': @@ -48,7 +35,7 @@ def from_file(cls, filename,filetype): type_of_file = 'fasta' else: type_of_file = 'embl' - + seq_record = SeqIO.read(filename, type_of_file) rec = cls(seq_record=seq_record) return rec @@ -154,42 +141,41 @@ def get_cds_from_gene(self,gene_list): def make_cluster_cds_pair(self,cluster_object,cds_list): """Links cluster objects with corresponding cds objects - :param cluster object: cluster feature object - :param cds_list : list of cds feature object linked to the cluster object + :param cluster_cobject: cluster object + :param cds_list : list of cds feature object linked to the cluster object """ - hash_value = self.hash_function(cluster_object) #getting a hash_value which will the index of the list containing cluster_cds() class objects(cds_array) - self.cluster_cds_dict[cluster_object.id]=hash_value #linking hash_value(index of cds_array) and the unique id of the cluster object + self.cds_array.append(cluster_cds(cds_list,cluster_object.id)) - self.cds_array[hash_value] = cluster_cds(cds_list,cluster_object.id) - - def hash_function(a,b): - #Hash function yet to be defined based on the id of the cluster feature object - return 0 - def get_cds_from_cluster(self,cluster_object): - pointer = self.cluster_cds_dict[cluster_object.id] - cds_object = self.cds_array[pointer] + """ Given a cluster feature object returns the corresponding CDS features list + :param cluster_object: cluster feature object + """ + for i in self.cds_array: + if i .key == cluster_object.id: + return i.cds_list + else: + return None - return cds_object.cds_list - def get_cluster_from_cds(self,cds_object): - for i in self.cds_array: - if i != None: - for j in i.cds_list: - if j.qualifiers['product'][0] == cds_object.qualifiers['product'][0]: - for k in self.clusters: - if k.id == i.key: - return k - - - + """ Given a cds feature object returns the corresponding cluster object + :param CDS object: CDS feature object + """ + for cluster_cds_object in self.cds_array: + for cds_obj in cluster_cds_object.cds_list: + if cds_obj.qualifiers['product'][0] == cds_object.qualifiers['product'][0]: + for cluster in self.clusters: + if cluster.id == cluster_cds_object.key: + return cluster + + + class cluster_cds(): def __init__(self,cds_list=[],key=None): self.cds_list = cds_list self.key = key - - -rec = Record.from_file('../tests/data/nisin.gbk','genbank') +rec = Record.from_file('../tests/data/nisin.gbk','genbank') +rec.make_cluster_cds_pair(rec.clusters[0],rec.CDS) +print rec.get_cds_from_cluster(rec.clusters[0]) From 2a11f427ba09d7f96596eb8faa6e613a91e2a633 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 6 Jun 2017 00:00:09 +0530 Subject: [PATCH 14/15] Add get_gene_from_cds() method --- secmet/record.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/secmet/record.py b/secmet/record.py index 64b7701..83b3489 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -139,6 +139,23 @@ def get_cds_from_gene(self,gene_list): cds_list.append(i) return cds_list + def get_gene_from_cds(self,cds_list): + """Returns a list of gene features corresponding to a list of CDS features + :param cds_list : list of CDS features + """ + gene_list =[] + for cds in cds_list: + if type(cds) != type(self.CDS[0]): + return None + else: + cds_name = cds.qualifiers['product'][0] + gene = self.gene + for i in gene: + if i.qualifiers.__getattribute__.__self__['gene'][0].lower() == cds_name.lower(): #.lower() is used to overcome strings comparison + # without considering the case of the alphabets + gene_list.append(i) + return gene_list + def make_cluster_cds_pair(self,cluster_object,cds_list): """Links cluster objects with corresponding cds objects :param cluster_cobject: cluster object @@ -178,4 +195,4 @@ def __init__(self,cds_list=[],key=None): rec = Record.from_file('../tests/data/nisin.gbk','genbank') rec.make_cluster_cds_pair(rec.clusters[0],rec.CDS) -print rec.get_cds_from_cluster(rec.clusters[0]) +print rec.get_gene_from_cds(rec.CDS) From 0ef7476b896c234f95632a592733564a322ef3e8 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 8 Jun 2017 01:04:04 +0530 Subject: [PATCH 15/15] Refined secmet library --- secmet/record.py | 320 ++++++++++++++++++++--------------------------- 1 file changed, 133 insertions(+), 187 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 83b3489..77cd021 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -2,197 +2,143 @@ # Licensed under the APL2, see LICENSE for details """Secondary Metabolite Record Objects""" +import Bio from Bio import SeqIO class Record(object): - """A record containing secondary metabolite clusters""" - - def __init__(self, seq_record=None): - """Initialise a secondary metabolite record - - :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read - :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - :param cds_array :List that has the objects of type cluster_cds - """ - self._record = seq_record - self.cds_array =[] #Initialising the cds_array to None, Maximum 100 cluster features - - @classmethod - def from_file(cls, filename,filetype): - - """Initialise a record from a file of specified type - - :param string filename: file name of the file to read - :param string filetype: Type of the inputfile - """ - - filetype_list = ['gb','genbank','fasta','fas','fa','emb','embl'] - if filetype in filetype_list: - if filetype == 'gb' or filetype == 'genbank': - type_of_file = 'genbank' - elif filetype == 'fas' or filetype == 'fa' or filetype =='fasta': - type_of_file = 'fasta' - else: - type_of_file = 'embl' - - seq_record = SeqIO.read(filename, type_of_file) - rec = cls(seq_record=seq_record) - return rec - else: - return None - - @property - def id(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.id - else: - return "NO_ID_ASSIGNED" - - - @property - def seq(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.seq - else: + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + self._cluster_cds = {} #Dictionary to create cluster-cds hierarchy + + @classmethod + def from_file(cls, filename, filetype): + + """Initialise a record from a file of specified type + + :param string filename: file name of the file to read + :param string filetype: Type of the inputfile + """ + if filetype in ['gb', 'genbank']: + type_of_file = 'genbank' + elif filetype in ['fa', 'fas', 'fasta']: + type_of_file = 'fasta' + elif filetype in ['emb', 'embl']: + type_of_file = 'embl' + else: + return None + seq_record = SeqIO.read(filename, type_of_file) + rec = cls(seq_record=seq_record) + return rec + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def feature_types(self): + """Returns a list of all types of features present in the record""" + type_features = [] + for i in self._record.features: + if i.type not in type_features: + type_features.append(i.type) + return type_features + + def _features_by_type(self, f_type): + """Returns a list of features of specified f_type in the record + param string f_type: Name of the feature + """ + if f_type in self.feature_types: + return [i for i in self._record.features if i.type == f_type] + else: + return [] + + def get_cds_from_gene(self, genes): + """Returns a list of CDS features corresponding to a list of gene features + :param list genes : List of gene features + """ + cds_list = [] + cdss = self._features_by_type('CDS') + for gene in genes: + if not isinstance(gene, Bio.SeqFeature.SeqFeature): return None - - - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations else: - return {} - - @property - def description(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.description + gene_name = gene.qualifiers['gene'][0] + for cds in cdss: + if cds.qualifiers['gene'][0] == gene_name: + cds_list.append(cds) + cdss.remove(cds) #Removing to reduce the number of operations + return cds_list + + def get_gene_from_cds(self, cdss): + """Returns a list of gene features corresponding to a list of CDS features + :param cdss : List of CDS features + """ + gene_list = [] + genes = self._features_by_type('gene') + for cds in cdss: + if not isinstance(cds, Bio.SeqFeature.SeqFeature): + return None else: - return "" - - @property - def clusters(self): - """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - clusters = [i for i in self._record.features if i.type == 'cluster'] - return clusters - - @property - def gene(self): - """A list of secondary metabolite genes present in the record""" - if self._record is None: - return [] - gene_list =[i for i in self._record.features if i.type == 'gene'] - return gene_list - - @property - def CDS(self): - """A list of secondary metabolite CDS present in the record""" - if self._record is None: - return [] - CDS = [i for i in self._record.features if i.type == 'CDS'] - return CDS - - @property - def CDS_motif(self): - """A list of secondary metabolite cds_motifs present in the record""" - if self._record is None: - return [] - cds_motifs_list =[i for i in self._record.features if i.type == 'CDS_motif'] - return cds_motifs_list - - @property - def source(self): - if self._record is not None: - for i in self._record.features: - if i.type == 'source': - return i - else: - return None - - @property - def feature_types(self): - type_features =[] - for i in self._record.features: - if i.type not in type_features: - type_features.append(i.type) - return type_features - - def get_cds_from_gene(self,gene_list): - """Give the CDS corresponding to a particular gene""" - cds_list =[] - for gene in gene_list: - if type(gene) != type(self.gene[0]): - return None - else: - gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] - cds = self.CDS - for i in cds: - if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: - cds_list.append(i) - return cds_list - - def get_gene_from_cds(self,cds_list): - """Returns a list of gene features corresponding to a list of CDS features - :param cds_list : list of CDS features - """ - gene_list =[] - for cds in cds_list: - if type(cds) != type(self.CDS[0]): - return None - else: - cds_name = cds.qualifiers['product'][0] - gene = self.gene - for i in gene: - if i.qualifiers.__getattribute__.__self__['gene'][0].lower() == cds_name.lower(): #.lower() is used to overcome strings comparison - # without considering the case of the alphabets - gene_list.append(i) - return gene_list - - def make_cluster_cds_pair(self,cluster_object,cds_list): - """Links cluster objects with corresponding cds objects - :param cluster_cobject: cluster object - :param cds_list : list of cds feature object linked to the cluster object - """ - - self.cds_array.append(cluster_cds(cds_list,cluster_object.id)) - - def get_cds_from_cluster(self,cluster_object): - """ Given a cluster feature object returns the corresponding CDS features list - :param cluster_object: cluster feature object - """ - for i in self.cds_array: - if i .key == cluster_object.id: - return i.cds_list - else: - return None - - def get_cluster_from_cds(self,cds_object): - """ Given a cds feature object returns the corresponding cluster object - :param CDS object: CDS feature object - """ - for cluster_cds_object in self.cds_array: - for cds_obj in cluster_cds_object.cds_list: - if cds_obj.qualifiers['product'][0] == cds_object.qualifiers['product'][0]: - for cluster in self.clusters: - if cluster.id == cluster_cds_object.key: - return cluster - - - -class cluster_cds(): - def __init__(self,cds_list=[],key=None): - self.cds_list = cds_list - self.key = key - - -rec = Record.from_file('../tests/data/nisin.gbk','genbank') -rec.make_cluster_cds_pair(rec.clusters[0],rec.CDS) -print rec.get_gene_from_cds(rec.CDS) + cds_name = cds.qualifiers['product'][0] + for gene in genes: + if gene.qualifiers['gene'][0].lower() == cds_name.lower(): + gene_list.append(gene) + genes.remove(gene) #Removing to reduce the number of operations + return gene_list + + def make_cluster_cds_pair(self, cluster_object, cds_list): + """Creates a dictionary of cluster objects with corresponding cds objects + :param cluster_object: A cluster feature object + :param cds_list : list of cds objects corresponding to the cluster object + """ + self._cluster_cds[cluster_object] = cds_list + + def get_cds_from_cluster(self, cluster_object): + """Returns the list of CDS feature objects of the given cluster_object + :param cluster_object: cluster feature object + """ + return self._cluster_cds[cluster_object] + + def get_cluster_from_cds(self, cds_object): + """Returns the cluster feature object of the given cds_object + :param CDS object: CDS feature object + """ + for cluster, cds_list in self._cluster_cds.items(): + if cds_object in cds_list: + return cluster