diff --git a/secmet/record.py b/secmet/record.py index ba8b658..77cd021 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -2,6 +2,7 @@ # Licensed under the APL2, see LICENSE for details """Secondary Metabolite Record Objects""" +import Bio from Bio import SeqIO @@ -15,19 +16,28 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - + self._cluster_cds = {} #Dictionary to create cluster-cds hierarchy @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file + def from_file(cls, filename, filetype): + + """Initialise a record from a file of specified type - :param string filename: file name of the GenBank file to read + :param string filename: file name of the file to read + :param string filetype: Type of the inputfile """ - seq_record = SeqIO.read(filename, 'genbank') + if filetype in ['gb', 'genbank']: + type_of_file = 'genbank' + elif filetype in ['fa', 'fas', 'fasta']: + type_of_file = 'fasta' + elif filetype in ['emb', 'embl']: + type_of_file = 'embl' + else: + return None + seq_record = SeqIO.read(filename, type_of_file) rec = cls(seq_record=seq_record) return rec - @property def id(self): """Pass through to seq_record object if available""" @@ -36,7 +46,6 @@ def id(self): else: return "NO_ID_ASSIGNED" - @property def seq(self): """Pass through to seq_record object if available""" @@ -45,7 +54,6 @@ def seq(self): else: return None - @property def annotations(self): """Pass through to seq_record object if available""" @@ -63,10 +71,74 @@ def description(self): return "" @property - def clusters(self): - """A list of secondary metabolite clusters present in the record""" - if self._record is None: + def feature_types(self): + """Returns a list of all types of features present in the record""" + type_features = [] + for i in self._record.features: + if i.type not in type_features: + type_features.append(i.type) + return type_features + + def _features_by_type(self, f_type): + """Returns a list of features of specified f_type in the record + param string f_type: Name of the feature + """ + if f_type in self.feature_types: + return [i for i in self._record.features if i.type == f_type] + else: return [] - clusters = [i for i in self._record.features if i.type == 'cluster'] - return clusters + def get_cds_from_gene(self, genes): + """Returns a list of CDS features corresponding to a list of gene features + :param list genes : List of gene features + """ + cds_list = [] + cdss = self._features_by_type('CDS') + for gene in genes: + if not isinstance(gene, Bio.SeqFeature.SeqFeature): + return None + else: + gene_name = gene.qualifiers['gene'][0] + for cds in cdss: + if cds.qualifiers['gene'][0] == gene_name: + cds_list.append(cds) + cdss.remove(cds) #Removing to reduce the number of operations + return cds_list + + def get_gene_from_cds(self, cdss): + """Returns a list of gene features corresponding to a list of CDS features + :param cdss : List of CDS features + """ + gene_list = [] + genes = self._features_by_type('gene') + for cds in cdss: + if not isinstance(cds, Bio.SeqFeature.SeqFeature): + return None + else: + cds_name = cds.qualifiers['product'][0] + for gene in genes: + if gene.qualifiers['gene'][0].lower() == cds_name.lower(): + gene_list.append(gene) + genes.remove(gene) #Removing to reduce the number of operations + return gene_list + + def make_cluster_cds_pair(self, cluster_object, cds_list): + """Creates a dictionary of cluster objects with corresponding cds objects + :param cluster_object: A cluster feature object + :param cds_list : list of cds objects corresponding to the cluster object + """ + self._cluster_cds[cluster_object] = cds_list + + def get_cds_from_cluster(self, cluster_object): + """Returns the list of CDS feature objects of the given cluster_object + :param cluster_object: cluster feature object + """ + return self._cluster_cds[cluster_object] + + def get_cluster_from_cds(self, cds_object): + """Returns the cluster feature object of the given cds_object + :param CDS object: CDS feature object + """ + for cluster, cds_list in self._cluster_cds.items(): + if cds_object in cds_list: + return cluster diff --git a/tests/data/sequence.fasta b/tests/data/sequence.fasta new file mode 100644 index 0000000..6626404 --- /dev/null +++ b/tests/data/sequence.fasta @@ -0,0 +1,13 @@ +>J04057.1 S.lactis antibiotic nisin (spaN) gene, complete cds +AGTTGACGAATATTTAATAATTTTATTAATATCTTGATTTTCTAGTTCCTGAATAATATAGAGATAGGTT +TATTGAGTCTTAGACATACTTGAATGACCTAGTCTTATAACTATACTGACAATAGAAACATTAACAAATC +TAAAACAGTCTTAATTCTATCTTGAGAAAGTATTGGTAATAATATTATTGTCGATAACGCGAGCATAATA +AACGGCTCTGATTAAATTCTGAAGTTTGTTAGATACAATGATTTCGTTCGAAGGAACTACAAAATAAATT +ATAAGGAGGCACTCAAAATGAGTACAAAAGATTTTAACTTGGATTTGGTATCTGTTTCGAAGAAAGATTC +AGGTGCATCACCACGCATTACAAGTATTTCGCTATGTACACCCGGTTGTAAAACAGGAGCTCTGATGGGT +TGTAACATGAAAACAGCAACTTGTCATTGTAGTATTCACGTAAGCAAATAACCAAATCAAAGGATAGTAT +TTTGTTAGTTCAGACATGGATACTATCCTATTTTTATAAGTTATTTAGGGTTGCTAAATAGCTTATAAAA +ATAAAGAGAGGAAAAAACATGATAAAAAGTTCATTTAAAGCTCAACCGTTTTTAGTAAGAAATACAATTT +TATCTCCAAACGATAAACGGAGTTTTACTGAATATACTCAAGTCATTGAGACTGTAAGTAAAAATAAAGT +TTTTTTGGAACAGTTACTACTAGCTAATCCTAAACTCTATGATGTTATGCAGAAATATAATGCTGGT + diff --git a/tests/secmet/__init__.py b/tests/secmet/__init__.py new file mode 100644 index 0000000..b8023d8 --- /dev/null +++ b/tests/secmet/__init__.py @@ -0,0 +1 @@ +__version__ = '0.0.1' diff --git a/tests/secmet/record.py b/tests/secmet/record.py new file mode 100644 index 0000000..7ebf36c --- /dev/null +++ b/tests/secmet/record.py @@ -0,0 +1,101 @@ +# vim :set et sts=4 sw=4 fileencoding=utf-8 : +# Licensed under the APL2, see LICENSE for details +"""Secondary Metabolite Record Objects""" + +from Bio import SeqIO + + +class Record(object): + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + + + @classmethod + def from_genbank(cls, filename): + """Initialise a record from a GenBank file + + :param string filename: file name of the GenBank file to read + """ + seq_record = SeqIO.read(filename, 'genbank') + rec = cls(seq_record=seq_record) + return rec + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def clusters(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + clusters = [i for i in self._record.features if i.type == 'cluster'] + return clusters + + @property + def gene(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + gene_list =[i for i in self._record.features if i.type == 'gene'] + return gene_list + + @property + def CDS(self): + if self._record is None: + return [] + CDS = [i for i in self._record.features if i.type == 'CDS'] + return CDS + + def get_cds_from_gene(self,gene): + if type(gene) != type(self.gene[0]): + return None + else: + gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0] + cds = self.CDS + for i in cds: + if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name: + return i + return None + + + + + diff --git a/tests/test_record.py b/tests/test_record.py index 3486dca..996918c 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -36,3 +36,43 @@ def test_clusters(): testfile = get_testfile('nisin.gbk') rec = Record.from_genbank(testfile) assert len(rec.clusters) == 1 + +def test_gene(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_gene = [i for i in bp_rec.features if i.type == 'gene'] + assert len(bp_cds) == len(rec.gene) + +def test_cds(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_cds = [i for i in bp_rec.features if i.type == 'CDS'] + assert len(bp_cds) == len(rec.CDS) + +def test_get_cds_from_gene(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_genbank(testfile) + bp_rec = SeqIO.read(testfile, 'genbank') + bp_gene = [i for i in bp_rec.features if i.type == 'gene'] + bp_cds = [i for i in bp_rec.features if i.type == 'CDS'] + #get gene name from bp_gene list + bp_gene_name = bp_gene[0].qualifiers.__getattribute__.__self__['gene'][0] + + #get cds name from bp_cds list + bp_cds_name = bp_cds[0].qualifiers.__getattribute__.__self__['gene'][0] + + #compare bp_gene_name and bp_cds_name + assert bp_gene_name == bp_cds_name + + #compare bp_cds_name and secmet rec cds name + assert bp_cds_name == rec.get_cds_from_gene(bp_gene[0]).qualifiers.__getattribute__.__self__['gene'][0] + + + + + + + +