Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 85 additions & 13 deletions secmet/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Licensed under the APL2, see LICENSE for details
"""Secondary Metabolite Record Objects"""

import Bio
from Bio import SeqIO


Expand All @@ -15,19 +16,28 @@ def __init__(self, seq_record=None):
:type seq_record: :class:`Bio.SeqRecord.SeqRecord`
"""
self._record = seq_record

self._cluster_cds = {} #Dictionary to create cluster-cds hierarchy

@classmethod
def from_genbank(cls, filename):
"""Initialise a record from a GenBank file
def from_file(cls, filename, filetype):

"""Initialise a record from a file of specified type

:param string filename: file name of the GenBank file to read
:param string filename: file name of the file to read
:param string filetype: Type of the inputfile
"""
seq_record = SeqIO.read(filename, 'genbank')
if filetype in ['gb', 'genbank']:
type_of_file = 'genbank'
elif filetype in ['fa', 'fas', 'fasta']:
type_of_file = 'fasta'
elif filetype in ['emb', 'embl']:
type_of_file = 'embl'
else:
return None
seq_record = SeqIO.read(filename, type_of_file)
rec = cls(seq_record=seq_record)
return rec


@property
def id(self):
"""Pass through to seq_record object if available"""
Expand All @@ -36,7 +46,6 @@ def id(self):
else:
return "NO_ID_ASSIGNED"


@property
def seq(self):
"""Pass through to seq_record object if available"""
Expand All @@ -45,7 +54,6 @@ def seq(self):
else:
return None


@property
def annotations(self):
"""Pass through to seq_record object if available"""
Expand All @@ -63,10 +71,74 @@ def description(self):
return ""

@property
def clusters(self):
"""A list of secondary metabolite clusters present in the record"""
if self._record is None:
def feature_types(self):
"""Returns a list of all types of features present in the record"""
type_features = []
for i in self._record.features:
if i.type not in type_features:
type_features.append(i.type)
return type_features

def _features_by_type(self, f_type):
"""Returns a list of features of specified f_type in the record
param string f_type: Name of the feature
"""
if f_type in self.feature_types:
return [i for i in self._record.features if i.type == f_type]
else:
return []

clusters = [i for i in self._record.features if i.type == 'cluster']
return clusters
def get_cds_from_gene(self, genes):
"""Returns a list of CDS features corresponding to a list of gene features
:param list genes : List of gene features
"""
cds_list = []
cdss = self._features_by_type('CDS')
for gene in genes:
if not isinstance(gene, Bio.SeqFeature.SeqFeature):
return None
else:
gene_name = gene.qualifiers['gene'][0]
for cds in cdss:
if cds.qualifiers['gene'][0] == gene_name:
cds_list.append(cds)
cdss.remove(cds) #Removing to reduce the number of operations
return cds_list

def get_gene_from_cds(self, cdss):
"""Returns a list of gene features corresponding to a list of CDS features
:param cdss : List of CDS features
"""
gene_list = []
genes = self._features_by_type('gene')
for cds in cdss:
if not isinstance(cds, Bio.SeqFeature.SeqFeature):
return None
else:
cds_name = cds.qualifiers['product'][0]
for gene in genes:
if gene.qualifiers['gene'][0].lower() == cds_name.lower():
gene_list.append(gene)
genes.remove(gene) #Removing to reduce the number of operations
return gene_list

def make_cluster_cds_pair(self, cluster_object, cds_list):
"""Creates a dictionary of cluster objects with corresponding cds objects
:param cluster_object: A cluster feature object
:param cds_list : list of cds objects corresponding to the cluster object
"""
self._cluster_cds[cluster_object] = cds_list

def get_cds_from_cluster(self, cluster_object):
"""Returns the list of CDS feature objects of the given cluster_object
:param cluster_object: cluster feature object
"""
return self._cluster_cds[cluster_object]

def get_cluster_from_cds(self, cds_object):
"""Returns the cluster feature object of the given cds_object
:param CDS object: CDS feature object
"""
for cluster, cds_list in self._cluster_cds.items():
if cds_object in cds_list:
return cluster
13 changes: 13 additions & 0 deletions tests/data/sequence.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
>J04057.1 S.lactis antibiotic nisin (spaN) gene, complete cds
AGTTGACGAATATTTAATAATTTTATTAATATCTTGATTTTCTAGTTCCTGAATAATATAGAGATAGGTT
TATTGAGTCTTAGACATACTTGAATGACCTAGTCTTATAACTATACTGACAATAGAAACATTAACAAATC
TAAAACAGTCTTAATTCTATCTTGAGAAAGTATTGGTAATAATATTATTGTCGATAACGCGAGCATAATA
AACGGCTCTGATTAAATTCTGAAGTTTGTTAGATACAATGATTTCGTTCGAAGGAACTACAAAATAAATT
ATAAGGAGGCACTCAAAATGAGTACAAAAGATTTTAACTTGGATTTGGTATCTGTTTCGAAGAAAGATTC
AGGTGCATCACCACGCATTACAAGTATTTCGCTATGTACACCCGGTTGTAAAACAGGAGCTCTGATGGGT
TGTAACATGAAAACAGCAACTTGTCATTGTAGTATTCACGTAAGCAAATAACCAAATCAAAGGATAGTAT
TTTGTTAGTTCAGACATGGATACTATCCTATTTTTATAAGTTATTTAGGGTTGCTAAATAGCTTATAAAA
ATAAAGAGAGGAAAAAACATGATAAAAAGTTCATTTAAAGCTCAACCGTTTTTAGTAAGAAATACAATTT
TATCTCCAAACGATAAACGGAGTTTTACTGAATATACTCAAGTCATTGAGACTGTAAGTAAAAATAAAGT
TTTTTTGGAACAGTTACTACTAGCTAATCCTAAACTCTATGATGTTATGCAGAAATATAATGCTGGT

1 change: 1 addition & 0 deletions tests/secmet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.0.1'
101 changes: 101 additions & 0 deletions tests/secmet/record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# vim :set et sts=4 sw=4 fileencoding=utf-8 :
# Licensed under the APL2, see LICENSE for details
"""Secondary Metabolite Record Objects"""

from Bio import SeqIO


class Record(object):
"""A record containing secondary metabolite clusters"""

def __init__(self, seq_record=None):
"""Initialise a secondary metabolite record

:param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read
:type seq_record: :class:`Bio.SeqRecord.SeqRecord`
"""
self._record = seq_record


@classmethod
def from_genbank(cls, filename):
"""Initialise a record from a GenBank file

:param string filename: file name of the GenBank file to read
"""
seq_record = SeqIO.read(filename, 'genbank')
rec = cls(seq_record=seq_record)
return rec

@property
def id(self):
"""Pass through to seq_record object if available"""
if self._record is not None:
return self._record.id
else:
return "NO_ID_ASSIGNED"


@property
def seq(self):
"""Pass through to seq_record object if available"""
if self._record is not None:
return self._record.seq
else:
return None


@property
def annotations(self):
"""Pass through to seq_record object if available"""
if self._record is not None:
return self._record.annotations
else:
return {}

@property
def description(self):
"""Pass through to seq_record object if available"""
if self._record is not None:
return self._record.description
else:
return ""

@property
def clusters(self):
"""A list of secondary metabolite clusters present in the record"""
if self._record is None:
return []
clusters = [i for i in self._record.features if i.type == 'cluster']
return clusters

@property
def gene(self):
"""A list of secondary metabolite clusters present in the record"""
if self._record is None:
return []
gene_list =[i for i in self._record.features if i.type == 'gene']
return gene_list

@property
def CDS(self):
if self._record is None:
return []
CDS = [i for i in self._record.features if i.type == 'CDS']
return CDS

def get_cds_from_gene(self,gene):
if type(gene) != type(self.gene[0]):
return None
else:
gene_name = gene.qualifiers.__getattribute__.__self__['gene'][0]
cds = self.CDS
for i in cds:
if i.qualifiers.__getattribute__.__self__['gene'][0] == gene_name:
return i
return None





40 changes: 40 additions & 0 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,43 @@ def test_clusters():
testfile = get_testfile('nisin.gbk')
rec = Record.from_genbank(testfile)
assert len(rec.clusters) == 1

def test_gene():
testfile = get_testfile('nisin.gbk')
rec = Record.from_genbank(testfile)
bp_rec = SeqIO.read(testfile, 'genbank')
bp_gene = [i for i in bp_rec.features if i.type == 'gene']
assert len(bp_cds) == len(rec.gene)

def test_cds():
testfile = get_testfile('nisin.gbk')
rec = Record.from_genbank(testfile)
bp_rec = SeqIO.read(testfile, 'genbank')
bp_cds = [i for i in bp_rec.features if i.type == 'CDS']
assert len(bp_cds) == len(rec.CDS)

def test_get_cds_from_gene():
testfile = get_testfile('nisin.gbk')
rec = Record.from_genbank(testfile)
bp_rec = SeqIO.read(testfile, 'genbank')
bp_gene = [i for i in bp_rec.features if i.type == 'gene']
bp_cds = [i for i in bp_rec.features if i.type == 'CDS']
#get gene name from bp_gene list
bp_gene_name = bp_gene[0].qualifiers.__getattribute__.__self__['gene'][0]

#get cds name from bp_cds list
bp_cds_name = bp_cds[0].qualifiers.__getattribute__.__self__['gene'][0]

#compare bp_gene_name and bp_cds_name
assert bp_gene_name == bp_cds_name

#compare bp_cds_name and secmet rec cds name
assert bp_cds_name == rec.get_cds_from_gene(bp_gene[0]).qualifiers.__getattribute__.__self__['gene'][0]