From 3b64501461199fdb5e64e2387297d08134006ce0 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 11 Jun 2017 01:43:00 +0530 Subject: [PATCH 01/71] Add Feature, GenericFeature, CDSFeature, ClusterFeature classes --- secmet/record.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index ba8b658..ef42f97 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -4,6 +4,120 @@ from Bio import SeqIO +#pylint: disable=W0312,C0111 + +class Feature(object): + """A Feature super class that expands to different subclasses""" + def __init__(self, feature): + """ Initialise a feature object + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + self.feature = feature + self._qualifiers = self.feature.qualifiers + self.location = self.feature.location + self.type = self.feature.type + + def to_biopython(self): + return [self.feature] + + +class GenericFeature(Feature): + """A GenericFeature Feature subclasses Feature + (Features other than CDSFeature and ClusterFeature) + """ + def __init__(self, feature=None): + """Initialise a GenericFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + if feature != None: + Feature.__init__(self, feature) + + def to_biopython(self): + """Returns Bio.SeqFeature.SeqFeature object of the same feature""" + return [self.feature] + + def add_qualifier(self, category, info): + """Adds a qualifier to qualifiers dictionary""" + self._qualifiers[category] = [info] + return None + + def get_qualifier(self, category): + """Returns a qualifier of given category""" + return self._qualifiers[category] + + +class CDSFeature(Feature): + """A CDSFeature subclasses Feature""" + + def __init__(self, feature=None): + """Initialise a CDSFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + Feature.__init__(self, feature) + if 'sec_met' in self._qualifiers: + self.sec_met = self._qualifiers['sec_met'] + else: + self.sec_met = None + + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'] + else: + self.locus_tag = None + + if 'product' in self._qualifiers: + self.product = self._qualifiers['product'][0] + else: + self.product = None + + if 'protein_id' in self._qualifiers: + self.protein_id = self._qualifiers['protein_id'][0] + else: + self.protein_id = None + + if 'gene' in self._qualifiers: + self.gene = self._qualifiers['gene'][0] + else: + self.gene = None + + def get_id(self): + """Returns the id of the CDSFeature""" + return self.gene + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object of the same feature""" + return [self.feature] + + + +class ClusterFeature(Feature): + """A ClusterFeature which subclasses Feature""" + def __init__(self, feature=None): + """Initialise a ClusterFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + if feature != None: + Feature.__init__(self, feature) + if 'cutoff' in self._qualifiers: + self.cutoff = self._qualifiers['cutoff'] + if 'extension' in self._qualifiers: + self.extension = self._qualifiers['extension'] + if 'contig_edge' in self._qualifiers: + self.contig_edge = self._qualifiers['contig_edge'] + if 'detection' in self._qualifiers: + self.detection = self._qualifiers['note'][1] + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object of the same feature""" + return [self.feature] + + def get_products(self): + """Returns product qualifier from ClusterFeature object""" + return self._qualifiers['product'] + + def get_cluster_number(self): + """Returns the clusternumber of the cluster""" + return int(self._qualifiers['note'][0].split()[2]) + class Record(object): """A record containing secondary metabolite clusters""" From 26d5e9412955df050f22d5cfbdd00d653a2d936d Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 11 Jun 2017 20:40:06 +0530 Subject: [PATCH 02/71] Add from_biopython(), _modified_features in Record --- secmet/record.py | 204 +++++++++++++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 77 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index ef42f97..adf34f5 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -4,8 +4,6 @@ from Bio import SeqIO -#pylint: disable=W0312,C0111 - class Feature(object): """A Feature super class that expands to different subclasses""" def __init__(self, feature): @@ -17,7 +15,9 @@ def __init__(self, feature): self.location = self.feature.location self.type = self.feature.type + @property def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object of same feature""" return [self.feature] @@ -32,10 +32,6 @@ def __init__(self, feature=None): if feature != None: Feature.__init__(self, feature) - def to_biopython(self): - """Returns Bio.SeqFeature.SeqFeature object of the same feature""" - return [self.feature] - def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" self._qualifiers[category] = [info] @@ -79,13 +75,16 @@ def __init__(self, feature=None): else: self.gene = None + @property def get_id(self): """Returns the id of the CDSFeature""" return self.gene - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object of the same feature""" - return [self.feature] + @property + def get_cluster(self): + """Returns a ClusterFeature""" + #TO-DO: Should return the corresponding ClusterFeature + return @@ -99,88 +98,139 @@ def __init__(self, feature=None): Feature.__init__(self, feature) if 'cutoff' in self._qualifiers: self.cutoff = self._qualifiers['cutoff'] + else: + self.cutoff = None if 'extension' in self._qualifiers: self.extension = self._qualifiers['extension'] + else: + self.extension = None if 'contig_edge' in self._qualifiers: self.contig_edge = self._qualifiers['contig_edge'] + else: + self.contig_edge = None if 'detection' in self._qualifiers: self.detection = self._qualifiers['note'][1] + else: + self.detection = None - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object of the same feature""" - return [self.feature] - + @property def get_products(self): """Returns product qualifier from ClusterFeature object""" return self._qualifiers['product'] + @property def get_cluster_number(self): """Returns the clusternumber of the cluster""" return int(self._qualifiers['note'][0].split()[2]) + @property + def get_CDSs(self): + #TO-DO: Should return a list of CDSFeatures + return + + def add_product(self, product): + """Adds a product qualifier to the ClusterFeature object""" + self._qualifiers['product'].append(product) + class Record(object): - """A record containing secondary metabolite clusters""" - - def __init__(self, seq_record=None): - """Initialise a secondary metabolite record - - :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read - :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - """ - self._record = seq_record - - - @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file - - :param string filename: file name of the GenBank file to read - """ - seq_record = SeqIO.read(filename, 'genbank') - rec = cls(seq_record=seq_record) - return rec - - - @property - def id(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.id - else: - return "NO_ID_ASSIGNED" - - - @property - def seq(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.seq - else: - return None - - - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations - else: - return {} - - @property - def description(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.description - else: - return "" - - @property - def clusters(self): - """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - - clusters = [i for i in self._record.features if i.type == 'cluster'] - return clusters + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + self._modified_features = [] #A list containing instances of Feature or its subclasses + + + @classmethod + def from_genbank(cls, filename): + """Initialise a record from a GenBank file + + :param string filename: file name of the GenBank file to read + """ + seq_record = SeqIO.read(filename, 'genbank') + rec = cls(seq_record=seq_record) + return rec + + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + @property + def get_clusters(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + + clusters = [i for i in self._modified_features if i.type == 'cluster'] + return clusters + + @property + def get_CDSs(self): + """A list of secondary metabolite clusters present in the record""" + if self._record is None: + return [] + + clusters = [i for i in self._modified_features if i.type == 'CDS'] + return clusters + + @property + def to_biopython(self): + """Returns a Bio.SeqRecord instance""" + return self._record + + def get_cluster_number(self, clusterfeature): + """Returns cluster number of a cluster feature + param ClusterFeature clusterfeature : A instance of ClusterFeature class + """ + return clusterfeature.get_cluster_number + + def from_biopython(self): + """Modifies _modified_features list with new Feature instances""" + features = self._record.features + for feature in features: + if feature.type == 'CDS': + feature = CDSFeature(feature) + self._modified_features.append(feature) + elif feature.type == 'cluster': + feature = ClusterFeature(feature) + self._modified_features.append(feature) + else: + feature = GenericFeature(feature) + self._modified_features.append(feature) + return self From 036553af03945d16f898d29768a21ca1fbadc43b Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 12 Jun 2017 00:39:50 +0530 Subject: [PATCH 03/71] Replace from_genbank() with from_file() in Record, Update test_record.py --- secmet/record.py | 28 +++++++++++++++++++--------- tests/test_record.py | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index adf34f5..0033657 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -147,14 +147,24 @@ def __init__(self, seq_record=None): @classmethod - def from_genbank(cls, filename): - """Initialise a record from a GenBank file + def from_file(cls, filename, filetype): - :param string filename: file name of the GenBank file to read - """ - seq_record = SeqIO.read(filename, 'genbank') - rec = cls(seq_record=seq_record) - return rec + """Initialise a record from a file of specified type + + :param string filename: file name of the file to read + :param string filetype: Type of the inputfile + """ + if filetype in ['gb', 'genbank']: + type_of_file = 'genbank' + elif filetype in ['fa', 'fas', 'fasta']: + type_of_file = 'fasta' + elif filetype in ['emb', 'embl']: + type_of_file = 'embl' + else: + return None + seq_record = SeqIO.read(filename, type_of_file) + rec = cls(seq_record=seq_record) + return rec @property @@ -206,8 +216,8 @@ def get_CDSs(self): if self._record is None: return [] - clusters = [i for i in self._modified_features if i.type == 'CDS'] - return clusters + cdss = [i for i in self._modified_features if i.type == 'CDS'] + return cdss @property def to_biopython(self): diff --git a/tests/test_record.py b/tests/test_record.py index 3486dca..eabd945 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -1,6 +1,8 @@ from os import path +import Bio from Bio import SeqIO from secmet.record import Record +from secmet.record import GenericFeature, ClusterFeature, CDSFeature def get_testfile(filename): @@ -16,11 +18,10 @@ def test_init_empty(): assert rec.description == "" assert rec.clusters == [] - -def test_from_genbank(): +def test_from_file(): testfile = get_testfile('nisin.gbk') bp_rec = SeqIO.read(testfile, 'genbank') - rec = Record.from_genbank(testfile) + rec = Record.from_file(testfile, 'genbank') assert isinstance(rec, Record) assert rec.id == bp_rec.id assert rec.seq == bp_rec.seq @@ -31,8 +32,32 @@ def test_from_genbank(): assert rec.annotations == bp_rec.annotations assert rec.description == bp_rec.description +def test_from_biopython(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_file(testfile, 'genbank') + assert isinstance(rec.from_biopython(), Record) + +def test_to_biopython(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_file(testfile, 'genbank') + assert isinstance(rec.to_biopython, Bio.SeqRecord.SeqRecord) -def test_clusters(): +def test_get_clusters(): testfile = get_testfile('nisin.gbk') - rec = Record.from_genbank(testfile) - assert len(rec.clusters) == 1 + rec = Record.from_file(testfile, 'genbank') + bp_rec = SeqIO.read(testfile, 'genbank') + clusters = [i for i in bp_rec.features if i.type == 'cluster'] + #Should call from_biopython() to access features + rec.from_biopython() + assert len(rec.get_clusters) == len(clusters) + assert isinstance(rec.get_clusters[0], ClusterFeature) + +def test_get_CDSs(): + testfile = get_testfile('nisin.gbk') + rec = Record.from_file(testfile, 'genbank') + bp_rec = SeqIO.read(testfile, 'genbank') + CDSs = [i for i in bp_rec.features if i.type == 'CDS'] + #Should call from_biopython() to access features + rec.from_biopython() + assert len(rec.get_CDSs) == len(CDSs) + assert isinstance(rec.get_CDSs[0], CDSFeature) From 236afba590ead81e717f3854b31d2101e4e89760 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Wed, 14 Jun 2017 21:48:37 +0530 Subject: [PATCH 04/71] Refined class methods --- secmet/record.py | 223 ++++++++++++++++++++++++++++------------------- 1 file changed, 135 insertions(+), 88 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 0033657..72e944f 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -3,22 +3,14 @@ """Secondary Metabolite Record Objects""" from Bio import SeqIO +from Bio.SeqFeature import SeqFeature, FeatureLocation class Feature(object): """A Feature super class that expands to different subclasses""" - def __init__(self, feature): - """ Initialise a feature object - param feature: class 'Bio.SeqFeature.SeqFeature' - """ - self.feature = feature - self._qualifiers = self.feature.qualifiers - self.location = self.feature.location - self.type = self.feature.type - - @property - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object of same feature""" - return [self.feature] + def __init__(self): + """ Initialise a feature object""" + self.location = FeatureLocation(0, 0) + self.type = None class GenericFeature(Feature): @@ -29,18 +21,30 @@ def __init__(self, feature=None): """Initialise a GenericFeature param feature: class 'Bio.SeqFeature.SeqFeature' """ - if feature != None: - Feature.__init__(self, feature) + super(GenericFeature, self).__init__() + self._qualifiers = {} + if feature is not None: + self._qualifiers = feature.qualifiers + self.location = feature.location + self.type = feature.type def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" - self._qualifiers[category] = [info] - return None + if isinstance(category, str) and isinstance(info, str): + self._qualifiers[category] = [info] + return None def get_qualifier(self, category): """Returns a qualifier of given category""" - return self._qualifiers[category] + if category in self._qualifiers: + return self._qualifiers[category] + @property + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" + Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + Generic.qualifiers = self._qualifiers + return [Generic] class CDSFeature(Feature): """A CDSFeature subclasses Feature""" @@ -49,31 +53,35 @@ def __init__(self, feature=None): """Initialise a CDSFeature param feature: class 'Bio.SeqFeature.SeqFeature' """ - Feature.__init__(self, feature) - if 'sec_met' in self._qualifiers: - self.sec_met = self._qualifiers['sec_met'] - else: - self.sec_met = None + super(CDSFeature, self).__init__() + self.sec_met = [] + self.locus_tag = None + self.product = None + self.protein_id = None + self.gene = None + self.cluster = None #At present we are manually assigning it for checking + self._qualifiers = {} + self.type = 'CDS' - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'] - else: - self.locus_tag = None + if feature is not None: - if 'product' in self._qualifiers: - self.product = self._qualifiers['product'][0] - else: - self.product = None + self._qualifiers = feature.qualifiers - if 'protein_id' in self._qualifiers: - self.protein_id = self._qualifiers['protein_id'][0] - else: - self.protein_id = None + if 'sec_met' in self._qualifiers: + self.sec_met = self._qualifiers['sec_met'] - if 'gene' in self._qualifiers: - self.gene = self._qualifiers['gene'][0] - else: - self.gene = None + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'] + + if 'product' in self._qualifiers: + self.product = self._qualifiers['product'][0] + + if 'protein_id' in self._qualifiers: + self.protein_id = self._qualifiers['protein_id'][0] + + if 'gene' in self._qualifiers: + self.gene = self._qualifiers['gene'][0] + self.location = feature.location @property def get_id(self): @@ -83,10 +91,19 @@ def get_id(self): @property def get_cluster(self): """Returns a ClusterFeature""" - #TO-DO: Should return the corresponding ClusterFeature - return - + return self.cluster + @property + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + self._qualifiers['sec_met'] = self.sec_met + self._qualifiers['locus_tag'] = self.locus_tag + self._qualifiers['product'] = self.product + self._qualifiers['protein_id'] = self.protein_id + self._qualifiers['gene'] = self.gene + new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_CDS.qualifiers = self._qualifiers + return [new_CDS] class ClusterFeature(Feature): """A ClusterFeature which subclasses Feature""" @@ -94,44 +111,70 @@ def __init__(self, feature=None): """Initialise a ClusterFeature param feature: class 'Bio.SeqFeature.SeqFeature' """ - if feature != None: - Feature.__init__(self, feature) - if 'cutoff' in self._qualifiers: - self.cutoff = self._qualifiers['cutoff'] - else: - self.cutoff = None - if 'extension' in self._qualifiers: - self.extension = self._qualifiers['extension'] - else: - self.extension = None - if 'contig_edge' in self._qualifiers: - self.contig_edge = self._qualifiers['contig_edge'] - else: - self.contig_edge = None - if 'detection' in self._qualifiers: - self.detection = self._qualifiers['note'][1] - else: - self.detection = None + super(ClusterFeature, self).__init__() + self.cutoff = None + self.extension = None + self.contig_edge = None + self.detection = None + self.products = [] + self._qualifiers = {} + self.type = 'cluster' + + if feature is not None: + self._qualifiers = feature.qualifiers + if 'cutoff' in self._qualifiers: + self.cutoff = self._qualifiers['cutoff'] + + if 'extension' in self._qualifiers: + self.extension = self._qualifiers['extension'] + + if 'contig_edge' in self._qualifiers: + self.contig_edge = self._qualifiers['contig_edge'] + + if 'note' in self._qualifiers: + self.detection = self._qualifiers['note'][1] + self.clusternumber = int(self._qualifiers['note'][0].split(':')[1]) + + if 'product' in self._qualifiers: + self.products = self._qualifiers['product'] + self.location = feature.location + + self.cdss = [] #At present they are manually assigned for checking + + + @property + def add_product(self, product_value): + """Adds a product qualifier to the ClusterFeature object""" + if isinstance(product_value, str): + self.products.append(product_value) @property def get_products(self): """Returns product qualifier from ClusterFeature object""" - return self._qualifiers['product'] + return self.products @property def get_cluster_number(self): """Returns the clusternumber of the cluster""" - return int(self._qualifiers['note'][0].split()[2]) + return self.clusternumber @property def get_CDSs(self): - #TO-DO: Should return a list of CDSFeatures - return - - def add_product(self, product): - """Adds a product qualifier to the ClusterFeature object""" - self._qualifiers['product'].append(product) + """Retruns a list of CDS objects which belong to this cluster""" + return self.cdss + @property + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] + self._qualifiers['note'].append(self.detection) + self._qualifiers['cutoff'] = [self.cutoff] + self._qualifiers['extension'] = [self.extension] + self._qualifiers['product'] = self.products + self._qualifiers['contig_edge'] = self.contig_edge + new_cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_cluster.qualifiers = self._qualifiers + return [new_cluster] class Record(object): """A record containing secondary metabolite clusters""" @@ -143,8 +186,10 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - self._modified_features = [] #A list containing instances of Feature or its subclasses - + self._modified_CDS = [] #A list containing instances of CDSFeature + self._modified_Clusters = [] #A list containing instances of ClusterFeature + self._modified_Generic = [] #A list containing instances of GenericFeature + self.from_biopython(self._record) @classmethod def from_file(cls, filename, filetype): @@ -204,43 +249,45 @@ def description(self): @property def get_clusters(self): """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - - clusters = [i for i in self._modified_features if i.type == 'cluster'] - return clusters + return self._modified_Clusters @property def get_CDSs(self): """A list of secondary metabolite clusters present in the record""" - if self._record is None: - return [] - - cdss = [i for i in self._modified_features if i.type == 'CDS'] - return cdss + return self._modified_CDS @property def to_biopython(self): - """Returns a Bio.SeqRecord instance""" - return self._record + """Returns a Bio.SeqRecord instance of the record""" + new_record = self._record + features = self._modified_Generic + features.extend(self._modified_CDS) + features.extend(self._modified_Clusters) + record_features = [] + for feature in features: + record_features.append(feature.to_biopython[0]) + new_record.features = record_features #A new_record with all the modified features + return new_record + @property def get_cluster_number(self, clusterfeature): """Returns cluster number of a cluster feature param ClusterFeature clusterfeature : A instance of ClusterFeature class """ - return clusterfeature.get_cluster_number + if isinstance(clusterfeature, ClusterFeature): + return clusterfeature.clusternumber - def from_biopython(self): + def from_biopython(self, record): """Modifies _modified_features list with new Feature instances""" - features = self._record.features + features = record.features for feature in features: if feature.type == 'CDS': feature = CDSFeature(feature) - self._modified_features.append(feature) + self._modified_CDS.append(feature) elif feature.type == 'cluster': feature = ClusterFeature(feature) - self._modified_features.append(feature) + self._modified_Clusters.append(feature) else: feature = GenericFeature(feature) - self._modified_features.append(feature) + self._modified_Generic.append(feature) return self From e3bb9e5c7c2f04f27f7d6e66af19107d38336d49 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Fri, 16 Jun 2017 12:21:04 +0530 Subject: [PATCH 05/71] Add add_feature() in Record class, modify get_cluster_number, Refined --- secmet/record.py | 595 +++++++++++++++++++++++++---------------------- 1 file changed, 318 insertions(+), 277 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 72e944f..54b40df 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -2,292 +2,333 @@ # Licensed under the APL2, see LICENSE for details """Secondary Metabolite Record Objects""" -from Bio import SeqIO +from Bio import SeqIO, SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation class Feature(object): - """A Feature super class that expands to different subclasses""" - def __init__(self): - """ Initialise a feature object""" - self.location = FeatureLocation(0, 0) - self.type = None + """A Feature super class that expands to different subclasses""" + def __init__(self): + """ Initialise a feature object""" + self.location = None + self.type = None class GenericFeature(Feature): - """A GenericFeature Feature subclasses Feature - (Features other than CDSFeature and ClusterFeature) - """ - def __init__(self, feature=None): - """Initialise a GenericFeature - param feature: class 'Bio.SeqFeature.SeqFeature' - """ - super(GenericFeature, self).__init__() - self._qualifiers = {} - if feature is not None: - self._qualifiers = feature.qualifiers - self.location = feature.location - self.type = feature.type - - def add_qualifier(self, category, info): - """Adds a qualifier to qualifiers dictionary""" - if isinstance(category, str) and isinstance(info, str): - self._qualifiers[category] = [info] - return None - - def get_qualifier(self, category): - """Returns a qualifier of given category""" - if category in self._qualifiers: - return self._qualifiers[category] - - @property - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" - Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - Generic.qualifiers = self._qualifiers - return [Generic] + """A GenericFeature Feature subclasses Feature + (Features other than CDSFeature and ClusterFeature) + """ + def __init__(self, feature=None): + """Initialise a GenericFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(GenericFeature, self).__init__() + self._qualifiers = {} + if feature is not None: + self._qualifiers = feature.qualifiers + self.location = feature.location + self.type = feature.type + + def add_qualifier(self, category, info): + """Adds a qualifier to qualifiers dictionary""" + if isinstance(category, str) and isinstance(info, str): + if category not in self._qualifiers: + self._qualifiers[category] = [info] + else: + self._qualifiers[category].append(info) + return None + else: + raise TypeError("Type of qualifiers should be 'str'") + + def get_qualifier(self, category): + """Returns a qualifier of given category""" + if category in self._qualifiers: + return self._qualifiers[category] + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" + if isinstance(self.location, FeatureLocation): + new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Generic.qualifiers = self._qualifiers.copy() + return [new_Generic] + else: + raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") class CDSFeature(Feature): - """A CDSFeature subclasses Feature""" - - def __init__(self, feature=None): - """Initialise a CDSFeature - param feature: class 'Bio.SeqFeature.SeqFeature' - """ - super(CDSFeature, self).__init__() - self.sec_met = [] - self.locus_tag = None - self.product = None - self.protein_id = None - self.gene = None - self.cluster = None #At present we are manually assigning it for checking - self._qualifiers = {} - self.type = 'CDS' - - if feature is not None: - - self._qualifiers = feature.qualifiers - - if 'sec_met' in self._qualifiers: - self.sec_met = self._qualifiers['sec_met'] - - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'] - - if 'product' in self._qualifiers: - self.product = self._qualifiers['product'][0] - - if 'protein_id' in self._qualifiers: - self.protein_id = self._qualifiers['protein_id'][0] - - if 'gene' in self._qualifiers: - self.gene = self._qualifiers['gene'][0] - self.location = feature.location - - @property - def get_id(self): - """Returns the id of the CDSFeature""" - return self.gene - - @property - def get_cluster(self): - """Returns a ClusterFeature""" - return self.cluster - - @property - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - self._qualifiers['sec_met'] = self.sec_met - self._qualifiers['locus_tag'] = self.locus_tag - self._qualifiers['product'] = self.product - self._qualifiers['protein_id'] = self.protein_id - self._qualifiers['gene'] = self.gene - new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_CDS.qualifiers = self._qualifiers - return [new_CDS] + """A CDSFeature subclasses Feature""" + + def __init__(self, feature=None): + """Initialise a CDSFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(CDSFeature, self).__init__() + self.sec_met = [] + self.locus_tag = None + self.product = None + self.protein_id = None + self.gene = None + self.cluster = None #At present we are manually assigning it for checking + self._qualifiers = {} + self.type = 'CDS' + + if feature is not None: + + self._qualifiers = feature.qualifiers + + if 'sec_met' in self._qualifiers: + self.sec_met = self._qualifiers['sec_met'] + + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'][0] + + if 'product' in self._qualifiers: + self.product = self._qualifiers['product'][0] + + if 'protein_id' in self._qualifiers: + self.protein_id = self._qualifiers['protein_id'][0] + + if 'gene' in self._qualifiers: + self.gene = self._qualifiers['gene'][0] + self.location = feature.location + + def get_id(self): + """Returns the id of the CDSFeature""" + return self.gene + + def get_cluster(self): + """Returns a ClusterFeature""" + return self.cluster + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + if isinstance(self.location, FeatureLocation): + self._qualifiers['sec_met'] = self.sec_met + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + self._qualifiers['product'] = [str(self.product)] + self._qualifiers['protein_id'] = [str(self.protein_id)] + self._qualifiers['gene'] = [str(self.gene)] + new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_CDS.qualifiers = self._qualifiers.copy() + return [new_CDS] + else: + raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") class ClusterFeature(Feature): - """A ClusterFeature which subclasses Feature""" - def __init__(self, feature=None): - """Initialise a ClusterFeature - param feature: class 'Bio.SeqFeature.SeqFeature' - """ - super(ClusterFeature, self).__init__() - self.cutoff = None - self.extension = None - self.contig_edge = None - self.detection = None - self.products = [] - self._qualifiers = {} - self.type = 'cluster' - - if feature is not None: - self._qualifiers = feature.qualifiers - if 'cutoff' in self._qualifiers: - self.cutoff = self._qualifiers['cutoff'] - - if 'extension' in self._qualifiers: - self.extension = self._qualifiers['extension'] - - if 'contig_edge' in self._qualifiers: - self.contig_edge = self._qualifiers['contig_edge'] - - if 'note' in self._qualifiers: - self.detection = self._qualifiers['note'][1] - self.clusternumber = int(self._qualifiers['note'][0].split(':')[1]) - - if 'product' in self._qualifiers: - self.products = self._qualifiers['product'] - self.location = feature.location - - self.cdss = [] #At present they are manually assigned for checking - - - @property - def add_product(self, product_value): - """Adds a product qualifier to the ClusterFeature object""" - if isinstance(product_value, str): - self.products.append(product_value) - - @property - def get_products(self): - """Returns product qualifier from ClusterFeature object""" - return self.products - - @property - def get_cluster_number(self): - """Returns the clusternumber of the cluster""" - return self.clusternumber - - @property - def get_CDSs(self): - """Retruns a list of CDS objects which belong to this cluster""" - return self.cdss - - @property - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] - self._qualifiers['note'].append(self.detection) - self._qualifiers['cutoff'] = [self.cutoff] - self._qualifiers['extension'] = [self.extension] - self._qualifiers['product'] = self.products - self._qualifiers['contig_edge'] = self.contig_edge - new_cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_cluster.qualifiers = self._qualifiers - return [new_cluster] + """A ClusterFeature which subclasses Feature""" + def __init__(self, feature=None): + """Initialise a ClusterFeature + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(ClusterFeature, self).__init__() + self.cutoff = None + self.extension = None + self.contig_edge = None + self.detection = None + self.products = [] + self._qualifiers = {} + self.parent_record = None + self.type = 'cluster' + + if feature is not None: + self._qualifiers = feature.qualifiers + if 'cutoff' in self._qualifiers: + self.cutoff = self._qualifiers['cutoff'][0] + + if 'extension' in self._qualifiers: + self.extension = self._qualifiers['extension'][0] + + if 'contig_edge' in self._qualifiers: + self.contig_edge = self._qualifiers['contig_edge'][0] + + if 'note' in self._qualifiers: + self.detection = self._qualifiers['note'][1] + self.clusternumber = int(self._qualifiers['note'][0].split(':')[1]) + + if 'product' in self._qualifiers: + self.products = self._qualifiers['product'] + self.location = feature.location + + self.cdss = [] #At present they are manually assigned for checking + + def add_product(self, product_value): + """Adds a product qualifier to the ClusterFeature object""" + if isinstance(product_value, str): + self.products.append(product_value) + else: + raise TypeError("Type of products should be 'str'") + + def get_products(self): + """Returns product qualifier from ClusterFeature object""" + return self.products + + def get_cluster_number(self): + """Returns the clusternumber of the cluster""" + if self.parent_record is not None: + return self.parent_record.get_cluster_number(self) + else: + raise ValueError('Parent record is None') + + def get_CDSs(self): + """Retruns a list of CDS objects which belong to this cluster""" + return self.cdss + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + if isinstance(self.location, FeatureLocation): + self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] + self._qualifiers['note'].append(self.detection) + self._qualifiers['cutoff'] = [str(self.cutoff)] + self._qualifiers['extension'] = [str(self.extension)] + self._qualifiers['product'] = self.products + self._qualifiers['contig_edge'] = [str(self.contig_edge)] + new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Cluster.qualifiers = self._qualifiers.copy() + return [new_Cluster] + else: + raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") class Record(object): - """A record containing secondary metabolite clusters""" - - def __init__(self, seq_record=None): - """Initialise a secondary metabolite record - - :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read - :type seq_record: :class:`Bio.SeqRecord.SeqRecord` - """ - self._record = seq_record - self._modified_CDS = [] #A list containing instances of CDSFeature - self._modified_Clusters = [] #A list containing instances of ClusterFeature - self._modified_Generic = [] #A list containing instances of GenericFeature - self.from_biopython(self._record) - - @classmethod - def from_file(cls, filename, filetype): - - """Initialise a record from a file of specified type - - :param string filename: file name of the file to read - :param string filetype: Type of the inputfile - """ - if filetype in ['gb', 'genbank']: - type_of_file = 'genbank' - elif filetype in ['fa', 'fas', 'fasta']: - type_of_file = 'fasta' - elif filetype in ['emb', 'embl']: - type_of_file = 'embl' - else: - return None - seq_record = SeqIO.read(filename, type_of_file) - rec = cls(seq_record=seq_record) - return rec - - - @property - def id(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.id - else: - return "NO_ID_ASSIGNED" - - - @property - def seq(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.seq - else: - return None - - - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations - else: - return {} - - @property - def description(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.description - else: - return "" - - @property - def get_clusters(self): - """A list of secondary metabolite clusters present in the record""" - return self._modified_Clusters - - @property - def get_CDSs(self): - """A list of secondary metabolite clusters present in the record""" - return self._modified_CDS - - @property - def to_biopython(self): - """Returns a Bio.SeqRecord instance of the record""" - new_record = self._record - features = self._modified_Generic - features.extend(self._modified_CDS) - features.extend(self._modified_Clusters) - record_features = [] - for feature in features: - record_features.append(feature.to_biopython[0]) - new_record.features = record_features #A new_record with all the modified features - return new_record - - @property - def get_cluster_number(self, clusterfeature): - """Returns cluster number of a cluster feature - param ClusterFeature clusterfeature : A instance of ClusterFeature class - """ - if isinstance(clusterfeature, ClusterFeature): - return clusterfeature.clusternumber - - def from_biopython(self, record): - """Modifies _modified_features list with new Feature instances""" - features = record.features - for feature in features: - if feature.type == 'CDS': - feature = CDSFeature(feature) - self._modified_CDS.append(feature) - elif feature.type == 'cluster': - feature = ClusterFeature(feature) - self._modified_Clusters.append(feature) - else: - feature = GenericFeature(feature) - self._modified_Generic.append(feature) - return self + """A record containing secondary metabolite clusters""" + + def __init__(self, seq_record=None): + """Initialise a secondary metabolite record + + :param seq_record: :class:`Bio.SeqRecord.SeqRecord` to read + :type seq_record: :class:`Bio.SeqRecord.SeqRecord` + """ + self._record = seq_record + self._modified_CDS = [] #A list containing instances of CDSFeature + self._modified_Cluster = [] #A list containing instances of ClusterFeature + self._modified_Generic = [] #A list containing instances of GenericFeature + self._cluster_number_dict = {} + + if isinstance(self._record, SeqRecord.SeqRecord): + self.from_biopython(self._record) + else: + raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") + + @classmethod + def from_file(cls, filename, filetype): + + """Initialise a record from a file of specified type + + :param string filename: file name of the file to read + :param string filetype: Type of the inputfile + """ + if filetype in ['gb', 'genbank']: + type_of_file = 'genbank' + elif filetype in ['fa', 'fas', 'fasta']: + type_of_file = 'fasta' + elif filetype in ['emb', 'embl']: + type_of_file = 'embl' + else: + return None + seq_record = SeqIO.read(filename, type_of_file) + rec = cls(seq_record=seq_record) + return rec + + + @property + def id(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.id + else: + return "NO_ID_ASSIGNED" + + + @property + def seq(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.seq + else: + return None + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + + @property + def description(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.description + else: + return "" + + def get_clusters(self): + """A list of secondary metabolite clusters present in the record""" + return self._modified_Cluster + + def get_CDSs(self): + """A list of secondary metabolite clusters present in the record""" + return self._modified_CDS + + def to_biopython(self): + """Returns a Bio.SeqRecord instance of the record""" + new_record = self._record + features = self._modified_Generic + features.extend(self._modified_CDS) + features.extend(self._modified_Cluster) + record_features = [] + for feature in features: + record_features.append(feature.to_biopython[0]) + new_record.features = record_features #A new_record with all the modified features + return new_record + + def get_cluster_number(self, clusterfeature): + """Returns cluster number of a cluster feature + param ClusterFeature clusterfeature : A instance of ClusterFeature class + """ + return self._modified_Cluster.index(clusterfeature)+1 + + def add_feature(self, feature): + """Adds features to appropriate lists""" + if isinstance(feature, Feature): + if feature.type == 'cluster': + if isinstance(feature.location, FeatureLocation): + clusters = self._modified_Cluster + clusters.append(None) + for index, cluster in enumerate(clusters): + if cluster is not None: + if feature.location.start < cluster.location.start and feature.location.end < cluster.location.start: + break + else: + clusters[index] = feature + feature.parent_record = self + return + for k in range(len(clusters)-1, index, -1): + temp = clusters[k] + clusters[k] = clusters[k-1] + clusters[k-1] = temp + clusters[k-1] = feature + feature.parent_record = self + else: + raise ValueError("location should be an instance of 'Bio.SeqFeatures.FeatureLocation'") + + elif feature.type == 'CDS': + self._modified_CDS.append(feature) + else: + self._modified_Generic.append(feature) + else: + raise TypeError("The argument is not an instance of 'Feature'") + + def from_biopython(self, record): + """Modifies _modified_features list with new Feature instances""" + features = record.features + for feature in features: + if feature.type == 'CDS': + feature = CDSFeature(feature) + self._modified_CDS.append(feature) + elif feature.type == 'cluster': + feature = ClusterFeature(feature) + feature.parent_record = self + self._modified_Cluster.append(feature) + else: + feature = GenericFeature(feature) + self._modified_Generic.append(feature) + return self From 1d420ce489484644efbbd093d26823f47de7ef17 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Fri, 16 Jun 2017 18:21:28 +0530 Subject: [PATCH 06/71] Strict type check for cutoff & extension, Clusters and their numbers are mapped --- secmet/record.py | 187 +++++++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 88 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 54b40df..e8ccd3d 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -30,14 +30,13 @@ def __init__(self, feature=None): def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" - if isinstance(category, str) and isinstance(info, str): - if category not in self._qualifiers: - self._qualifiers[category] = [info] - else: - self._qualifiers[category].append(info) - return None - else: + if not isinstance(category, str) and isinstance(info, str): raise TypeError("Type of qualifiers should be 'str'") + if category not in self._qualifiers: + self._qualifiers[category] = [info] + else: + self._qualifiers[category].append(info) + return None def get_qualifier(self, category): """Returns a qualifier of given category""" @@ -46,12 +45,12 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" - if isinstance(self.location, FeatureLocation): - new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_Generic.qualifiers = self._qualifiers.copy() - return [new_Generic] - else: + if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Generic.qualifiers = self._qualifiers.copy() + return [new_Generic] + class CDSFeature(Feature): """A CDSFeature subclasses Feature""" @@ -100,17 +99,17 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if isinstance(self.location, FeatureLocation): - self._qualifiers['sec_met'] = self.sec_met - self._qualifiers['locus_tag'] = [str(self.locus_tag)] - self._qualifiers['product'] = [str(self.product)] - self._qualifiers['protein_id'] = [str(self.protein_id)] - self._qualifiers['gene'] = [str(self.gene)] - new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_CDS.qualifiers = self._qualifiers.copy() - return [new_CDS] - else: + if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + self._qualifiers['sec_met'] = self.sec_met + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + self._qualifiers['product'] = [str(self.product)] + self._qualifiers['protein_id'] = [str(self.protein_id)] + self._qualifiers['gene'] = [str(self.gene)] + new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_CDS.qualifiers = self._qualifiers.copy() + return [new_CDS] + class ClusterFeature(Feature): """A ClusterFeature which subclasses Feature""" @@ -119,8 +118,6 @@ def __init__(self, feature=None): param feature: class 'Bio.SeqFeature.SeqFeature' """ super(ClusterFeature, self).__init__() - self.cutoff = None - self.extension = None self.contig_edge = None self.detection = None self.products = [] @@ -131,10 +128,10 @@ def __init__(self, feature=None): if feature is not None: self._qualifiers = feature.qualifiers if 'cutoff' in self._qualifiers: - self.cutoff = self._qualifiers['cutoff'][0] + self.cutoff = int(self._qualifiers['cutoff'][0]) if 'extension' in self._qualifiers: - self.extension = self._qualifiers['extension'][0] + self.extension = int(self._qualifiers['extension'][0]) if 'contig_edge' in self._qualifiers: self.contig_edge = self._qualifiers['contig_edge'][0] @@ -149,12 +146,33 @@ def __init__(self, feature=None): self.cdss = [] #At present they are manually assigned for checking + def _get_cutoff(self): + try: + return self.__cutoff + except: + return None + def _set_cutoff(self, value): + if not isinstance(value, int): + raise TypeError("cutoff must be an integer") + self.__cutoff = value + cutoff = property(_get_cutoff, _set_cutoff) + + def _get_extension(self): + try: + return self.__extension + except: + return None + def _set_extension(self, value): + if not isinstance(value, int): + raise TypeError("extension must be an integer") + self.__extension = value + extension = property(_get_extension, _set_extension) + def add_product(self, product_value): """Adds a product qualifier to the ClusterFeature object""" - if isinstance(product_value, str): - self.products.append(product_value) - else: + if not isinstance(product_value, str): raise TypeError("Type of products should be 'str'") + self.products.append(product_value) def get_products(self): """Returns product qualifier from ClusterFeature object""" @@ -162,10 +180,9 @@ def get_products(self): def get_cluster_number(self): """Returns the clusternumber of the cluster""" - if self.parent_record is not None: - return self.parent_record.get_cluster_number(self) - else: + if self.parent_record is None: raise ValueError('Parent record is None') + return self.parent_record.get_cluster_number(self) def get_CDSs(self): """Retruns a list of CDS objects which belong to this cluster""" @@ -173,18 +190,18 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if isinstance(self.location, FeatureLocation): - self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] - self._qualifiers['note'].append(self.detection) - self._qualifiers['cutoff'] = [str(self.cutoff)] - self._qualifiers['extension'] = [str(self.extension)] - self._qualifiers['product'] = self.products - self._qualifiers['contig_edge'] = [str(self.contig_edge)] - new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_Cluster.qualifiers = self._qualifiers.copy() - return [new_Cluster] - else: + if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] + self._qualifiers['note'].append(self.detection) + self._qualifiers['cutoff'] = [str(self.cutoff)] + self._qualifiers['extension'] = [str(self.extension)] + self._qualifiers['product'] = self.products + self._qualifiers['contig_edge'] = [str(self.contig_edge)] + new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Cluster.qualifiers = self._qualifiers.copy() + return [new_Cluster] + class Record(object): """A record containing secondary metabolite clusters""" @@ -196,15 +213,14 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - self._modified_CDS = [] #A list containing instances of CDSFeature - self._modified_Cluster = [] #A list containing instances of ClusterFeature - self._modified_Generic = [] #A list containing instances of GenericFeature - self._cluster_number_dict = {} + self._modified_cds = [] #A list containing instances of CDSFeature + self._modified_cluster = [] #A list containing instances of ClusterFeature + self._modified_generic = [] #A list containing instances of GenericFeature + self._cluster_number_dict = {} #A dictionary to map clusters and their numbers - if isinstance(self._record, SeqRecord.SeqRecord): - self.from_biopython(self._record) - else: + if not isinstance(self._record, SeqRecord.SeqRecord): raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") + self.from_biopython(self._record) @classmethod def from_file(cls, filename, filetype): @@ -226,7 +242,6 @@ def from_file(cls, filename, filetype): rec = cls(seq_record=seq_record) return rec - @property def id(self): """Pass through to seq_record object if available""" @@ -235,7 +250,6 @@ def id(self): else: return "NO_ID_ASSIGNED" - @property def seq(self): """Pass through to seq_record object if available""" @@ -262,21 +276,21 @@ def description(self): def get_clusters(self): """A list of secondary metabolite clusters present in the record""" - return self._modified_Cluster + return self._modified_cluster def get_CDSs(self): """A list of secondary metabolite clusters present in the record""" - return self._modified_CDS + return self._modified_cds def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record - features = self._modified_Generic - features.extend(self._modified_CDS) - features.extend(self._modified_Cluster) + features = self._modified_generic + features.extend(self._modified_cds) + features.extend(self._modified_cluster) record_features = [] for feature in features: - record_features.append(feature.to_biopython[0]) + record_features.append(feature.to_biopython()[0]) new_record.features = record_features #A new_record with all the modified features return new_record @@ -284,38 +298,34 @@ def get_cluster_number(self, clusterfeature): """Returns cluster number of a cluster feature param ClusterFeature clusterfeature : A instance of ClusterFeature class """ - return self._modified_Cluster.index(clusterfeature)+1 + return self._cluster_number_dict[clusterfeature] def add_feature(self, feature): """Adds features to appropriate lists""" - if isinstance(feature, Feature): - if feature.type == 'cluster': - if isinstance(feature.location, FeatureLocation): - clusters = self._modified_Cluster - clusters.append(None) - for index, cluster in enumerate(clusters): - if cluster is not None: - if feature.location.start < cluster.location.start and feature.location.end < cluster.location.start: - break - else: - clusters[index] = feature - feature.parent_record = self - return - for k in range(len(clusters)-1, index, -1): - temp = clusters[k] - clusters[k] = clusters[k-1] - clusters[k-1] = temp - clusters[k-1] = feature - feature.parent_record = self + if not isinstance(feature, Feature): + raise TypeError("The argument is not an instance of 'Feature'") + if feature.type == 'cluster': + if not isinstance(feature.location, FeatureLocation): + raise ValueError("location should be an instance of 'Bio.SeqFeatures.FeatureLocation'") + clusters = self._modified_cluster + clusters.append(None) + for index, cluster in enumerate(clusters): + if cluster is not None: + if feature.location.start < cluster.location.start: + break else: - raise ValueError("location should be an instance of 'Bio.SeqFeatures.FeatureLocation'") - - elif feature.type == 'CDS': - self._modified_CDS.append(feature) - else: - self._modified_Generic.append(feature) + clusters[index] = feature + feature.parent_record = self + return + clusters.insert(index, feature) + feature.parent_record = self + for index, cluster in enumerate(clusters): + self._cluster_number_dict[cluster] = index+1 + + elif feature.type == 'CDS': + self._modified_cds.append(feature) else: - raise TypeError("The argument is not an instance of 'Feature'") + self._modified_generic.append(feature) def from_biopython(self, record): """Modifies _modified_features list with new Feature instances""" @@ -323,12 +333,13 @@ def from_biopython(self, record): for feature in features: if feature.type == 'CDS': feature = CDSFeature(feature) - self._modified_CDS.append(feature) + self._modified_cds.append(feature) elif feature.type == 'cluster': feature = ClusterFeature(feature) feature.parent_record = self - self._modified_Cluster.append(feature) + self._modified_cluster.append(feature) + self._cluster_number_dict[feature] = self._modified_cluster.index(feature)+1 else: feature = GenericFeature(feature) - self._modified_Generic.append(feature) + self._modified_generic.append(feature) return self From 100db1772b6969ebdfca7fc7d5926a15c4661a89 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Fri, 16 Jun 2017 18:23:55 +0530 Subject: [PATCH 07/71] Update test_record.py --- tests/test_record.py | 90 +++++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index eabd945..cffc55f 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -1,25 +1,20 @@ from os import path import Bio from Bio import SeqIO +from Bio.SeqFeature import FeatureLocation from secmet.record import Record from secmet.record import GenericFeature, ClusterFeature, CDSFeature -def get_testfile(filename): - return path.join(path.dirname(__file__), 'data', filename) +filename = 'nisin.gbk' - -def test_init_empty(): - rec = Record() - assert isinstance(rec, Record) - assert rec.id == "NO_ID_ASSIGNED" - assert rec.seq is None - assert rec.annotations == {} - assert rec.description == "" - assert rec.clusters == [] +def get_testfile(): + """File path for testing""" + return path.join(path.dirname(__file__), 'tests/data', filename) def test_from_file(): - testfile = get_testfile('nisin.gbk') + """Test file operations in Record""" + testfile = get_testfile() bp_rec = SeqIO.read(testfile, 'genbank') rec = Record.from_file(testfile, 'genbank') assert isinstance(rec, Record) @@ -33,31 +28,64 @@ def test_from_file(): assert rec.description == bp_rec.description def test_from_biopython(): - testfile = get_testfile('nisin.gbk') - rec = Record.from_file(testfile, 'genbank') - assert isinstance(rec.from_biopython(), Record) + """Test from_biopython() in Record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + assert isinstance(rec.from_biopython(rec._record), Record) def test_to_biopython(): - testfile = get_testfile('nisin.gbk') - rec = Record.from_file(testfile, 'genbank') - assert isinstance(rec.to_biopython, Bio.SeqRecord.SeqRecord) + """Test to_biopython() in Record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + assert isinstance(rec.to_biopython(), Bio.SeqRecord.SeqRecord) def test_get_clusters(): - testfile = get_testfile('nisin.gbk') + """Test get_clusters() in Record""" + testfile = get_testfile() rec = Record.from_file(testfile, 'genbank') bp_rec = SeqIO.read(testfile, 'genbank') - clusters = [i for i in bp_rec.features if i.type == 'cluster'] - #Should call from_biopython() to access features - rec.from_biopython() - assert len(rec.get_clusters) == len(clusters) - assert isinstance(rec.get_clusters[0], ClusterFeature) - + bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] + mod_clusters = rec.get_clusters() + assert len(mod_clusters) == len(bp_clusters) + for cluster in mod_clusters: + assert isinstance(cluster, ClusterFeature) + def test_get_CDSs(): - testfile = get_testfile('nisin.gbk') + """Test get_CDSs() in Record""" + testfile = get_testfile() rec = Record.from_file(testfile, 'genbank') bp_rec = SeqIO.read(testfile, 'genbank') - CDSs = [i for i in bp_rec.features if i.type == 'CDS'] - #Should call from_biopython() to access features - rec.from_biopython() - assert len(rec.get_CDSs) == len(CDSs) - assert isinstance(rec.get_CDSs[0], CDSFeature) + bp_CDSs = [i for i in bp_rec.features if i.type == 'CDS'] + mod_CDSs = rec.get_CDSs() + assert len(mod_CDSs) == len(bp_CDSs) + for cds in mod_CDSs: + assert isinstance(cds, CDSFeature) + +def test_get_cluster_number(): + """Test get_cluster_number() in Record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + cluster = rec.get_clusters()[0] + assert rec.get_cluster_number(cluster) == 1 + +def test_add_feature(): + """Test add_feature() in Record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + no_of_clusters = len(rec.get_clusters()) + no_of_cdss = len(rec.get_CDSs()) + no_of_generics = len(rec._modified_Generic) + new_cluster = ClusterFeature() + + #ClusterFeature should have valid location for adding + new_cluster.location = FeatureLocation(15100, 15200) + new_cds = CDSFeature() + new_generic = GenericFeature() + rec.add_feature(new_cluster) + rec.add_feature(new_cds) + rec.add_feature(new_generic) + assert no_of_clusters+1 == len(rec.get_clusters()) + assert no_of_cdss+1 == len(rec.get_CDSs()) + assert no_of_generics+1 == len(rec._modified_Generic) + assert rec.get_clusters()[0].get_cluster_number() == 1 + assert new_cluster.get_cluster_number() == 2 From 524dec580ae5e7e7920ab2f51a6798fb7b429476 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 18 Jun 2017 01:28:52 +0530 Subject: [PATCH 08/71] Add test_cluster.py, modify add_feature() and test_record.py --- secmet/record.py | 4 ++- tests/test_cluster.py | 69 +++++++++++++++++++++++++++++++++++++++++++ tests/test_record.py | 4 +-- 3 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 tests/test_cluster.py diff --git a/secmet/record.py b/secmet/record.py index e8ccd3d..2037395 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -192,7 +192,7 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") - self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number)] + self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) self._qualifiers['cutoff'] = [str(self.cutoff)] self._qualifiers['extension'] = [str(self.extension)] @@ -316,6 +316,8 @@ def add_feature(self, feature): else: clusters[index] = feature feature.parent_record = self + for index, cluster in enumerate(clusters): + self._cluster_number_dict[cluster] = index+1 return clusters.insert(index, feature) feature.parent_record = self diff --git a/tests/test_cluster.py b/tests/test_cluster.py new file mode 100644 index 0000000..c0f875a --- /dev/null +++ b/tests/test_cluster.py @@ -0,0 +1,69 @@ +from os import path +from Bio import SeqIO +from Bio.SeqFeature import FeatureLocation +from secmet.record import Record +from secmet.record import ClusterFeature + +filename = 'nisin.gbk' + +def get_testfile(): + """File path for testing""" + return path.join(path.dirname(__file__), 'tests/data', filename) + +def test_add_new_cluster(): + """Test for adding a new cluster to record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + new_cluster = ClusterFeature() + new_cluster.location = FeatureLocation(15100, 15500) + try: + new_cluster.cutoff = 15300 + except: + raise ValueError('Error assigning cutoff value') + try: + new_cluster.extension = 15300 + except: + raise ValueError('Error assiging extension value') + new_cluster.contig_edge = True + new_cluster.detection = 'Detection rules...' + new_cluster.add_product('product_info') + assert len(rec.get_clusters()) == 1 + rec.add_feature(new_cluster) + assert len(rec.get_clusters()) == 2 + return new_cluster.to_biopython() + +def test_add_existing_cluster(): + """Test for accessing the existing cluster from record""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + assert len(rec.get_clusters()) == 1 + new_cluster = rec.get_clusters()[0] + assert isinstance(new_cluster, ClusterFeature) + new_cluster.location = FeatureLocation(100, 15106) + try: + new_cluster.cutoff = 5000 + except: + raise ValueError('Error assigning cutoff value') + try: + new_cluster.extension = 5000 + except: + raise ValueError('Error assiging extension value') + rec.add_feature(new_cluster) + return new_cluster.to_biopython() + +def write_to_genbank_file(): + """Write data from test_add_new_cluster()""" + testfile = get_testfile() + rec = Record.from_file(testfile, 'genbank') + record_1 = rec.to_biopython() + new_cluster_feature = test_add_new_cluster()[0] + record_1.features.append(new_cluster_feature) + with open('test_new_cluster.gbk', 'w') as handle: + SeqIO.write([record_1], handle, "genbank") + + #Write data from test_add_existing_cluster() + record_2 = rec.to_biopython() + new_cluster_feature = test_add_existing_cluster()[0] + record_2.features.append(new_cluster_feature) + with open('test_existing_cluster.gbk', 'w') as handle: + SeqIO.write([record_2], handle, "genbank") diff --git a/tests/test_record.py b/tests/test_record.py index cffc55f..0ca576b 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -74,7 +74,7 @@ def test_add_feature(): rec = Record.from_file(testfile, 'genbank') no_of_clusters = len(rec.get_clusters()) no_of_cdss = len(rec.get_CDSs()) - no_of_generics = len(rec._modified_Generic) + no_of_generics = len(rec._modified_generic) new_cluster = ClusterFeature() #ClusterFeature should have valid location for adding @@ -86,6 +86,6 @@ def test_add_feature(): rec.add_feature(new_generic) assert no_of_clusters+1 == len(rec.get_clusters()) assert no_of_cdss+1 == len(rec.get_CDSs()) - assert no_of_generics+1 == len(rec._modified_Generic) + assert no_of_generics+1 == len(rec._modified_generic) assert rec.get_clusters()[0].get_cluster_number() == 1 assert new_cluster.get_cluster_number() == 2 From 90528790969e951e1724b3ae1f9cc7b703fee8a5 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 18 Jun 2017 15:55:36 +0530 Subject: [PATCH 09/71] Solved: Format of writing to files --- secmet/record.py | 29 ++++++++++++++++++----------- tests/test_cluster.py | 19 +++++++++++-------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 2037395..d9a57a2 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -46,7 +46,7 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -100,7 +100,7 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") self._qualifiers['sec_met'] = self.sec_met self._qualifiers['locus_tag'] = [str(self.locus_tag)] self._qualifiers['product'] = [str(self.product)] @@ -191,7 +191,7 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of 'Bio.SeqFeature.FeatureLocation'") + raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) self._qualifiers['cutoff'] = [str(self.cutoff)] @@ -213,9 +213,10 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - self._modified_cds = [] #A list containing instances of CDSFeature - self._modified_cluster = [] #A list containing instances of ClusterFeature - self._modified_generic = [] #A list containing instances of GenericFeature + self._features = [] #A list containing all Feature instances + self._modified_cds = [] #A list containing instances of CDSFeature + self._modified_cluster = [] #A list containing instances of ClusterFeature + self._modified_generic = [] #A list containing instances of GenericFeature self._cluster_number_dict = {} #A dictionary to map clusters and their numbers if not isinstance(self._record, SeqRecord.SeqRecord): @@ -285,9 +286,12 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record - features = self._modified_generic - features.extend(self._modified_cds) - features.extend(self._modified_cluster) + features = self._features + for index, feature in enumerate(features): + if feature.type == 'cluster': + features.pop(index) + for index, cluster in enumerate(self.get_clusters()): + features.insert(index+1, cluster) record_features = [] for feature in features: record_features.append(feature.to_biopython()[0]) @@ -306,8 +310,8 @@ def add_feature(self, feature): raise TypeError("The argument is not an instance of 'Feature'") if feature.type == 'cluster': if not isinstance(feature.location, FeatureLocation): - raise ValueError("location should be an instance of 'Bio.SeqFeatures.FeatureLocation'") - clusters = self._modified_cluster + raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") + clusters = self.get_clusters() clusters.append(None) for index, cluster in enumerate(clusters): if cluster is not None: @@ -323,11 +327,13 @@ def add_feature(self, feature): feature.parent_record = self for index, cluster in enumerate(clusters): self._cluster_number_dict[cluster] = index+1 + return elif feature.type == 'CDS': self._modified_cds.append(feature) else: self._modified_generic.append(feature) + self._features.append(feature) def from_biopython(self, record): """Modifies _modified_features list with new Feature instances""" @@ -344,4 +350,5 @@ def from_biopython(self, record): else: feature = GenericFeature(feature) self._modified_generic.append(feature) + self._features.append(feature) return self diff --git a/tests/test_cluster.py b/tests/test_cluster.py index c0f875a..7f28449 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -2,7 +2,7 @@ from Bio import SeqIO from Bio.SeqFeature import FeatureLocation from secmet.record import Record -from secmet.record import ClusterFeature +from secmet.record import ClusterFeature, CDSFeature filename = 'nisin.gbk' @@ -30,7 +30,7 @@ def test_add_new_cluster(): assert len(rec.get_clusters()) == 1 rec.add_feature(new_cluster) assert len(rec.get_clusters()) == 2 - return new_cluster.to_biopython() + return new_cluster def test_add_existing_cluster(): """Test for accessing the existing cluster from record""" @@ -49,21 +49,24 @@ def test_add_existing_cluster(): except: raise ValueError('Error assiging extension value') rec.add_feature(new_cluster) - return new_cluster.to_biopython() + return new_cluster def write_to_genbank_file(): """Write data from test_add_new_cluster()""" testfile = get_testfile() rec = Record.from_file(testfile, 'genbank') + new_cluster_feature = test_add_new_cluster() + rec.add_feature(new_cluster_feature) record_1 = rec.to_biopython() - new_cluster_feature = test_add_new_cluster()[0] - record_1.features.append(new_cluster_feature) + with open('test_new_cluster.gbk', 'w') as handle: SeqIO.write([record_1], handle, "genbank") - #Write data from test_add_existing_cluster() + #Write data from test_add_existing_cluster( + rec = Record.from_file(testfile, 'genbank') + new_cluster_feature = test_add_existing_cluster() + rec.add_feature(new_cluster_feature) record_2 = rec.to_biopython() - new_cluster_feature = test_add_existing_cluster()[0] - record_2.features.append(new_cluster_feature) + with open('test_existing_cluster.gbk', 'w') as handle: SeqIO.write([record_2], handle, "genbank") From 3046ecae6cba3c5459e6b34c8a23c3cc8401959a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 18 Jun 2017 23:14:14 +0530 Subject: [PATCH 10/71] Update record.py, test_cluster.py, test_record.py. Add new test files to tests/data/ --- secmet/record.py | 11 +- tests/data/HM219853.1.final.gbk | 630 +++ tests/data/HM219853.1.final.minimal.gbk | 539 ++ tests/data/Y16952.3.final.gbk | 6887 +++++++++++++++++++++++ tests/data/Y16952.3.final.minimal.gbk | 2270 ++++++++ tests/data/balh.embl | 2033 +++++++ tests/test_cluster.py | 69 +- tests/test_record.py | 37 +- 8 files changed, 12422 insertions(+), 54 deletions(-) create mode 100644 tests/data/HM219853.1.final.gbk create mode 100644 tests/data/HM219853.1.final.minimal.gbk create mode 100644 tests/data/Y16952.3.final.gbk create mode 100644 tests/data/Y16952.3.final.minimal.gbk create mode 100644 tests/data/balh.embl diff --git a/secmet/record.py b/secmet/record.py index d9a57a2..ef69957 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -224,14 +224,15 @@ def __init__(self, seq_record=None): self.from_biopython(self._record) @classmethod - def from_file(cls, filename, filetype): + def from_file(cls, filename): """Initialise a record from a file of specified type :param string filename: file name of the file to read :param string filetype: Type of the inputfile """ - if filetype in ['gb', 'genbank']: + filetype = filename.split('.')[-1] + if filetype in ['gb', 'gbk', 'genbank']: type_of_file = 'genbank' elif filetype in ['fa', 'fas', 'fasta']: type_of_file = 'fasta' @@ -290,8 +291,10 @@ def to_biopython(self): for index, feature in enumerate(features): if feature.type == 'cluster': features.pop(index) - for index, cluster in enumerate(self.get_clusters()): - features.insert(index+1, cluster) + if feature.type == 'source': + source_index = index + for cluster in self.get_clusters(): + features.insert(source_index+1, cluster) record_features = [] for feature in features: record_features.append(feature.to_biopython()[0]) diff --git a/tests/data/HM219853.1.final.gbk b/tests/data/HM219853.1.final.gbk new file mode 100644 index 0000000..3595812 --- /dev/null +++ b/tests/data/HM219853.1.final.gbk @@ -0,0 +1,630 @@ +LOCUS HM219853 15016 bp DNA linear BCT 05-OCT-2011 +DEFINITION Lactococcus lactis subsp. lactis nisin biosynthetic gene cluster, + complete sequence. +ACCESSION HM219853 +VERSION HM219853.1 GI:299832736 +KEYWORDS . +SOURCE Lactococcus lactis subsp. lactis + ORGANISM Lactococcus lactis subsp. lactis + Bacteria; Firmicutes; Lactobacillales; Streptococcaceae; + Lactococcus. +REFERENCE 1 (bases 1 to 15016) + AUTHORS Trmcic,A., Samelis,J., Monnet,C., Rogelj,I. and Bogovic Matijasic,B. + TITLE Complete nisin A gene cluster from Lactococcus lactis M78 (HM219853) + - obtaining the nucleic acid sequence and comparing it to other + published nisin sequences + JOURNAL Genes Genomics 33 (3), 217-221 (2011) +REFERENCE 2 (bases 1 to 15016) + AUTHORS Trmcic,A., Monnet,C., Rogelj,I. and Bogovic Matijasic,B. + TITLE Expression of nisin genes in cheese--a quantitative real-time + polymerase chain reaction approach + JOURNAL J. Dairy Sci. 94 (1), 77-85 (2011) + PUBMED 21183019 +REFERENCE 3 (bases 1 to 15016) + AUTHORS Trmcic,A., Samelis,J., Monnet,C., Rogelj,I. and Bogovic-Matijasic,B. + TITLE Direct Submission + JOURNAL Submitted (12-MAY-2010) Department of Animal Science - Chair of + Dairy Science, Biotechnical Faculty, Groblje 3, Domzale 1230, + Slovenia +FEATURES Location/Qualifiers + source 1..15016 + /collected_by="John Samelis" + /country="Greece" + /db_xref="taxon:1360" + /isolation_source="raw milk" + /mol_type="genomic DNA" + /organism="Lactococcus lactis subsp. lactis" + /strain="M78" + /sub_species="lactis" + cluster 1..15016 + /clusterblast="2. CP002365_c2 Lactococcus lactis subsp. + lactis CV56, complete genome. (100% of genes show + similarity)" + /clusterblast="3. CP010050_c1 Lactococcus lactis subsp. + lactis strain S0, complete genome. (100% of genes show + similarity)" + /clusterblast="4. CP009472_c1 Lactococcus lactis strain + AI06, complete genome. (100% of genes show similarity)" + /clusterblast="5. AB362350_c1 Lactococcus lactis DNA, nisin + Q gene cluster (nisQ, niqB,niqT,... (100% of genes show + similarity)" + /clusterblast="6. CP003858_c1 Streptococcus intermedius + C270, complete genome. (100% of genes show similarity)" + /clusterblast="7. AP012281_c2 Lactococcus lactis subsp. + lactis IO-1 DNA, complete genome. (100% of genes show + similarity)" + /clusterblast="8. DQ146939_c1 Streptococcus uberis nisin U + gene locus, complete sequence. (90% of genes show + similarity)" + /clusterblast="9. JN564797_c1 Streptococcus salivarius + strain 5M6c salivaricin D gene locus,... (100% of genes + show similarity)" + /clusterblast="10. KP793707_c1 Streptococcus + hyointestinalis strain DPC6484 nisin H gene clu... (90% of + genes show similarity)" + /clusterblast="11. ALUZ01000053_c1 Streptococcus agalactiae + GB00984 ctg7180000004230, whole ... (81% of genes show + similarity)" + /contig_edge="True" + /cutoff=20000 + /extension=10000 + /knownclusterblast="1. + BGC0000535_c1 Nisin_A_biosynthetic_gene_cluster (100% of + genes show similarity)" + /knownclusterblast="2. + BGC0000536_c1 Nisin_Q_biosynthetic_gene_cluster (100% of + genes show similarity)" + /knownclusterblast="3. + BGC0000549_c1 Salivaricin_D_biosynthetic_gene_cluster (90% + of genes show similarity)" + /knownclusterblast="4. + BGC0000538_c1 Nisin_Z_biosynthetic_gene_cluster (45% of + genes show similarity)" + /knownclusterblast="5. + BGC0000537_c1 Nisin_U_biosynthetic_gene_cluster (45% of + genes show similarity)" + /knownclusterblast="6. + BGC0000511_c1 Ericin_A_biosynthetic_gene_cluster (18% of + genes show similarity)" + /knownclusterblast="7. + BGC0000559_c1 Subtilin_biosynthetic_gene_cluster (18% of + genes show similarity)" + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: + lantipeptide: (cluster(LANC_like,Flavoprotein) or + cluster(LANC_like,Trp_halogenase) or + cluster(LANC_like,p450) or (LANC_like & Pkinase) or + (LANC_like & DUF4135) or cluster(LANC_like,Lant_dehyd_N) or + cluster(LANC_like,Lant_dehyd_C) or + cluster(LANC_like,adh_short) or + cluster(LANC_like,adh_short_C2) or TIGR03731 or Antimicr18 + or Gallidermin or L_biotic_A or TIGR03731 or leader_d or + leader_eh or leader_abc or mature_d or mature_ab or + mature_a or mature_b or mature_ha or mature_h_beta or + lacticin_l or lacticin_mat or LD_lanti_pre or + strep_PEQAXS);" + /product="lantipeptide" + CDS_motif 828..896 + /locus_tag="nisA" + /note="leader peptide" + /note="lantipeptide" + /note="predicted leader seq: MSTKDFNLDLVSVSKKDSGASPR" + gene 828..1001 + /gene="nisA" + CDS 828..1001 + /codon_start=1 + /db_xref="GI:299832737" + /gene="nisA" + /note="prenisin; nisin A structural protein" + /product="NisA" + /protein_id="ADJ56352.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: TIGR03731 (E-value: 1.3e-24, + bitscore: 75.6, seeds: 23); mature_a (E-value: 6.5e-08, + bitscore: 21.5, seeds: 5)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MSTKDFNLDLVSVSKKDSGASPRITSISLCTPGCKTGALMGCNMK + TATCHCSIHVSK" + misc_feature 828..14152 + /note="nisin biosynthetic gene cluster" + CDS_motif 897..1001 + /locus_tag="nisA" + /note="core peptide" + /note="lantipeptide" + /note="monoisotopic mass: 3333.6" + /note="molecular weight: 3336.0" + /note="alternative weights: 3354.0; 3372.1; 3390.1; 3408.1" + /note="number of bridges: 5" + /note="predicted core seq: + ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK" + /note="predicted class: Class-I" + /note="score: 26.70" + /note="RODEO score: 32" + gene 1109..4090 + /gene="nisB" + CDS 1109..4090 + /codon_start=1 + /db_xref="GI:299832738" + /function="dehydration of serine and threonine" + /gene="nisB" + /note="nisin biosynthesis protein" + /note="smCOG: + SMCOG1155:Lantibiotic_dehydratase_domain_protein (Score: + 859.9; E-value: 3.6e-260);" + /note="smCOG tree PNG image: smcogs/nisB.png" + /product="NisB" + /protein_id="ADJ56353.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: Lant_dehyd_N (E-value: 4.5e-25, + bitscore: 77.4, seeds: 38); Lant_dehyd_C (E-value: + 6.8e-120, bitscore: 390.3, seeds: 37); Lant_dehydr_C + (E-value: 2.8e-18, bitscore: 56.0, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MIKSSFKAQPFLVRNTILSPNDKRSFTEYTQVIETVSKNKVFLEQ + LLLANPKLYDVMQKYNAGLLKKKRVKKLFESIYKYYKRSYLRSTPFGLFSETSIGVFSK + SSQYKLMGKTTKGIRLDTQWLIRLVHKMEVDFSKKLSFTRNNANYKFGDRVFQVYTINS + SELEEVNIKYTNVYQIISEFCENDYQKYEDICETVTLCYGDEYRELSEQYLGSLIVNHY + LISNLQKDLLSDFSWNTFLTKVEAIDEDKKYIIPLKKVQKFIQEYSEIEIGEGIEKLKE + IYQEMSQILENDNYIQIDLISDSEINFDVKQKQQLEHLAEFLGNTTKSVRRTYLDDYKD + KFIEKYGVDQEVQITELFDSTFGIGAPYNYNHPRNDFYESEPSTLYYSEEEREKYLSMY + VEAVKNHNVINLDDLESHYQKMDLEKKSELQGLELFLNLAKEYEKDIFILGDIVGNNNL + GGASGRFSALSPELTSYHRTIVDSVERENENKEITSCEIVFLPENIRHANVMHTSIMRR + KVLPFFTSTSHNEVLLTNIYIGIDEKEKFYARDISTQEVLKFYITSMYNKTLFSNELRF + LYEISLDDKFGNLPWELIYRDFDYIPRLVFDEIVISPAKWKIWGRDVNSKMTIRELIQS + KEIPKEFYIVNGDNKVYLSQENPLDMEILESAIKKSSKRKDFIELQEYFEDENIINKGE + KGRVADVVVPFIRTRALGNEGRAFIREKRVSVERREKLPFNEWLYLKLYISINRQNEFL + LSYLPDIQKIVANLGGNLFFLRYTDPKPHIRLRIKCSDLFLAYGSILEILKRSRKNRIM + STFDISIYDQEVERYGGFDTLELSEAIFCADSKIIPNLLTLIKDTNNDWKVDDVSILVN + YLYLKCFFENDNKKILNFLNLVSPKKVKENVNEKIEHYLKLLKVNNLGDQIFYDKNFKE + LKHAIKNLFLKMIAQDFELQKVYSIIDSIIHVHNNRLIGIERDKEKLIYYTLQRLFVSE + EYMK" + gene 4101..5903 + /gene="nisT" + CDS 4101..5903 + /codon_start=1 + /db_xref="GI:299832739" + /function="secretion of prenisin" + /gene="nisT" + /note="nisin transport ATP-binding protein" + /note="smCOG: SMCOG1000:ABC_transporter_ATP-binding_protein + (Score: 163.7; E-value: 9.5e-50);" + /note="smCOG tree PNG image: smcogs/nisT.png" + /product="NisT" + /protein_id="ADJ56354.1" + /transl_table=11 + /translation="MDEVKEFTSKQFFNTLLTLPSTLKLIFQLEKRYAIYLIVLNAITA + FVPLASLFIYQDLINSVLGSGRHLINIIIIYFIVQVITTVLGQLESYVSGKFDMRLSYS + INMRLMRTTSSLELSDYEQADMYNIIEKVTQDSTYKPFQLFNAIIVVLSSFISLLSSLF + FIGTWNIGVAILLLIVPVLSLVLFLRVGQLEFLIQWQRASSERETWYIVYLLTHDFSFK + EIKLNNISNYFIHKFGKLKKGFINQDLAIARKKTYFNIFLDFILNLINILTIFAMILSV + RAGKLLIGNLVSLIQAISKINTYSQTMIQNIYIIYNTSLFMEQLFEFLKRESVVHKKIE + DTEICNQHIGTVKVINLSYVYPNSNAFALKNINLSFEKGELTAIVGKNGSGKSTLVKII + SGLYQPTMGIIQYDKMRSSLMPEEFYQKNISVLFQDFVKYELTIRENIGLSDLSSQWED + EKIIKVLDNLGLDFLKTNNQYVLDTQLGNWFQEGHQLSGGQWQKIALARTFFKKASIYI + LDEPSAALDPVAEKEIFDYFVALSENNISIFISHSLNAARKANKIVVMKDGQVEDVGSH + DVLLRRCQYYQELYYSEQYEDNDE" + gene 5896..7140 + /gene="nisC" + CDS 5896..7140 + /codon_start=1 + /db_xref="GI:299832740" + /function="cyclization, formation of (methyl)lantionines" + /gene="nisC" + /note="nisin biosynthesis protein" + /note="smCOG: + SMCOG1140:Lanthionine_synthetase_C_family_protein (Score: + 337.0; E-value: 2.6e-102);" + /note="smCOG tree PNG image: smcogs/nisC.png" + /product="NisC" + /protein_id="ADJ56355.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: LANC_like (E-value: 8.3e-76, + bitscore: 244.5, seeds: 47)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MNKKNIKRNVEKIIAQWDERTRKNKENFDFGELTLSTGLPGIILM + LAELKNKDNSKIYQKKIDNYIEYIVSKLSTYGLLTGSLYSGAAGIALSILHLREDDEKY + KNLLDSLNRYIEYFVREKIEGFNLENITPPDYDVIEGLSGILSYLLLINDEQYDDLKIL + IINFLSNLTKENKGLISLYIKSENQMSQSESEMYPLGCLNMGLAHGLAGVGCILAYAHI + KGYSNEASLSALQKIIFIYEKFELERKKQFLWKDGLVADELKKEKVIREASFIRDAWCY + GGPGISLLYLYGGLALDNDYFVDKAEKILESAMQRKLGIDSYMICHGYSGLIEICSLFK + RLLNTKKFDSYMEEFNVNSEQILEEYGDESGTGFLEGISGCILVLSKFEYSINFTYWRQ + ALLLFDDFLKGGKRK" + gene 7137..7874 + /gene="nisI" + CDS 7137..7874 + /codon_start=1 + /db_xref="GI:299832741" + /gene="nisI" + /note="nisin immunity protein; nisin-binding lipoprotein" + /product="NisI" + /protein_id="ADJ56356.1" + /transl_table=11 + /translation="MRRYLILIVALIGITGLSGCYQTSHKKVRFDEGSYTNFIYDNKSY + FVTDKEIPQENVNNSKVKFYKLLIVDMKSEKLLSSSNKNSVTLVLNNIYEASDKSLCMG + INDRYYKILPESDKGAVKALRLQNFDVTSDISDDNFVIDKNDSRKIDYMGNIYSISDTT + VSDEELGEYQDVLAEVRVFDSVSGKSIPRSEWGRIDKDGSNSKQSRTEWDYGEIHSIRG + KSLTEAFAVEINDDFKLATKVGN" + gene 7876..9924 + /gene="nisP" + CDS 7876..9924 + /codon_start=1 + /db_xref="GI:299832742" + /function="cleavage of leader peptide from prenisin and + activation of nisin" + /gene="nisP" + /note="nisin leader peptide-processing serine protease" + /note="smCOG: + SMCOG1075:alkaline_serine_protease,_subtilase_family + (Score: 249.7; E-value: 1.2e-75);" + /note="smCOG tree PNG image: smcogs/nisP.png" + /product="NisP" + /protein_id="ADJ56357.1" + /sec_met="Type: none" + /sec_met="Domains detected: Peptidase_S8 (E-value: 2.3e-50, + bitscore: 161.0, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MKKILGFLFIVCSLGLSATVHGETTNSQQLLSNNINTELINHNSN + AILSSTEGSTTDSINLGAQSPAVKSTTRTELDVTGAAKTLLQTSAVQKEMKVSLQETQV + SSEFSKRDSVTNKEAVPVSKDELLEQSEVVVSTSSIQKNKILDNKKNRANFVTSSPLIK + EKPSNSKDASGVIDNSASPLSYRKAKEVVSLRQPLKNQKVEAQPLLISNSSEKKASVYT + NSHDFWDYQWDMKYVTNNGESYALYQPSKKISVGIIDSGIMEEHPDLSNSLGNYFKNLV + PKGGFDNEEPDETGNPSDIVDKMGHGTEVAGQITANGNILGVAPGITVNIYRVFGENLS + KSEWVARAIRRAADDGNKVINISAGQYLMISGSYDDGTNDYQEYLNYKSAINYATAKGS + IVVAALGNDSLNIQDNQTMINFLKRFRSIKVPGKVVDAPSVFEDVIAVGGIDGYGNISD + FSNIGADAIYAPAGTTANFKKYGQDKFVSQGYYLKDWLFTTTNTGWYQYVYGNSFATPK + VSGALALVVDKYGIKNPNQLKRFLLMNSPEVNGNRVLNIVDLLNGKNKAFSLDTDKGQD + DAINHKSMENLKESRDTMKQEQDKEIQRNTNNNFSIKNDFHNISKEVISVDYNINQKMA + NNRNSRGAVSVRSQEILPVTGDGEDFLPALGIVCISILGILKRKTKN" + gene 9993..10679 + /gene="nisR" + CDS 9993..10679 + /codon_start=1 + /db_xref="GI:299832743" + /function="transcriptional activator/response regulator in + two-component regulatory system" + /gene="nisR" + /note="nisin biosynthesis regulatory protein" + /note="smCOG: SMCOG1008:response_regulator (Score: 180.3; + E-value: 6.6e-55);" + /note="smCOG tree PNG image: smcogs/nisR.png" + /product="NisR" + /protein_id="ADJ56358.1" + /transl_table=11 + /translation="MYKILIVDDDQEILKLMKTALEMRNYEVAMHQNISLPLDITDFQG + FDLILLDIMMSNIEGTEICKRIRREISTPIIFVSAKDTEEDIINGLGIGGDDYITKPFS + LKQLVAKVEANIKREERNKHAVHVFSEIRRDLGPITFYLEERRVCVNGQTIPLTCREYD + ILELLSQRTSKVYTREDIYDDVYDEYSNALFRSISEYIYQIRSKFAPYDINPIKTVRGL + GYQWHG" + gene 10672..12015 + /gene="nisK" + CDS 10672..12015 + /codon_start=1 + /db_xref="GI:299832744" + /function="sensor protein in two-component regulatory + system" + /gene="nisK" + /note="nisin biosynthesis sensor protein" + /note="smCOG: SMCOG1003:sensor_histidine_kinase (Score: + 102.5; E-value: 5e-31);" + /note="smCOG tree PNG image: smcogs/nisK.png" + /product="NisK" + /protein_id="ADJ56359.1" + /transl_table=11 + /translation="MGKKYSMRRRIWQAVIEIIIGTCLLILLLLGLTFFLRQIGQISGS + ETIRLSLDSDNLTISDIERDMKHYPYDYIIFDNDTSKILGGHYVKSDVPSFVASKQSSH + NITEGEITYTYSSNKHFSVVLRQNSMPEFTNHTLRSISYNQFTYLFFFLGEIILIIFSV + YHLIREFSKNFQAVQKIALKMGEITTFPEQEESKIIEFDQVLNNLYSKSKELAFLIEAE + RHEKHDLSFQVAALSHDVKTPLTVLKGNIELLEMTEVNEQQADFIESMKNSLTVFDKYF + NTMISYTKLLNDENDYKATISLEDFLIDLSVELEELSTTYQVDYQLVKKTDLTTFYGNT + LALSRALINIFVNACQYAKEGEKIVSLSIYDDEKYLYFEIWNNGHPFSEQAKKNAGKLF + FTEDTGRSGKHYGIGLSFAQGVALKHQGNLILSNPQKGGAEVILKIKK" + gene 12114..12791 + /gene="nisF" + CDS 12114..12791 + /codon_start=1 + /db_xref="GI:299832745" + /function="secretion of cell-associated nisin" + /gene="nisF" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /note="smCOG: SMCOG1000:ABC_transporter_ATP-binding_protein + (Score: 173.7; E-value: 8.2e-53);" + /note="smCOG tree PNG image: smcogs/nisF.png" + /product="NisF" + /protein_id="ADJ56360.1" + /transl_table=11 + /translation="MQVKIQNLSKTYKEKQVLQDISFDIKSGTVCGLLGVNGAGKSTLM + KILFGLISADTGKIFFDGQEKTNNQLGALIEAPAIYMNLSAFDNLKTKALLFGISDKRI + HETLEVIGLAETGKKRAGKFSLGMKQRLGIGMAILTEPQFLILDEPTNGLDPDGIAELL + NLILKLKAKGVTILISSHQLHEISKVASQIIILNKGKIRYNHANNKEDDIEQLFFKIVH + GGM" + gene 12793..13521 + /gene="nisE" + CDS 12793..13521 + /codon_start=1 + /db_xref="GI:299832746" + /function="secretion of cell-associated nisin" + /gene="nisE" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /product="NisE" + /protein_id="ADJ56361.1" + /transl_table=11 + /translation="MKRIIASEAIKLKKSGTLRLVLIIPFVTLFIAFLMGGIQIFSVFS + IYWWETGFLFLLMSLLFLYDIKSEEQAGNFQNVKWKKLSWKIHLAKMLLIWLRGILASI + VLIILLYLVAFVFQGIVVVDFMKVSVALIAILLAASWNLPFIYLIFKWINTYVLLAANT + LICLIVAPFVAQTPVWFLLPYTYHYKVTESLLNIKPSGDLLTGKINFSIWEVLLPFGLS + IVVTIGVSYLLKGVIEHDKK" + gene 13508..14152 + /gene="nisG" + CDS 13508..14152 + /codon_start=1 + /db_xref="GI:299832747" + /function="secretion of cell-associated nisin" + /gene="nisG" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /product="NisG" + /protein_id="ADJ56362.1" + /transl_table=11 + /translation="MIRSECLKLKNSLGFYLVFLFTLLELLTVPIYLAFGRSHVSMTDL + SLMIFLFFPLLVTILSILIFEQESLANRFQEINVNKKSSRIWLSKLIVVDFLLFFPSAM + IWIITGVSQAVGQQGMMIATASWLMAIFLNHFHLLLTFIINRGGSMIIAIIEILLIIFA + SNKVLLAAYWCPIALPVNFMITGRCAYLIAAVGWIVLSTIILVALSKKKIR" +ORIGIN + 1 tctcttaact ccgtgtctag tttttcgttg actttccatt atgcttggat tttttattgt + 61 ttaattccct ttttttgtat acaagctcgt attcttaaca aataattggc atatcgggtt + 121 taaaaatact atgtgtttta aagaatctct catgagtttg acgccaataa cttagattaa + 181 aatcaccgtc accttatttt taggcacgtt cggcagtaac cttatcaaag gtatctcagt + 241 cattaagttt catgatagta tttactattt tgatggttct tgttattatc caatcgttaa + 301 aatgacaaaa acaaatagat aaatagataa atatttatgg ggaggacaag tgaacttatc + 361 atgattaatt gtaaacgatt gagttctgaa tgtttcaaat tatgaggaac aacaggagtt + 421 ggactattct ttaaacgcct cgacgatacc atcactcttc attagcctaa aattaacaag + 481 ttaaaatcat tagaataatc tcttttacaa aaaatattta tttaagttat agttgacgaa + 541 tatttaataa ttttattaat atcttgattt tctagttcct gaataatata gagataggtt + 601 tattgagtct taaacatact tgaatgacct agtcttataa ctatactgac aatagaaaca + 661 ttaacaaatc taaaacagtc ttaattctat cttgagaaag tattggtaat aatattattg + 721 tcgataacgc gagcataata aacggctctg attaaattct gaagtttgtt agatacaatg + 781 atttcgttcg aaggaactac aaaataaatt ataaggaggc actcaaaatg agtacaaaag + 841 attttaactt ggatttggta tctgtttcga agaaagattc aggtgcatca ccacgcatta + 901 caagtatttc gctatgtaca cccggttgta aaacaggagc tctgatgggt tgtaacatga + 961 aaacagcaac ttgtcattgt agtattcacg taagcaaata accaaatcaa aggatagtat + 1021 tttgttagtt cagacatgga tactatccta tttttataag ttatttaggg ttgctaaata + 1081 gcttataaaa ataaagagag gaaaaaacat gataaaaagt tcatttaaag ctcaaccgtt + 1141 tttagtaaga aatacaattt tatctccaaa cgataaacgg agttttactg aatatactca + 1201 agtcattgag actgtaagta aaaataaagt ttttttggaa cagttactac tagctaatcc + 1261 taaactctat gatgttatgc agaaatataa tgctggtctg ttaaagaaga aaagggttaa + 1321 aaaattattt gaatctattt acaagtatta taagagaagt tatttacgat caactccatt + 1381 tggattattt agtgaaactt caattggtgt tttttcgaaa agttcacagt acaagttaat + 1441 gggaaagact acaaagggta taagattgga tactcagtgg ttgattcgcc tagttcataa + 1501 aatggaagta gatttctcaa aaaagttatc atttactaga aataatgcaa attataagtt + 1561 tggagatcga gtttttcaag tttataccat aaatagtagt gagcttgaag aagtaaatat + 1621 taaatatacg aatgtttatc aaattatttc tgaattttgt gagaatgact atcaaaaata + 1681 tgaagatatt tgtgaaactg taacgctttg ctatggagac gaatatagag aactatcgga + 1741 acaatatctt ggcagtctga tagttaatca ttatttgatc tctaatttac aaaaagattt + 1801 gttgtcagat ttttcttgga acactttttt gactaaagtt gaagcaatag atgaagataa + 1861 aaaatatata attcctctga aaaaagttca aaagtttatt caagaatact cagaaataga + 1921 aattggtgaa ggtattgaga aactgaaaga aatatatcag gaaatgtcac aaattcttga + 1981 gaatgataat tatattcaaa ttgatttaat tagtgatagt gaaataaatt ttgatgttaa + 2041 acaaaagcaa caattagaac atttagctga gtttttagga aatacgacaa aatctgtaag + 2101 aagaacatat ttggatgact ataaggataa atttatcgaa aaatatggtg tagatcaaga + 2161 agtacaaata acagaattat ttgattctac atttggcata ggagctccat ataattataa + 2221 tcatcctcga aatgactttt atgagtccga accgagtact ctatactatt cagaagagga + 2281 gagagaaaag tacctcagca tgtatgtaga agccgttaaa aatcataatg taattaatct + 2341 tgacgactta gagtctcatt atcaaaaaat ggacttagaa aagaaaagtg aacttcaagg + 2401 gttagaatta tttttgaatt tggcaaagga gtatgaaaaa gatattttta ttttagggga + 2461 tatcgttgga aataataatt tgggaggggc atcaggtaga ttttctgcac tctctccgga + 2521 gttaacaagt tatcatagaa cgatagtaga ttctgtcgaa agagaaaatg agaataaaga + 2581 aattacatcg tgtgaaatag tatttcttcc agaaaatatc agacatgcta acgttatgca + 2641 tacatcaatt atgaggagga aagtacttcc attttttaca agtacaagtc acaatgaagt + 2701 tctgttaact aatatctata ttggaataga cgaaaaagaa aaattttatg cacgagacat + 2761 ttcaactcaa gaggtattga aattctacat tacaagcatg tacaataaaa cgttattcag + 2821 taatgagcta agatttcttt acgaaatttc attagatgac aagtttggta atttaccttg + 2881 ggaacttatt tacagagact ttgattatat tccacgttta gtatttgacg aaatagtaat + 2941 atctcctgct aaatggaaaa tttggggaag ggatgtaaat agtaagatga caataagaga + 3001 acttattcaa agcaaagaaa ttcccaaaga gttttatatt gtcaatggag ataataaagt + 3061 ttatttatca caggaaaacc cattggatat ggaaatttta gagtcggcga taaagaagag + 3121 ctcaaaaaga aaagatttta tagagctaca agaatatttt gaagatgaaa atatcataaa + 3181 taaaggagaa aaggggagag ttgccgatgt tgtagtgcct tttattagaa cgagagcatt + 3241 aggtaatgaa gggagagcat ttataagaga gaaaagagtt tcggttgaac ggcgtgaaaa + 3301 attgcccttt aacgagtggc tttatctaaa gttgtacatt tctataaatc gtcaaaatga + 3361 atttttactg tcgtatcttc cagatattca gaaaatagta gcaaacctgg gtggaaatct + 3421 attcttccta agatatactg atcctaaacc acatattaga ttgcgtataa aatgttcaga + 3481 tttattttta gcttacggat ctattcttga aatcttaaaa aggagtcgga aaaataggat + 3541 aatgtcaact tttgatattt ctatttatga tcaagaagta gaaagatatg gtggatttga + 3601 tactttagag ttatccgaag caatattttg tgccgattct aaaattattc caaatttgct + 3661 tacattgata aaagatacta ataatgattg gaaagtcgat gatgtatcaa tcttggtgaa + 3721 ttatttatat ctgaaatgct tctttgagaa tgataacaaa aagattctta attttttgaa + 3781 tttagttagt cctaaaaagg ttaaagaaaa tgtcaatgaa aagattgaac attatcttaa + 3841 gcttctgaaa gttaataatc taggtgacca aattttttat gacaagaatt ttaaagaatt + 3901 aaagcatgcc ataaaaaatt tatttttaaa aatgatagct caagattttg aacttcagaa + 3961 agtttattca attattgaca gtatcattca tgtccataat aaccgactaa ttggtattga + 4021 acgagataaa gagaaattaa tttattacac acttcaaagg ttgtttgttt cggaagaata + 4081 catgaaatga ggactaatag atggatgaag tgaaagaatt cacatcaaaa caatttttta + 4141 atactttact tactcttcca agcaccttga agttaatttt tcagttggaa aaacgttatg + 4201 caatttattt aattgtgcta aatgctatca cagcttttgt tccgttggct agtcttttta + 4261 tttatcaaga tttaataaac tctgtgctag gttcagggag acatcttatc aatattatta + 4321 tcatctattt tattgttcaa gtgataacaa cagttctggg acagctggaa agttatgtta + 4381 gtggaaaatt tgatatgcga ctttcttaca gtatcaatat gcgcctcatg aggactacct + 4441 catctcttga attaagtgat tatgagcagg ctgatatgta taatatcata gaaaaagtta + 4501 ctcaagacag cacttacaag ccttttcagc tatttaatgc tatcattgtt gtgctttcat + 4561 cgtttatctc attgttatct agtctatttt ttattggaac atggaacatt ggggtagcaa + 4621 ttttactcct tattgttcca gtattatctt tggtactttt tctcagagtg ggacaattag + 4681 agtttttaat ccagtggcag agagcaagtt ctgaaagaga aacatggtat attgtatatt + 4741 tattgactca tgatttttca tttaaagaaa tcaagttaaa taatattagc aattacttca + 4801 ttcataaatt tggaaaatta aagaaaggat ttatcaacca agatttagct attgctcgta + 4861 agaagacata tttcaatatt tttcttgatt tcattttgaa tttgataaat attcttacga + 4921 tatttgctat gatcctttcg gtaagagcag gaaaacttct tataggtaat ttggtaagtc + 4981 tcatacaagc tatttctaaa atcaatactt attctcaaac aatgattcaa aatatttaca + 5041 tcatttataa tactagtttg tttatggaac aactttttga gtttttaaag agagaaagtg + 5101 tagttcacaa aaaaatagaa gatactgaaa tatgcaatca acatatagga actgttaaag + 5161 taattaattt atcatatgtt taccctaatt cgaatgcctt tgcactaaag aatatcaatt + 5221 tatcctttga aaaaggagaa ttaactgcta ttgtaggaaa aaatggttca gggaaaagta + 5281 cactagtaaa gataatttca ggattatatc aaccaactat gggaataatc caatacgaca + 5341 aaatgagaag tagtttgatg cctgaggagt tttatcagaa aaacatatcg gtgctgttcc + 5401 aagattttgt gaagtatgag ttaacgataa gagagaatat aggattgagt gatttgtctt + 5461 ctcaatggga agatgagaaa attattaaag tactagataa tttaggactc gattttttga + 5521 aaactaataa tcaatatgta cttgatacgc agttaggaaa ttggtttcaa gaagggcatc + 5581 aactttcagg aggtcagtgg caaaaaattg cattagcaag gacattcttt aagaaagctt + 5641 caatttatat tttagatgaa ccaagtgctg cactcgatcc tgtagctgaa aaagaaatat + 5701 ttgattattt tgttgctctt tcggaaaata atatttcaat tttcatttct catagtttga + 5761 atgctgccag aaaagcaaat aaaatcgtgg ttatgaaaga tggacaggtc gaagatgttg + 5821 gaagtcatga tgtccttctg agaagatgtc aatactatca agaactttat tattcagagc + 5881 aatatgagga taatgatgaa taaaaaaaat ataaaaagaa atgttgaaaa aattattgct + 5941 caatgggatg agagaactag aaaaaataaa gaaaacttcg atttcggaga gttgactctc + 6001 tctacaggat tgcctggtat aattttaatg ttagcggagt taaaaaataa agataactca + 6061 aagatatatc agaaaaagat agacaattat attgaatata ttgttagcaa actttcaaca + 6121 tatgggcttt taacaggatc actttattcg ggagcagctg gcattgcatt aagtatccta + 6181 catttacgag aagatgacga aaaatataag aatcttcttg atagcctaaa tagatatatc + 6241 gaatatttcg tcagagaaaa aattgaagga tttaatttgg aaaacattac tcctcctgat + 6301 tatgacgtga ttgaaggttt atctgggata ctttcctatc tattattaat caacgacgag + 6361 caatatgatg atttgaaaat actcattatc aattttttat caaatctgac taaagaaaac + 6421 aaaggactaa tatcgcttta catcaaatcg gagaatcaga tgtctcaatc agaaagtgag + 6481 atgtatccac taggctgttt gaatatggga ttagcacatg gacttgctgg agtgggctgt + 6541 atcttagctt atgcccacat aaaaggatat agtaatgaag cctcgttgtc agctttgcaa + 6601 aaaattattt ttatttatga aaagtttgaa cttgaaagga aaaaacagtt tctatggaaa + 6661 gatggacttg tagcagatga attaaaaaaa gagaaagtaa ttagggaagc aagtttcatt + 6721 agagatgcat ggtgctatgg aggtccaggt attagtctgc tatacttata cggaggatta + 6781 gcactggata atgactattt tgtagataaa gcagaaaaaa tattagagtc agctatgcaa + 6841 aggaaacttg gtattgattc atatatgatt tgccatggct attctggttt aatagaaatt + 6901 tgttctttat ttaagcggct attaaataca aaaaagtttg attcatacat ggaagaattt + 6961 aatgttaata gtgagcaaat tcttgaagaa tacggagatg aaagtggcac gggttttctt + 7021 gaaggaataa gtggctgtat actggtatta tcgaaatttg aatattcaat caattttact + 7081 tattggagac aagcactgtt actttttgac gattttttga aaggagggaa gaggaaatga + 7141 gaagatattt aatacttatt gtggccttaa tagggataac aggtttatca gggtgttatc + 7201 aaacaagtca taaaaaggtg aggtttgacg aaggaagtta tactaatttt atttatgata + 7261 ataaatcgta tttcgtaact gataaggaga ttcctcagga gaacgttaac aattccaaag + 7321 taaaatttta taagctgttg attgttgaca tgaaaagtga gaaactttta tcaagtagca + 7381 acaaaaatag tgtgactttg gtcttaaata atatttatga ggcttctgac aagtcgctat + 7441 gtatgggtat taacgacaga tactataaga tacttccaga aagtgataag ggggcggtca + 7501 aagctttgag attacaaaac tttgatgtga caagcgatat ttctgatgat aattttgtta + 7561 ttgataaaaa tgattcacga aaaattgact atatgggaaa tatttacagt atatcggaca + 7621 ccaccgtatc tgatgaagaa ttgggagaat atcaggatgt tttagctgaa gtacgtgtgt + 7681 ttgattcagt tagtggcaaa agtatcccga ggtctgaatg ggggagaatt gataaggatg + 7741 gttcaaattc caaacagagt aggacggaat gggattatgg cgaaatccat tctattagag + 7801 gaaaatctct tactgaagca tttgccgttg agataaatga tgattttaag cttgcaacga + 7861 aggtaggaaa ctagagtgaa aaaaatacta ggtttccttt ttatcgtttg ttcgttgggt + 7921 ttatcagcaa ctgtgcatgg ggagacaaca aattcacaac agttactctc aaataatatt + 7981 aatacggaat taattaatca taattctaat gcaattttat cttcaacaga gggatcaacg + 8041 actgattcga ttaatctagg ggcgcagtca cctgcagtaa aatcgacaac aaggactgaa + 8101 ttggatgtaa ctggtgctgc taaaacttta ttacagacat cagctgttca aaaagaaatg + 8161 aaagtttcgt tgcaagaaac tcaagttagt tctgaattca gtaagagaga tagcgttaca + 8221 aataaagaag cagttccagt atctaaggat gagctacttg agcaaagtga agtagtcgtt + 8281 tcaacatcat cgattcaaaa aaataaaatc ctcgataata agaagaatag agctaacttc + 8341 gttacttcct ctccgcttat taaggaaaaa ccatcaaatt ctaaagatgc atctggtgta + 8401 attgataatt ctgcttctcc tctatcttat cgtaaagcta aggaagtggt atctcttaga + 8461 caacctttaa aaaatcaaaa agtagaggca caacctctat tgataagtaa ttcttctgaa + 8521 aagaaagcaa gtgtttatac aaattcacat gatttttggg attatcagtg ggatatgaaa + 8581 tatgtgacaa ataatggaga aagctatgcg ctctaccagc cctcaaagaa aatttctgtt + 8641 ggaattattg attcaggaat catggaagaa catcctgatt tgtcaaatag tttaggaaat + 8701 tattttaaaa atcttgttcc taagggaggg tttgataatg aagaacctga tgaaactgga + 8761 aatccaagtg atattgtcga caaaatggga cacgggacgg aagtcgcagg tcagattaca + 8821 gcaaatggta atattttagg agtagcacca gggattactg taaatatata cagagtattt + 8881 ggtgaaaatc tttcgaaatc ggaatgggta gctagagcaa taagaagagc tgcggatgat + 8941 gggaacaagg tcatcaatat aagtgctgga cagtatctta tgatttcagg atcgtatgat + 9001 gatggaacaa atgattatca agagtatctt aattataagt cagcaataaa ttatgcaaca + 9061 gcaaaaggaa gtattgttgt cgcagctctt ggtaatgata gtttaaacat acaagataac + 9121 caaacaatga taaactttct taagcgtttc agaagtataa aggttcctgg aaaagttgta + 9181 gatgcaccga gtgtatttga ggatgtaata gccgtaggtg gaatagatgg ttatggtaat + 9241 atttctgatt ttagtaatat tggagcggat gcaatttatg ctcctgctgg cacaacggcc + 9301 aattttaaaa aatatgggca agataaattt gtcagtcagg gttattattt gaaagattgg + 9361 ctttttacaa ctactaatac tggctggtac caatatgttt atggcaactc atttgctact + 9421 cctaaagtat ctggggcact ggcattagta gttgataaat atggaataaa gaatcctaac + 9481 caactaaaaa ggtttcttct aatgaattct ccagaagtta atgggaatag agtattgaat + 9541 attgttgatt tattgaatgg gaaaaataaa gcttttagct tagatacaga taaaggtcag + 9601 gatgatgcta ttaaccataa atcgatggag aatcttaaag agtctaggga tacaatgaaa + 9661 caggaacaag ataaagaaat tcaaagaaat acaaataaca atttttctat caaaaatgat + 9721 tttcataaca tttcaaaaga agtaatttca gttgattata atattaatca aaaaatggct + 9781 aataatcgaa attcgagagg tgctgtttct gtacgaagtc aagaaatttt acctgttact + 9841 ggagatggag aagatttttt accggcttta ggtatagtgt gtatctcaat ccttggtata + 9901 ttgaaaagaa agactaaaaa ttgatagatt atatttcttc agaatgaatg gtataatgaa + 9961 gtaatgagta ctaaacaatc ggaggtaaag tggtgtataa aattttaata gttgatgatg + 10021 atcaggaaat tttaaaatta atgaagacag cattagaaat gagaaactat gaagttgcga + 10081 tgcatcaaaa catttcactt cccttggata ttactgattt tcagggattt gatttgattt + 10141 tgttagatat catgatgtca aatattgaag ggacagaaat ttgtaaaagg attcgcagag + 10201 aaatatcaac tccaattatc tttgttagtg cgaaagatac agaagaggat attataaacg + 10261 gcttaggtat tggtggggat gactatatta ctaagccttt tagccttaaa cagttggttg + 10321 caaaagtgga agcaaatata aagcgagagg aacgcaataa acatgcagtt catgtttttt + 10381 cagagattcg tagagattta ggaccaatta cattttattt agaagaaagg cgagtctgtg + 10441 tcaatggtca aacaattcca ctgacttgtc gtgaatacga tattcttgaa ttactatcac + 10501 aacgaacttc taaagtttat acgagagagg atatttatga tgacgtatat gatgaatatt + 10561 ctaatgcact ttttcggtca atctcggaat atatttatca gattaggagt aagtttgcac + 10621 catacgatat taatccgata aaaacggttc ggggacttgg gtatcagtgg catgggtaaa + 10681 aaatattcaa tgcgtcgacg gatatggcaa gctgtcattg aaattatcat aggtacttgt + 10741 ctacttatcc tgttgttact gggcttgact ttctttctac gacaaattgg acaaatcagt + 10801 ggttcagaaa ctattcgttt atctttagat tcagataatt taactatttc tgatatcgaa + 10861 cgtgatatga aacactaccc atatgattat attatttttg acaatgatac aagtaaaatt + 10921 ttgggaggac attatgtcaa gtcggatgta cctagttttg tagcttcaaa acagtcttca + 10981 cataatatta cagaaggaga aattacttat acttattcaa gcaataagca tttttcagtt + 11041 gttttaagac aaaacagtat gcctgaattt acaaatcata cgcttcgttc aatttcttat + 11101 aatcaattta cttacctttt cttttttctt ggtgaaataa tactcattat tttttctgtc + 11161 tatcatctca ttagagaatt ttctaagaat tttcaagccg ttcaaaagat tgcattgaag + 11221 atgggggaaa taactacttt tcctgaacaa gaggaatcaa aaattattga atttgatcag + 11281 gttctgaata acttatattc gaaaagtaag gagttagctt tccttattga agcggagcgt + 11341 catgaaaaac atgatttatc cttccaggtt gctgcacttt cacatgatgt taagacacct + 11401 ttaacagtat taaaaggaaa tattgaactg ctagagatga ctgaagtaaa tgaacaacaa + 11461 gctgatttta ttgagtcaat gaaaaatagt ttgactgttt ttgacaagta ttttaacaca + 11521 atgattagtt atacaaaact tttgaatgat gaaaatgatt acaaagcgac aatctccctg + 11581 gaggattttt tgatagattt atcagttgag ttggaagagt tgtcaacaac ttatcaagtg + 11641 gattatcagc tagttaaaaa aacagattta accacttttt acggaaatac attagcttta + 11701 agtcgagcac ttatcaatat ctttgttaat gcctgtcagt atgctaaaga gggtgaaaaa + 11761 atagtcagtt tgagtattta tgatgatgaa aaatatctct attttgaaat ctggaataat + 11821 ggtcatcctt tttctgaaca agcaaaaaaa aatgctggaa aactattttt cacagaagat + 11881 actggacgta gtgggaaaca ctatgggatt ggactatctt ttgctcaagg tgtagcttta + 11941 aaacatcaag gaaacttaat tctcagtaat cctcaaaaag gtggggcaga agttatccta + 12001 aaaataaaaa agtaatttag taatctctaa ggattacttt ttttgtttct gaatagattc + 12061 tgaaaattgt tttatatact ttttttaaac ataaaataaa gtgaggaaat ataatgcagg + 12121 taaaaattca aaatctttct aaaacatata aagaaaagca ggtgctacaa gatatcagtt + 12181 ttgatattaa atctggaaca gtctgtggtt tattaggagt taacggtgca ggaaaatcaa + 12241 ctttgatgaa aattttgttt ggtttaattt ctgcagatac tggaaaaatt ttttttgatg + 12301 gacaagaaaa gacaaataat caacttggag ccttaatcga ggctccagca atatatatga + 12361 atttatctgc tttcgataat cttaaaacta aggctttgct ttttggaatt tcagataaga + 12421 gaattcatga aactctagaa gtgattggtt tggcagaaac aggaaagaaa agagcaggaa + 12481 aattctcttt agggatgaaa caacgtttgg gaattggtat ggctattctt acagaacctc + 12541 aatttttaat tcttgatgaa cctactaatg gtttggatcc tgatggtatt gcggagttgt + 12601 taaacttaat cttaaaactt aaagctaaag gtgtgacaat cttgatttct agtcatcagt + 12661 tgcacgaaat aagtaaagta gctagtcaaa ttattatttt gaacaaaggt aagattcgtt + 12721 ataatcatgc gaacaataaa gaagacgaca ttgaacagtt attctttaag attgtgcatg + 12781 gaggaatgtg atatgaaaag aataatagca tcagaagcaa taaaattaaa aaaatcagga + 12841 actcttagat tggtattaat tatccctttt gtgactctat ttatagcatt tcttatgggt + 12901 ggaatacaga tttttagtgt tttttcaatt tattggtggg aaactggttt tttattcctt + 12961 ttgatgagtt tgctttttct ttatgatata aaatcagagg agcaagctgg aaattttcaa + 13021 aatgtgaaat ggaaaaagct gagttggaaa attcatttgg ccaaaatgtt gttgatttgg + 13081 ctaagaggta tactagcgag catagtcttg attattttgc tttatttggt tgcttttgtg + 13141 tttcaaggta ttgtagtggt ggattttatg aaagtaagtg tggcattgat tgctatatta + 13201 ctagcagctt cttggaattt accctttata tacttgattt tcaagtggat taatacttac + 13261 gtattgttag ctgcgaatac cttgatttgt ttaattgttg ccccttttgt tgcacaaact + 13321 ccagtatggt tcttgctacc atacacttat cactataaag ttacagaaag tttgttaaat + 13381 atcaaaccat caggagattt gttaacaggg aagataaatt tcagtatttg ggaagtttta + 13441 ttaccatttg gactttccat agttgtaacg ataggagttt cgtatttact taaaggagtg + 13501 atagaacatg ataagaagtg aatgtctcaa attaaaaaat agcttagggt tttatttagt + 13561 ttttctcttt actttattag agcttttaac ggttcctatt tatttagctt ttggaagaag + 13621 tcatgtttca atgactgatt tatcgctcat gatttttttg ttttttccgt tactggttac + 13681 aattttgtct attctaatct ttgaacagga gagtctggcc aatcgtttcc aagaaataaa + 13741 tgtaaataaa aaaagtagca gaatttggtt atcaaagcta atagtagtgg atttcctttt + 13801 gttctttcca tcagcaatga tctggataat tacgggagtt tcacaggcag tagggcaaca + 13861 aggaatgatg atcgcaacag ctagctggtt gatggcaatt tttcttaatc attttcatct + 13921 tttattgacc tttataatca atcgaggagg gagcatgatt atcgcgatta ttgaaatatt + 13981 actcattatt tttgccagta ataaagtttt attagcagct tattggtgtc ccattgcttt + 14041 acctgttaat tttatgataa ctgggcggtg tgcttatctg atagctgccg tagggtggat + 14101 tgttttatcc acaataattc ttgtagcatt atctaaaaaa aagattagat aaagtatttt + 14161 ttcttatggt aattcgacct aatatgtttt tgctatttat ctcttatttc tgtctatagt + 14221 aatttattca aagtaccttt agactcataa gtttgaataa aatttccatc aatatgtgac + 14281 agttcttact ctaagaataa tgtttccgtc gttagttttc ttttctcagt aattttttat + 14341 aaagtggttt cctatcgact tacacttttt cttttggagt tattggtgtc aagctcatga + 14401 gagattcttt aaagcatata acatttttaa actgatatgc cagttatttg tgaagaatat + 14461 gagtttctat ataaaagaga aaattaaaca atgaaaaatc caagtataaa tacttggatt + 14521 tttcattatt tttgatagac atataaagtg cattttaacc tagaattaaa agatgtatcg + 14581 ttgataataa taaaatgaac caaagcaaaa ctgacgttaa gtcaatttat tagagtcaaa + 14641 aacgtataat tttaatgtat ttattttaag tgatttcttt ttcaaattga ttaggcgtaa + 14701 gataccctaa actttgatgg attcgttttg aattataaaa ggcttcaata taccagaaaa + 14761 tactctgata ggcttcttca aagttcttat atttaaattg atagacccac tctcttttta + 14821 aatgtccatg ccaagattca agactggcat tatgataagg atagccctta cgactgaagg + 14881 agtgagtcat tccagagttt tttattgtct cttcatactc atgactcgta tactgacttc + 14941 cttggtcaga atgaagcatc acagcttctg gataattttg tgattccatt gccttattca + 15001 aagtcctttg cactaa +// diff --git a/tests/data/HM219853.1.final.minimal.gbk b/tests/data/HM219853.1.final.minimal.gbk new file mode 100644 index 0000000..91502d3 --- /dev/null +++ b/tests/data/HM219853.1.final.minimal.gbk @@ -0,0 +1,539 @@ +LOCUS HM219853 15016 bp DNA linear BCT 05-OCT-2011 +DEFINITION Lactococcus lactis subsp. lactis nisin biosynthetic gene cluster, + complete sequence. +ACCESSION HM219853 +VERSION HM219853.1 GI:299832736 +KEYWORDS . +SOURCE Lactococcus lactis subsp. lactis + ORGANISM Lactococcus lactis subsp. lactis + Bacteria; Firmicutes; Lactobacillales; Streptococcaceae; + Lactococcus. +REFERENCE 1 (bases 1 to 15016) + AUTHORS Trmcic,A., Samelis,J., Monnet,C., Rogelj,I. and Bogovic Matijasic,B. + TITLE Complete nisin A gene cluster from Lactococcus lactis M78 (HM219853) + - obtaining the nucleic acid sequence and comparing it to other + published nisin sequences + JOURNAL Genes Genomics 33 (3), 217-221 (2011) +REFERENCE 2 (bases 1 to 15016) + AUTHORS Trmcic,A., Monnet,C., Rogelj,I. and Bogovic Matijasic,B. + TITLE Expression of nisin genes in cheese--a quantitative real-time + polymerase chain reaction approach + JOURNAL J. Dairy Sci. 94 (1), 77-85 (2011) + PUBMED 21183019 +REFERENCE 3 (bases 1 to 15016) + AUTHORS Trmcic,A., Samelis,J., Monnet,C., Rogelj,I. and Bogovic-Matijasic,B. + TITLE Direct Submission + JOURNAL Submitted (12-MAY-2010) Department of Animal Science - Chair of + Dairy Science, Biotechnical Faculty, Groblje 3, Domzale 1230, + Slovenia +FEATURES Location/Qualifiers + source 1..15016 + /collected_by="John Samelis" + /country="Greece" + /db_xref="taxon:1360" + /isolation_source="raw milk" + /mol_type="genomic DNA" + /organism="Lactococcus lactis subsp. lactis" + /strain="M78" + /sub_species="lactis" + cluster 1..15016 + /contig_edge="True" + /cutoff=20000 + /extension=10000 + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: + lantipeptide: (cluster(LANC_like,Flavoprotein) or + cluster(LANC_like,Trp_halogenase) or + cluster(LANC_like,p450) or (LANC_like & Pkinase) or + (LANC_like & DUF4135) or cluster(LANC_like,Lant_dehyd_N) or + cluster(LANC_like,Lant_dehyd_C) or + cluster(LANC_like,adh_short) or + cluster(LANC_like,adh_short_C2) or TIGR03731 or Antimicr18 + or Gallidermin or L_biotic_A or TIGR03731 or leader_d or + leader_eh or leader_abc or mature_d or mature_ab or + mature_a or mature_b or mature_ha or mature_h_beta or + lacticin_l or lacticin_mat or LD_lanti_pre or + strep_PEQAXS);" + /product="lantipeptide" + gene 828..1001 + /gene="nisA" + CDS 828..1001 + /codon_start=1 + /db_xref="GI:299832737" + /gene="nisA" + /note="prenisin; nisin A structural protein" + /product="NisA" + /protein_id="ADJ56352.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: TIGR03731 (E-value: 1.3e-24, + bitscore: 75.6, seeds: 23); mature_a (E-value: 6.5e-08, + bitscore: 21.5, seeds: 5)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MSTKDFNLDLVSVSKKDSGASPRITSISLCTPGCKTGALMGCNMK + TATCHCSIHVSK" + misc_feature 828..14152 + /note="nisin biosynthetic gene cluster" + gene 1109..4090 + /gene="nisB" + CDS 1109..4090 + /codon_start=1 + /db_xref="GI:299832738" + /function="dehydration of serine and threonine" + /gene="nisB" + /note="nisin biosynthesis protein" + /product="NisB" + /protein_id="ADJ56353.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: Lant_dehyd_N (E-value: 4.5e-25, + bitscore: 77.4, seeds: 38); Lant_dehyd_C (E-value: + 6.8e-120, bitscore: 390.3, seeds: 37); Lant_dehydr_C + (E-value: 2.8e-18, bitscore: 56.0, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MIKSSFKAQPFLVRNTILSPNDKRSFTEYTQVIETVSKNKVFLEQ + LLLANPKLYDVMQKYNAGLLKKKRVKKLFESIYKYYKRSYLRSTPFGLFSETSIGVFSK + SSQYKLMGKTTKGIRLDTQWLIRLVHKMEVDFSKKLSFTRNNANYKFGDRVFQVYTINS + SELEEVNIKYTNVYQIISEFCENDYQKYEDICETVTLCYGDEYRELSEQYLGSLIVNHY + LISNLQKDLLSDFSWNTFLTKVEAIDEDKKYIIPLKKVQKFIQEYSEIEIGEGIEKLKE + IYQEMSQILENDNYIQIDLISDSEINFDVKQKQQLEHLAEFLGNTTKSVRRTYLDDYKD + KFIEKYGVDQEVQITELFDSTFGIGAPYNYNHPRNDFYESEPSTLYYSEEEREKYLSMY + VEAVKNHNVINLDDLESHYQKMDLEKKSELQGLELFLNLAKEYEKDIFILGDIVGNNNL + GGASGRFSALSPELTSYHRTIVDSVERENENKEITSCEIVFLPENIRHANVMHTSIMRR + KVLPFFTSTSHNEVLLTNIYIGIDEKEKFYARDISTQEVLKFYITSMYNKTLFSNELRF + LYEISLDDKFGNLPWELIYRDFDYIPRLVFDEIVISPAKWKIWGRDVNSKMTIRELIQS + KEIPKEFYIVNGDNKVYLSQENPLDMEILESAIKKSSKRKDFIELQEYFEDENIINKGE + KGRVADVVVPFIRTRALGNEGRAFIREKRVSVERREKLPFNEWLYLKLYISINRQNEFL + LSYLPDIQKIVANLGGNLFFLRYTDPKPHIRLRIKCSDLFLAYGSILEILKRSRKNRIM + STFDISIYDQEVERYGGFDTLELSEAIFCADSKIIPNLLTLIKDTNNDWKVDDVSILVN + YLYLKCFFENDNKKILNFLNLVSPKKVKENVNEKIEHYLKLLKVNNLGDQIFYDKNFKE + LKHAIKNLFLKMIAQDFELQKVYSIIDSIIHVHNNRLIGIERDKEKLIYYTLQRLFVSE + EYMK" + gene 4101..5903 + /gene="nisT" + CDS 4101..5903 + /codon_start=1 + /db_xref="GI:299832739" + /function="secretion of prenisin" + /gene="nisT" + /note="nisin transport ATP-binding protein" + /product="NisT" + /protein_id="ADJ56354.1" + /transl_table=11 + /translation="MDEVKEFTSKQFFNTLLTLPSTLKLIFQLEKRYAIYLIVLNAITA + FVPLASLFIYQDLINSVLGSGRHLINIIIIYFIVQVITTVLGQLESYVSGKFDMRLSYS + INMRLMRTTSSLELSDYEQADMYNIIEKVTQDSTYKPFQLFNAIIVVLSSFISLLSSLF + FIGTWNIGVAILLLIVPVLSLVLFLRVGQLEFLIQWQRASSERETWYIVYLLTHDFSFK + EIKLNNISNYFIHKFGKLKKGFINQDLAIARKKTYFNIFLDFILNLINILTIFAMILSV + RAGKLLIGNLVSLIQAISKINTYSQTMIQNIYIIYNTSLFMEQLFEFLKRESVVHKKIE + DTEICNQHIGTVKVINLSYVYPNSNAFALKNINLSFEKGELTAIVGKNGSGKSTLVKII + SGLYQPTMGIIQYDKMRSSLMPEEFYQKNISVLFQDFVKYELTIRENIGLSDLSSQWED + EKIIKVLDNLGLDFLKTNNQYVLDTQLGNWFQEGHQLSGGQWQKIALARTFFKKASIYI + LDEPSAALDPVAEKEIFDYFVALSENNISIFISHSLNAARKANKIVVMKDGQVEDVGSH + DVLLRRCQYYQELYYSEQYEDNDE" + gene 5896..7140 + /gene="nisC" + CDS 5896..7140 + /codon_start=1 + /db_xref="GI:299832740" + /function="cyclization, formation of (methyl)lantionines" + /gene="nisC" + /note="nisin biosynthesis protein" + /product="NisC" + /protein_id="ADJ56355.1" + /sec_met="Type: lantipeptide" + /sec_met="Domains detected: LANC_like (E-value: 8.3e-76, + bitscore: 244.5, seeds: 47)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MNKKNIKRNVEKIIAQWDERTRKNKENFDFGELTLSTGLPGIILM + LAELKNKDNSKIYQKKIDNYIEYIVSKLSTYGLLTGSLYSGAAGIALSILHLREDDEKY + KNLLDSLNRYIEYFVREKIEGFNLENITPPDYDVIEGLSGILSYLLLINDEQYDDLKIL + IINFLSNLTKENKGLISLYIKSENQMSQSESEMYPLGCLNMGLAHGLAGVGCILAYAHI + KGYSNEASLSALQKIIFIYEKFELERKKQFLWKDGLVADELKKEKVIREASFIRDAWCY + GGPGISLLYLYGGLALDNDYFVDKAEKILESAMQRKLGIDSYMICHGYSGLIEICSLFK + RLLNTKKFDSYMEEFNVNSEQILEEYGDESGTGFLEGISGCILVLSKFEYSINFTYWRQ + ALLLFDDFLKGGKRK" + gene 7137..7874 + /gene="nisI" + CDS 7137..7874 + /codon_start=1 + /db_xref="GI:299832741" + /gene="nisI" + /note="nisin immunity protein; nisin-binding lipoprotein" + /product="NisI" + /protein_id="ADJ56356.1" + /transl_table=11 + /translation="MRRYLILIVALIGITGLSGCYQTSHKKVRFDEGSYTNFIYDNKSY + FVTDKEIPQENVNNSKVKFYKLLIVDMKSEKLLSSSNKNSVTLVLNNIYEASDKSLCMG + INDRYYKILPESDKGAVKALRLQNFDVTSDISDDNFVIDKNDSRKIDYMGNIYSISDTT + VSDEELGEYQDVLAEVRVFDSVSGKSIPRSEWGRIDKDGSNSKQSRTEWDYGEIHSIRG + KSLTEAFAVEINDDFKLATKVGN" + gene 7876..9924 + /gene="nisP" + CDS 7876..9924 + /codon_start=1 + /db_xref="GI:299832742" + /function="cleavage of leader peptide from prenisin and + activation of nisin" + /gene="nisP" + /note="nisin leader peptide-processing serine protease" + /product="NisP" + /protein_id="ADJ56357.1" + /sec_met="Type: none" + /sec_met="Domains detected: Peptidase_S8 (E-value: 2.3e-50, + bitscore: 161.0, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MKKILGFLFIVCSLGLSATVHGETTNSQQLLSNNINTELINHNSN + AILSSTEGSTTDSINLGAQSPAVKSTTRTELDVTGAAKTLLQTSAVQKEMKVSLQETQV + SSEFSKRDSVTNKEAVPVSKDELLEQSEVVVSTSSIQKNKILDNKKNRANFVTSSPLIK + EKPSNSKDASGVIDNSASPLSYRKAKEVVSLRQPLKNQKVEAQPLLISNSSEKKASVYT + NSHDFWDYQWDMKYVTNNGESYALYQPSKKISVGIIDSGIMEEHPDLSNSLGNYFKNLV + PKGGFDNEEPDETGNPSDIVDKMGHGTEVAGQITANGNILGVAPGITVNIYRVFGENLS + KSEWVARAIRRAADDGNKVINISAGQYLMISGSYDDGTNDYQEYLNYKSAINYATAKGS + IVVAALGNDSLNIQDNQTMINFLKRFRSIKVPGKVVDAPSVFEDVIAVGGIDGYGNISD + FSNIGADAIYAPAGTTANFKKYGQDKFVSQGYYLKDWLFTTTNTGWYQYVYGNSFATPK + VSGALALVVDKYGIKNPNQLKRFLLMNSPEVNGNRVLNIVDLLNGKNKAFSLDTDKGQD + DAINHKSMENLKESRDTMKQEQDKEIQRNTNNNFSIKNDFHNISKEVISVDYNINQKMA + NNRNSRGAVSVRSQEILPVTGDGEDFLPALGIVCISILGILKRKTKN" + gene 9993..10679 + /gene="nisR" + CDS 9993..10679 + /codon_start=1 + /db_xref="GI:299832743" + /function="transcriptional activator/response regulator in + two-component regulatory system" + /gene="nisR" + /note="nisin biosynthesis regulatory protein" + /product="NisR" + /protein_id="ADJ56358.1" + /transl_table=11 + /translation="MYKILIVDDDQEILKLMKTALEMRNYEVAMHQNISLPLDITDFQG + FDLILLDIMMSNIEGTEICKRIRREISTPIIFVSAKDTEEDIINGLGIGGDDYITKPFS + LKQLVAKVEANIKREERNKHAVHVFSEIRRDLGPITFYLEERRVCVNGQTIPLTCREYD + ILELLSQRTSKVYTREDIYDDVYDEYSNALFRSISEYIYQIRSKFAPYDINPIKTVRGL + GYQWHG" + gene 10672..12015 + /gene="nisK" + CDS 10672..12015 + /codon_start=1 + /db_xref="GI:299832744" + /function="sensor protein in two-component regulatory + system" + /gene="nisK" + /note="nisin biosynthesis sensor protein" + /product="NisK" + /protein_id="ADJ56359.1" + /transl_table=11 + /translation="MGKKYSMRRRIWQAVIEIIIGTCLLILLLLGLTFFLRQIGQISGS + ETIRLSLDSDNLTISDIERDMKHYPYDYIIFDNDTSKILGGHYVKSDVPSFVASKQSSH + NITEGEITYTYSSNKHFSVVLRQNSMPEFTNHTLRSISYNQFTYLFFFLGEIILIIFSV + YHLIREFSKNFQAVQKIALKMGEITTFPEQEESKIIEFDQVLNNLYSKSKELAFLIEAE + RHEKHDLSFQVAALSHDVKTPLTVLKGNIELLEMTEVNEQQADFIESMKNSLTVFDKYF + NTMISYTKLLNDENDYKATISLEDFLIDLSVELEELSTTYQVDYQLVKKTDLTTFYGNT + LALSRALINIFVNACQYAKEGEKIVSLSIYDDEKYLYFEIWNNGHPFSEQAKKNAGKLF + FTEDTGRSGKHYGIGLSFAQGVALKHQGNLILSNPQKGGAEVILKIKK" + gene 12114..12791 + /gene="nisF" + CDS 12114..12791 + /codon_start=1 + /db_xref="GI:299832745" + /function="secretion of cell-associated nisin" + /gene="nisF" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /product="NisF" + /protein_id="ADJ56360.1" + /transl_table=11 + /translation="MQVKIQNLSKTYKEKQVLQDISFDIKSGTVCGLLGVNGAGKSTLM + KILFGLISADTGKIFFDGQEKTNNQLGALIEAPAIYMNLSAFDNLKTKALLFGISDKRI + HETLEVIGLAETGKKRAGKFSLGMKQRLGIGMAILTEPQFLILDEPTNGLDPDGIAELL + NLILKLKAKGVTILISSHQLHEISKVASQIIILNKGKIRYNHANNKEDDIEQLFFKIVH + GGM" + gene 12793..13521 + /gene="nisE" + CDS 12793..13521 + /codon_start=1 + /db_xref="GI:299832746" + /function="secretion of cell-associated nisin" + /gene="nisE" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /product="NisE" + /protein_id="ADJ56361.1" + /transl_table=11 + /translation="MKRIIASEAIKLKKSGTLRLVLIIPFVTLFIAFLMGGIQIFSVFS + IYWWETGFLFLLMSLLFLYDIKSEEQAGNFQNVKWKKLSWKIHLAKMLLIWLRGILASI + VLIILLYLVAFVFQGIVVVDFMKVSVALIAILLAASWNLPFIYLIFKWINTYVLLAANT + LICLIVAPFVAQTPVWFLLPYTYHYKVTESLLNIKPSGDLLTGKINFSIWEVLLPFGLS + IVVTIGVSYLLKGVIEHDKK" + gene 13508..14152 + /gene="nisG" + CDS 13508..14152 + /codon_start=1 + /db_xref="GI:299832747" + /function="secretion of cell-associated nisin" + /gene="nisG" + /note="nisin transport/immunity protein; component of + ABC-transporter" + /product="NisG" + /protein_id="ADJ56362.1" + /transl_table=11 + /translation="MIRSECLKLKNSLGFYLVFLFTLLELLTVPIYLAFGRSHVSMTDL + SLMIFLFFPLLVTILSILIFEQESLANRFQEINVNKKSSRIWLSKLIVVDFLLFFPSAM + IWIITGVSQAVGQQGMMIATASWLMAIFLNHFHLLLTFIINRGGSMIIAIIEILLIIFA + SNKVLLAAYWCPIALPVNFMITGRCAYLIAAVGWIVLSTIILVALSKKKIR" +ORIGIN + 1 tctcttaact ccgtgtctag tttttcgttg actttccatt atgcttggat tttttattgt + 61 ttaattccct ttttttgtat acaagctcgt attcttaaca aataattggc atatcgggtt + 121 taaaaatact atgtgtttta aagaatctct catgagtttg acgccaataa cttagattaa + 181 aatcaccgtc accttatttt taggcacgtt cggcagtaac cttatcaaag gtatctcagt + 241 cattaagttt catgatagta tttactattt tgatggttct tgttattatc caatcgttaa + 301 aatgacaaaa acaaatagat aaatagataa atatttatgg ggaggacaag tgaacttatc + 361 atgattaatt gtaaacgatt gagttctgaa tgtttcaaat tatgaggaac aacaggagtt + 421 ggactattct ttaaacgcct cgacgatacc atcactcttc attagcctaa aattaacaag + 481 ttaaaatcat tagaataatc tcttttacaa aaaatattta tttaagttat agttgacgaa + 541 tatttaataa ttttattaat atcttgattt tctagttcct gaataatata gagataggtt + 601 tattgagtct taaacatact tgaatgacct agtcttataa ctatactgac aatagaaaca + 661 ttaacaaatc taaaacagtc ttaattctat cttgagaaag tattggtaat aatattattg + 721 tcgataacgc gagcataata aacggctctg attaaattct gaagtttgtt agatacaatg + 781 atttcgttcg aaggaactac aaaataaatt ataaggaggc actcaaaatg agtacaaaag + 841 attttaactt ggatttggta tctgtttcga agaaagattc aggtgcatca ccacgcatta + 901 caagtatttc gctatgtaca cccggttgta aaacaggagc tctgatgggt tgtaacatga + 961 aaacagcaac ttgtcattgt agtattcacg taagcaaata accaaatcaa aggatagtat + 1021 tttgttagtt cagacatgga tactatccta tttttataag ttatttaggg ttgctaaata + 1081 gcttataaaa ataaagagag gaaaaaacat gataaaaagt tcatttaaag ctcaaccgtt + 1141 tttagtaaga aatacaattt tatctccaaa cgataaacgg agttttactg aatatactca + 1201 agtcattgag actgtaagta aaaataaagt ttttttggaa cagttactac tagctaatcc + 1261 taaactctat gatgttatgc agaaatataa tgctggtctg ttaaagaaga aaagggttaa + 1321 aaaattattt gaatctattt acaagtatta taagagaagt tatttacgat caactccatt + 1381 tggattattt agtgaaactt caattggtgt tttttcgaaa agttcacagt acaagttaat + 1441 gggaaagact acaaagggta taagattgga tactcagtgg ttgattcgcc tagttcataa + 1501 aatggaagta gatttctcaa aaaagttatc atttactaga aataatgcaa attataagtt + 1561 tggagatcga gtttttcaag tttataccat aaatagtagt gagcttgaag aagtaaatat + 1621 taaatatacg aatgtttatc aaattatttc tgaattttgt gagaatgact atcaaaaata + 1681 tgaagatatt tgtgaaactg taacgctttg ctatggagac gaatatagag aactatcgga + 1741 acaatatctt ggcagtctga tagttaatca ttatttgatc tctaatttac aaaaagattt + 1801 gttgtcagat ttttcttgga acactttttt gactaaagtt gaagcaatag atgaagataa + 1861 aaaatatata attcctctga aaaaagttca aaagtttatt caagaatact cagaaataga + 1921 aattggtgaa ggtattgaga aactgaaaga aatatatcag gaaatgtcac aaattcttga + 1981 gaatgataat tatattcaaa ttgatttaat tagtgatagt gaaataaatt ttgatgttaa + 2041 acaaaagcaa caattagaac atttagctga gtttttagga aatacgacaa aatctgtaag + 2101 aagaacatat ttggatgact ataaggataa atttatcgaa aaatatggtg tagatcaaga + 2161 agtacaaata acagaattat ttgattctac atttggcata ggagctccat ataattataa + 2221 tcatcctcga aatgactttt atgagtccga accgagtact ctatactatt cagaagagga + 2281 gagagaaaag tacctcagca tgtatgtaga agccgttaaa aatcataatg taattaatct + 2341 tgacgactta gagtctcatt atcaaaaaat ggacttagaa aagaaaagtg aacttcaagg + 2401 gttagaatta tttttgaatt tggcaaagga gtatgaaaaa gatattttta ttttagggga + 2461 tatcgttgga aataataatt tgggaggggc atcaggtaga ttttctgcac tctctccgga + 2521 gttaacaagt tatcatagaa cgatagtaga ttctgtcgaa agagaaaatg agaataaaga + 2581 aattacatcg tgtgaaatag tatttcttcc agaaaatatc agacatgcta acgttatgca + 2641 tacatcaatt atgaggagga aagtacttcc attttttaca agtacaagtc acaatgaagt + 2701 tctgttaact aatatctata ttggaataga cgaaaaagaa aaattttatg cacgagacat + 2761 ttcaactcaa gaggtattga aattctacat tacaagcatg tacaataaaa cgttattcag + 2821 taatgagcta agatttcttt acgaaatttc attagatgac aagtttggta atttaccttg + 2881 ggaacttatt tacagagact ttgattatat tccacgttta gtatttgacg aaatagtaat + 2941 atctcctgct aaatggaaaa tttggggaag ggatgtaaat agtaagatga caataagaga + 3001 acttattcaa agcaaagaaa ttcccaaaga gttttatatt gtcaatggag ataataaagt + 3061 ttatttatca caggaaaacc cattggatat ggaaatttta gagtcggcga taaagaagag + 3121 ctcaaaaaga aaagatttta tagagctaca agaatatttt gaagatgaaa atatcataaa + 3181 taaaggagaa aaggggagag ttgccgatgt tgtagtgcct tttattagaa cgagagcatt + 3241 aggtaatgaa gggagagcat ttataagaga gaaaagagtt tcggttgaac ggcgtgaaaa + 3301 attgcccttt aacgagtggc tttatctaaa gttgtacatt tctataaatc gtcaaaatga + 3361 atttttactg tcgtatcttc cagatattca gaaaatagta gcaaacctgg gtggaaatct + 3421 attcttccta agatatactg atcctaaacc acatattaga ttgcgtataa aatgttcaga + 3481 tttattttta gcttacggat ctattcttga aatcttaaaa aggagtcgga aaaataggat + 3541 aatgtcaact tttgatattt ctatttatga tcaagaagta gaaagatatg gtggatttga + 3601 tactttagag ttatccgaag caatattttg tgccgattct aaaattattc caaatttgct + 3661 tacattgata aaagatacta ataatgattg gaaagtcgat gatgtatcaa tcttggtgaa + 3721 ttatttatat ctgaaatgct tctttgagaa tgataacaaa aagattctta attttttgaa + 3781 tttagttagt cctaaaaagg ttaaagaaaa tgtcaatgaa aagattgaac attatcttaa + 3841 gcttctgaaa gttaataatc taggtgacca aattttttat gacaagaatt ttaaagaatt + 3901 aaagcatgcc ataaaaaatt tatttttaaa aatgatagct caagattttg aacttcagaa + 3961 agtttattca attattgaca gtatcattca tgtccataat aaccgactaa ttggtattga + 4021 acgagataaa gagaaattaa tttattacac acttcaaagg ttgtttgttt cggaagaata + 4081 catgaaatga ggactaatag atggatgaag tgaaagaatt cacatcaaaa caatttttta + 4141 atactttact tactcttcca agcaccttga agttaatttt tcagttggaa aaacgttatg + 4201 caatttattt aattgtgcta aatgctatca cagcttttgt tccgttggct agtcttttta + 4261 tttatcaaga tttaataaac tctgtgctag gttcagggag acatcttatc aatattatta + 4321 tcatctattt tattgttcaa gtgataacaa cagttctggg acagctggaa agttatgtta + 4381 gtggaaaatt tgatatgcga ctttcttaca gtatcaatat gcgcctcatg aggactacct + 4441 catctcttga attaagtgat tatgagcagg ctgatatgta taatatcata gaaaaagtta + 4501 ctcaagacag cacttacaag ccttttcagc tatttaatgc tatcattgtt gtgctttcat + 4561 cgtttatctc attgttatct agtctatttt ttattggaac atggaacatt ggggtagcaa + 4621 ttttactcct tattgttcca gtattatctt tggtactttt tctcagagtg ggacaattag + 4681 agtttttaat ccagtggcag agagcaagtt ctgaaagaga aacatggtat attgtatatt + 4741 tattgactca tgatttttca tttaaagaaa tcaagttaaa taatattagc aattacttca + 4801 ttcataaatt tggaaaatta aagaaaggat ttatcaacca agatttagct attgctcgta + 4861 agaagacata tttcaatatt tttcttgatt tcattttgaa tttgataaat attcttacga + 4921 tatttgctat gatcctttcg gtaagagcag gaaaacttct tataggtaat ttggtaagtc + 4981 tcatacaagc tatttctaaa atcaatactt attctcaaac aatgattcaa aatatttaca + 5041 tcatttataa tactagtttg tttatggaac aactttttga gtttttaaag agagaaagtg + 5101 tagttcacaa aaaaatagaa gatactgaaa tatgcaatca acatatagga actgttaaag + 5161 taattaattt atcatatgtt taccctaatt cgaatgcctt tgcactaaag aatatcaatt + 5221 tatcctttga aaaaggagaa ttaactgcta ttgtaggaaa aaatggttca gggaaaagta + 5281 cactagtaaa gataatttca ggattatatc aaccaactat gggaataatc caatacgaca + 5341 aaatgagaag tagtttgatg cctgaggagt tttatcagaa aaacatatcg gtgctgttcc + 5401 aagattttgt gaagtatgag ttaacgataa gagagaatat aggattgagt gatttgtctt + 5461 ctcaatggga agatgagaaa attattaaag tactagataa tttaggactc gattttttga + 5521 aaactaataa tcaatatgta cttgatacgc agttaggaaa ttggtttcaa gaagggcatc + 5581 aactttcagg aggtcagtgg caaaaaattg cattagcaag gacattcttt aagaaagctt + 5641 caatttatat tttagatgaa ccaagtgctg cactcgatcc tgtagctgaa aaagaaatat + 5701 ttgattattt tgttgctctt tcggaaaata atatttcaat tttcatttct catagtttga + 5761 atgctgccag aaaagcaaat aaaatcgtgg ttatgaaaga tggacaggtc gaagatgttg + 5821 gaagtcatga tgtccttctg agaagatgtc aatactatca agaactttat tattcagagc + 5881 aatatgagga taatgatgaa taaaaaaaat ataaaaagaa atgttgaaaa aattattgct + 5941 caatgggatg agagaactag aaaaaataaa gaaaacttcg atttcggaga gttgactctc + 6001 tctacaggat tgcctggtat aattttaatg ttagcggagt taaaaaataa agataactca + 6061 aagatatatc agaaaaagat agacaattat attgaatata ttgttagcaa actttcaaca + 6121 tatgggcttt taacaggatc actttattcg ggagcagctg gcattgcatt aagtatccta + 6181 catttacgag aagatgacga aaaatataag aatcttcttg atagcctaaa tagatatatc + 6241 gaatatttcg tcagagaaaa aattgaagga tttaatttgg aaaacattac tcctcctgat + 6301 tatgacgtga ttgaaggttt atctgggata ctttcctatc tattattaat caacgacgag + 6361 caatatgatg atttgaaaat actcattatc aattttttat caaatctgac taaagaaaac + 6421 aaaggactaa tatcgcttta catcaaatcg gagaatcaga tgtctcaatc agaaagtgag + 6481 atgtatccac taggctgttt gaatatggga ttagcacatg gacttgctgg agtgggctgt + 6541 atcttagctt atgcccacat aaaaggatat agtaatgaag cctcgttgtc agctttgcaa + 6601 aaaattattt ttatttatga aaagtttgaa cttgaaagga aaaaacagtt tctatggaaa + 6661 gatggacttg tagcagatga attaaaaaaa gagaaagtaa ttagggaagc aagtttcatt + 6721 agagatgcat ggtgctatgg aggtccaggt attagtctgc tatacttata cggaggatta + 6781 gcactggata atgactattt tgtagataaa gcagaaaaaa tattagagtc agctatgcaa + 6841 aggaaacttg gtattgattc atatatgatt tgccatggct attctggttt aatagaaatt + 6901 tgttctttat ttaagcggct attaaataca aaaaagtttg attcatacat ggaagaattt + 6961 aatgttaata gtgagcaaat tcttgaagaa tacggagatg aaagtggcac gggttttctt + 7021 gaaggaataa gtggctgtat actggtatta tcgaaatttg aatattcaat caattttact + 7081 tattggagac aagcactgtt actttttgac gattttttga aaggagggaa gaggaaatga + 7141 gaagatattt aatacttatt gtggccttaa tagggataac aggtttatca gggtgttatc + 7201 aaacaagtca taaaaaggtg aggtttgacg aaggaagtta tactaatttt atttatgata + 7261 ataaatcgta tttcgtaact gataaggaga ttcctcagga gaacgttaac aattccaaag + 7321 taaaatttta taagctgttg attgttgaca tgaaaagtga gaaactttta tcaagtagca + 7381 acaaaaatag tgtgactttg gtcttaaata atatttatga ggcttctgac aagtcgctat + 7441 gtatgggtat taacgacaga tactataaga tacttccaga aagtgataag ggggcggtca + 7501 aagctttgag attacaaaac tttgatgtga caagcgatat ttctgatgat aattttgtta + 7561 ttgataaaaa tgattcacga aaaattgact atatgggaaa tatttacagt atatcggaca + 7621 ccaccgtatc tgatgaagaa ttgggagaat atcaggatgt tttagctgaa gtacgtgtgt + 7681 ttgattcagt tagtggcaaa agtatcccga ggtctgaatg ggggagaatt gataaggatg + 7741 gttcaaattc caaacagagt aggacggaat gggattatgg cgaaatccat tctattagag + 7801 gaaaatctct tactgaagca tttgccgttg agataaatga tgattttaag cttgcaacga + 7861 aggtaggaaa ctagagtgaa aaaaatacta ggtttccttt ttatcgtttg ttcgttgggt + 7921 ttatcagcaa ctgtgcatgg ggagacaaca aattcacaac agttactctc aaataatatt + 7981 aatacggaat taattaatca taattctaat gcaattttat cttcaacaga gggatcaacg + 8041 actgattcga ttaatctagg ggcgcagtca cctgcagtaa aatcgacaac aaggactgaa + 8101 ttggatgtaa ctggtgctgc taaaacttta ttacagacat cagctgttca aaaagaaatg + 8161 aaagtttcgt tgcaagaaac tcaagttagt tctgaattca gtaagagaga tagcgttaca + 8221 aataaagaag cagttccagt atctaaggat gagctacttg agcaaagtga agtagtcgtt + 8281 tcaacatcat cgattcaaaa aaataaaatc ctcgataata agaagaatag agctaacttc + 8341 gttacttcct ctccgcttat taaggaaaaa ccatcaaatt ctaaagatgc atctggtgta + 8401 attgataatt ctgcttctcc tctatcttat cgtaaagcta aggaagtggt atctcttaga + 8461 caacctttaa aaaatcaaaa agtagaggca caacctctat tgataagtaa ttcttctgaa + 8521 aagaaagcaa gtgtttatac aaattcacat gatttttggg attatcagtg ggatatgaaa + 8581 tatgtgacaa ataatggaga aagctatgcg ctctaccagc cctcaaagaa aatttctgtt + 8641 ggaattattg attcaggaat catggaagaa catcctgatt tgtcaaatag tttaggaaat + 8701 tattttaaaa atcttgttcc taagggaggg tttgataatg aagaacctga tgaaactgga + 8761 aatccaagtg atattgtcga caaaatggga cacgggacgg aagtcgcagg tcagattaca + 8821 gcaaatggta atattttagg agtagcacca gggattactg taaatatata cagagtattt + 8881 ggtgaaaatc tttcgaaatc ggaatgggta gctagagcaa taagaagagc tgcggatgat + 8941 gggaacaagg tcatcaatat aagtgctgga cagtatctta tgatttcagg atcgtatgat + 9001 gatggaacaa atgattatca agagtatctt aattataagt cagcaataaa ttatgcaaca + 9061 gcaaaaggaa gtattgttgt cgcagctctt ggtaatgata gtttaaacat acaagataac + 9121 caaacaatga taaactttct taagcgtttc agaagtataa aggttcctgg aaaagttgta + 9181 gatgcaccga gtgtatttga ggatgtaata gccgtaggtg gaatagatgg ttatggtaat + 9241 atttctgatt ttagtaatat tggagcggat gcaatttatg ctcctgctgg cacaacggcc + 9301 aattttaaaa aatatgggca agataaattt gtcagtcagg gttattattt gaaagattgg + 9361 ctttttacaa ctactaatac tggctggtac caatatgttt atggcaactc atttgctact + 9421 cctaaagtat ctggggcact ggcattagta gttgataaat atggaataaa gaatcctaac + 9481 caactaaaaa ggtttcttct aatgaattct ccagaagtta atgggaatag agtattgaat + 9541 attgttgatt tattgaatgg gaaaaataaa gcttttagct tagatacaga taaaggtcag + 9601 gatgatgcta ttaaccataa atcgatggag aatcttaaag agtctaggga tacaatgaaa + 9661 caggaacaag ataaagaaat tcaaagaaat acaaataaca atttttctat caaaaatgat + 9721 tttcataaca tttcaaaaga agtaatttca gttgattata atattaatca aaaaatggct + 9781 aataatcgaa attcgagagg tgctgtttct gtacgaagtc aagaaatttt acctgttact + 9841 ggagatggag aagatttttt accggcttta ggtatagtgt gtatctcaat ccttggtata + 9901 ttgaaaagaa agactaaaaa ttgatagatt atatttcttc agaatgaatg gtataatgaa + 9961 gtaatgagta ctaaacaatc ggaggtaaag tggtgtataa aattttaata gttgatgatg + 10021 atcaggaaat tttaaaatta atgaagacag cattagaaat gagaaactat gaagttgcga + 10081 tgcatcaaaa catttcactt cccttggata ttactgattt tcagggattt gatttgattt + 10141 tgttagatat catgatgtca aatattgaag ggacagaaat ttgtaaaagg attcgcagag + 10201 aaatatcaac tccaattatc tttgttagtg cgaaagatac agaagaggat attataaacg + 10261 gcttaggtat tggtggggat gactatatta ctaagccttt tagccttaaa cagttggttg + 10321 caaaagtgga agcaaatata aagcgagagg aacgcaataa acatgcagtt catgtttttt + 10381 cagagattcg tagagattta ggaccaatta cattttattt agaagaaagg cgagtctgtg + 10441 tcaatggtca aacaattcca ctgacttgtc gtgaatacga tattcttgaa ttactatcac + 10501 aacgaacttc taaagtttat acgagagagg atatttatga tgacgtatat gatgaatatt + 10561 ctaatgcact ttttcggtca atctcggaat atatttatca gattaggagt aagtttgcac + 10621 catacgatat taatccgata aaaacggttc ggggacttgg gtatcagtgg catgggtaaa + 10681 aaatattcaa tgcgtcgacg gatatggcaa gctgtcattg aaattatcat aggtacttgt + 10741 ctacttatcc tgttgttact gggcttgact ttctttctac gacaaattgg acaaatcagt + 10801 ggttcagaaa ctattcgttt atctttagat tcagataatt taactatttc tgatatcgaa + 10861 cgtgatatga aacactaccc atatgattat attatttttg acaatgatac aagtaaaatt + 10921 ttgggaggac attatgtcaa gtcggatgta cctagttttg tagcttcaaa acagtcttca + 10981 cataatatta cagaaggaga aattacttat acttattcaa gcaataagca tttttcagtt + 11041 gttttaagac aaaacagtat gcctgaattt acaaatcata cgcttcgttc aatttcttat + 11101 aatcaattta cttacctttt cttttttctt ggtgaaataa tactcattat tttttctgtc + 11161 tatcatctca ttagagaatt ttctaagaat tttcaagccg ttcaaaagat tgcattgaag + 11221 atgggggaaa taactacttt tcctgaacaa gaggaatcaa aaattattga atttgatcag + 11281 gttctgaata acttatattc gaaaagtaag gagttagctt tccttattga agcggagcgt + 11341 catgaaaaac atgatttatc cttccaggtt gctgcacttt cacatgatgt taagacacct + 11401 ttaacagtat taaaaggaaa tattgaactg ctagagatga ctgaagtaaa tgaacaacaa + 11461 gctgatttta ttgagtcaat gaaaaatagt ttgactgttt ttgacaagta ttttaacaca + 11521 atgattagtt atacaaaact tttgaatgat gaaaatgatt acaaagcgac aatctccctg + 11581 gaggattttt tgatagattt atcagttgag ttggaagagt tgtcaacaac ttatcaagtg + 11641 gattatcagc tagttaaaaa aacagattta accacttttt acggaaatac attagcttta + 11701 agtcgagcac ttatcaatat ctttgttaat gcctgtcagt atgctaaaga gggtgaaaaa + 11761 atagtcagtt tgagtattta tgatgatgaa aaatatctct attttgaaat ctggaataat + 11821 ggtcatcctt tttctgaaca agcaaaaaaa aatgctggaa aactattttt cacagaagat + 11881 actggacgta gtgggaaaca ctatgggatt ggactatctt ttgctcaagg tgtagcttta + 11941 aaacatcaag gaaacttaat tctcagtaat cctcaaaaag gtggggcaga agttatccta + 12001 aaaataaaaa agtaatttag taatctctaa ggattacttt ttttgtttct gaatagattc + 12061 tgaaaattgt tttatatact ttttttaaac ataaaataaa gtgaggaaat ataatgcagg + 12121 taaaaattca aaatctttct aaaacatata aagaaaagca ggtgctacaa gatatcagtt + 12181 ttgatattaa atctggaaca gtctgtggtt tattaggagt taacggtgca ggaaaatcaa + 12241 ctttgatgaa aattttgttt ggtttaattt ctgcagatac tggaaaaatt ttttttgatg + 12301 gacaagaaaa gacaaataat caacttggag ccttaatcga ggctccagca atatatatga + 12361 atttatctgc tttcgataat cttaaaacta aggctttgct ttttggaatt tcagataaga + 12421 gaattcatga aactctagaa gtgattggtt tggcagaaac aggaaagaaa agagcaggaa + 12481 aattctcttt agggatgaaa caacgtttgg gaattggtat ggctattctt acagaacctc + 12541 aatttttaat tcttgatgaa cctactaatg gtttggatcc tgatggtatt gcggagttgt + 12601 taaacttaat cttaaaactt aaagctaaag gtgtgacaat cttgatttct agtcatcagt + 12661 tgcacgaaat aagtaaagta gctagtcaaa ttattatttt gaacaaaggt aagattcgtt + 12721 ataatcatgc gaacaataaa gaagacgaca ttgaacagtt attctttaag attgtgcatg + 12781 gaggaatgtg atatgaaaag aataatagca tcagaagcaa taaaattaaa aaaatcagga + 12841 actcttagat tggtattaat tatccctttt gtgactctat ttatagcatt tcttatgggt + 12901 ggaatacaga tttttagtgt tttttcaatt tattggtggg aaactggttt tttattcctt + 12961 ttgatgagtt tgctttttct ttatgatata aaatcagagg agcaagctgg aaattttcaa + 13021 aatgtgaaat ggaaaaagct gagttggaaa attcatttgg ccaaaatgtt gttgatttgg + 13081 ctaagaggta tactagcgag catagtcttg attattttgc tttatttggt tgcttttgtg + 13141 tttcaaggta ttgtagtggt ggattttatg aaagtaagtg tggcattgat tgctatatta + 13201 ctagcagctt cttggaattt accctttata tacttgattt tcaagtggat taatacttac + 13261 gtattgttag ctgcgaatac cttgatttgt ttaattgttg ccccttttgt tgcacaaact + 13321 ccagtatggt tcttgctacc atacacttat cactataaag ttacagaaag tttgttaaat + 13381 atcaaaccat caggagattt gttaacaggg aagataaatt tcagtatttg ggaagtttta + 13441 ttaccatttg gactttccat agttgtaacg ataggagttt cgtatttact taaaggagtg + 13501 atagaacatg ataagaagtg aatgtctcaa attaaaaaat agcttagggt tttatttagt + 13561 ttttctcttt actttattag agcttttaac ggttcctatt tatttagctt ttggaagaag + 13621 tcatgtttca atgactgatt tatcgctcat gatttttttg ttttttccgt tactggttac + 13681 aattttgtct attctaatct ttgaacagga gagtctggcc aatcgtttcc aagaaataaa + 13741 tgtaaataaa aaaagtagca gaatttggtt atcaaagcta atagtagtgg atttcctttt + 13801 gttctttcca tcagcaatga tctggataat tacgggagtt tcacaggcag tagggcaaca + 13861 aggaatgatg atcgcaacag ctagctggtt gatggcaatt tttcttaatc attttcatct + 13921 tttattgacc tttataatca atcgaggagg gagcatgatt atcgcgatta ttgaaatatt + 13981 actcattatt tttgccagta ataaagtttt attagcagct tattggtgtc ccattgcttt + 14041 acctgttaat tttatgataa ctgggcggtg tgcttatctg atagctgccg tagggtggat + 14101 tgttttatcc acaataattc ttgtagcatt atctaaaaaa aagattagat aaagtatttt + 14161 ttcttatggt aattcgacct aatatgtttt tgctatttat ctcttatttc tgtctatagt + 14221 aatttattca aagtaccttt agactcataa gtttgaataa aatttccatc aatatgtgac + 14281 agttcttact ctaagaataa tgtttccgtc gttagttttc ttttctcagt aattttttat + 14341 aaagtggttt cctatcgact tacacttttt cttttggagt tattggtgtc aagctcatga + 14401 gagattcttt aaagcatata acatttttaa actgatatgc cagttatttg tgaagaatat + 14461 gagtttctat ataaaagaga aaattaaaca atgaaaaatc caagtataaa tacttggatt + 14521 tttcattatt tttgatagac atataaagtg cattttaacc tagaattaaa agatgtatcg + 14581 ttgataataa taaaatgaac caaagcaaaa ctgacgttaa gtcaatttat tagagtcaaa + 14641 aacgtataat tttaatgtat ttattttaag tgatttcttt ttcaaattga ttaggcgtaa + 14701 gataccctaa actttgatgg attcgttttg aattataaaa ggcttcaata taccagaaaa + 14761 tactctgata ggcttcttca aagttcttat atttaaattg atagacccac tctcttttta + 14821 aatgtccatg ccaagattca agactggcat tatgataagg atagccctta cgactgaagg + 14881 agtgagtcat tccagagttt tttattgtct cttcatactc atgactcgta tactgacttc + 14941 cttggtcaga atgaagcatc acagcttctg gataattttg tgattccatt gccttattca + 15001 aagtcctttg cactaa +// diff --git a/tests/data/Y16952.3.final.gbk b/tests/data/Y16952.3.final.gbk new file mode 100644 index 0000000..7f69a2c --- /dev/null +++ b/tests/data/Y16952.3.final.gbk @@ -0,0 +1,6887 @@ +LOCUS Y16952 66669 bp DNA linear BCT 14-DEC-2008 +DEFINITION Amycolatopsis balhimycina biosynthetic gene cluster for balhimycin, + strain DSM 5908. +ACCESSION Y16952 +VERSION Y16952.3 GI:46275283 +KEYWORDS abc gene; bbr gene; bgtfA gene; bgtfB gene; bgtfC gene; bhaA gene; + bhp gene; bmt gene; bpsA gene; bpsB gene; bpsC gene; bpsD gene; + dihydroxyphenylacetic acid synthase; dpgA gene; dpgB gene; dpgC + gene; dpgD gene; enoyl-CoA hydratase; enoyl-CoA-isomerase; glycosyl + transferase; halogenase; hydrolase; hydroxyacyl-dehydrogenase; ORF1; + ORF10; ORF11; ORF2; ORF3; ORF5; ORF6; ORF7; ORF8; ORF9; orfX; oxyA + gene; oxyB gene; oxyC gene; oxyD gene; P450 monooxygenase; pdh gene; + peptide synthetase; pgat gene; phenylglycine amino transferase; + putative ABC transporter ATP-binding protein; putative prephenate + dehydrogenase; putative two-component system respons; putative + two-component system sensor kinase; putative VanY-type + carboxypeptidase; StrR family transcriptional regulator; vanR gene; + vanS gene; vanY gene. +SOURCE Amycolatopsis balhimycina DSM 5908 + ORGANISM Amycolatopsis balhimycina DSM 5908 + Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales; + Pseudonocardineae; Pseudonocardiaceae; Amycolatopsis. +REFERENCE 1 + AUTHORS Pelzer,S., Sussmuth,R., Heckmann,D., Recktenwald,J., Huber,P., + Jung,G. and Wohlleben,W. + TITLE Identification and analysis of the balhimycin biosynthetic gene + cluster and its use for manipulating glycopeptide biosynthesis in + Amycolatopsis mediterranei DSM5908 + JOURNAL Antimicrob. Agents Chemother. 43 (7), 1565-1573 (1999) + PUBMED 10390204 +REFERENCE 2 + AUTHORS Pfeifer,V., Nicholson,G.J., Ries,J., Recktenwald,J., Schefer,A.B., + Shawky,R.M., Schroder,J., Wohlleben,W. and Pelzer,S. + TITLE A polyketide synthase in glycopeptide biosynthesis: the biosynthesis + of the non-proteinogenic amino acid (S)-3,5-dihydroxyphenylglycine + JOURNAL J. Biol. Chem. 276 (42), 38370-38377 (2001) + PUBMED 11495926 +REFERENCE 3 + AUTHORS Bischoff,D., Pelzer,S., Bister,B., Nicholson,G.J., Stockert,S., + Schirle,M., Wohlleben,W., Jung,G. and Sussmuth,R.D. + TITLE The Biosynthesis of Vancomycin-Type Glycopeptide Antibiotics-The + Order of the Cyclization Steps This work was supported by the + Deutsche Forschungsgemeinschaft (SFB 323) and by a grant of the EU + (MEGATOP, QLK3-1999-00650). R. D. S. gratefully acknowledges the + support of a Feodor-Lynen Fellowship granted by the + Alexander-von-Humboldt Stiftung. We thank Corina Bihlmaier and + Volker Pfeifer for help with transformation and Southern + hybridization, J. A. Moss (La Jolla (USA)) for critical comments on + the manuscript and Prof. Dr. M. E. Maier and Prof. Dr. H.-P. Fiedler + (Tubingen) for generous support + JOURNAL Angew. Chem. Int. Ed. Engl. 40 (24), 4688-4691 (2001) + PUBMED 12404385 +REFERENCE 4 + AUTHORS Puk,O., Huber,P., Bischoff,D., Recktenwald,J., Jung,G., + Sussmuth,R.D., van Pee,K.H., Wohlleben,W. and Pelzer,S. + TITLE Glycopeptide biosynthesis in Amycolatopsis mediterranei DSM5908: + function of a halogenase and a haloperoxidase/perhydrolase + JOURNAL Chem. Biol. 9 (2), 225-235 (2002) + PUBMED 11880037 +REFERENCE 5 + AUTHORS Recktenwald,J., Shawky,R., Puk,O., Pfennig,F., Keller,U., + Wohlleben,W. and Pelzer,S. + TITLE Nonribosomal biosynthesis of vancomycin-type antibiotics: a + heptapeptide backbone and eight peptide synthetase modules + JOURNAL Microbiology (Reading, Engl.) 148 (PT 4), 1105-1118 (2002) + PUBMED 11932455 +REFERENCE 6 + AUTHORS Stegmann,E., Bischoff,D., Kittel,C., Pelzer,S., Puk,O., + Recktenwald,J., Weist,S., Sussmuth,R. and Wohlleben,W. + TITLE Precursor-directed biosynthesis for the generation of novel + glycopetides + JOURNAL Ernst Schering Res. Found. Workshop -(51), 215-232 (2005) +REFERENCE 7 + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (24-MAR-1998) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG + REMARK revised by [6] +REFERENCE 8 + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (03-AUG-2001) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG + REMARK revised by [10] +REFERENCE 9 (bases 1 to 66669) + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (07-APR-2004) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG +COMMENT On Apr 7, 2004 this sequence version replaced gi:15131491. +FEATURES Location/Qualifiers + gene complement(<1..759) + /gene="vanS" + CDS complement(<1..759) + /codon_start=1 + /db_xref="GI:46275284" + /db_xref="GOA:Q799B6" + /db_xref="InterPro:IPR003660" + /db_xref="InterPro:IPR003661" + /db_xref="InterPro:IPR009082" + /db_xref="UniProtKB/TrEMBL:Q799B6" + /gene="vanS" + /product="putative two-component system sensor kinase" + /protein_id="CAG25751.1" + /transl_table=11 + /translation="MDRAAGMSVRLKLTLSYACFLVLAGVLLLASVWLFLLRDVPDVLA + KPPPGGVLERSVLVRNFLPAAGSVLFFLLLFGLLGGWILAGRMLAPLTRITDAARMAAN + GSLSHRIRLEGTEDEFRELADAFDAMLARLEAHVAAQRRFAANASHELRTPLAITQALL + EVARNDPAKDPLLVFDRLHAVNARAIDLTEALLVLSRADQRAFTREPVDLSLLVEEAIE + TLLPIAEKRRVVIIASGHISRVVGSATLLLQ" + source 1..66669 + /db_xref="taxon:1081091" + /mol_type="genomic DNA" + /organism="Amycolatopsis balhimycina DSM 5908" + /strain="DSM 5908" + cluster 1..66669 + /clusterblast="2. AOHO01000074_c2 Amycolatopsis decaplanina + DSM 44594 Contig0074, whole geno... (89% of genes show + similarity)" + /clusterblast="3. CP003410_c2 Amycolatopsis orientalis + HCCB10007, complete genome. (86% of genes show similarity)" + /clusterblast="4. HE589771_c1 Amycolatopsis orientalis + vancomycin biosynthesis cluster, stra... (89% of genes show + similarity)" + /clusterblast="5. HQ679900_c1 Amycolatopsis orientalis + vancomycin biosynthetic gene cluster,... (81% of genes show + similarity)" + /clusterblast="6. CP008953_c23 Amycolatopsis japonica + strain MG417-CF17, complete genome. (86% of genes show + similarity)" + /clusterblast="7. KF882511_c1 Amycolatopsis sp. MJM2582 + glycopeptide biosynthetic gene clust... (86% of genes show + similarity)" + /clusterblast="8. CP007219_c17 Amycolatopsis lurida NRRL + 2430, complete genome. (83% of genes show similarity)" + /clusterblast="9. JPLW01000007_c2 Amycolatopsis sp. MJM2582 + contig00007, whole genome shotgu... (81% of genes show + similarity)" + /clusterblast="10. KJ364518_c1 Amycolatopsis lurida strain + NRRL2430 ristocetin biosynthetic ... (86% of genes show + similarity)" + /clusterblast="11. JX026280_c1 Streptomyces sp. WAC1420 + genomic sequence. (72% of genes show similarity)" + /contig_edge="True" + /cutoff=20000 + /extension=20000 + /knownclusterblast="1. + BGC0000311_c1 Balhimycin_biosynthetic_gene_cluster (100% of + genes show similarity)" + /knownclusterblast="2. + BGC0000455_c1 Vancomycin_biosynthetic_gene_cluster (81% of + genes show similarity)" + /knownclusterblast="3. + BGC0000418_c1 Ristocetin_biosynthetic_gene_cluster (83% of + genes show similarity)" + /knownclusterblast="4. + BGC0000419_c1 Ristomycin_A_biosynthetic_gene_cluster (83% + of genes show similarity)" + /knownclusterblast="5. + BGC0000322_c2 Chloroeremomycin_biosynthetic_gene_cluster + (62% of genes show similarity)" + /knownclusterblast="6. + BGC0001178_c1 UK-68,597_biosynthetic_gene_cluster (83% of + genes show similarity)" + /knownclusterblast="7. + BGC0000289_c1 A40926_biosynthetic_gene_cluster (64% of + genes show similarity)" + /knownclusterblast="8. + BGC0000441_c1 Teicoplanin_biosynthetic_gene_cluster (67% of + genes show similarity)" + /knownclusterblast="9. + BGC0000440_c1 Teicoplanin_biosynthetic_gene_cluster (67% of + genes show similarity)" + /knownclusterblast="10. + BGC0000290_c1 A47934_biosynthetic_gene_cluster (56% of + genes show similarity)" + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: t3pks: + (Chal_sti_synt_C or Chal_sti_synt_N); nrps: ((Condensation + & AMP-binding) or (Condensation & A-OX) or + cluster(Condensation,AMP-binding));" + /note="Monomers prediction: (leu-bht-asn) + (hpg-hpg-bht) + + (dpg) + (bht|tyr) + (pk)" + /note="Structure image: structures/genecluster1.png" + /product="t3pks-nrps" + /subclusterblast="1. + HE589771_4_c4 vancomycin_glucose_/_vancosamine (100% of + genes show similarity)" + /subclusterblast="3. + HE589771_1_c1 vancomycin_dihydroxyphenylglycine (57% of + genes show similarity)" + /subclusterblast="4. + AJ632270_1_c1 teicoplanin_dihydroxyphenylglycine (57% of + genes show similarity)" + /subclusterblast="7. + HE589771_3_c3 vancomycin_beta-hydroxytyrosine (42% of genes + show similarity)" + /subclusterblast="8. AB088119_2_c2 staurosporine_deoxysugar + (57% of genes show similarity)" + /subclusterblast="9. + AF386507_1_c1 complestatin_hydroxyphenylglycine (57% of + genes show similarity)" + /subclusterblast="11. + HE589771_2_c2 vancomycin_hydroxyphenylglycine (42% of genes + show similarity)" + /subclusterblast="12. + AJ632270_2_c2 teicoplanin_hydroxyphenylglycine (42% of + genes show similarity)" + /subclusterblast="13. + DQ403252_1_c1 enduracidin_hydroxyphenylglycine (28% of + genes show similarity)" + /subclusterblast="14. + AB103463_2_c2 medermycin_aminodeoxysugar_(angolosamine) + (28% of genes show similarity)" + PFAM_domain complement(151..339) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_vanS_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00512" + /description="His Kinase A (phospho-acceptor) domain" + /detection="hmmscan" + /domain="HisKA" + /evalue="5.50E-08" + /label="vanS" + /locus_tag="vanS" + /note="Pfam-A.hmm-Hit: HisKA. Score: 32.6. E-value: + 5.5e-08. Domain range: 1..67." + /score="32.6" + /translation="QRRFAANASHELRTPLAITQALLEVARNDPAKDPLLVFDRLHAVN + ARAIDLTEALLVLSRADQ" + PFAM_domain complement(355..555) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_vanS_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00672" + /description="HAMP domain" + /detection="hmmscan" + /domain="HAMP" + /evalue="1.60E-16" + /label="vanS" + /locus_tag="vanS" + /note="Pfam-A.hmm-Hit: HAMP. Score: 60.2. E-value: 1.6e-16. + Domain range: 3..69." + /score="60.2" + /translation="VLFFLLLFGLLGGWILAGRMLAPLTRITDAARMAANGSLSHRIRL + EGTEDEFRELADAFDAMLARLE" + gene complement(770..1447) + /gene="vanR" + CDS complement(770..1447) + /codon_start=1 + /db_xref="GI:46275285" + /db_xref="GOA:Q799B5" + /db_xref="HSSP:1KGS" + /db_xref="InterPro:IPR001789" + /db_xref="InterPro:IPR001867" + /db_xref="InterPro:IPR011006" + /db_xref="InterPro:IPR011991" + /db_xref="UniProtKB/TrEMBL:Q799B5" + /gene="vanR" + /note="smCOG: SMCOG1008:response_regulator (Score: 188.5; + E-value: 2.1e-57);" + /note="smCOG tree PNG image: smcogs/vanR.png" + /product="putative two-component system response regulator" + /protein_id="CAG25752.1" + /transl_table=11 + /translation="MRVLIVEDEPYLAEAIRDGLRLEAIAADTAGNGDTALELLSLNTY + DIAVLDRDIPGPSGDEIAKRIVASGSGLPILMLTAADRLDDKITGFELGADDYLTKPFE + LRELVLRLRALDRRRAHNRPPVLEIAGLRLNPFRREVYRDDRYIALTRKQFAVLEVLVS + ADGGVVSAEELLERAWDKNADPFTNAVRITVSALRKRLGEPWIITTVAGVGYRIGAAPG + AGR" + PFAM_domain complement(797..1009) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_vanR_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00486" + /description="Transcriptional regulatory protein, C + terminal" + /detection="hmmscan" + /domain="Trans_reg_C" + /evalue="1.70E-17" + /label="vanR" + /locus_tag="vanR" + /note="Pfam-A.hmm-Hit: Trans_reg_C. Score: 63.1. E-value: + 1.7e-17. Domain range: 2..77." + /score="63.1" + /translation="YIALTRKQFAVLEVLVSADGGVVSAEELLERAWDKNADPFTNAVR + ITVSALRKRLGEPWIITTVAGVGYRI" + PFAM_domain complement(1112..1441) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_vanR_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00072" + /description="Response regulator receiver domain" + /detection="hmmscan" + /domain="Response_reg" + /evalue="1.30E-22" + /label="vanR" + /locus_tag="vanR" + /note="Pfam-A.hmm-Hit: Response_reg. Score: 79.9. E-value: + 1.3e-22. Domain range: 0..111." + /score="79.9" + /translation="VLIVEDEPYLAEAIRDGLRLEAIAADTAGNGDTALELLSLNTYDI + AVLDRDIPGPSGDEIAKRIVASGSGLPILMLTAADRLDDKITGFELGADDYLTKPFELR + ELVLRL" + gene 1537..2157 + /gene="vanY" + CDS 1537..2157 + /codon_start=1 + /db_xref="GI:46275286" + /db_xref="GOA:Q799B4" + /db_xref="InterPro:IPR003709" + /db_xref="InterPro:IPR009045" + /db_xref="UniProtKB/TrEMBL:Q799B4" + /gene="vanY" + /product="putative VanY-type carboxypeptidase" + /protein_id="CAG25753.1" + /transl_table=11 + /translation="MTYRESARTTTRRIPGAVVPVARRIRGVLLAGLRAVGTRIARSPG + RPVRPQDRAGLGKTHGAVPAGVTVFDDDVPAVTRLDPALLSALRRAATAAADGGVELCV + NSGWRSPEYQSRLLREAVAKYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEH + GADYGLCRVYRNEPWHFELRPEAIEHGCPPLYADPSHDPRLRR" + PFAM_domain 1771..2085 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_vanY_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02557" + /description="D-alanyl-D-alanine carboxypeptidase" + /detection="hmmscan" + /domain="VanY" + /evalue="1.60E-17" + /label="vanY" + /locus_tag="vanY" + /note="Pfam-A.hmm-Hit: VanY. Score: 63.6. E-value: 1.6e-17. + Domain range: 2..130." + /score="63.6" + /translation="RLDPALLSALRRAATAAADGGVELCVNSGWRSPEYQSRLLREAVA + KYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEHGADYGLCRVYRNEPWHFEL + R" + gene 3011..3976 + /gene="bbr" + CDS 3011..3976 + /codon_start=1 + /db_xref="GI:46275287" + /db_xref="GOA:Q799B3" + /db_xref="InterPro:IPR003115" + /db_xref="InterPro:IPR009057" + /db_xref="UniProtKB/TrEMBL:Q799B3" + /gene="bbr" + /product="StrR family transcriptional regulator" + /protein_id="CAG25754.1" + /transl_table=11 + /translation="MDPTRVDIFALPAVEIELSRLSSASSPRTSGEDPEHVETLLSAEG + ELPPILVHRPTMQVLDGLHRLKVARVRGDTKILARLVDATESDAFVLAVEANIRHGLPL + SLADRKRAAVQIIGTHPQWSDRRVASATGISAGTVADLRRRAGEDGTEARIGRDGRVRP + SDGSERRRLAAELIRSDPGLSLRQVAKQVGISPETVRDVRGRLERGESPTPDGTRRLPA + KPHPLRLSEPDFGRAVDQDRLALLERLKSDPALRLNEVGRILLRMLTMHSMDGQEWERI + LQGVPPHLHGVIAGFARDHARVWAEFADHLESRATELAAG" + PFAM_domain 3050..3304 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02195" + /description="ParB-like nuclease domain" + /detection="hmmscan" + /domain="ParBc" + /evalue="3.70E-11" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: ParBc. Score: 42.9. E-value: + 3.7e-11. Domain range: 0..90." + /score="42.9" + /translation="VEIELSRLSSASSPRTSGEDPEHVETLLSAEGELPPILVHRPTMQ + VLDGLHRLKVARVRGDTKILARLVDATESDAFVLAVEANI" + PFAM_domain 3422..3610 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0006" + /database="Pfam-A.hmm" + /description="Homeodomain-like domain" + /detection="hmmscan" + /domain="HTH_32" + /evalue="1.30E-03" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: HTH_32. Score: 19.4. E-value: + 0.0013. Domain range: 0..74." + /score="19.4" + /translation="TVADLRRRAGEDGTEARIGRDGRVRPSDGSERRRLAAELIRSDPG + LSLRQVAKQVGISPETVR" + PFAM_domain 3515..3643 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0002" + /database="Pfam-A.hmm" + /description="Homeodomain-like domain" + /detection="hmmscan" + /domain="HTH_23" + /evalue="5.80E-06" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: HTH_23. Score: 25.7. E-value: + 5.8e-06. Domain range: 5..47." + /score="25.7" + /translation="RRRLAAELIRSDPGLSLRQVAKQVGISPETVRDVRGRLERGES" + PFAM_domain 3518..3637 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0003" + /database="Pfam-A.hmm" + /description="Helix-turn-helix domain" + /detection="hmmscan" + /domain="HTH_28" + /evalue="1.60E-03" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: HTH_28. Score: 18.3. E-value: + 0.0016. Domain range: 1..40." + /score="18.3" + /translation="RRLAAELIRSDPGLSLRQVAKQVGISPETVRDVRGRLERG" + PFAM_domain 3536..3631 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0004" + /database="Pfam-A.hmm" + /description="Winged helix-turn-helix DNA-binding" + /detection="hmmscan" + /domain="HTH_24" + /evalue="3.40E-04" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: HTH_24. Score: 19.9. E-value: + 0.00034. Domain range: 10..42." + /score="19.9" + /translation="LIRSDPGLSLRQVAKQVGISPETVRDVRGRLE" + PFAM_domain 3542..3691 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bbr_0005" + /database="Pfam-A.hmm" + /description="Winged helix-turn helix" + /detection="hmmscan" + /domain="HTH_29" + /evalue="4.50E-03" + /label="bbr" + /locus_tag="bbr" + /note="Pfam-A.hmm-Hit: HTH_29. Score: 17.1. E-value: + 0.0045. Domain range: 6..55." + /score="17.1" + /translation="RSDPGLSLRQVAKQVGISPETVRDVRGRLERGESPTPDGTRRLPA + KPHPL" + gene 4106..4981 + /gene="pdh" + CDS 4106..4981 + /codon_start=1 + /db_xref="GI:46275288" + /db_xref="GOA:Q799B2" + /db_xref="InterPro:IPR003099" + /db_xref="InterPro:IPR008927" + /db_xref="InterPro:IPR016040" + /db_xref="UniProtKB/TrEMBL:Q799B2" + /gene="pdh" + /product="putative prephenate dehydrogenase" + /protein_id="CAG25755.1" + /transl_table=11 + /translation="MTIEKALVVGTGLIGTSVALALREKGVAVFLSDVDTEAARLAQVL + GAGREWAGEGVDLAVIAVPPHLVGDRLADLQKQGAARVYTDVASVKADPIADAERLGCD + LASYVPGHPLAGRERSGPAAARAELFSGRPWALCPGPETDAEALRRVRELVSLCGATAV + VVGAGEHDSAVALVSHAPHVVASAVAASLASGDDVALGLAGQGLRDVTRIAAGDPLLWR + RILSGNTRPVAGVLERIAADLAAAASALRSGDLDEVTDLLRRGVDGHGRIPGQRGGSLP + GRNPAGSPGR" + PFAM_domain 4121..4231 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pdh_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02737" + /description="3-hydroxyacyl-CoA dehydrogenase, NAD binding + domain" + /detection="hmmscan" + /domain="3HCDH_N" + /evalue="2.90E-03" + /label="pdh" + /locus_tag="pdh" + /note="Pfam-A.hmm-Hit: 3HCDH_N. Score: 17.2. E-value: + 0.0029. Domain range: 1..38." + /score="17.2" + /translation="ALVVGTGLIGTSVALALREKGVAVFLSDVDTEAARLA" + PFAM_domain 4157..4894 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pdh_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02153" + /description="Prephenate dehydrogenase" + /detection="hmmscan" + /domain="PDH" + /evalue="4.60E-78" + /label="pdh" + /locus_tag="pdh" + /note="Pfam-A.hmm-Hit: PDH. Score: 261.6. E-value: 4.6e-78. + Domain range: 0..257." + /score="261.6" + /translation="VALALREKGVAVFLSDVDTEAARLAQVLGAGREWAGEGVDLAVIA + VPPHLVGDRLADLQKQGAARVYTDVASVKADPIADAERLGCDLASYVPGHPLAGRERSG + PAAARAELFSGRPWALCPGPETDAEALRRVRELVSLCGATAVVVGAGEHDSAVALVSHA + PHVVASAVAASLASGDDVALGLAGQGLRDVTRIAAGDPLLWRRILSGNTRPVAGVLERI + AADLAAAASALRSGDLDEVTDLLR" + gene 5180..7135 + /gene="abc" + CDS 5180..7135 + /codon_start=1 + /db_xref="GI:46275289" + /db_xref="GOA:Q799B1" + /db_xref="InterPro:IPR001140" + /db_xref="InterPro:IPR003439" + /db_xref="InterPro:IPR003593" + /db_xref="InterPro:IPR011527" + /db_xref="InterPro:IPR017871" + /db_xref="InterPro:IPR017940" + /db_xref="UniProtKB/TrEMBL:Q799B1" + /gene="abc" + /note="smCOG: SMCOG1288:ABC_transporter_related_protein + (Score: 325.4; E-value: 1.1e-98);" + /note="smCOG tree PNG image: smcogs/abc.png" + /product="putative ABC transporter ATP-binding protein" + /protein_id="CAG25756.1" + /transl_table=11 + /translation="MDMVLRFEGVDKSPDDPDPWVTKVRKGTLRRVLAYFRPHVGKVAL + FCLVAVLESLIVVATPLLLKELIDNGIVKNDLGVVILMAGLTAVLAVLGAGLTMVSGYI + SGRIGEGITYDLRVQALGHVRRLPIAFFTRTQTGVLVGRLHTELIMAQQHFTGLLMAAT + SVVMVVVVLAELIYLSWIVAIVSLVLIPIFLVPWIRVGRAIQRRSIRLMDANTGLGGLL + QERFNVQGAMLSKLFGRPAEEMAEYEERAGEIRKIGVSLSVWGRMAFVMMALMASLATA + LVYGIGGGLVLAGAFELGTLVAIATLLQRLFGPITQLSGMQELAQTVVVSFSRVFELLD + LKPLIQERPDAIALKKKVVPDVEFEHVSFRYPTADEVSLASLEHLRAERERSEVTPDVL + RDVSFHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLRDLTFESLREA + VGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLIDSLPLGLDTVTGDRG + YRMSGGEKQRLAIARLLLKEPSIVVLDEATAHLDSESEAAVQRALKTALHGRTSLVIAH + RLSTIREADQILVIDGGRVRERGTHDELLAQGGLYAELYHTQFANPAANDPKPEIEDEL + DDIEPEPVIQHMGYGG" + PFAM_domain 5312..6121 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00664" + /description="ABC transporter transmembrane region" + /detection="hmmscan" + /domain="ABC_membrane" + /evalue="1.30E-17" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: ABC_membrane. Score: 64.1. E-value: + 1.3e-17. Domain range: 2..274." + /score="64.1" + /translation="LFCLVAVLESLIVVATPLLLKELIDNGIVKNDLGVVILMAGLTAV + LAVLGAGLTMVSGYISGRIGEGITYDLRVQALGHVRRLPIAFFTRTQTGVLVGRLHTEL + IMAQQHFTGLLMAATSVVMVVVVLAELIYLSWIVAIVSLVLIPIFLVPWIRVGRAIQRR + SIRLMDANTGLGGLLQERFNVQGAMLSKLFGRPAEEMAEYEERAGEIRKIGVSLSVWGR + MAFVMMALMASLATALVYGIGGGLVLAGAFELGTLVAIATLLQRLFGP" + PFAM_domain 6368..6679 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0008" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03193" + /description="Protein of unknown function, DUF258" + /detection="hmmscan" + /domain="DUF258" + /evalue="2.80E-03" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: DUF258. Score: 16.8. E-value: + 0.0028. Domain range: 23..93." + /score="16.8" + /translation="DVLRDVSFHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVR + LGGHDLRDLTFESLREAVGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRD + " + PFAM_domain 6374..6823 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00005" + /description="ABC transporter" + /detection="hmmscan" + /domain="ABC_tran" + /evalue="1.40E-32" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: ABC_tran. Score: 112.8. E-value: + 1.4e-32. Domain range: 0..137." + /score="112.8" + /translation="LRDVSFHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVRLG + GHDLRDLTFESLREAVGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLI + DSLPLGLDTVTGDRGYRMSGGEKQRLAIARLLLKEPSIVVLDEATA" + PFAM_domain 6389..6880 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0004" + /database="Pfam-A.hmm" + /description="AAA ATPase domain" + /detection="hmmscan" + /domain="AAA_16" + /evalue="1.30E-05" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: AAA_16. Score: 25.2. E-value: + 1.3e-05. Domain range: 18..179." + /score="25.2" + /translation="FHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLR + DLTFESLREAVGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLIDSLPL + GLDTVTGDRGYRMSGGEKQRLAIARLLLKEPSIVVLDEATAHLDSESEAAVQRALKTAL + H" + PFAM_domain 6398..6454 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0007" + /database="Pfam-A.hmm" + /description="P-loop containing region of AAA domain" + /detection="hmmscan" + /domain="AAA_29" + /evalue="3.20E-03" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: AAA_29. Score: 16.9. E-value: + 0.0032. Domain range: 20..39." + /score="16.9" + /translation="QAGTLTALVGPSGAGKSTI" + PFAM_domain 6413..6532 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF06414" + /description="Zeta toxin" + /detection="hmmscan" + /domain="Zeta_toxin" + /evalue="3.20E-03" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: Zeta_toxin. Score: 16.6. E-value: + 0.0032. Domain range: 18..57." + /score="16.6" + /translation="TALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLRDLT" + PFAM_domain 6416..6820 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0006" + /database="Pfam-A.hmm" + /description="AAA domain" + /detection="hmmscan" + /domain="AAA_17" + /evalue="2.50E-03" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: AAA_17. Score: 18.6. E-value: + 0.0025. Domain range: 2..101." + /score="18.6" + /translation="ALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLRDLTFESLRE + AVGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLIDSLPLGLDTVTGDR + GYRMSGGEKQRLAIARLLLKEPSIVVLDEAT" + PFAM_domain 6734..6949 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_abc_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02463" + /description="RecF/RecN/SMC N terminal domain" + /detection="hmmscan" + /domain="SMC_N" + /evalue="3.10E-05" + /label="abc" + /locus_tag="abc" + /note="Pfam-A.hmm-Hit: SMC_N. Score: 23.2. E-value: + 3.1e-05. Domain range: 135..211." + /score="23.2" + /translation="RMSGGEKQRLAIARLLLKEPSIVVLDEATAHLDSESEAAVQRALK + TALHGRTSLVIAHRLSTIREADQILVI" + gene 7138..16635 + /gene="bpsA" + CDS 7138..16635 + /aSProdPred="leu-bht-asn" + /citation=[5] + /codon_start=1 + /db_xref="GI:15131492" + /db_xref="GOA:Q939Z1" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010060" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020806" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Z1" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsA" + /note="smCOG: SMCOG1002:AMP-dependent_synthetase_and_ligase + (Score: 355.9; E-value: 4.9e-108);" + /product="peptide synthetase" + /protein_id="CAC48360.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 7.4e-70, + bitscore: 226.6, seeds: 42); AMP-binding (E-value: 4e-111, + bitscore: 362.7, seeds: 400); PP-binding (E-value: 1.6e-13, + bitscore: 42.3, seeds: 164)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: Glycopeptide NRPS" + /sec_met="NRPS/PKS Domain: AMP-binding (35-428). E-value: + 2.1e-110. Score: 360.7; NRPS/PKS Domain: bpsA_A1; Substrate + specificity predictions: leu (Stachelhaus code), leu + (NRPSPredictor3 SVM), leu (pHMM), Q939Z1_A1-leu (PrediCAT), + leu (SANDPUMA ensemble); PID to NN: 100.00; SNN score: + 4.26428826058 ." + /sec_met="NRPS/PKS Domain: PCP (504-569). E-value: 6.3e-21. + Score: 66.0;" + /sec_met="NRPS/PKS Domain: Cglyc (600-893). E-value: + 1.6e-133. Score: 436.1;" + /sec_met="NRPS/PKS Domain: AMP-binding (1058-1449). + E-value: 4.9e-111. Score: 362.7; NRPS/PKS Domain: bpsA_A2; + Substrate specificity predictions: bht|tyr (Stachelhaus + code), bht (NRPSPredictor3 SVM), bht (pHMM), Q939Z1_A2-bht + (PrediCAT), bht (SANDPUMA ensemble); PID to NN: 100.00; SNN + score: 3.5276738422 ." + /sec_met="NRPS/PKS Domain: PCP (1524-1592). E-value: + 9.5e-22. Score: 68.6;" + /sec_met="NRPS/PKS Domain: Epimerization (1605-1906). + E-value: 2.1e-143. Score: 470.4;" + /sec_met="NRPS/PKS Domain: Cglyc (2086-2381). E-value: + 9.3e-108. Score: 351.5;" + /sec_met="NRPS/PKS Domain: AMP-binding (2563-2970). + E-value: 1.7e-108. Score: 354.4; NRPS/PKS Domain: bpsA_A3; + Substrate specificity predictions: asn (Stachelhaus code), + asn (NRPSPredictor3 SVM), asn (pHMM), Q939Z1_A3-asn + (PrediCAT), asn (SANDPUMA ensemble); PID to NN: 100.00; SNN + score: 1.76689127463 ." + /sec_met="NRPS/PKS Domain: PCP (3052-3119). E-value: + 3.5e-22. Score: 70.0;" + /transl_table=11 + /translation="MNSAARTTPTMLDLFASHVDRTPDAVAVAGGDGVLTYRQLDERAG + RLAGRLASRGIRRGDRVAVVMDRSADLVVALLAVWKAGAAYVPVDAGYPAPRVAFMVAD + SAAKLVVCSAASRGAVPAGVESLEPAAAAEEGASDAPAATVRPGDPAYVMYTSGSTGTP + KGVTISQGCVAELTMDAGWAMEPGEAVLMHSPHAFDASLFELWMPLASGVRVVLAEPGS + VDARRLREAAAAGVTRVYLTAGSLRAVAEEAPESFAEFREVLTGGDVVPAHAVERVRTA + APRARFRNMYGPTEATMCATWHLLQPGDVVGPVVPIGRPLTGRRVQVLDASLRPVGPGV + VGDLYLSGALAEGYFNRAALTAERFVADPSAPGQRMYWTGDLAQWTADGELVFAGRADD + QVKIRGFRIEPGEIEAALIAQPDVHDAVVAAVDGRLIGYVVTEGDADPRVIRERLGAVL + PEHLVPAAVLALDALPLTGNGKVDRSALPAPEFAASAAGRAPSTDAERVLCGLFAEVLG + VARAGVDDGFFELGGDSIGAMRLAARAAKAGLLVTPAQIFEEPTPARLAAVARPVPAGG + PVDGPLLTLTAAEEAELALAAPGAEEIWPLAPLQEGLLFESILDDQGSDIYQVQVILEL + NGPVDAPRLRAAWDAVVRRHPELRLSFHRLASGKTVQAVHGDVTPPWRVVDLTGAGDVD + AAVAALVAEEQQQRFELATAPLVRLVLVRIAADRYRLLFVIHHILVDGWSVAVILNDVS + EAYEAGEPVPEQRGGATFRDYLAWLDRQDDDAARAAWRAELAGLDEPALIATSGVETEY + DYRATHLTPALHTRLLGFAREHGLTPSTVVHAAWAMVLARLTRRTDVVFGTMVATRPPE + LAGIESMPGLLMTAVPVRVPLDGGQSVLDMLTDLHSRQTALKRHQYLGLPEIQKAAGPG + ATFDTMLVVENYPREYARRYTHLRTIEGTHYPVTLGITPGDRFKIQLGYWPGQVPDTVA + ESLLEWFVGAIGALVADPAGLVGRIGMGAADVRRWDPPLQAGEPLPALVGRMAARPPDN + VAVVDGDGALSYADLWERSLKFAAVLRAHGVRSEDRVGLVVGRSAWWTVGMLGVLLAGG + TFVPVDPAYPAERKEWIFRSANPMLVVCAGATRGAVPAEFADRLVVIDEVDPAAGSAGD + LPRVDPRSAAYVIYTSGSTGTPKGVVVTHAGLGNLALAHIDRFGVSPSSRVLQFAALGF + DTIVSEVMMALLSGATLVVPPERDLPPRASFTDALERWDITHVKAPPSVLGTADVLPST + VETVVAAGELCPPGLVDRLSADRRMINAYGPTETTICATMSMPLSPGQHPIPFGKPVPG + VRGYLLDSFLRPLPPGVTGELYLAGIGVARGYLGRSALTAERFVADPFVPGERMYRTGD + LAYWTEQGELVSAGRADDQVKIRGFRVEPREIEFALSGYPRVTQAAVAVRDDRLVAYVT + PGDIDTQAVRAHLASRMPQYMVPAAVVALDALPLTAHGKIDRRALPDPDFTAGKQAREP + ATETERVLCELFAGVLGLARVGVDDSFFELGGDSILSMQLAARARRSGLTFTAADVFDG + KTPERIAQLAAESSVPEPGRSPKPDGVGDVAWTPVMWMLGDGVAGPAFAQWMVVGTPSD + LTEKALAAGFAAVVDTHDMLRARVVADEGGRRLVVGERGSVDVAGAVTRIRADGRSLDE + AVADAARAAVTRLDPSAGVMAQAVWVDAGPDQVGRLVVVAHHLSVDGVSWRILLSDLQA + ACEAAVAGREPVLEPVGASFKRWAGLLAEWAVSAERAGELAAWKAILGPGDRPAGAQAT + SRAAEGAVRSRSWVVPKVETAALAGRAPVAFHCGVNEVLLAGLAGAVARWRGGDAVLVD + VESHGRHPVDGTDLSRTVGWFTSAHPVRLDVAGTDLADVLAGGPAAGRLLKAVKEQSRA + VPGDGLGYGLLRYLNGTTGPVLADLPSPQIGFNYMGRFAAGEKSGVRAWQPVGDIGSSL + EPGMGLPHALEVNAIVQDLPDGPELTLMLEWQDGLLGEDEIDRLGRAWLDMLSGVARQA + ADPAAGGHTASDFDLVTLDQAEIEALEAEFAAAGGLAEVLPLSPLQHGLAFHAGYAGDG + VDVYTAQAVLELAGPLDVPLLRKSVRALLDRHANLRAGFRHGADGTAYQVVPGAVAVPV + TLVDVTESADPAAEAAAVAAAERARPFELARPPLLRVMVVVLGPDRHRLVLTNHHILLD + GWSTPLLLDELLTLYRNGAAPAALAPVTPYRDYLAWVRETDREAATEAWRDALAGLPEP + TLVAADRPVPVEVPEQIWTTLDETFAQALGARARECGVTVSTVLQAVWGMVLAALTGRD + DVVFGSVVSGRPAELPGIETMVGLFINTVPVRVRMRPQDTFAELVRGLQNEQVALLAHH + HVGLTDIQQAAGLGRLFDTIIVYENYPRPAEIGDESADADRVRVQGLTAADATHYPLAL + AVVPGTDLRLRLEHQPALFTAEQAGAVLERFTLVLEAVVADPRLPLAVVPILSDAERRQ + LQAGNDTALPVPDRTLPELFAAQAAATPEATAVVFEDRSLTYAELDARANQLARWLIDQ + GAGPEGLVAVLLPRSLELVVALLAVTKTGGAWLPIDPGYPADRIAFMLDDAGPALVITT + AVLSASPIGDVLAARSRTVVLDEPAAAGQLAGRDRAPVTDTDRARALDPRHPAYLIYTS + GSTGRPKAVVVTHRNLTNYLLHCGRMYPGLRGRSVLHSSIAFDLTVTATFTPLIVGGEI + HVGALEDLIGVVEAAPSIFLKATPSHLLTLDTASRGSAGSGDLLLGGEQLPADTVVQWR + RKYPNIVVVNEYGPTEATVGCVEYRLEPGQECPPGGVVPIGTPLANMRAFVLDSWLRLV + PPGAVGELYVAGAGLARGYLGRAGLTATRFVADPFGSGERMYRTGDLVQWNPDGQLVFA + GRVDDQVKVRGFRIEPGEIEAALVAQESVGQAVVVARDSEIGTRLIGYVTAAGESGVDE + AAVREGVAARLPQYMVPAALVVLGALPLTANGKVDRAALPDPDFGARAGGREPVTEAER + LLCALFAEVLGLERAGADDSFFELGGDSILSMRLAARAHREGMSFGAREVFEQRTPAGI + AAIVERVAGDRPVAAVHAVSDVALLDLDQGELDEFKAEFDDDSQPFADPGRY" + PFAM_domain 7180..8349 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="1.60E-97" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 326.5. E-value: + 1.6e-97. Domain range: 0..417." + /score="326.5" + /translation="FASHVDRTPDAVAVAGGDGVLTYRQLDERAGRLAGRLASRGIRRG + DRVAVVMDRSADLVVALLAVWKAGAAYVPVDAGYPAPRVAFMVADSAAKLVVCSAASRG + AVPAGVESLEPAAAAEEGASDAPAATVRPGDPAYVMYTSGSTGTPKGVTISQGCVAELT + MDAGWAMEPGEAVLMHSPHAFDASLFELWMPLASGVRVVLAEPGSVDARRLREAAAAGV + TRVYLTAGSLRAVAEEAPESFAEFREVLTGGDVVPAHAVERVRTAAPRARFRNMYGPTE + ATMCATWHLLQPGDVVGPVVPIGRPLTGRRVQVLDASLRPVGPGVVGDLYLSGALAEGY + FNRAALTAERFVADPSAPGQRMYWTGDLAQWTADGELVFAGRADDQVKIR" + aSDomain 7243..8421 + /asDomain_id="nrpspksdomains_bpsA_A1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="2.10E-110" + /label="bpsA_A1" + /locus_tag="bpsA" + /score="360.7" + /specificity="Stachelhaus code: leu" + /specificity="NRPSpredictor3 SVM: leu" + /specificity="pHMM: leu" + /specificity="PrediCAT Q939Z1_A1-leu" + /specificity="SANDPUMA ensemble: leu" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 4.26428826058" + /translation="TYRQLDERAGRLAGRLASRGIRRGDRVAVVMDRSADLVVALLAVW + KAGAAYVPVDAGYPAPRVAFMVADSAAKLVVCSAASRGAVPAGVESLEPAAAAEEGASD + APAATVRPGDPAYVMYTSGSTGTPKGVTISQGCVAELTMDAGWAMEPGEAVLMHSPHAF + DASLFELWMPLASGVRVVLAEPGSVDARRLREAAAAGVTRVYLTAGSLRAVAEEAPESF + AEFREVLTGGDVVPAHAVERVRTAAPRARFRNMYGPTEATMCATWHLLQPGDVVGPVVP + IGRPLTGRRVQVLDASLRPVGPGVVGDLYLSGALAEGYFNRAALTAERFVADPSAPGQR + MYWTGDLAQWTADGELVFAGRADDQVKIRGFRIEPGEIEAALIAQPDVHDAVV" + CDS_motif 7366..7407 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E-05" + /label="NRPS-A_a2" + /locus_tag="bpsA" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 1.1e-05, + bit-score: 18.5)" + /score="18.5" + /translation="LAVWKAGAAYVPVD" + CDS_motif 7588..7644 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.50E-09" + /label="NRPS-A_a3" + /locus_tag="bpsA" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 9.5e-09, + bit-score: 27.4)" + /score="27.4" + /translation="AYVMYTSGSTGTPKGVTIS" + CDS_motif 8008..8034 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.00E-02" + /label="NRPS-A_a5" + /locus_tag="bpsA" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.08, bit-score: + 6.2)" + /score="6.2" + /translation="YGPTEATMC" + CDS_motif 8149..8232 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.30E-10" + /label="NRPS-A_a6" + /locus_tag="bpsA" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 5.3e-10, + bit-score: 31.6)" + /score="31.6" + /translation="PGVVGDLYLSGALAEGYFNRAALTAERF" + CDS_motif 8314..8379 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0005" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.50E-12" + /label="NRPS-A_a8" + /locus_tag="bpsA" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 2.5e-12, + bit-score: 38.7)" + /score="38.7" + /translation="FAGRADDQVKIRGFRIEPGEIE" + PFAM_domain 8371..8577 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0010" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="2.00E-09" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 38.2. E-value: + 2e-09. Domain range: 0..73." + /score="38.2" + /translation="EIEAALIAQPDVHDAVVAAVDGRLIGYVVTEGDADPRVIRERLGA + VLPEHLVPAAVLALDALPLTGNGK" + CDS_motif 8572..8598 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0006" + /database="abmotifs" + /detection="hmmscan" + /evalue="6.90E-01" + /label="NRPS-A_a10" + /locus_tag="bpsA" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 0.69, + bit-score: 3.5)" + /score="3.5" + /translation="GKVDRSALP" + aSDomain 8650..8844 + /asDomain_id="nrpspksdomains_bpsA_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="6.30E-21" + /locus_tag="bpsA" + /score="66.0" + /translation="ERVLCGLFAEVLGVARAGVDDGFFELGGDSIGAMRLAARAAKAGL + LVTPAQIFEEPTPARLAAVA" + PFAM_domain 8656..8838 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0007" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="6.50E-11" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 42.3. E-value: + 6.5e-11. Domain range: 1..65." + /score="42.3" + /translation="VLCGLFAEVLGVARAGVDDGFFELGGDSIGAMRLAARAAKAGLLV + TPAQIFEEPTPARLAA" + CDS_motif 8713..8742 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0007" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.60E-01" + /label="PCP_mE" + /locus_tag="bpsA" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.56, bit-score: + 4.0)" + /score="4.0" + /translation="GFFELGGDSI" + aSDomain 8938..9816 + /asDomain_id="nrpspksdomains_bpsA_Xdom02" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="1.60E-133" + /locus_tag="bpsA" + /score="436.1" + /translation="EEIWPLAPLQEGLLFESILDDQGSDIYQVQVILELNGPVDAPRLR + AAWDAVVRRHPELRLSFHRLASGKTVQAVHGDVTPPWRVVDLTGAGDVDAAVAALVAEE + QQQRFELATAPLVRLVLVRIAADRYRLLFVIHHILVDGWSVAVILNDVSEAYEAGEPVP + EQRGGATFRDYLAWLDRQDDDAARAAWRAELAGLDEPALIATSGVETEYDYRATHLTPA + LHTRLLGFAREHGLTPSTVVHAAWAMVLARLTRRTDVVFGTMVATRPPELAGIESMPGL + LMTAVPVRVPLD" + PFAM_domain 8938..9816 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="3.10E-61" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: Condensation. Score: 206.8. E-value: + 3.1e-61. Domain range: 1..301." + /score="206.8" + /translation="EEIWPLAPLQEGLLFESILDDQGSDIYQVQVILELNGPVDAPRLR + AAWDAVVRRHPELRLSFHRLASGKTVQAVHGDVTPPWRVVDLTGAGDVDAAVAALVAEE + QQQRFELATAPLVRLVLVRIAADRYRLLFVIHHILVDGWSVAVILNDVSEAYEAGEPVP + EQRGGATFRDYLAWLDRQDDDAARAAWRAELAGLDEPALIATSGVETEYDYRATHLTPA + LHTRLLGFAREHGLTPSTVVHAAWAMVLARLTRRTDVVFGTMVATRPPELAGIESMPGL + LMTAVPVRVPLD" + CDS_motif 8950..8982 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0008" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-03" + /label="C1_dual_004-017" + /locus_tag="bpsA" + /motif="C1_dual_004-017" + /note="NRPS/PKS Motif: C1_dual_004-017 (e-value: 0.0014, + bit-score: 11.7)" + /score="11.7" + /translation="PLAPLQEGLLF" + CDS_motif 9013..9114 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0009" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-13" + /label="C2_DCL_024-062" + /locus_tag="bpsA" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 2.4e-13, + bit-score: 42.3)" + /score="42.3" + /translation="IYQVQVILELNGPVDAPRLRAAWDAVVRRHPELR" + CDS_motif 9340..9405 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0010" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.00E-09" + /label="C3_DCL_135-156" + /locus_tag="bpsA" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 3e-09, + bit-score: 29.4)" + /score="29.4" + /translation="IHHILVDGWSVAVILNDVSEAY" + CDS_motif 9448..9483 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0011" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.10E-02" + /label="C4_DCL_171-183" + /locus_tag="bpsA" + /motif="C4_DCL_171-183" + /note="NRPS/PKS Motif: C4_DCL_171-183 (e-value: 0.051, + bit-score: 6.9)" + /score="6.9" + /translation="FRDYLAWLDRQD" + CDS_motif 9709..9804 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0012" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.80E-12" + /label="C5_DCL_263-294" + /locus_tag="bpsA" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 1.8e-12, + bit-score: 39.7)" + /score="39.7" + /translation="DVVFGTMVATRPPELAGIESMPGLLMTAVPVR" + CDS_motif 9856..9966 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0013" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.90E-15" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsA" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 1.9e-15, + bit-score: 49.2)" + /score="49.2" + /translation="SRQTALKRHQYLGLPEIQKAAGPGATFDTMLVVENYP" + PFAM_domain 10255..11412 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="6.90E-98" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 327.7. E-value: + 6.9e-98. Domain range: 2..417." + /score="327.7" + /translation="RMAARPPDNVAVVDGDGALSYADLWERSLKFAAVLRAHGVRSEDR + VGLVVGRSAWWTVGMLGVLLAGGTFVPVDPAYPAERKEWIFRSANPMLVVCAGATRGAV + PAEFADRLVVIDEVDPAAGSAGDLPRVDPRSAAYVIYTSGSTGTPKGVVVTHAGLGNLA + LAHIDRFGVSPSSRVLQFAALGFDTIVSEVMMALLSGATLVVPPERDLPPRASFTDALE + RWDITHVKAPPSVLGTADVLPSTVETVVAAGELCPPGLVDRLSADRRMINAYGPTETTI + CATMSMPLSPGQHPIPFGKPVPGVRGYLLDSFLRPLPPGVTGELYLAGIGVARGYLGRS + ALTAERFVADPFVPGERMYRTGDLAYWTEQGELVSAGRADDQVKIR" + aSDomain 10312..11484 + /asDomain_id="nrpspksdomains_bpsA_A2" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="4.90E-111" + /label="bpsA_A2" + /locus_tag="bpsA" + /score="362.7" + /specificity="Stachelhaus code: bht|tyr" + /specificity="NRPSpredictor3 SVM: bht" + /specificity="pHMM: bht" + /specificity="PrediCAT Q939Z1_A2-bht" + /specificity="SANDPUMA ensemble: bht" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 3.5276738422" + /translation="SYADLWERSLKFAAVLRAHGVRSEDRVGLVVGRSAWWTVGMLGVL + LAGGTFVPVDPAYPAERKEWIFRSANPMLVVCAGATRGAVPAEFADRLVVIDEVDPAAG + SAGDLPRVDPRSAAYVIYTSGSTGTPKGVVVTHAGLGNLALAHIDRFGVSPSSRVLQFA + ALGFDTIVSEVMMALLSGATLVVPPERDLPPRASFTDALERWDITHVKAPPSVLGTADV + LPSTVETVVAAGELCPPGLVDRLSADRRMINAYGPTETTICATMSMPLSPGQHPIPFGK + PVPGVRGYLLDSFLRPLPPGVTGELYLAGIGVARGYLGRSALTAERFVADPFVPGERMY + RTGDLAYWTEQGELVSAGRADDQVKIRGFRVEPREIEFALSGYPRVTQAAV" + CDS_motif 10435..10476 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0014" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.00E-03" + /label="NRPS-A_a2" + /locus_tag="bpsA" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 0.007, + bit-score: 10.1)" + /score="10.1" + /translation="LGVLLAGGTFVPVD" + CDS_motif 10663..10722 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0015" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.90E-10" + /label="NRPS-A_a3" + /locus_tag="bpsA" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 1.9e-10, + bit-score: 32.6)" + /score="32.6" + /translation="AYVIYTSGSTGTPKGVVVTH" + CDS_motif 11074..11100 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0016" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.90E-02" + /label="NRPS-A_a5" + /locus_tag="bpsA" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.019, + bit-score: 8.0)" + /score="8.0" + /translation="YGPTETTIC" + CDS_motif 11206..11295 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0017" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.70E-16" + /label="NRPS-A_a6" + /locus_tag="bpsA" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 5.7e-16, + bit-score: 50.6)" + /score="50.6" + /translation="PPGVTGELYLAGIGVARGYLGRSALTAERF" + CDS_motif 11383..11442 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0018" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.20E-10" + /label="NRPS-A_a8" + /locus_tag="bpsA" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 2.2e-10, + bit-score: 32.6)" + /score="32.6" + /translation="GRADDQVKIRGFRVEPREIE" + PFAM_domain 11434..11637 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0011" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="1.40E-13" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 51.5. E-value: + 1.4e-13. Domain range: 0..73." + /score="51.5" + /translation="EIEFALSGYPRVTQAAVAVRDDRLVAYVTPGDIDTQAVRAHLASR + MPQYMVPAAVVALDALPLTAHGK" + CDS_motif 11632..11658 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0019" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.00E-01" + /label="NRPS-A_a10" + /locus_tag="bpsA" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 0.2, bit-score: + 4.9)" + /score="4.9" + /translation="GKIDRRALP" + aSDomain 11710..11913 + /asDomain_id="nrpspksdomains_bpsA_Xdom03" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="9.50E-22" + /locus_tag="bpsA" + /score="68.6" + /translation="ERVLCELFAGVLGLARVGVDDSFFELGGDSILSMQLAARARRSGL + TFTAADVFDGKTPERIAQLAAES" + PFAM_domain 11716..11901 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0008" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="1.10E-10" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 41.5. E-value: + 1.1e-10. Domain range: 1..66." + /score="41.5" + /translation="VLCELFAGVLGLARVGVDDSFFELGGDSILSMQLAARARRSGLTF + TAADVFDGKTPERIAQL" + CDS_motif 11773..11802 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0020" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.00E-01" + /label="PCP_mE" + /locus_tag="bpsA" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.2, bit-score: + 5.2)" + /score="5.2" + /translation="SFFELGGDSI" + aSDomain 11953..12855 + /asDomain_id="nrpspksdomains_bpsA_Xdom04" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Epimerization" + /evalue="2.10E-143" + /locus_tag="bpsA" + /score="470.4" + /translation="VGDVAWTPVMWMLGDGVAGPAFAQWMVVGTPSDLTEKALAAGFAA + VVDTHDMLRARVVADEGGRRLVVGERGSVDVAGAVTRIRADGRSLDEAVADAARAAVTR + LDPSAGVMAQAVWVDAGPDQVGRLVVVAHHLSVDGVSWRILLSDLQAACEAAVAGREPV + LEPVGASFKRWAGLLAEWAVSAERAGELAAWKAILGPGDRPAGAQATSRAAEGAVRSRS + WVVPKVETAALAGRAPVAFHCGVNEVLLAGLAGAVARWRGGDAVLVDVESHGRHPVDGT + DLSRTVGWFTSAHPVRLDVA" + PFAM_domain 12013..12852 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="1.20E-29" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: Condensation. Score: 103.1. E-value: + 1.2e-29. Domain range: 25..300." + /score="103.1" + /translation="AFAQWMVVGTPSDLTEKALAAGFAAVVDTHDMLRARVVADEGGRR + LVVGERGSVDVAGAVTRIRADGRSLDEAVADAARAAVTRLDPSAGVMAQAVWVDAGPDQ + VGRLVVVAHHLSVDGVSWRILLSDLQAACEAAVAGREPVLEPVGASFKRWAGLLAEWAV + SAERAGELAAWKAILGPGDRPAGAQATSRAAEGAVRSRSWVVPKVETAALAGRAPVAFH + CGVNEVLLAGLAGAVARWRGGDAVLVDVESHGRHPVDGTDLSRTVGWFTSAHPVRLDV" + CDS_motif 12088..12132 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0021" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.50E-04" + /label="Cy2" + /locus_tag="bpsA" + /motif="Cy2" + /note="NRPS/PKS Motif: Cy2 (e-value: 0.00095, bit-score: + 11.9)" + /score="11.9" + /translation="VVDTHDMLRARVVAD" + CDS_motif 12334..12399 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0022" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.00E-10" + /label="NRPS-E2" + /locus_tag="bpsA" + /motif="NRPS-E2" + /note="NRPS/PKS Motif: NRPS-E2 (e-value: 4e-10, bit-score: + 31.9)" + /score="31.9" + /translation="LVVVAHHLSVDGVSWRILLSDL" + CDS_motif 12796..12846 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0023" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.80E-08" + /label="NRPS-E5" + /locus_tag="bpsA" + /motif="NRPS-E5" + /note="NRPS/PKS Motif: NRPS-E5 (e-value: 2.8e-08, + bit-score: 25.8)" + /score="25.8" + /translation="DLSRTVGWFTSAHPVRL" + CDS_motif 12916..12978 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0024" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.80E-10" + /label="NRPS-E6" + /locus_tag="bpsA" + /motif="NRPS-E6" + /note="NRPS/PKS Motif: NRPS-E6 (e-value: 9.8e-10, + bit-score: 30.6)" + /score="30.6" + /translation="VKEQSRAVPGDGLGYGLLRYL" + aSDomain 13396..14280 + /asDomain_id="nrpspksdomains_bpsA_Xdom05" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="9.30E-108" + /locus_tag="bpsA" + /score="351.5" + /translation="LAEVLPLSPLQHGLAFHAGYAGDGVDVYTAQAVLELAGPLDVPLL + RKSVRALLDRHANLRAGFRHGADGTAYQVVPGAVAVPVTLVDVTESADPAAEAAAVAAA + ERARPFELARPPLLRVMVVVLGPDRHRLVLTNHHILLDGWSTPLLLDELLTLYRNGAAP + AALAPVTPYRDYLAWVRETDREAATEAWRDALAGLPEPTLVAADRPVPVEVPEQIWTTL + DETFAQALGARARECGVTVSTVLQAVWGMVLAALTGRDDVVFGSVVSGRPAELPGIETM + VGLFINTVPVRVRM" + PFAM_domain 13402..14280 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0006" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="3.00E-67" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: Condensation. Score: 226.6. E-value: + 3e-67. Domain range: 2..300." + /score="226.6" + /translation="EVLPLSPLQHGLAFHAGYAGDGVDVYTAQAVLELAGPLDVPLLRK + SVRALLDRHANLRAGFRHGADGTAYQVVPGAVAVPVTLVDVTESADPAAEAAAVAAAER + ARPFELARPPLLRVMVVVLGPDRHRLVLTNHHILLDGWSTPLLLDELLTLYRNGAAPAA + LAPVTPYRDYLAWVRETDREAATEAWRDALAGLPEPTLVAADRPVPVEVPEQIWTTLDE + TFAQALGARARECGVTVSTVLQAVWGMVLAALTGRDDVVFGSVVSGRPAELPGIETMVG + LFINTVPVRVRM" + CDS_motif 13411..13449 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0025" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-02" + /label="C1_DCL_004-017" + /locus_tag="bpsA" + /motif="C1_DCL_004-017" + /note="NRPS/PKS Motif: C1_DCL_004-017 (e-value: 0.024, + bit-score: 7.6)" + /score="7.6" + /translation="PLSPLQHGLAFHA" + CDS_motif 13471..13590 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0026" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E-16" + /label="C2_DCL_024-062" + /locus_tag="bpsA" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 1.1e-16, + bit-score: 53.1)" + /score="53.1" + /translation="DVYTAQAVLELAGPLDVPLLRKSVRALLDRHANLRAGFRH" + CDS_motif 13801..13866 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0027" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.70E-11" + /label="C3_DCL_135-156" + /locus_tag="bpsA" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 1.7e-11, + bit-score: 36.4)" + /score="36.4" + /translation="NHHILLDGWSTPLLLDELLTLY" + CDS_motif 13906..13944 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0028" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.20E-01" + /label="C4_DCL_171-183" + /locus_tag="bpsA" + /motif="C4_DCL_171-183" + /note="NRPS/PKS Motif: C4_DCL_171-183 (e-value: 0.12, + bit-score: 5.9)" + /score="5.9" + /translation="PYRDYLAWVRETD" + CDS_motif 14176..14271 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0029" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-21" + /label="C5_DCL_263-294" + /locus_tag="bpsA" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 1.4e-21, + bit-score: 68.9)" + /score="68.9" + /translation="DVVFGSVVSGRPAELPGIETMVGLFINTVPVR" + CDS_motif 14320..14433 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0030" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.90E-18" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsA" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 2.9e-18, + bit-score: 58.3)" + /score="58.3" + /translation="QNEQVALLAHHHVGLTDIQQAAGLGRLFDTIIVYENYP" + PFAM_domain 14764..15975 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="2.40E-102" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 342.4. E-value: + 2.4e-102. Domain range: 0..417." + /score="342.4" + /translation="FAAQAAATPEATAVVFEDRSLTYAELDARANQLARWLIDQGAGPE + GLVAVLLPRSLELVVALLAVTKTGGAWLPIDPGYPADRIAFMLDDAGPALVITTAVLSA + SPIGDVLAARSRTVVLDEPAAAGQLAGRDRAPVTDTDRARALDPRHPAYLIYTSGSTGR + PKAVVVTHRNLTNYLLHCGRMYPGLRGRSVLHSSIAFDLTVTATFTPLIVGGEIHVGAL + EDLIGVVEAAPSIFLKATPSHLLTLDTASRGSAGSGDLLLGGEQLPADTVVQWRRKYPN + IVVVNEYGPTEATVGCVEYRLEPGQECPPGGVVPIGTPLANMRAFVLDSWLRLVPPGAV + GELYVAGAGLARGYLGRAGLTATRFVADPFGSGERMYRTGDLVQWNPDGQLVFAGRVDD + QVKVR" + aSDomain 14827..16047 + /asDomain_id="nrpspksdomains_bpsA_A3" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="1.70E-108" + /label="bpsA_A3" + /locus_tag="bpsA" + /score="354.4" + /specificity="Stachelhaus code: asn" + /specificity="NRPSpredictor3 SVM: asn" + /specificity="pHMM: asn" + /specificity="PrediCAT Q939Z1_A3-asn" + /specificity="SANDPUMA ensemble: asn" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 1.76689127463" + /translation="TYAELDARANQLARWLIDQGAGPEGLVAVLLPRSLELVVALLAVT + KTGGAWLPIDPGYPADRIAFMLDDAGPALVITTAVLSASPIGDVLAARSRTVVLDEPAA + AGQLAGRDRAPVTDTDRARALDPRHPAYLIYTSGSTGRPKAVVVTHRNLTNYLLHCGRM + YPGLRGRSVLHSSIAFDLTVTATFTPLIVGGEIHVGALEDLIGVVEAAPSIFLKATPSH + LLTLDTASRGSAGSGDLLLGGEQLPADTVVQWRRKYPNIVVVNEYGPTEATVGCVEYRL + EPGQECPPGGVVPIGTPLANMRAFVLDSWLRLVPPGAVGELYVAGAGLARGYLGRAGLT + ATRFVADPFGSGERMYRTGDLVQWNPDGQLVFAGRVDDQVKVRGFRIEPGEIEAALVAQ + ESVGQAVV" + CDS_motif 14950..14991 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0031" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.20E-02" + /label="NRPS-A_a2" + /locus_tag="bpsA" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 0.012, + bit-score: 9.4)" + /score="9.4" + /translation="LAVTKTGGAWLPID" + CDS_motif 15217..15276 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0032" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.50E-09" + /label="NRPS-A_a3" + /locus_tag="bpsA" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 1.5e-09, + bit-score: 29.8)" + /score="29.8" + /translation="AYLIYTSGSTGRPKAVVVTH" + CDS_motif 15622..15648 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0033" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.00E-01" + /label="NRPS-A_a5" + /locus_tag="bpsA" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.1, bit-score: + 5.9)" + /score="5.9" + /translation="EYGPTEATV" + CDS_motif 15769..15858 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0034" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.60E-15" + /label="NRPS-A_a6" + /locus_tag="bpsA" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 2.6e-15, + bit-score: 48.5)" + /score="48.5" + /translation="PPGAVGELYVAGAGLARGYLGRAGLTATRF" + CDS_motif 15940..16005 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0035" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.00E-12" + /label="NRPS-A_a8" + /locus_tag="bpsA" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 8e-12, + bit-score: 37.1)" + /score="37.1" + /translation="FAGRVDDQVKVRGFRIEPGEIE" + PFAM_domain 15997..16221 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0012" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="3.30E-13" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 50.3. E-value: + 3.3e-13. Domain range: 0..73." + /score="50.3" + /translation="EIEAALVAQESVGQAVVVARDSEIGTRLIGYVTAAGESGVDEAAV + REGVAARLPQYMVPAALVVLGALPLTANGK" + CDS_motif 16216..16242 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0036" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.00E-01" + /label="NRPS-A_a10" + /locus_tag="bpsA" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 0.8, bit-score: + 3.3)" + /score="3.3" + /translation="GKVDRAALP" + aSDomain 16294..16494 + /asDomain_id="nrpspksdomains_bpsA_Xdom06" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="3.50E-22" + /locus_tag="bpsA" + /score="70.0" + /translation="ERLLCALFAEVLGLERAGADDSFFELGGDSILSMRLAARAHREGM + SFGAREVFEQRTPAGIAAIVER" + PFAM_domain 16300..16482 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsA_0009" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="1.30E-10" + /label="bpsA" + /locus_tag="bpsA" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 41.3. E-value: + 1.3e-10. Domain range: 1..65." + /score="41.3" + /translation="LLCALFAEVLGLERAGADDSFFELGGDSILSMRLAARAHREGMSF + GAREVFEQRTPAGIAA" + CDS_motif 16357..16386 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsA_0037" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.00E-01" + /label="PCP_mE" + /locus_tag="bpsA" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.2, bit-score: + 5.2)" + /score="5.2" + /translation="SFFELGGDSI" + gene 16635..28868 + /gene="bpsB" + CDS 16635..28868 + /aSProdPred="hpg-hpg-bht" + /citation=[5] + /codon_start=1 + /db_xref="GI:15131493" + /db_xref="GOA:Q939Z0" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010060" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020806" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Z0" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsB" + /note="smCOG: + SMCOG1127:condensation_domain-containing_protein (Score: + 318.8; E-value: 1.1e-96);" + /product="peptide synthetase" + /protein_id="CAC48361.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 7.9e-66, + bitscore: 213.3, seeds: 42); AMP-binding (E-value: + 5.8e-116, bitscore: 378.6, seeds: 400); PP-binding + (E-value: 7e-17, bitscore: 53.1, seeds: 164)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: Glycopeptide NRPS" + /sec_met="NRPS/PKS Domain: NRPS-COM_Nterm (1-28). E-value: + 8.7e-11. Score: 32.5;" + /sec_met="NRPS/PKS Domain: Cglyc (5-301). E-value: 3e-149. + Score: 487.7;" + /sec_met="NRPS/PKS Domain: AMP-binding (481-879). E-value: + 7.2e-116. Score: 378.6; NRPS/PKS Domain: bpsB_A1; Substrate + specificity predictions: hpg|hpg2cl (Stachelhaus code), hpg + (NRPSPredictor3 SVM), hpg-hpg2cl (pHMM), + Q939Z0_A1-4--hpg|hpg2cl (PrediCAT), hpg (SANDPUMA + ensemble); PID to NN: 100.00; SNN score: 5.44265382446 ." + /sec_met="NRPS/PKS Domain: PCP (957-1025). E-value: + 1.5e-22. Score: 71.2;" + /sec_met="NRPS/PKS Domain: Epimerization (1036-1332). + E-value: 1.8e-131. Score: 430.9;" + /sec_met="NRPS/PKS Domain: Cglyc (1506-1801). E-value: + 2.1e-142. Score: 465.2;" + /sec_met="NRPS/PKS Domain: AMP-binding (1980-2375). + E-value: 4.4e-107. Score: 349.7; NRPS/PKS Domain: bpsB_A2; + Substrate specificity predictions: hpg|hpg2cl|tyr + (Stachelhaus code), hpg (NRPSPredictor3 SVM), hpg-hpg2cl + (pHMM), Q939Z0_A2-5--hpg|hpg2cl (PrediCAT), hpg (SANDPUMA + ensemble); PID to NN: 100.00; SNN score: 5.51672079644 ." + /sec_met="NRPS/PKS Domain: PCP (2458-2526). E-value: + 2.2e-21. Score: 67.4;" + /sec_met="NRPS/PKS Domain: Epimerization (2537-2840). + E-value: 1.9e-128. Score: 420.9;" + /sec_met="NRPS/PKS Domain: Cglyc (3019-3315). E-value: + 9.6e-143. Score: 466.4;" + /sec_met="NRPS/PKS Domain: AMP-binding (3497-3887). + E-value: 5.6e-110. Score: 359.2; NRPS/PKS Domain: bpsB_A3; + Substrate specificity predictions: bht|tyr (Stachelhaus + code), bht (NRPSPredictor3 SVM), bht (pHMM), + Q939Z0_A3-6--bht (PrediCAT), bht (SANDPUMA ensemble); PID + to NN: 100.00; SNN score: 4.34362760424 ." + /sec_met="NRPS/PKS Domain: PCP (3974-4041). E-value: + 2.2e-21. Score: 67.5;" + /transl_table=11 + /translation="MSQSRIEEIWPLSPLQAGLLFHAVYDGEGPDVYIGHWILDLAGPV + DAAGLRAAWETLLARHAPLRACFRQRKSGETVQIIARQVELPWREVDLSHLDDPEEAVR + ELAEQDRTTRFDLAQAPLLRLTLIRLGADAHRLVVTCHHTIMDGWSLPIVIDELSVLYP + AGGDASALPDVPSYREYLAWLSRQDKERALSAWTAELSGAEEPTLVVPADPGRAPAEPE + SVEAHLPEHLTRSLAELARRHGLTLNTVVQGAWALVLAQLAGRPDVVFGAAVSARPPDL + PGVEGMVGLFLNTVPVRVRLRGSTPVVELLAELQKRQSALIPDQFVGLADIQQAAGPAA + VFDTLLVFEKFHHGPAGSDSAGTFRIHVNQGRVAAHYPLTLVAVPGESMYLKLDYLTEL + FDRETAFAILERFTGVLRQLTGAGELTVAGVEVTTAAERALVAGEWGASTSAPPSLPAL + DLFGHQVAHRRDEPAVVDGDRTVSYGELAERAERLAGYLNGRGVRRGDRVAVVLDRSPD + LIATLLAVWKAGAAYVPVDPAYPVERRKFMLADSGPAAVVCAEAYRAAVPDTCPEPIVL + DDPRTRQAVAESPRLSAGTSADDLAYVMYTSGSTGTPKGVAVSHGNVAALAGEPGWRVG + PGDAVLLHASHAFDISLFEMWVPLLSGARVVLAGPGAVDGAALAAYVAGGVTAAHLTAG + AFRVLADESPEAVAGLREVLTGGDAVPLAAVERVRGRVRNVRVRHLYGPTEATLCATWW + LLEPGDETGSVLPIGRPLAGRRVHVLDAFLRPVPPGVAGELYVAGAGVAQGYSSRPALT + AERFVADPSGSGARMYRTGDLAYWTEQGALAFAGRADDQVKIRGYRVEPGEIEVVLAGL + PGVGQAVVTPRGEHLIGYVVAEAGHDADPVRLREQLAGTLPEFMVPAAVLVLDELPLTV + NGKVDRRALPEPDFAAKSAGREPVTEAERVLCGVFADVLGLDHVGVDDSFFELGGDSIS + SMQVAARARREGISLTPRLVFEHRTPERLAALAQEAGATPRAEVVTGVGEIPWTPVMRA + LGDDAMRPGFAQVRVVVTPAGVNPDALVSALQAVLDAHDLLRARVEPDGRLIVPERGAV + AAAGLLTRVAAGTGGLDEIAEREVRTATGTLDPSAGIMARVVWIDAGDAEPGRLAFVAH + HLSVDAVSWGILLPDLRAAYDEVISGGTPALEPPVTSYRQWARRLTARALSESTVAELE + KWAAVVEGAEPALPQDTGQHTGQSHSWSTSLSGTEVRDLVTVLPGAFHCGIQDVLLAGL + AGAVARVRGSGAALLVDVEGHGREAADGEDLLRTVGWFTSVHPVRLELSDVDLAGAADG + ERPAGQLLKAVKEQIRAVPGDGSGYGLLRHLNPGTGARLAELPSAQIGFNYLGRTVLAP + EDTAWQPNGGGPLGGGPDMVLAHAVEVSAELQDTPAGPRLGLAIDTRDFDLATVERLGE + AWLEMLTGLAAVARGSGAGGHTPADFALVDLTQRDVAELEAAAPGLTDIWPLSPLQEGM + LFERAFDEDGVDVYQTQRILDLDGPLDEPRLRAAWNQVLARHASLRTGFHQLGSGATVQ + VVVREADIPWRVADLSHLDAAEAAAEVERLLAEDQGRRFDVTRPPLLRLLLIRLGADEH + RLVVTSHHVLLDGWSTPLVVGEMSDGYAGGRSSSKPPSYQDYLAWLSRQDAEATRSAWR + AELAGADEPTLVDADAGKTLVMPDEHAEWLPEPATRALAGFARGHGLTVSTIVLGAWAL + VLARLAGRTDVVFGSVVSGRPADVPDVERMVGMFINTVPARVRLDGRRPLLEMLEDLQA + RQAALTEHQYLGLPEIQKVAGTGAIFDTIVMVENYPHDAAGLGGDGGVAISSVVTRTGT + SYPLTMNVSLGDRLRITVSYRPDRIDDATAAEVARQVVRVLERVVAEPSLPVGRLGVTS + EPTRAAVVERWNSTGEAAAETSVLELFRRQAGASPDAVAVVAGERTLSYADLDRESDRL + AGHLAGIGVGRGDRVGVVMTRGADLFVALLGVWKAGAAQVPVNVDYPAERIERMLADVG + ASVAVCVEATRKAVPDGVEPVVVDLPVIGGVRPEAPPVTVGAHDVAYVMYTSGSTGVPK + AVAVPHGSVAALASDPGWSQGPGDCVLLHASHAFDASLVEIWVPLVSGARVLVAEPGTV + DAERLREAVSRGVTTVHLTAGAFRAVAEESPDSFIGLREILTGGDAVPLASVVRMRQAC + PDVRVRQLYGPTEITLCATWLVLEPGAATGDVLPIGRPLAGRQAYVLDAFLQPVAPNVT + GELYLAGAGLAHGYLGNTAATSERFVANPFSGGGRMYRTGDLARWTDQGELVFAGRADS + QVKIRGYRVEPGEVEVALTEVPHVAQAVVVAREGQPGEKRLIAYVTAEAGSALESAAVR + AHLATRLPEFMVPSVVVVLESFPLTLNGKIDRAALPAPEFAGKAAGREPRTEAERVLCG + LFAEILGLERVGADDGFFELGGDSILSMRLAARARRENFVFGAKQVFEQKTPAGIAAVA + ERGGQSRPAGVADGVGEVPWTPVVRALLERDPAGLTRGAMAQWVSVAAPRDLSVTALVA + GLGAVIDTHDMLRSRIVESEGVEPRLVVAGRGTVDAAALVERVEAGDGDLAEIADRCAH + DTAARLDPVAGVLVRAVWVDAGPGRAGRLVVAAHHLVVDVVSWRTLLPDLQAACEAVVA + GGQPALDPPDVSFRRWSRTLDGEAAIRTGELAVWTEILDGAQSRLGELDPRRDTVSTAG + RRSWTVPREHAGVLVEQVTSAFHCGVHEVLLATLAGAVAGWRGGTAVVVDVEGHGRQPL + GELDLSRTLGWFTDVHPLRLDVTGVDPAEAVAGGDAAGRLLKQVKENVRAVPDGGLGYG + MLRYLNAETGPVLAALPKAEIGFNYLGRFSAGSGGEAQPWQITGIVGGAAEQDTPLRHV + VEIDAVVVDGPDGPEFTLTVTWAGRMLGDAEAESLAQAWLDMLAGLAAHVAAGGPGGHT + PSDFPLTALTQREVAEFEAAVPGLLDIWPLSPLQEGLLFHAADDRGPDVYASMRTLAID + GPLDVARFRASWTVLLDRHPALRASFHQLESGEAVQVIARDVPPDWRETDLSGLPESEA + LAEFDRLAARMHAERFDLTKAPQLRLHLVRLGDRRYRLIFTSHHIVADGWSLPLILVDV + LTAYEAGGDGRTLPAATSYRDFLAWVDRQDKGAAGQAWRTELAGLDEATHVVPPGSIIT + PLEPERVAFELDDETSKRLVEFTRRHGVTANTLFQGVWALHLARLAGRNDVVFGAAVAG + RPPEIPGVESAVGLFMNMLPVRARLTGAEPVVDMLKDLQERQVAMMAHQHIGLPEIKQL + TGPGAAFDTIVVFENYPPAPPRSDDPDALVIRPVGIPNDTGHYPLSMRASVAAGPVRGE + FIYRPDVVDRTEAGEMVAAILRALEQVVAEPWTPVGQVGLIGPEQRRLVVDEWNRTDVP + LAAETLPVLFRRQAERSPDAVAVEDGARSLTFGGLLGEVEALARLLVGAGVRREHRVGV + LVERSAELAVTMMAVSFAGGVFVPVDPDYPRERVEFMLANSAPGVMVCTKTTRAAVPAE + FAGTVLVLDELPAADPDVELPPVAPEDAAYVIYTSGSTGVPKGVLVTHSGLANLGYAHI + ERMAVTSSSRVLQLSATGFDAIVSELYMALLAGATLVLPDAASMPPRVTLGEAIRRAGI + THLTVSPSVLASEDDLPDTLRTVLTGGEALPPALVDRWSPGRRVIQAYGPTETTICSTM + SAPLSPGHDQVPLGGPIHNVRHYVLDAFLQPVPPGVVGELYITGVGLARGYLGRPGLTA + ERFVASPFAPGERMYRSGDLFRWTREGQLLFAGRVDAQVKVRGYRVEPAEIEAVLAEHP + WVGQVAVSVRRDGPGDKQLVAYVVPSADAAAENGTLASALRELAAERLPEYMMPAAFVS + LEQMPLTPNGKLDHRALQAPDFAGMSSKRAPRTPMEARLCALFADVLGLDQVGPDDSFF + ELGGDSITSMQLSARARPTGLELTPWQVFDEKTPERLAVIVQELAAEGGTTPAPEPGEG + TLVALSPDQMDLLEAGLAGE" + aSDomain 16638..16718 + /asDomain_id="nrpspksdomains_bpsB_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="NRPS-COM_Nterm" + /evalue="8.70E-11" + /locus_tag="bpsB" + /score="32.5" + /translation="SQSRIEEIWPLSPLQAGLLFHAVYDGE" + aSDomain 16650..17537 + /asDomain_id="nrpspksdomains_bpsB_Xdom02" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="3.00E-149" + /locus_tag="bpsB" + /score="487.7" + /translation="IEEIWPLSPLQAGLLFHAVYDGEGPDVYIGHWILDLAGPVDAAGL + RAAWETLLARHAPLRACFRQRKSGETVQIIARQVELPWREVDLSHLDDPEEAVRELAEQ + DRTTRFDLAQAPLLRLTLIRLGADAHRLVVTCHHTIMDGWSLPIVIDELSVLYPAGGDA + SALPDVPSYREYLAWLSRQDKERALSAWTAELSGAEEPTLVVPADPGRAPAEPESVEAH + LPEHLTRSLAELARRHGLTLNTVVQGAWALVLAQLAGRPDVVFGAAVSARPPDLPGVEG + MVGLFLNTVPVRVRL" + PFAM_domain 16653..17537 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="3.20E-63" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: Condensation. Score: 213.3. E-value: + 3.2e-63. Domain range: 1..300." + /score="213.3" + /translation="EEIWPLSPLQAGLLFHAVYDGEGPDVYIGHWILDLAGPVDAAGLR + AAWETLLARHAPLRACFRQRKSGETVQIIARQVELPWREVDLSHLDDPEEAVRELAEQD + RTTRFDLAQAPLLRLTLIRLGADAHRLVVTCHHTIMDGWSLPIVIDELSVLYPAGGDAS + ALPDVPSYREYLAWLSRQDKERALSAWTAELSGAEEPTLVVPADPGRAPAEPESVEAHL + PEHLTRSLAELARRHGLTLNTVVQGAWALVLAQLAGRPDVVFGAAVSARPPDLPGVEGM + VGLFLNTVPVRVRL" + CDS_motif 16662..16703 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.50E-03" + /label="C1_DCL_004-017" + /locus_tag="bpsB" + /motif="C1_DCL_004-017" + /note="NRPS/PKS Motif: C1_DCL_004-017 (e-value: 0.0015, + bit-score: 11.0)" + /score="11.0" + /translation="WPLSPLQAGLLFHA" + CDS_motif 16725..16844 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.80E-15" + /label="C2_DCL_024-062" + /locus_tag="bpsB" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 5.8e-15, + bit-score: 47.5)" + /score="47.5" + /translation="DVYIGHWILDLAGPVDAAGLRAAWETLLARHAPLRACFRQ" + CDS_motif 17055..17120 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.20E-09" + /label="C3_DCL_135-156" + /locus_tag="bpsB" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 1.2e-09, + bit-score: 30.6)" + /score="30.6" + /translation="CHHTIMDGWSLPIVIDELSVLY" + CDS_motif 17163..17198 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.80E-02" + /label="C4_DCL_171-183" + /locus_tag="bpsB" + /motif="C4_DCL_171-183" + /note="NRPS/PKS Motif: C4_DCL_171-183 (e-value: 0.028, + bit-score: 7.7)" + /score="7.7" + /translation="YREYLAWLSRQD" + CDS_motif 17364..17414 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0005" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E+00" + /label="Cy4" + /locus_tag="bpsB" + /motif="Cy4" + /note="NRPS/PKS Motif: Cy4 (e-value: 1.1, bit-score: 2.6)" + /score="2.6" + /translation="GLTLNTVVQGAWALVLA" + CDS_motif 17433..17528 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0006" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.60E-18" + /label="C5_DCL_263-294" + /locus_tag="bpsB" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 2.6e-18, + bit-score: 58.4)" + /score="58.4" + /translation="DVVFGAAVSARPPDLPGVEGMVGLFLNTVPVR" + CDS_motif 17577..17687 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0007" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.20E-14" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsB" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 4.2e-14, + bit-score: 44.9)" + /score="44.9" + /translation="QKRQSALIPDQFVGLADIQQAAGPAAVFDTLLVFEKF" + PFAM_domain 18015..19199 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="7.20E-105" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 350.7. E-value: + 7.2e-105. Domain range: 0..417." + /score="350.7" + /translation="FGHQVAHRRDEPAVVDGDRTVSYGELAERAERLAGYLNGRGVRRG + DRVAVVLDRSPDLIATLLAVWKAGAAYVPVDPAYPVERRKFMLADSGPAAVVCAEAYRA + AVPDTCPEPIVLDDPRTRQAVAESPRLSAGTSADDLAYVMYTSGSTGTPKGVAVSHGNV + AALAGEPGWRVGPGDAVLLHASHAFDISLFEMWVPLLSGARVVLAGPGAVDGAALAAYV + AGGVTAAHLTAGAFRVLADESPEAVAGLREVLTGGDAVPLAAVERVRGRVRNVRVRHLY + GPTEATLCATWWLLEPGDETGSVLPIGRPLAGRRVHVLDAFLRPVPPGVAGELYVAGAG + VAQGYSSRPALTAERFVADPSGSGARMYRTGDLAYWTEQGALAFAGRADDQVKIR" + aSDomain 18078..19271 + /asDomain_id="nrpspksdomains_bpsB_A1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="7.20E-116" + /label="bpsB_A1" + /locus_tag="bpsB" + /score="378.6" + /specificity="Stachelhaus code: hpg|hpg2cl" + /specificity="NRPSpredictor3 SVM: hpg" + /specificity="pHMM: hpg-hpg2cl" + /specificity="PrediCAT Q939Z0_A1-4--hpg|hpg2cl" + /specificity="SANDPUMA ensemble: hpg" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 5.44265382446" + /translation="SYGELAERAERLAGYLNGRGVRRGDRVAVVLDRSPDLIATLLAVW + KAGAAYVPVDPAYPVERRKFMLADSGPAAVVCAEAYRAAVPDTCPEPIVLDDPRTRQAV + AESPRLSAGTSADDLAYVMYTSGSTGTPKGVAVSHGNVAALAGEPGWRVGPGDAVLLHA + SHAFDISLFEMWVPLLSGARVVLAGPGAVDGAALAAYVAGGVTAAHLTAGAFRVLADES + PEAVAGLREVLTGGDAVPLAAVERVRGRVRNVRVRHLYGPTEATLCATWWLLEPGDETG + SVLPIGRPLAGRRVHVLDAFLRPVPPGVAGELYVAGAGVAQGYSSRPALTAERFVADPS + GSGARMYRTGDLAYWTEQGALAFAGRADDQVKIRGYRVEPGEIEVVLAGLPGVGQAVV" + CDS_motif 18201..18242 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0008" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-05" + /label="NRPS-A_a2" + /locus_tag="bpsB" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 1.4e-05, + bit-score: 18.2)" + /score="18.2" + /translation="LAVWKAGAAYVPVD" + CDS_motif 18435..18494 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0009" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.40E-10" + /label="NRPS-A_a3" + /locus_tag="bpsB" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 7.4e-10, + bit-score: 30.8)" + /score="30.8" + /translation="AYVMYTSGSTGTPKGVAVSH" + CDS_motif 18855..18881 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0010" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.50E-01" + /label="NRPS-A_a5" + /locus_tag="bpsB" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.15, bit-score: + 5.4)" + /score="5.4" + /translation="YGPTEATLC" + CDS_motif 18993..19082 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0011" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-15" + /label="NRPS-A_a6" + /locus_tag="bpsB" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 2.4e-15, + bit-score: 48.6)" + /score="48.6" + /translation="PPGVAGELYVAGAGVAQGYSSRPALTAERF" + CDS_motif 19164..19229 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0012" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.70E-12" + /label="NRPS-A_a8" + /locus_tag="bpsB" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 5.7e-12, + bit-score: 37.5)" + /score="37.5" + /translation="FAGRADDQVKIRGYRVEPGEIE" + PFAM_domain 19221..19433 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0009" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="3.50E-11" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 43.8. E-value: + 3.5e-11. Domain range: 0..73." + /score="43.8" + /translation="EIEVVLAGLPGVGQAVVTPRGEHLIGYVVAEAGHDADPVRLREQL + AGTLPEFMVPAAVLVLDELPLTVNGK" + CDS_motif 19428..19454 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0013" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.90E-01" + /label="NRPS-A_a10" + /locus_tag="bpsB" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 0.29, + bit-score: 4.4)" + /score="4.4" + /translation="GKVDRRALP" + aSDomain 19506..19709 + /asDomain_id="nrpspksdomains_bpsB_Xdom03" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="1.50E-22" + /locus_tag="bpsB" + /score="71.2" + /translation="ERVLCGVFADVLGLDHVGVDDSFFELGGDSISSMQVAARARREGI + SLTPRLVFEHRTPERLAALAQEA" + CDS_motif 19512..19544 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0014" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.70E-01" + /label="Cy4" + /locus_tag="bpsB" + /motif="Cy4" + /note="NRPS/PKS Motif: Cy4 (e-value: 0.37, bit-score: 4.2)" + /score="4.2" + /translation="VLCGVFADVLG" + PFAM_domain 19512..19697 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0012" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="2.50E-14" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 53.2. E-value: + 2.5e-14. Domain range: 1..66." + /score="53.2" + /translation="VLCGVFADVLGLDHVGVDDSFFELGGDSISSMQVAARARREGISL + TPRLVFEHRTPERLAAL" + CDS_motif 19569..19598 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0015" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.70E-01" + /label="PCP_mE" + /locus_tag="bpsB" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.27, bit-score: + 4.8)" + /score="4.8" + /translation="SFFELGGDSI" + aSDomain 19743..20630 + /asDomain_id="nrpspksdomains_bpsB_Xdom04" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Epimerization" + /evalue="1.80E-131" + /locus_tag="bpsB" + /score="430.9" + /translation="VGEIPWTPVMRALGDDAMRPGFAQVRVVVTPAGVNPDALVSALQA + VLDAHDLLRARVEPDGRLIVPERGAVAAAGLLTRVAAGTGGLDEIAEREVRTATGTLDP + SAGIMARVVWIDAGDAEPGRLAFVAHHLSVDAVSWGILLPDLRAAYDEVISGGTPALEP + PVTSYRQWARRLTARALSESTVAELEKWAAVVEGAEPALPQDTGQHTGQSHSWSTSLSG + TEVRDLVTVLPGAFHCGIQDVLLAGLAGAVARVRGSGAALLVDVEGHGREAADGEDLLR + TVGWFTSVHPVRLEL" + PFAM_domain 19824..20630 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="1.50E-29" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: Condensation. Score: 102.8. E-value: + 1.5e-29. Domain range: 32..300." + /score="102.8" + /translation="VVTPAGVNPDALVSALQAVLDAHDLLRARVEPDGRLIVPERGAVA + AAGLLTRVAAGTGGLDEIAEREVRTATGTLDPSAGIMARVVWIDAGDAEPGRLAFVAHH + LSVDAVSWGILLPDLRAAYDEVISGGTPALEPPVTSYRQWARRLTARALSESTVAELEK + WAAVVEGAEPALPQDTGQHTGQSHSWSTSLSGTEVRDLVTVLPGAFHCGIQDVLLAGLA + GAVARVRGSGAALLVDVEGHGREAADGEDLLRTVGWFTSVHPVRLEL" + CDS_motif 19845..19904 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0016" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.50E-04" + /label="C2_dual_024-063" + /locus_tag="bpsB" + /motif="C2_dual_024-063" + /note="NRPS/PKS Motif: C2_dual_024-063 (e-value: 0.00015, + bit-score: 14.4)" + /score="14.4" + /translation="NPDALVSALQAVLDAHDLLR" + CDS_motif 20115..20180 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0017" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.10E-09" + /label="NRPS-E2" + /locus_tag="bpsB" + /motif="NRPS-E2" + /note="NRPS/PKS Motif: NRPS-E2 (e-value: 2.1e-09, + bit-score: 29.7)" + /score="29.7" + /translation="LAFVAHHLSVDAVSWGILLPDL" + CDS_motif 20574..20624 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0018" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.30E-07" + /label="NRPS-E5" + /locus_tag="bpsB" + /motif="NRPS-E5" + /note="NRPS/PKS Motif: NRPS-E5 (e-value: 4.3e-07, + bit-score: 22.2)" + /score="22.2" + /translation="DLLRTVGWFTSVHPVRL" + CDS_motif 20694..20756 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0019" + /database="abmotifs" + /detection="hmmscan" + /evalue="6.70E-09" + /label="NRPS-E6" + /locus_tag="bpsB" + /motif="NRPS-E6" + /note="NRPS/PKS Motif: NRPS-E6 (e-value: 6.7e-09, + bit-score: 28.0)" + /score="28.0" + /translation="VKEQIRAVPGDGSGYGLLRHL" + CDS_motif 20796..20825 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0020" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.90E-02" + /label="NRPS-E7" + /locus_tag="bpsB" + /motif="NRPS-E7" + /note="NRPS/PKS Motif: NRPS-E7 (e-value: 0.099, bit-score: + 5.8)" + /score="5.8" + /translation="AQIGFNYLGR" + aSDomain 21153..22037 + /asDomain_id="nrpspksdomains_bpsB_Xdom05" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="2.10E-142" + /locus_tag="bpsB" + /score="465.2" + /translation="LTDIWPLSPLQEGMLFERAFDEDGVDVYQTQRILDLDGPLDEPRL + RAAWNQVLARHASLRTGFHQLGSGATVQVVVREADIPWRVADLSHLDAAEAAAEVERLL + AEDQGRRFDVTRPPLLRLLLIRLGADEHRLVVTSHHVLLDGWSTPLVVGEMSDGYAGGR + SSSKPPSYQDYLAWLSRQDAEATRSAWRAELAGADEPTLVDADAGKTLVMPDEHAEWLP + EPATRALAGFARGHGLTVSTIVLGAWALVLARLAGRTDVVFGSVVSGRPADVPDVERMV + GMFINTVPARVRLD" + PFAM_domain 21159..22034 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0006" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="1.40E-62" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: Condensation. Score: 211.2. E-value: + 1.4e-62. Domain range: 2..300." + /score="211.2" + /translation="DIWPLSPLQEGMLFERAFDEDGVDVYQTQRILDLDGPLDEPRLRA + AWNQVLARHASLRTGFHQLGSGATVQVVVREADIPWRVADLSHLDAAEAAAEVERLLAE + DQGRRFDVTRPPLLRLLLIRLGADEHRLVVTSHHVLLDGWSTPLVVGEMSDGYAGGRSS + SKPPSYQDYLAWLSRQDAEATRSAWRAELAGADEPTLVDADAGKTLVMPDEHAEWLPEP + ATRALAGFARGHGLTVSTIVLGAWALVLARLAGRTDVVFGSVVSGRPADVPDVERMVGM + FINTVPARVRL" + CDS_motif 21168..21200 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0021" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E-03" + /label="C1_dual_004-017" + /locus_tag="bpsB" + /motif="C1_dual_004-017" + /note="NRPS/PKS Motif: C1_dual_004-017 (e-value: 0.0016, + bit-score: 11.5)" + /score="11.5" + /translation="PLSPLQEGMLF" + CDS_motif 21228..21347 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0022" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-18" + /label="C2_DCL_024-062" + /locus_tag="bpsB" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 1.4e-18, + bit-score: 59.2)" + /score="59.2" + /translation="DVYQTQRILDLDGPLDEPRLRAAWNQVLARHASLRTGFHQ" + CDS_motif 21567..21629 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0023" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.10E-07" + /label="C3_DCL_135-156" + /locus_tag="bpsB" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 5.1e-07, + bit-score: 22.3)" + /score="22.3" + /translation="HHVLLDGWSTPLVVGEMSDGY" + CDS_motif 21663..21698 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0024" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.10E-02" + /label="C4_DCL_171-183" + /locus_tag="bpsB" + /motif="C4_DCL_171-183" + /note="NRPS/PKS Motif: C4_DCL_171-183 (e-value: 0.031, + bit-score: 7.5)" + /score="7.5" + /translation="YQDYLAWLSRQD" + CDS_motif 21861..21914 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0025" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-02" + /label="Cy4" + /locus_tag="bpsB" + /motif="Cy4" + /note="NRPS/PKS Motif: Cy4 (e-value: 0.024, bit-score: + 7.9)" + /score="7.9" + /translation="GLTVSTIVLGAWALVLAR" + CDS_motif 21930..22025 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0026" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.50E-19" + /label="C5_DCL_263-294" + /locus_tag="bpsB" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 3.5e-19, + bit-score: 61.2)" + /score="61.2" + /translation="DVVFGSVVSGRPADVPDVERMVGMFINTVPAR" + CDS_motif 22074..22187 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0027" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-16" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsB" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 1.4e-16, + bit-score: 52.9)" + /score="52.9" + /translation="QARQAALTEHQYLGLPEIQKVAGTGAIFDTIVMVENYP" + PFAM_domain 22512..23687 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="3.90E-95" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 318.7. E-value: + 3.9e-95. Domain range: 0..417." + /score="318.7" + /translation="FRRQAGASPDAVAVVAGERTLSYADLDRESDRLAGHLAGIGVGRG + DRVGVVMTRGADLFVALLGVWKAGAAQVPVNVDYPAERIERMLADVGASVAVCVEATRK + AVPDGVEPVVVDLPVIGGVRPEAPPVTVGAHDVAYVMYTSGSTGVPKAVAVPHGSVAAL + ASDPGWSQGPGDCVLLHASHAFDASLVEIWVPLVSGARVLVAEPGTVDAERLREAVSRG + VTTVHLTAGAFRAVAEESPDSFIGLREILTGGDAVPLASVVRMRQACPDVRVRQLYGPT + EITLCATWLVLEPGAATGDVLPIGRPLAGRQAYVLDAFLQPVAPNVTGELYLAGAGLAH + GYLGNTAATSERFVANPFSGGGRMYRTGDLARWTDQGELVFAGRADSQVKIR" + aSDomain 22575..23759 + /asDomain_id="nrpspksdomains_bpsB_A2" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="4.40E-107" + /label="bpsB_A2" + /locus_tag="bpsB" + /score="349.7" + /specificity="Stachelhaus code: hpg|hpg2cl|tyr" + /specificity="NRPSpredictor3 SVM: hpg" + /specificity="pHMM: hpg-hpg2cl" + /specificity="PrediCAT Q939Z0_A2-5--hpg|hpg2cl" + /specificity="SANDPUMA ensemble: hpg" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 5.51672079644" + /translation="SYADLDRESDRLAGHLAGIGVGRGDRVGVVMTRGADLFVALLGVW + KAGAAQVPVNVDYPAERIERMLADVGASVAVCVEATRKAVPDGVEPVVVDLPVIGGVRP + EAPPVTVGAHDVAYVMYTSGSTGVPKAVAVPHGSVAALASDPGWSQGPGDCVLLHASHA + FDASLVEIWVPLVSGARVLVAEPGTVDAERLREAVSRGVTTVHLTAGAFRAVAEESPDS + FIGLREILTGGDAVPLASVVRMRQACPDVRVRQLYGPTEITLCATWLVLEPGAATGDVL + PIGRPLAGRQAYVLDAFLQPVAPNVTGELYLAGAGLAHGYLGNTAATSERFVANPFSGG + GRMYRTGDLARWTDQGELVFAGRADSQVKIRGYRVEPGEVEVALTEVPHVAQAVV" + CDS_motif 22698..22736 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0028" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.80E-03" + /label="NRPS-A_a2" + /locus_tag="bpsB" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 0.0078, + bit-score: 10.0)" + /score="10.0" + /translation="LGVWKAGAAQVPV" + CDS_motif 22923..22982 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0029" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.30E-08" + /label="NRPS-A_a3" + /locus_tag="bpsB" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 1.3e-08, + bit-score: 27.0)" + /score="27.0" + /translation="AYVMYTSGSTGVPKAVAVPH" + CDS_motif 23343..23369 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0030" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.80E-01" + /label="NRPS-A_a5" + /locus_tag="bpsB" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.58, bit-score: + 3.8)" + /score="3.8" + /translation="YGPTEITLC" + CDS_motif 23484..23570 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0031" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.20E-11" + /label="NRPS-A_a6" + /locus_tag="bpsB" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 1.2e-11, + bit-score: 36.8)" + /score="36.8" + /translation="PNVTGELYLAGAGLAHGYLGNTAATSERF" + CDS_motif 23652..23717 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0032" + /database="abmotifs" + /detection="hmmscan" + /evalue="6.70E-11" + /label="NRPS-A_a8" + /locus_tag="bpsB" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 6.7e-11, + bit-score: 34.2)" + /score="34.2" + /translation="FAGRADSQVKIRGYRVEPGEVE" + PFAM_domain 23709..23936 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0010" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="3.50E-14" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 53.4. E-value: + 3.5e-14. Domain range: 0..73." + /score="53.4" + /translation="EVEVALTEVPHVAQAVVVAREGQPGEKRLIAYVTAEAGSALESAA + VRAHLATRLPEFMVPSVVVVLESFPLTLNGK" + CDS_motif 23931..23957 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0033" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.00E+00" + /label="NRPS-A_a10" + /locus_tag="bpsB" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 1.0, bit-score: + 3.0)" + /score="3.0" + /translation="GKIDRAALP" + aSDomain 24009..24212 + /asDomain_id="nrpspksdomains_bpsB_Xdom06" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="2.20E-21" + /locus_tag="bpsB" + /score="67.4" + /translation="ERVLCGLFAEILGLERVGADDGFFELGGDSILSMRLAARARRENF + VFGAKQVFEQKTPAGIAAVAERG" + CDS_motif 24015..24047 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0034" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E+00" + /label="Cy4" + /locus_tag="bpsB" + /motif="Cy4" + /note="NRPS/PKS Motif: Cy4 (e-value: 1.6, bit-score: 2.1)" + /score="2.1" + /translation="VLCGLFAEILG" + PFAM_domain 24015..24197 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0013" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="2.50E-09" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 37.2. E-value: + 2.5e-09. Domain range: 1..65." + /score="37.2" + /translation="VLCGLFAEILGLERVGADDGFFELGGDSILSMRLAARARRENFVF + GAKQVFEQKTPAGIAA" + CDS_motif 24072..24101 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0035" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.70E-01" + /label="PCP_mE" + /locus_tag="bpsB" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.77, bit-score: + 3.6)" + /score="3.6" + /translation="GFFELGGDSI" + aSDomain 24246..25154 + /asDomain_id="nrpspksdomains_bpsB_Xdom07" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Epimerization" + /evalue="1.90E-128" + /locus_tag="bpsB" + /score="420.9" + /translation="VGEVPWTPVVRALLERDPAGLTRGAMAQWVSVAAPRDLSVTALVA + GLGAVIDTHDMLRSRIVESEGVEPRLVVAGRGTVDAAALVERVEAGDGDLAEIADRCAH + DTAARLDPVAGVLVRAVWVDAGPGRAGRLVVAAHHLVVDVVSWRTLLPDLQAACEAVVA + GGQPALDPPDVSFRRWSRTLDGEAAIRTGELAVWTEILDGAQSRLGELDPRRDTVSTAG + RRSWTVPREHAGVLVEQVTSAFHCGVHEVLLATLAGAVAGWRGGTAVVVDVEGHGRQPL + GELDLSRTLGWFTDVHPLRLDV" + PFAM_domain 24318..25154 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0007" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="5.60E-33" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: Condensation. Score: 114.0. E-value: + 5.6e-33. Domain range: 25..300." + /score="114.0" + /translation="AMAQWVSVAAPRDLSVTALVAGLGAVIDTHDMLRSRIVESEGVEP + RLVVAGRGTVDAAALVERVEAGDGDLAEIADRCAHDTAARLDPVAGVLVRAVWVDAGPG + RAGRLVVAAHHLVVDVVSWRTLLPDLQAACEAVVAGGQPALDPPDVSFRRWSRTLDGEA + AIRTGELAVWTEILDGAQSRLGELDPRRDTVSTAGRRSWTVPREHAGVLVEQVTSAFHC + GVHEVLLATLAGAVAGWRGGTAVVVDVEGHGRQPLGELDLSRTLGWFTDVHPLRLDV" + CDS_motif 24393..24431 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0036" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E-02" + /label="Cy2" + /locus_tag="bpsB" + /motif="Cy2" + /note="NRPS/PKS Motif: Cy2 (e-value: 0.016, bit-score: + 8.0)" + /score="8.0" + /translation="VIDTHDMLRSRIV" + CDS_motif 24642..24707 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0037" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E-09" + /label="NRPS-E2" + /locus_tag="bpsB" + /motif="NRPS-E2" + /note="NRPS/PKS Motif: NRPS-E2 (e-value: 1.1e-09, + bit-score: 30.5)" + /score="30.5" + /translation="LVVAAHHLVVDVVSWRTLLPDL" + CDS_motif 25098..25148 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0038" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.30E-06" + /label="NRPS-E5" + /locus_tag="bpsB" + /motif="NRPS-E5" + /note="NRPS/PKS Motif: NRPS-E5 (e-value: 2.3e-06, + bit-score: 20.0)" + /score="20.0" + /translation="DLSRTLGWFTDVHPLRL" + CDS_motif 25218..25280 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0039" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.00E-10" + /label="NRPS-E6" + /locus_tag="bpsB" + /motif="NRPS-E6" + /note="NRPS/PKS Motif: NRPS-E6 (e-value: 9e-10, bit-score: + 30.7)" + /score="30.7" + /translation="VKENVRAVPDGGLGYGMLRYL" + CDS_motif 25320..25349 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0040" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.30E-02" + /label="NRPS-E7" + /locus_tag="bpsB" + /motif="NRPS-E7" + /note="NRPS/PKS Motif: NRPS-E7 (e-value: 0.083, bit-score: + 6.0)" + /score="6.0" + /translation="AEIGFNYLGR" + aSDomain 25692..26579 + /asDomain_id="nrpspksdomains_bpsB_Xdom08" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="9.60E-143" + /locus_tag="bpsB" + /score="466.4" + /translation="LDIWPLSPLQEGLLFHAADDRGPDVYASMRTLAIDGPLDVARFRA + SWTVLLDRHPALRASFHQLESGEAVQVIARDVPPDWRETDLSGLPESEALAEFDRLAAR + MHAERFDLTKAPQLRLHLVRLGDRRYRLIFTSHHIVADGWSLPLILVDVLTAYEAGGDG + RTLPAATSYRDFLAWVDRQDKGAAGQAWRTELAGLDEATHVVPPGSIITPLEPERVAFE + LDDETSKRLVEFTRRHGVTANTLFQGVWALHLARLAGRNDVVFGAAVAGRPPEIPGVES + AVGLFMNMLPVRARL" + CDS_motif 25701..25742 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0041" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.60E-04" + /label="C1_DCL_004-017" + /locus_tag="bpsB" + /motif="C1_DCL_004-017" + /note="NRPS/PKS Motif: C1_DCL_004-017 (e-value: 0.00036, + bit-score: 12.8)" + /score="12.8" + /translation="WPLSPLQEGLLFHA" + PFAM_domain 25701..26576 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0008" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="1.00E-59" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: Condensation. Score: 201.8. E-value: + 1e-59. Domain range: 4..299." + /score="201.8" + /translation="WPLSPLQEGLLFHAADDRGPDVYASMRTLAIDGPLDVARFRASWT + VLLDRHPALRASFHQLESGEAVQVIARDVPPDWRETDLSGLPESEALAEFDRLAARMHA + ERFDLTKAPQLRLHLVRLGDRRYRLIFTSHHIVADGWSLPLILVDVLTAYEAGGDGRTL + PAATSYRDFLAWVDRQDKGAAGQAWRTELAGLDEATHVVPPGSIITPLEPERVAFELDD + ETSKRLVEFTRRHGVTANTLFQGVWALHLARLAGRNDVVFGAAVAGRPPEIPGVESAVG + LFMNMLPVRAR" + CDS_motif 25779..25880 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0042" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.80E-14" + /label="C2_DCL_024-062" + /locus_tag="bpsB" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 1.8e-14, + bit-score: 45.9)" + /score="45.9" + /translation="RTLAIDGPLDVARFRASWTVLLDRHPALRASFHQ" + CDS_motif 26100..26162 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0043" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.90E-09" + /label="C3_DCL_135-156" + /locus_tag="bpsB" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 9.9e-09, + bit-score: 27.7)" + /score="27.7" + /translation="HHIVADGWSLPLILVDVLTAY" + CDS_motif 26205..26240 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0044" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.90E-01" + /label="C4_DCL_171-183" + /locus_tag="bpsB" + /motif="C4_DCL_171-183" + /note="NRPS/PKS Motif: C4_DCL_171-183 (e-value: 0.29, + bit-score: 4.8)" + /score="4.8" + /translation="YRDFLAWVDRQD" + CDS_motif 26475..26570 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0045" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.50E-17" + /label="C5_DCL_263-294" + /locus_tag="bpsB" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 5.5e-17, + bit-score: 54.2)" + /score="54.2" + /translation="DVVFGAAVAGRPPEIPGVESAVGLFMNMLPVR" + CDS_motif 26619..26732 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0046" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.00E-15" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsB" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 3e-15, + bit-score: 48.6)" + /score="48.6" + /translation="QERQVAMMAHQHIGLPEIKQLTGPGAAFDTIVVFENYP" + PFAM_domain 27063..28223 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="6.30E-99" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 331.1. E-value: + 6.3e-99. Domain range: 0..417." + /score="331.1" + /translation="FRRQAERSPDAVAVEDGARSLTFGGLLGEVEALARLLVGAGVRRE + HRVGVLVERSAELAVTMMAVSFAGGVFVPVDPDYPRERVEFMLANSAPGVMVCTKTTRA + AVPAEFAGTVLVLDELPAADPDVELPPVAPEDAAYVIYTSGSTGVPKGVLVTHSGLANL + GYAHIERMAVTSSSRVLQLSATGFDAIVSELYMALLAGATLVLPDAASMPPRVTLGEAI + RRAGITHLTVSPSVLASEDDLPDTLRTVLTGGEALPPALVDRWSPGRRVIQAYGPTETT + ICSTMSAPLSPGHDQVPLGGPIHNVRHYVLDAFLQPVPPGVVGELYITGVGLARGYLGR + PGLTAERFVASPFAPGERMYRSGDLFRWTREGQLLFAGRVDAQVKVR" + aSDomain 27126..28295 + /asDomain_id="nrpspksdomains_bpsB_A3" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="5.60E-110" + /label="bpsB_A3" + /locus_tag="bpsB" + /score="359.2" + /specificity="Stachelhaus code: bht|tyr" + /specificity="NRPSpredictor3 SVM: bht" + /specificity="pHMM: bht" + /specificity="PrediCAT Q939Z0_A3-6--bht" + /specificity="SANDPUMA ensemble: bht" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 4.34362760424" + /translation="TFGGLLGEVEALARLLVGAGVRREHRVGVLVERSAELAVTMMAVS + FAGGVFVPVDPDYPRERVEFMLANSAPGVMVCTKTTRAAVPAEFAGTVLVLDELPAADP + DVELPPVAPEDAAYVIYTSGSTGVPKGVLVTHSGLANLGYAHIERMAVTSSSRVLQLSA + TGFDAIVSELYMALLAGATLVLPDAASMPPRVTLGEAIRRAGITHLTVSPSVLASEDDL + PDTLRTVLTGGEALPPALVDRWSPGRRVIQAYGPTETTICSTMSAPLSPGHDQVPLGGP + IHNVRHYVLDAFLQPVPPGVVGELYITGVGLARGYLGRPGLTAERFVASPFAPGERMYR + SGDLFRWTREGQLLFAGRVDAQVKVRGYRVEPAEIEAVLAEHPWVGQVAV" + CDS_motif 27249..27290 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0047" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.90E-01" + /label="NRPS-A_a2" + /locus_tag="bpsB" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 0.19, bit-score: + 5.8)" + /score="5.8" + /translation="MAVSFAGGVFVPVD" + CDS_motif 27474..27533 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0048" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.60E-10" + /label="NRPS-A_a3" + /locus_tag="bpsB" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 5.6e-10, + bit-score: 31.1)" + /score="31.1" + /translation="AYVIYTSGSTGVPKGVLVTH" + CDS_motif 27885..27914 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0049" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.10E-02" + /label="NRPS-A_a5" + /locus_tag="bpsB" + /motif="NRPS-A_a5" + /note="NRPS/PKS Motif: NRPS-A_a5 (e-value: 0.021, + bit-score: 7.8)" + /score="7.8" + /translation="YGPTETTICS" + CDS_motif 28017..28106 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0050" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.40E-17" + /label="NRPS-A_a6" + /locus_tag="bpsB" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 4.4e-17, + bit-score: 54.1)" + /score="54.1" + /translation="PPGVVGELYITGVGLARGYLGRPGLTAERF" + CDS_motif 28188..28253 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0051" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.00E-10" + /label="NRPS-A_a8" + /locus_tag="bpsB" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 4e-10, + bit-score: 31.8)" + /score="31.8" + /translation="FAGRVDAQVKVRGYRVEPAEIE" + PFAM_domain 28245..28484 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0011" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="1.10E-15" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 58.2. E-value: + 1.1e-15. Domain range: 0..73." + /score="58.2" + /translation="EIEAVLAEHPWVGQVAVSVRRDGPGDKQLVAYVVPSADAAAENGT + LASALRELAAERLPEYMMPAAFVSLEQMPLTPNGK" + CDS_motif 28479..28502 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0052" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.70E+01" + /label="NRPS-A_a10" + /locus_tag="bpsB" + /motif="NRPS-A_a10" + /note="NRPS/PKS Motif: NRPS-A_a10 (e-value: 17.0, + bit-score: -0.2)" + /score="-0.2" + /translation="GKLDHRAL" + aSDomain 28557..28757 + /asDomain_id="nrpspksdomains_bpsB_Xdom09" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="2.20E-21" + /locus_tag="bpsB" + /score="67.5" + /translation="EARLCALFADVLGLDQVGPDDSFFELGGDSITSMQLSARARPTGL + ELTPWQVFDEKTPERLAVIVQE" + PFAM_domain 28563..28742 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsB_0014" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="7.90E-11" + /label="bpsB" + /locus_tag="bpsB" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 42.0. E-value: + 7.9e-11. Domain range: 1..64." + /score="42.0" + /translation="RLCALFADVLGLDQVGPDDSFFELGGDSITSMQLSARARPTGLEL + TPWQVFDEKTPERLA" + CDS_motif 28620..28649 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsB_0053" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.70E-01" + /label="PCP_mE" + /locus_tag="bpsB" + /motif="PCP_mE" + /note="NRPS/PKS Motif: PCP_mE (e-value: 0.27, bit-score: + 4.8)" + /score="4.8" + /translation="SFFELGGDSI" + gene 28888..34464 + /gene="bpsC" + CDS 28888..34464 + /aSProdPred="dpg" + /codon_start=1 + /db_xref="GI:15131494" + /db_xref="GOA:Q939Y9" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001031" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Y9" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsC" + /note="smCOG: + SMCOG1127:condensation_domain-containing_protein (Score: + 341.4; E-value: 1.6e-103);" + /product="peptide synthetase" + /protein_id="CAC48362.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 1.7e-67, + bitscore: 218.8, seeds: 42); AMP-binding (E-value: + 5.2e-109, bitscore: 355.7, seeds: 400); PP-binding + (E-value: 2.5e-16, bitscore: 51.3, seeds: 164); PF00561 + (E-value: 4.1e-08, bitscore: 24.4, seeds: 48)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: NRPS" + /sec_met="NRPS/PKS Domain: NRPS-COM_Nterm (7-35). E-value: + 8.8e-13. Score: 38.9;" + /sec_met="NRPS/PKS Domain: Cglyc (13-311). E-value: 2e-147. + Score: 481.7;" + /sec_met="NRPS/PKS Domain: AMP-binding (492-887). E-value: + 6.5e-109. Score: 355.7; NRPS/PKS Domain: bpsC_A1; Substrate + specificity predictions: dpg (Stachelhaus code), dhpg + (NRPSPredictor3 SVM), dpg (pHMM), Q939Y9_A1-dpg (PrediCAT), + dpg (SANDPUMA ensemble); PID to NN: 100.00; SNN score: + 5.81506002215 ." + /sec_met="NRPS/PKS Domain: PCP (969-1037). E-value: + 2.2e-23. Score: 73.8;" + /sec_met="NRPS/PKS Domain: X (1047-1350). E-value: + 1.2e-178. Score: 584.0;" + /sec_met="NRPS/PKS Domain: Thioesterase (1621-1836). + E-value: 2.6e-39. Score: 127.5;" + /transl_table=11 + /translation="MTVDDTRAKRRSSVEDVWPLSPLQEGMLYHTALDDDGPDTYTVQT + VYGIDGPLDPGLLRASWQALVDRHAALRACFRYVSGAQMVQVIAREAEVPWRETDLSGL + PDDIAEGEVDRLAADEVAERLRIEAAPLMKLHLIRLGPDRHRLVHTLHHVLVDGWSMPI + LHRELAAIYAAGGDASGLPPTVSYRDYLAWLGRQDKEVARAAWRAELAGLDTPTTVAAP + DPARVPDIHTAVVELPAELTDGLAQFARGHDLTLNTVVQGAWAVVLAQLAGRDDVVFGA + TASGRPADLPGVEAMVGQLLNTLPVRVRLDGGRRAAELFARLQRDQSALMAHQHLGLQD + VQAVVGPGAVFDTLVIYENFPRKGLGRAPGGGLSLVPVKRGRNSSHYPFTLITGPGERM + PLILDYDRGLFDPAAAESVVGALARVLERLVAEPDVLVGRLTLASEAERALVVEGFNAT + AGPVPGESVLELFARRVAAAPDAVAITGAAGANLTYAEVDQASNRLAGYLAVRGVGRGD + RVGVAMERSPDLLIAFLAIWKAGAAYVPVDVEYPAERISFVFDDSGVSTVLCTLATSAV + APGNAIVLDAPETRVAVRDCAAPEIRPHADDLAYVMYTSGSTGLPKGVAIPHGAVAGLA + GDAGWQIGPGDGVLMHATHVFDPSLYAMWVPLVSGARVLLTEPGVLDAAGVRQAVHRGA + TFVHLTAGTFRALAETAPECFEGLVEIGTGGDVVPLQSVENLRRAQPGLRVRNTYGPTE + TTLCATWLPIEPGEVLGRELPIGHPMTNRRIYLLDAFLRPVPPGVAGELYIAGTGLAHG + YLKSPGLTAGRFVACPFAAGERMYRTGDRARWTRDGEVVFLGRADDQVKIRGYRVELGE + VEAALAAQPGVVEAVVTAREDQPGEKRLVGYFVSDGGDAGPVEIRRQLALVLPDYLVPI + AVVALPGLPVTPNGKVDRRALPAPDLAGHSPEKAPENETEKVLCALFAEILSIDQVGVD + DTFHDLGGSSALAMRLVARIREELGADLPIRQLFSSPTPAGLARALAAKSRPALEAAQR + PDRVPVTARQLRAWLLADPGGETAGLHTSVALRLHGRVDVPALAAALGDVAARHEILRT + TFPGDAQSVHQHVHDALAVELTPVGVTEEDLPGLLAERRDLLFDLTRDVPWRCDLFALS + DNEHVLHLQVHRILADDDSLDVFFRDLAAAYGARREGRVPERAPLALQFADYALWEQRL + LTDENEPGSLINEQVAFWRDNLAGLDGETVLPFDRPRPAVPSRRAGTVALRLEAGPHAR + LTEAAEPPGADTLEMVHAALAMLLAKLGAGHDVVIGTALPRDEELFDLEPMIGPFTRAL + ALRTDVSGDPTFLEVVARVQEAGQATGEHLDLPFERIVELLDLPASLARHPVFQVGLQV + DEEDIDGWAAAELPALRTAVEPGGTAAMELDLAVKLTERFDDDDNAGGLEGALHYATDL + FDEATAESVARRLVRVLEQVAEDPGRRISDLDVFLDDFERGRPPIAPARWAGAVPPVVA + ELAGDGPLGALLLDEQLRPVAPGAVGDLYVTGPAVDAGTATLATVPCPFGDEGHRMLHT + GLLARKTPAKTLVVVGERRRSSASVKTGDFEILLPLRAGGDRPPLFCVHASGGLSWNYE + PLLRYLPPNQPVYGVQARGLARTEPLPGSVEEMAADYLEQIRAVQPAGPYHLLGWSLGG + RIAQAMARLLEADGERLGLLALLDAYPVYMGRKTTGAASEEAALEQRNQQDLDLAGQLV + KGVAARSRLEAVMRNLWKVGPRHTRSPFAGDVLLFVATVDRPAHLPVPVAKASWKEFTS + GAVEAHEIPSNHYDMVQSAALGQIGAIVAEKLRSRPEGERTQR" + aSDomain 28909..28992 + /asDomain_id="nrpspksdomains_bpsC_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="NRPS-COM_Nterm" + /evalue="8.80E-13" + /locus_tag="bpsC" + /score="38.9" + /translation="AKRRSSVEDVWPLSPLQEGMLYHTALDD" + aSDomain 28927..29820 + /asDomain_id="nrpspksdomains_bpsC_Xdom02" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Condensation" + /domain_subtype="Cglyc" + /evalue="2.00E-147" + /locus_tag="bpsC" + /score="481.7" + /translation="VEDVWPLSPLQEGMLYHTALDDDGPDTYTVQTVYGIDGPLDPGLL + RASWQALVDRHAALRACFRYVSGAQMVQVIAREAEVPWRETDLSGLPDDIAEGEVDRLA + ADEVAERLRIEAAPLMKLHLIRLGPDRHRLVHTLHHVLVDGWSMPILHRELAAIYAAGG + DASGLPPTVSYRDYLAWLGRQDKEVARAAWRAELAGLDTPTTVAAPDPARVPDIHTAVV + ELPAELTDGLAQFARGHDLTLNTVVQGAWAVVLAQLAGRDDVVFGATASGRPADLPGVE + AMVGQLLNTLPVRVRLD" + PFAM_domain 28930..29817 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="6.90E-65" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: Condensation. Score: 218.8. E-value: + 6.9e-65. Domain range: 1..300." + /score="218.8" + /translation="EDVWPLSPLQEGMLYHTALDDDGPDTYTVQTVYGIDGPLDPGLLR + ASWQALVDRHAALRACFRYVSGAQMVQVIAREAEVPWRETDLSGLPDDIAEGEVDRLAA + DEVAERLRIEAAPLMKLHLIRLGPDRHRLVHTLHHVLVDGWSMPILHRELAAIYAAGGD + ASGLPPTVSYRDYLAWLGRQDKEVARAAWRAELAGLDTPTTVAAPDPARVPDIHTAVVE + LPAELTDGLAQFARGHDLTLNTVVQGAWAVVLAQLAGRDDVVFGATASGRPADLPGVEA + MVGQLLNTLPVRVRL" + CDS_motif 28942..28977 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.70E-05" + /label="C1_dual_004-017" + /locus_tag="bpsC" + /motif="C1_dual_004-017" + /note="NRPS/PKS Motif: C1_dual_004-017 (e-value: 4.7e-05, + bit-score: 16.1)" + /score="16.1" + /translation="PLSPLQEGMLYH" + CDS_motif 29002..29121 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.40E-19" + /label="C2_DCL_024-062" + /locus_tag="bpsC" + /motif="C2_DCL_024-062" + /note="NRPS/PKS Motif: C2_DCL_024-062 (e-value: 1.4e-19, + bit-score: 62.4)" + /score="62.4" + /translation="DTYTVQTVYGIDGPLDPGLLRASWQALVDRHAALRACFRY" + CDS_motif 29338..29403 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.60E-11" + /label="C3_DCL_135-156" + /locus_tag="bpsC" + /motif="C3_DCL_135-156" + /note="NRPS/PKS Motif: C3_DCL_135-156 (e-value: 7.6e-11, + bit-score: 34.4)" + /score="34.4" + /translation="LHHVLVDGWSMPILHRELAAIY" + CDS_motif 29446..29466 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.70E+00" + /label="C4_LCL_164-176" + /locus_tag="bpsC" + /motif="C4_LCL_164-176" + /note="NRPS/PKS Motif: C4_LCL_164-176 (e-value: 5.7, + bit-score: 0.3)" + /score="0.3" + /translation="YRDYLAW" + CDS_motif 29464..29517 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0005" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E+01" + /label="NRPS-A_a8" + /locus_tag="bpsC" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 24.0, bit-score: + -1.6)" + /score="-1.6" + /translation="WLGRQDKEVARAAWRAEL" + CDS_motif 29713..29808 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0006" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.40E-18" + /label="C5_DCL_263-294" + /locus_tag="bpsC" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 3.4e-18, + bit-score: 58.0)" + /score="58.0" + /translation="DVVFGATASGRPADLPGVEAMVGQLLNTLPVR" + CDS_motif 29857..29970 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0007" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-17" + /label="C67_DCL_14fromHMM" + /locus_tag="bpsC" + /motif="C67_DCL_14fromHMM" + /note="NRPS/PKS Motif: C67_DCL_14fromHMM (e-value: 2.4e-17, + bit-score: 55.3)" + /score="55.3" + /translation="QRDQSALMAHQHLGLQDVQAVVGPGAVFDTLVIYENFP" + CDS_motif 30142..30207 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0008" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.90E+00" + /label="C2_starter_024-063" + /locus_tag="bpsC" + /motif="C2_starter_024-063" + /note="NRPS/PKS Motif: C2_starter_024-063 (e-value: 2.9, + bit-score: 0.7)" + /score="0.7" + /translation="VGALARVLERLVAEPDVLVGRL" + PFAM_domain 30298..31476 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="3.90E-96" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 322.0. E-value: + 3.9e-96. Domain range: 0..417." + /score="322.0" + /translation="FARRVAAAPDAVAITGAAGANLTYAEVDQASNRLAGYLAVRGVGR + GDRVGVAMERSPDLLIAFLAIWKAGAAYVPVDVEYPAERISFVFDDSGVSTVLCTLATS + AVAPGNAIVLDAPETRVAVRDCAAPEIRPHADDLAYVMYTSGSTGLPKGVAIPHGAVAG + LAGDAGWQIGPGDGVLMHATHVFDPSLYAMWVPLVSGARVLLTEPGVLDAAGVRQAVHR + GATFVHLTAGTFRALAETAPECFEGLVEIGTGGDVVPLQSVENLRRAQPGLRVRNTYGP + TETTLCATWLPIEPGEVLGRELPIGHPMTNRRIYLLDAFLRPVPPGVAGELYIAGTGLA + HGYLKSPGLTAGRFVACPFAAGERMYRTGDRARWTRDGEVVFLGRADDQVKIR" + aSDomain 30364..31548 + /asDomain_id="nrpspksdomains_bpsC_A1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="6.50E-109" + /label="bpsC_A1" + /locus_tag="bpsC" + /score="355.7" + /specificity="Stachelhaus code: dpg" + /specificity="NRPSpredictor3 SVM: dhpg" + /specificity="pHMM: dpg" + /specificity="PrediCAT Q939Y9_A1-dpg" + /specificity="SANDPUMA ensemble: dpg" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 5.81506002215" + /translation="TYAEVDQASNRLAGYLAVRGVGRGDRVGVAMERSPDLLIAFLAIW + KAGAAYVPVDVEYPAERISFVFDDSGVSTVLCTLATSAVAPGNAIVLDAPETRVAVRDC + AAPEIRPHADDLAYVMYTSGSTGLPKGVAIPHGAVAGLAGDAGWQIGPGDGVLMHATHV + FDPSLYAMWVPLVSGARVLLTEPGVLDAAGVRQAVHRGATFVHLTAGTFRALAETAPEC + FEGLVEIGTGGDVVPLQSVENLRRAQPGLRVRNTYGPTETTLCATWLPIEPGEVLGREL + PIGHPMTNRRIYLLDAFLRPVPPGVAGELYIAGTGLAHGYLKSPGLTAGRFVACPFAAG + ERMYRTGDRARWTRDGEVVFLGRADDQVKIRGYRVELGEVEAALAAQPGVVEAVV" + CDS_motif 30487..30528 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0009" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.40E-06" + /label="NRPS-A_a2" + /locus_tag="bpsC" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 5.4e-06, + bit-score: 19.4)" + /score="19.4" + /translation="LAIWKAGAAYVPVD" + CDS_motif 30712..30771 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0010" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.80E-09" + /label="NRPS-A_a3" + /locus_tag="bpsC" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 2.8e-09, + bit-score: 29.0)" + /score="29.0" + /translation="AYVMYTSGSTGLPKGVAIPH" + CDS_motif 31270..31359 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0011" + /database="abmotifs" + /detection="hmmscan" + /evalue="6.30E-14" + /label="NRPS-A_a6" + /locus_tag="bpsC" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 6.3e-14, + bit-score: 44.1)" + /score="44.1" + /translation="PPGVAGELYIAGTGLAHGYLKSPGLTAGRF" + CDS_motif 31441..31506 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0012" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E-12" + /label="NRPS-A_a8" + /locus_tag="bpsC" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 1.6e-12, + bit-score: 39.3)" + /score="39.3" + /translation="FLGRADDQVKIRGYRVELGEVE" + PFAM_domain 31498..31722 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0008" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="2.50E-12" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 47.5. E-value: + 2.5e-12. Domain range: 0..73." + /score="47.5" + /translation="EVEAALAAQPGVVEAVVTAREDQPGEKRLVGYFVSDGGDAGPVEI + RRQLALVLPDYLVPIAVVALPGLPVTPNGK" + CDS_motif 31543..31599 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0013" + /database="abmotifs" + /detection="hmmscan" + /evalue="9.20E-03" + /label="C5_LCL_267-296" + /locus_tag="bpsC" + /motif="C5_LCL_267-296" + /note="NRPS/PKS Motif: C5_LCL_267-296 (e-value: 0.0092, + bit-score: 8.6)" + /score="8.6" + /translation="VVTAREDQPGEKRLVGYFV" + aSDomain 31795..31998 + /asDomain_id="nrpspksdomains_bpsC_Xdom03" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="2.20E-23" + /locus_tag="bpsC" + /score="73.8" + /translation="EKVLCALFAEILSIDQVGVDDTFHDLGGSSALAMRLVARIREELG + ADLPIRQLFSSPTPAGLARALAA" + PFAM_domain 31801..31992 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0007" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="1.10E-13" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 51.2. E-value: + 1.1e-13. Domain range: 1..67." + /score="51.2" + /translation="VLCALFAEILSIDQVGVDDTFHDLGGSSALAMRLVARIREELGAD + LPIRQLFSSPTPAGLARAL" + aSDomain 32029..32937 + /asDomain_id="nrpspksdomains_bpsC_Xdom04" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="X" + /evalue="1.20E-178" + /locus_tag="bpsC" + /score="584.0" + /translation="RPDRVPVTARQLRAWLLADPGGETAGLHTSVALRLHGRVDVPALA + AALGDVAARHEILRTTFPGDAQSVHQHVHDALAVELTPVGVTEEDLPGLLAERRDLLFD + LTRDVPWRCDLFALSDNEHVLHLQVHRILADDDSLDVFFRDLAAAYGARREGRVPERAP + LALQFADYALWEQRLLTDENEPGSLINEQVAFWRDNLAGLDGETVLPFDRPRPAVPSRR + AGTVALRLEAGPHARLTEAAEPPGADTLEMVHAALAMLLAKLGAGHDVVIGTALPRDEE + LFDLEPMIGPFTRALALRTDVS" + PFAM_domain 32035..32934 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00668" + /description="Condensation domain" + /detection="hmmscan" + /domain="Condensation" + /evalue="5.80E-49" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: Condensation. Score: 166.5. E-value: + 5.8e-49. Domain range: 2..300." + /score="166.5" + /translation="DRVPVTARQLRAWLLADPGGETAGLHTSVALRLHGRVDVPALAAA + LGDVAARHEILRTTFPGDAQSVHQHVHDALAVELTPVGVTEEDLPGLLAERRDLLFDLT + RDVPWRCDLFALSDNEHVLHLQVHRILADDDSLDVFFRDLAAAYGARREGRVPERAPLA + LQFADYALWEQRLLTDENEPGSLINEQVAFWRDNLAGLDGETVLPFDRPRPAVPSRRAG + TVALRLEAGPHARLTEAAEPPGADTLEMVHAALAMLLAKLGAGHDVVIGTALPRDEELF + DLEPMIGPFTRALALRTDV" + CDS_motif 32110..32217 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0014" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.90E-15" + /label="C2_LCL_024-062" + /locus_tag="bpsC" + /motif="C2_LCL_024-062" + /note="NRPS/PKS Motif: C2_LCL_024-062 (e-value: 2.9e-15, + bit-score: 48.6)" + /score="48.6" + /translation="HTSVALRLHGRVDVPALAAALGDVAARHEILRTTFP" + CDS_motif 32422..32478 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0015" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.40E-03" + /label="C3_LCL_132-143" + /locus_tag="bpsC" + /motif="C3_LCL_132-143" + /note="NRPS/PKS Motif: C3_LCL_132-143 (e-value: 0.0034, + bit-score: 10.2)" + /score="10.2" + /translation="ILADDDSLDVFFRDLAAAY" + CDS_motif 32527..32565 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0016" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.60E-03" + /label="C4_LCL_164-176" + /locus_tag="bpsC" + /motif="C4_LCL_164-176" + /note="NRPS/PKS Motif: C4_LCL_164-176 (e-value: 0.0026, + bit-score: 10.6)" + /score="10.6" + /translation="QFADYALWEQRLL" + CDS_motif 32857..32907 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0017" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.50E-01" + /label="C5_DCL_263-294" + /locus_tag="bpsC" + /motif="C5_DCL_263-294" + /note="NRPS/PKS Motif: C5_DCL_263-294 (e-value: 0.35, + bit-score: 3.5)" + /score="3.5" + /translation="PRDEELFDLEPMIGPFT" + CDS_motif 32998..33087 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0018" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.20E-09" + /label="C67_LCL_14fromHMM" + /locus_tag="bpsC" + /motif="C67_LCL_14fromHMM" + /note="NRPS/PKS Motif: C67_LCL_14fromHMM (e-value: 7.2e-09, + bit-score: 27.9)" + /score="27.9" + /translation="EHLDLPFERIVELLDLPASLARHPVFQVGL" + PFAM_domain 32998..33294 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0005" + /database="Pfam-A.hmm" + /description="HxxPF-repeated domain" + /detection="hmmscan" + /domain="HxxPF_rpt" + /evalue="6.30E-19" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: HxxPF_rpt. Score: 68.0. E-value: + 6.3e-19. Domain range: 0..90." + /score="68.0" + /translation="EHLDLPFERIVELLDLPASLARHPVFQVGLQVDEEDIDGWAAAEL + PALRTAVEPGGTAAMELDLAVKLTERFDDDDNAGGLEGALHYATDLFDEATAES" + CDS_motif 33133..33168 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0019" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E+00" + /label="Cy2" + /locus_tag="bpsC" + /motif="Cy2" + /note="NRPS/PKS Motif: Cy2 (e-value: 1.6, bit-score: 1.8)" + /score="1.8" + /translation="PALRTAVEPGGT" + CDS_motif 33511..33561 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0020" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.40E-03" + /label="NRPS-A_a6" + /locus_tag="bpsC" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 0.0084, + bit-score: 8.6)" + /score="8.6" + /translation="PGAVGDLYVTGPAVDAG" + aSDomain 33751..34395 + /asDomain_id="nrpspksdomains_bpsC_Xdom05" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Thioesterase" + /evalue="2.60E-39" + /locus_tag="bpsC" + /score="127.5" + /translation="PPLFCVHASGGLSWNYEPLLRYLPPNQPVYGVQARGLARTEPLPG + SVEEMAADYLEQIRAVQPAGPYHLLGWSLGGRIAQAMARLLEADGERLGLLALLDAYPV + YMGRKTTGAASEEAALEQRNQQDLDLAGQLVKGVAARSRLEAVMRNLWKVGPRHTRSPF + AGDVLLFVATVDRPAHLPVPVAKASWKEFTSGAVEAHEIPSNHYDMVQSAAL" + PFAM_domain 33751..34395 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00975" + /description="Thioesterase domain" + /detection="hmmscan" + /domain="Thioesterase" + /evalue="7.90E-37" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: Thioesterase. Score: 127.6. E-value: + 7.9e-37. Domain range: 0..222." + /score="127.6" + /translation="PPLFCVHASGGLSWNYEPLLRYLPPNQPVYGVQARGLARTEPLPG + SVEEMAADYLEQIRAVQPAGPYHLLGWSLGGRIAQAMARLLEADGERLGLLALLDAYPV + YMGRKTTGAASEEAALEQRNQQDLDLAGQLVKGVAARSRLEAVMRNLWKVGPRHTRSPF + AGDVLLFVATVDRPAHLPVPVAKASWKEFTSGAVEAHEIPSNHYDMVQSAAL" + CDS_motif 33754..33783 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0021" + /database="abmotifs" + /detection="hmmscan" + /evalue="8.40E-03" + /label="NRPS-beforete1" + /locus_tag="bpsC" + /motif="NRPS-beforete1" + /note="NRPS/PKS Motif: NRPS-beforete1 (e-value: 0.0084, + bit-score: 9.3)" + /score="9.3" + /translation="PLFCVHASGG" + PFAM_domain 33757..34377 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0006" + /database="Pfam-A.hmm" + /description="Alpha/beta hydrolase family" + /detection="hmmscan" + /domain="Abhydrolase_6" + /evalue="2.40E-14" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: Abhydrolase_6. Score: 53.7. E-value: + 2.4e-14. Domain range: 0..218." + /score="53.7" + /translation="LFCVHASGGLSWNYEPLLRYLPPNQPVYGVQARGLARTEPLPGSV + EEMAADYLEQIRAVQPAGPYHLLGWSLGGRIAQAMARLLEADGERLGLLALLDAYPVYM + GRKTTGAASEEAALEQRNQQDLDLAGQLVKGVAARSRLEAVMRNLWKVGPRHTRSPFAG + DVLLFVATVDRPAHLPVPVAKASWKEFTSGAVEAHEIPSNHYDM" + PFAM_domain 33760..34356 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsC_0009" + /database="Pfam-A.hmm" + /description="Alpha/beta hydrolase family" + /detection="hmmscan" + /domain="Abhydrolase_5" + /evalue="6.50E-04" + /label="bpsC" + /locus_tag="bpsC" + /note="Pfam-A.hmm-Hit: Abhydrolase_5. Score: 19.4. E-value: + 0.00065. Domain range: 2..139." + /score="19.4" + /translation="FCVHASGGLSWNYEPLLRYLPPNQPVYGVQARGLARTEPLPGSVE + EMAADYLEQIRAVQPAGPYHLLGWSLGGRIAQAMARLLEADGERLGLLALLDAYPVYMG + RKTTGAASEEAALEQRNQQDLDLAGQLVKGVAARSRLEAVMRNLWKVGPRHTRSPFAGD + VLLFVATVDRPAHLPVPVAKASWKEFTSGAVEAHEI" + CDS_motif 33934..34008 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsC_0022" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.30E-13" + /label="NRPS-te1" + /locus_tag="bpsC" + /motif="NRPS-te1" + /note="NRPS/PKS Motif: NRPS-te1 (e-value: 1.3e-13, + bit-score: 43.2)" + /score="43.2" + /translation="QPAGPYHLLGWSLGGRIAQAMARLL" + PFAM_domain 34461..34619 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_CAC48363.1_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03621" + /description="MbtH-like protein" + /detection="hmmscan" + /domain="MbtH" + /evalue="5.30E-30" + /label="CAC48363.1" + /locus_tag="CAC48363.1" + /note="Pfam-A.hmm-Hit: MbtH. Score: 102.5. E-value: + 5.3e-30. Domain range: 0..54." + /score="102.5" + /translation="MSNPFDNEDGSFFVLVNDEGQHSLWPTFAEVPAGWTRVHGEAGRQ + ECLAYVEE" + CDS 34461..34670 + /codon_start=1 + /db_xref="GI:15131495" + /db_xref="InterPro:IPR005153" + /db_xref="InterPro:IPR015166" + /db_xref="UniProtKB/TrEMBL:Q939Y8" + /note="orf1" + /note="smCOG: SMCOG1009:mbtH-like_protein (Score: 119.2; + E-value: 7.9e-37);" + /product="hypothetical protein" + /protein_id="CAC48363.1" + /transl_table=11 + /translation="MSNPFDNEDGSFFVLVNDEGQHSLWPTFAEVPAGWTRVHGEAGRQ + ECLAYVEENWTDLRPKSLIREASA" + gene 34782..35957 + /gene="oxyA" + CDS 34782..35957 + /codon_start=1 + /db_xref="GI:3688114" + /db_xref="GOA:O87673" + /db_xref="HSSP:1LFK" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87673" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyA" + /note="already deposited under Y16952" + /note="smCOG: SMCOG1007:cytochrome_P450 (Score: 359.8; + E-value: 3e-109);" + /product="P450 monooxygenase" + /protein_id="CAA76547.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.8e-11, + bitscore: 33.6, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MFEESNALRGTEIHRRDRFAPGPELRSLMGEGTMSILQPPDSPGG + RTGWLATGHDEVRQVLGSDKFSAKLLYGGTVAGRIWPGFLNQYDPPEHTRLRRMVTSAF + TVRRMQDFRPRIEQIVQASLDAIEAAGGPVDFVPRFAWSVATTVTCDFLGIPRDDQADL + SRALHASRSERSGKRRVAAGNKYWTYMTEIAARARRDPGDDMFGAVVRDHGDAITDAEL + LGVAAFVMGAGGDQVARFLAAGAWLMVEHPDQFALLREKPDTVPDWLNEVERYLTSDEK + TTPRIAQEDVRIGDQLVKAGDAVTCSLLAANRRKFPAPEDEFDITRERPVHVTFGHGIH + HCLGRPLAEMVFRAAIPALAQRFPKLRLAEPDREIKLGPPPFDVEALLLEW" + PFAM_domain 35586..35870 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="1.50E-08" + /label="oxyA" + /locus_tag="oxyA" + /note="Pfam-A.hmm-Hit: p450. Score: 33.6. E-value: 1.5e-08. + Domain range: 327..433." + /score="33.6" + /translation="LNEVERYLTSDEKTTPRIAQEDVRIGDQLVKAGDAVTCSLLAANR + RKFPAPEDEFDITRERPVHVTFGHGIHHCLGRPLAEMVFRAAIPALAQRF" + gene 36007..37203 + /gene="oxyB" + CDS 36007..37203 + /codon_start=1 + /db_xref="GI:3688115" + /db_xref="GOA:O87674" + /db_xref="HSSP:1LFK" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87674" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyB" + /note="smCOG: SMCOG1007:cytochrome_P450 (Score: 413.9; + E-value: 1.1e-125);" + /product="P450 monooxygenase" + /protein_id="CAA76548.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.3e-16, + bitscore: 50.2, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MNDDDPRPLHIRRQGLDPADELLAAGSLTRVTIGSGADAETHWMA + TAHALVRQVMGDHQRFSTRRRWDPRDEIGGTGTFRPRELVGNLMDYDPPEHTRLRQKLT + PGFTLRKMQRLQPYIEQIVNERLDEMARAGSPADLVAFVADKVPGAVLCELIGVPRDDR + ATFMQLCHAHLDASRSQKRRAAAGEAFSRYLLAMIARERKDPGEGLIGAVVAEYGDEAT + DEELRGFCVQVMLAGDDNISGMIGLGVLALLRHPEQIDALRGGEQPAQRAVDELIRYLT + VPYGPTPRIAKQDVTVGDQVIKAGESVICSLPAANRDPALVPDADRLDVTRDPVPHVAF + GHGIHHCLGAALARLELRTVFTALWRRFPDLRLADPAQETKFRLTTPAYGLTELMVAW" + PFAM_domain 36814..37110 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyB_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="1.30E-13" + /label="oxyB" + /locus_tag="oxyB" + /note="Pfam-A.hmm-Hit: p450. Score: 50.2. E-value: 1.3e-13. + Domain range: 324..433." + /score="50.2" + /translation="QRAVDELIRYLTVPYGPTPRIAKQDVTVGDQVIKAGESVICSLPA + ANRDPALVPDADRLDVTRDPVPHVAFGHGIHHCLGAALARLELRTVFTALWRRF" + gene 37353..38573 + /gene="oxyC" + CDS 37353..38573 + /codon_start=1 + /db_xref="GI:3688116" + /db_xref="GOA:O87675" + /db_xref="HSSP:1UED" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87675" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyC" + /note="smCOG: SMCOG1007:cytochrome_P450 (Score: 378.2; + E-value: 7.4e-115);" + /product="P450 monooxygenase" + /protein_id="CAA76549.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 2.6e-15, + bitscore: 47.3, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MGHDIGQLAPLLPEPANFQLRTNCDPHADNFDLRAHGPLVRIAGD + SSAQLGREYVWQAHGYDVVRRILGDHENFTTRPQFTQAKSGAHVEAQFVGQISTYDPPE + HTRLRKMLTPEFTVRRIRRMEPAIQALVDDRLDRVAAEGPPADLQALFADPVGALALCE + LLGIPRDDQREFVRRIRRNTDLSRGLKARAADSAAFNRYLDNLIARQRRDADDGFLGMI + VREHGDTVTDEELKGLCTALILGGVETVAGMIGFGVLALLENPGQVPLLFAGPEQADRV + VNELLRYLSPVQAPNPSLAVKDVIIDGQLIKAGDYVLCSVLMANRDEALTPNPNVFDAN + RAAVSDVGFGHGIHYCVGAALARSMLRMAYQALWQRFPGLRLAVPIAEVKYRSAFVDCP + DRVPVTW" + PFAM_domain 37497..37778 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyC_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="2.60E-04" + /label="oxyC" + /locus_tag="oxyC" + /note="Pfam-A.hmm-Hit: p450. Score: 19.6. E-value: 0.00026. + Domain range: 39..132." + /score="19.6" + /translation="QLGREYVWQAHGYDVVRRILGDHENFTTRPQFTQAKSGAHVEAQF + VGQISTYDPPEHTRLRKMLTPEFTVRRIRRMEPAIQALVDDRLDRVAAE" + PFAM_domain 37977..38147 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyC_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="2.80E-03" + /label="oxyC" + /locus_tag="oxyC" + /note="Pfam-A.hmm-Hit: p450. Score: 16.2. E-value: 0.0028. + Domain range: 233..293." + /score="16.2" + /translation="RQRRDADDGFLGMIVREHGDTVTDEELKGLCTALILGGVETVAGM + IGFGVLALLENP" + PFAM_domain 38187..38483 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyC_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="1.10E-12" + /label="oxyC" + /locus_tag="oxyC" + /note="Pfam-A.hmm-Hit: p450. Score: 47.3. E-value: 1.1e-12. + Domain range: 324..433." + /score="47.3" + /translation="DRVVNELLRYLSPVQAPNPSLAVKDVIIDGQLIKAGDYVLCSVLM + ANRDEALTPNPNVFDANRAAVSDVGFGHGIHYCVGAALARSMLRMAYQALWQRF" + gene 38634..40109 + /gene="bhaA" + CDS 38634..40109 + /codon_start=1 + /db_xref="GI:3688117" + /db_xref="GOA:O87676" + /db_xref="InterPro:IPR003042" + /db_xref="InterPro:IPR006905" + /db_xref="UniProtKB/TrEMBL:O87676" + /function="involved in the halogenation of balhimycin" + /gene="bhaA" + /note="smCOG: SMCOG1119:halogenase (Score: 489.4; E-value: + 1.8e-148);" + /note="smCOG tree PNG image: smcogs/bhaA.png" + /product="halogenase" + /protein_id="CAA76550.1" + /sec_met="Type: none" + /sec_met="Domains detected: Trp_halogenase (E-value: + 4.7e-77, bitscore: 250.9, seeds: 23)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MSVEDFDVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIG + ESLLPATVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSH + AYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARF + VIDASGNKSRLYTKVGGSRNYSEFFRSLALFGYFEGGKRLPEPVSGNILSVAFDSGWFW + YIPLSDTLTSVGAVVRREDAEKIQGDREKALNTLIAECPLISEYLADATRVTTGRYGEL + RVRKDYSYQQETYWRPGMILVGDAACFVDPVFSSGVHLATYSALLAARSINSVLAGDLD + EKTALNEFELRYRREYGVFYEFLVSFYQMNVNEESYFWQAKKVTQNQSTDVESFVELIG + GVSSGETALTAADRIAARSAEFAAAVDEMAGGDGDNMVPMFKSTVVQQAMQEAGQVQMK + ALLGEDAEPELPLFPGGLVTSPERMKWLPHHPA" + PFAM_domain 38646..38747 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0010" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01946" + /description="Thi4 family" + /detection="hmmscan" + /domain="Thi4" + /evalue="6.40E-03" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: Thi4. Score: 15.6. E-value: 0.0064. + Domain range: 16..50." + /score="15.6" + /translation="DFDVVVAGGGPGGSTVATLVAMQGHRVLLLEKEV" + PFAM_domain 38649..39653 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01494" + /description="FAD binding domain" + /detection="hmmscan" + /domain="FAD_binding_3" + /evalue="1.90E-21" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: FAD_binding_3. Score: 76.5. E-value: + 1.9e-21. Domain range: 1..334." + /score="76.5" + /translation="FDVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIGESLLP + ATVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSHAYQVE + RARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARFVIDAS + GNKSRLYTKVGGSRNYSEFFRSLALFGYFEGGKRLPEPVSGNILSVAFDSGWFWYIPLS + DTLTSVGAVVRREDAEKIQGDREKALNTLIAECPLISEYLADATRVTTGRYGELRVRKD + YSYQQETYWRPGMILVGDAACFVDPVFSSGVHLATYSALLAARSINSVLAGDLD" + PFAM_domain 38652..38744 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0007" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00890" + /description="FAD binding domain" + /detection="hmmscan" + /domain="FAD_binding_2" + /evalue="9.40E-03" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: FAD_binding_2. Score: 14.8. E-value: + 0.0094. Domain range: 0..31." + /score="14.8" + /translation="DVVVAGGGPGGSTVATLVAMQGHRVLLLEKE" + PFAM_domain 38652..38753 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01266" + /description="FAD dependent oxidoreductase" + /detection="hmmscan" + /domain="DAO" + /evalue="5.80E-03" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: DAO. Score: 15.5. E-value: 0.0058. + Domain range: 0..34." + /score="15.5" + /translation="DVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFP" + PFAM_domain 38652..38918 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF04820" + /description="Tryptophan halogenase" + /detection="hmmscan" + /domain="Trp_halogenase" + /evalue="6.60E-24" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: Trp_halogenase. Score: 84.3. + E-value: 6.6e-24. Domain range: 0..94." + /score="84.3" + /translation="DVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIGESLLPA + TVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGIS" + PFAM_domain 38652..39140 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0006" + /database="Pfam-A.hmm" + /description="FAD dependent oxidoreductase" + /detection="hmmscan" + /domain="FAD_oxidored" + /evalue="2.80E-11" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: FAD_oxidored. Score: 43.1. E-value: + 2.8e-11. Domain range: 0..147." + /score="43.1" + /translation="DVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIGESLLPA + TVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSHAYQVER + ARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARFVIDASG + " + PFAM_domain 38652..39176 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0009" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF07992" + /description="Pyridine nucleotide-disulphide + oxidoreductase" + /detection="hmmscan" + /domain="Pyr_redox_2" + /evalue="7.90E-05" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: Pyr_redox_2. Score: 22.6. E-value: + 7.9e-05. Domain range: 0..130." + /score="22.6" + /translation="DVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIGESLLPA + TVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSHAYQVER + ARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARFVIDASG + NKSRLYTKVGGS" + PFAM_domain 38922..39155 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01266" + /description="FAD dependent oxidoreductase" + /detection="hmmscan" + /domain="DAO" + /evalue="3.80E-07" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: DAO. Score: 29.3. E-value: 3.8e-07. + Domain range: 132..205." + /score="29.3" + /translation="KMAGSTSHAYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDG + ERVTGARYTDPDGTEREVSARFVIDASGNKSRL" + PFAM_domain 38928..39839 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF04820" + /description="Tryptophan halogenase" + /detection="hmmscan" + /domain="Trp_halogenase" + /evalue="1.90E-74" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: Trp_halogenase. Score: 250.9. + E-value: 1.9e-74. Domain range: 141..416." + /score="250.9" + /translation="AGSTSHAYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDGER + VTGARYTDPDGTEREVSARFVIDASGNKSRLYTKVGGSRNYSEFFRSLALFGYFEGGKR + LPEPVSGNILSVAFDSGWFWYIPLSDTLTSVGAVVRREDAEKIQGDREKALNTLIAECP + LISEYLADATRVTTGRYGELRVRKDYSYQQETYWRPGMILVGDAACFVDPVFSSGVHLA + TYSALLAARSINSVLAGDLDEKTALNEFELRYRREYGVFYEFLVSFYQMNVNEESYFWQ + AKKVTQNQSTDVESFVELIGGVS" + PFAM_domain 38937..39188 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhaA_0008" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00890" + /description="FAD binding domain" + /detection="hmmscan" + /domain="FAD_binding_2" + /evalue="6.90E-04" + /label="bhaA" + /locus_tag="bhaA" + /note="Pfam-A.hmm-Hit: FAD_binding_2. Score: 18.5. E-value: + 0.00069. Domain range: 132..217." + /score="18.5" + /translation="TSHAYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTG + ARYTDPDGTEREVSARFVIDASGNKSRLYTKVGGSRNYS" + gene 40192..41382 + /gene="bgtfA" + CDS 40192..41382 + /codon_start=1 + /db_xref="GI:3688118" + /db_xref="GOA:O87677" + /db_xref="HSSP:1PN3" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87677" + /function="involved in glycosylation of balhimycin" + /gene="bgtfA" + /note="smCOG: SMCOG1102:glycosyltransferase (Score: 383.8; + E-value: 1.8e-116);" + /note="smCOG tree PNG image: smcogs/bgtfA.png" + /product="glycosyltransferase" + /protein_id="CAA76551.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 2.5e-28, bitscore: 90.1, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MRVLISGCGSRGDTEPLIALAVRLRELGVDVRMCLPPDYVERCAE + VGVSMVAVGPAMRAGARGPGEPPPGAPEIVSEVVADWFDKVPAAAEGCDVVVATGLLPA + AVVVRSVAEKLGIPYLYTVLSPDHLPSVLSQAERDEYDQGADRLFGAVVTSGRAAIGLP + PVANLFTYGYTEQPWLGADQILAPPPPGDLDTVQTGAWILPDERPLPAELETFLAAGSP + PVYVGFGSSSGPRTAGAAKAAIEAIRARGHRVVLSRGWADLAAPDDSADCFTVGEVNLQ + VLFRRVAAAVHHDSAGTTLLAIRAGTPQIVVRRVIDNVVEQAYHADRVAELGVGVALEG + PIPASEAMSDALETALAPETRARAAEVAGTVRTDGTTVAAELLFAAVSREKPAVPA" + PFAM_domain 40198..40590 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03033" + /description="Glycosyltransferase family 28 N-terminal + domain" + /detection="hmmscan" + /domain="Glyco_transf_28" + /evalue="1.00E-25" + /label="bgtfA" + /locus_tag="bgtfA" + /note="Pfam-A.hmm-Hit: Glyco_transf_28. Score: 90.1. + E-value: 1e-25. Domain range: 0..138." + /score="90.1" + /translation="VLISGCGSRGDTEPLIALAVRLRELGVDVRMCLPPDYVERCAEVG + VSMVAVGPAMRAGARGPGEPPPGAPEIVSEVVADWFDKVPAAAEGCDVVVATGLLPAAV + VVRSVAEKLGIPYLYTVLSPDHLPSVL" + PFAM_domain 40246..40581 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfA_0002" + /database="Pfam-A.hmm" + /description="Glycosyl transferase 4-like domain" + /detection="hmmscan" + /domain="Glyco_trans_4_4" + /evalue="1.40E-05" + /label="bgtfA" + /locus_tag="bgtfA" + /note="Pfam-A.hmm-Hit: Glyco_trans_4_4. Score: 25.3. + E-value: 1.4e-05. Domain range: 8..110." + /score="25.3" + /translation="ALAVRLRELGVDVRMCLPPDYVERCAEVGVSMVAVGPAMRAGARG + PGEPPPGAPEIVSEVVADWFDKVPAAAEGCDVVVATGLLPAAVVVRSVAEKLGIPYLYT + VLSPDHLP" + gene 41447..42676 + /gene="bgtfB" + CDS 41447..42676 + /codon_start=1 + /db_xref="GI:3688119" + /db_xref="GOA:O87678" + /db_xref="HSSP:1IIR" + /db_xref="InterPro:IPR002213" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87678" + /function="involved in glycosylation of balhimycin" + /gene="bgtfB" + /note="smCOG: SMCOG1102:glycosyltransferase (Score: 406.8; + E-value: 1.9e-123);" + /note="smCOG tree PNG image: smcogs/bgtfB.png" + /product="glycosyltransferase" + /protein_id="CAA76552.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 2.9e-29, bitscore: 93.1, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MKRVLLSTLGSRGDVEPLVALAVRLRDLGAEPLMCAPPDCADRLE + EVGVPHVPVGPSARAPIHREKPLTPEDMRRLMAEAIAMPFDRIPAAAEGCAAVVTTGLL + AAAIGVRSVAEKLGIPYFYAFHCPSYVPSPYYPPPPPLGEPPAEDVTDIRALWERNNRS + AYQRYGGPLNSHRAAIGLPPVEDIFTFGYTDHPWVAADSVLAPMQPTDLGAVQTGAWIL + PDERPLSPELEAFLDTGTPPVYLGFGSLRAPADAVRVSIDAIRAQGRRVILSRGWADLV + LPDDREDCFATGEVNQQVLFGRVAAVIHHGGAGTTHVAMQAGAPQVLVPQMADQPYYAG + RVAELGIGVAHDGPVPTFDSLSAALVTALAPETRARAEAVARTAGADGAAVAAKLLLDA + VSREKPAVPA" + PFAM_domain 41456..41851 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfB_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03033" + /description="Glycosyltransferase family 28 N-terminal + domain" + /detection="hmmscan" + /domain="Glyco_transf_28" + /evalue="1.20E-26" + /label="bgtfB" + /locus_tag="bgtfB" + /note="Pfam-A.hmm-Hit: Glyco_transf_28. Score: 93.1. + E-value: 1.2e-26. Domain range: 0..138." + /score="93.1" + /translation="VLLSTLGSRGDVEPLVALAVRLRDLGAEPLMCAPPDCADRLEEVG + VPHVPVGPSARAPIHREKPLTPEDMRRLMAEAIAMPFDRIPAAAEGCAAVVTTGLLAAA + IGVRSVAEKLGIPYFYAFHCPSYVPSPY" + PFAM_domain 41504..41851 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfB_0003" + /database="Pfam-A.hmm" + /description="Glycosyl transferase 4-like domain" + /detection="hmmscan" + /domain="Glyco_trans_4_4" + /evalue="4.70E-03" + /label="bgtfB" + /locus_tag="bgtfB" + /note="Pfam-A.hmm-Hit: Glyco_trans_4_4. Score: 17.0. + E-value: 0.0047. Domain range: 8..105." + /score="17.0" + /translation="ALAVRLRDLGAEPLMCAPPDCADRLEEVGVPHVPVGPSARAPIHR + EKPLTPEDMRRLMAEAIAMPFDRIPAAAEGCAAVVTTGLLAAAIGVRSVAEKLGIPYFY + AFHCPSYVPSPY" + PFAM_domain 42362..42586 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfB_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF04101" + /description="Glycosyltransferase family 28 C-terminal + domain" + /detection="hmmscan" + /domain="Glyco_tran_28_C" + /evalue="5.00E-03" + /label="bgtfB" + /locus_tag="bgtfB" + /note="Pfam-A.hmm-Hit: Glyco_tran_28_C. Score: 16.5. + E-value: 0.005. Domain range: 74..153." + /score="16.5" + /translation="AVIHHGGAGTTHVAMQAGAPQVLVPQMADQPYYAGRVAELGIGVA + HDGPVPTFDSLSAALVTALAPETRARAEAV" + gene 42824..44053 + /gene="bgtfC" + CDS 42824..44053 + /codon_start=1 + /db_xref="GI:3688120" + /db_xref="GOA:O87679" + /db_xref="HSSP:1IIR" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87679" + /function="involved in the glycosylation of balhimycin" + /gene="bgtfC" + /note="smCOG: SMCOG1102:glycosyltransferase (Score: 394.0; + E-value: 1.5e-119);" + /note="smCOG tree PNG image: smcogs/bgtfC.png" + /product="glycosyltransferase" + /protein_id="CAA76553.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 3.6e-31, bitscore: 99.3, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MRVLLSTAGSRGDVEPLLALAVRLQGLGAEVLMCASPASAERLAE + VGVPHVPVGLQLDGMLLQEGMPPPSAEDERRLAAMAIDMQFDAVPAAAEGCAAVVATGE + LAAAAAVRSVAEKLGIPYFYGAYSPNYLASPHYPPPDDERTTPGVTDNGVLWAERAERF + AKRYGETLNSRRAAIGLPPVADVFGYGYTEQPWLAADPVLAPLDPDLDAVQTGAWILRD + DRPLSPELAAFLAAGSPPVYVGFGSASGPGIEDAAKVAIEAIRALGRRAILSRGWADLV + LPDDREDCFAVDEANLQVLFEQSAAVVHHGSAGTEHLATRAGVPQIAIPRHTDQAYYAG + RVAELGVGVALEGPVPSFAAMSAELATALAPETRARAAEVAGTVRTDGTTMAAELLFQA + AEQGKLTVPA" + PFAM_domain 42830..43228 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bgtfC_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03033" + /description="Glycosyltransferase family 28 N-terminal + domain" + /detection="hmmscan" + /domain="Glyco_transf_28" + /evalue="1.50E-28" + /label="bgtfC" + /locus_tag="bgtfC" + /note="Pfam-A.hmm-Hit: Glyco_transf_28. Score: 99.3. + E-value: 1.5e-28. Domain range: 0..137." + /score="99.3" + /translation="VLLSTAGSRGDVEPLLALAVRLQGLGAEVLMCASPASAERLAEVG + VPHVPVGLQLDGMLLQEGMPPPSAEDERRLAAMAIDMQFDAVPAAAEGCAAVVATGELA + AAAAVRSVAEKLGIPYFYGAYSPNYLASP" + gene 44087..45313 + /gene="dvaC" + CDS 44087..45313 + /codon_start=1 + /db_xref="GI:15131496" + /db_xref="GOA:Q939Y7" + /db_xref="InterPro:IPR013217" + /db_xref="InterPro:IPR013630" + /db_xref="InterPro:IPR013691" + /db_xref="UniProtKB/TrEMBL:Q939Y7" + /function="probably involved in the C-3 methylation of + dehydrovancosamine" + /gene="dvaC" + /note="smCOG: SMCOG1089:methyltransferase (Score: 43.8; + E-value: 3.6e-13);" + /note="smCOG tree PNG image: smcogs/dvaC.png" + /product="putative C-3 methyl transferase" + /protein_id="CAC48364.1" + /transl_table=11 + /translation="MSTTSQCRICDGTVHEFIDFGRQPLSDAFVAPGAEKGEFFFRLAT + GICDSCTMVQLMEEVPRDLMFHEAYPYLSSGSAVMRTHFHELAKHLLATELTGEDPFIV + ELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKDFFEEATAADIRENDGP + ADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFEDPYLGDIVERTSFDQIYDEHFFL + FTARSVQEMARRNGLELVDVERIPVHGGEVRYTLALAGARKPSEAVAELLAWEAERKLA + EYATLERFATDVKKIKEDLIALLTKLRAEGKRVVGYGATAKSATVTNFCGITPDLVEFI + SDTTPAKQGKLSPGQHIPVREYGEFAGNHPDYALLFAWNHADEIMNVEQAFRDAGGQWI + LYVPNVHVS" + PFAM_domain 44105..44287 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08421" + /description="Putative zinc binding domain" + /detection="hmmscan" + /domain="Methyltransf_13" + /evalue="1.20E-20" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_13. Score: 72.7. + E-value: 1.2e-20. Domain range: 0..62." + /score="72.7" + /translation="CRICDGTVHEFIDFGRQPLSDAFVAPGAEKGEFFFRLATGICDSC + TMVQLMEEVPRDLMFH" + PFAM_domain 44375..44809 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0002" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_23" + /evalue="2.70E-21" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_23. Score: 75.9. + E-value: 2.7e-21. Domain range: 19..160." + /score="75.9" + /translation="TGEDPFIVELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKG + IRVRKDFFEEATAADIRENDGPADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFED + PYLGDIVERTSFDQIYDEHFFLFTARSVQEMARRNGLELVD" + PFAM_domain 44393..44686 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0006" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_18" + /evalue="3.90E-07" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_18. Score: 30.6. + E-value: 3.9e-07. Domain range: 4..111." + /score="30.6" + /translation="IVELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKD + FFEEATAADIRENDGPADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFED" + PFAM_domain 44393..44746 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0007" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_31" + /evalue="4.50E-07" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_31. Score: 29.5. + E-value: 4.5e-07. Domain range: 6..126." + /score="29.5" + /translation="IVELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKD + FFEEATAADIRENDGPADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFEDPYLGDI + VERTSFDQIYDEHF" + PFAM_domain 44396..44674 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08242" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_12" + /evalue="2.70E-11" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_12. Score: 43.8. + E-value: 2.7e-11. Domain range: 0..99." + /score="43.8" + /translation="VELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKDF + FEEATAADIRENDGPADVIYAANTLCHIPYMDSILKGVTKLLGPNGVF" + PFAM_domain 44399..44680 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08241" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_11" + /evalue="5.90E-10" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_11. Score: 39.5. + E-value: 5.9e-10. Domain range: 1..95." + /score="39.5" + /translation="ELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKDFF + EEATAADIRENDGPADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVF" + PFAM_domain 44819..45295 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaC_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08484" + /description="C-methyltransferase C-terminal domain" + /detection="hmmscan" + /domain="Methyltransf_14" + /evalue="2.00E-56" + /label="dvaC" + /locus_tag="dvaC" + /note="Pfam-A.hmm-Hit: Methyltransf_14. Score: 189.9. + E-value: 2e-56. Domain range: 0..160." + /score="189.9" + /translation="IPVHGGEVRYTLALAGARKPSEAVAELLAWEAERKLAEYATLERF + ATDVKKIKEDLIALLTKLRAEGKRVVGYGATAKSATVTNFCGITPDLVEFISDTTPAKQ + GKLSPGQHIPVREYGEFAGNHPDYALLFAWNHADEIMNVEQAFRDAGGQWILYVP" + CDS 45409..46233 + /codon_start=1 + /db_xref="GI:15131497" + /db_xref="InterPro:IPR003737" + /db_xref="UniProtKB/TrEMBL:Q939Y6" + /note="orf2" + /product="hypothetical protein" + /protein_id="CAC48365.1" + /sec_met="Type: none" + /sec_met="Domains detected: PIG-L (E-value: 2.7e-23, + bitscore: 74.4, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MPQDLDADRILAISPHLDDAVLSFGAGLARAAQAGAKVTVHTVFA + GTAAPPYSPAAERLHAIWELSPDQDASLRRRDEDIAALDHLGVDYRHGRFLDAIYRKLP + DGRWLADNVPGRQKLAIGRQSPQGDPELFSAVRADIESIVEEYAPALILTCAAGNGHVD + NEIARDAALFVAYEKGIRVRLWEDLPHAMFAEGAAELPDGFRLGPPDFGSVEPEARARK + FEALRLYSSQMLMLHGPEKDFFAQLDGHARKSAPGGGYGETTWPVVSREDNG" + PFAM_domain 45439..45921 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_CAC48365.1_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02585" + /description="GlcNAc-PI de-N-acetylase" + /detection="hmmscan" + /domain="PIG-L" + /evalue="1.10E-20" + /label="CAC48365.1" + /locus_tag="CAC48365.1" + /note="Pfam-A.hmm-Hit: PIG-L. Score: 74.4. E-value: + 1.1e-20. Domain range: 0..124." + /score="74.4" + /translation="LAISPHLDDAVLSFGAGLARAAQAGAKVTVHTVFAGTAAPPYSPA + AERLHAIWELSPDQDASLRRRDEDIAALDHLGVDYRHGRFLDAIYRKLPDGRWLADNVP + GRQKLAIGRQSPQGDPELFSAVRADIESIVEEYAPALILTCAAGNGHVDNEIARDAA" + gene 46265..47119 + /gene="bmt" + CDS 46265..47119 + /codon_start=1 + /db_xref="GI:15131498" + /db_xref="GOA:Q939Y5" + /db_xref="UniProtKB/TrEMBL:Q939Y5" + /function="probably involved in the methylation of the + D-Leu residue of the heptapeptide" + /gene="bmt" + /note="smCOG: SMCOG1248:methyltransferase (Score: 113.8; + E-value: 1.5e-34);" + /note="smCOG tree PNG image: smcogs/bmt.png" + /product="putative N-methyl transferase" + /protein_id="CAC48366.1" + /transl_table=11 + /translation="MSGQLERGPVRTTHADVLLASVGERGVLCDFYDEEGSNTYRDLIQ + DADGTPEAREFATRVGPVPGPVLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMR + LAEAPADLRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEP + GGKFLLSLALSEVAESQPPERRQELPGQSGRLYVLHVSVQPAEETQDITIYPADETADP + FVVCTHRRRLVPADRIVRELLRAGFDVIARTPFASGASGRAGHEDMLLVEAVKQEGAIP + AAR" + PFAM_domain 46427..46837 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0005" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_23" + /evalue="5.40E-04" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: Methyltransf_23. Score: 19.7. + E-value: 0.00054. Domain range: 10..121." + /score="19.7" + /translation="EFATRVGPVPGPVLELAAGTGRLTFPFLELGWEVTALELSAPVVD + GFRMRLAEAPADLRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVR + EHLEPGGKFLLSLALSEVAESQPPERRQELPGQ" + PFAM_domain 46457..46774 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0001" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_18" + /evalue="6.10E-11" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: Methyltransf_18. Score: 42.8. + E-value: 6.1e-11. Domain range: 1..110." + /score="42.8" + /translation="GPVLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMRLAEAP + ADLRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEPGGKFL + LS" + PFAM_domain 46460..46651 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0006" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00398" + /description="Ribosomal RNA adenine dimethylase" + /detection="hmmscan" + /domain="RrnaAD" + /evalue="5.70E-04" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: RrnaAD. Score: 19.0. E-value: + 0.00057. Domain range: 32..91." + /score="19.0" + /translation="PVLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMRLAEAPA + DLRDRCTVVQADMSAFSVD" + PFAM_domain 46463..46681 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0003" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_26" + /evalue="8.90E-05" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: Methyltransf_26. Score: 22.5. + E-value: 8.9e-05. Domain range: 3..78." + /score="22.5" + /translation="VLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMRLAEAPAD + LRDRCTVVQADMSAFSVDRRFGAAVISS" + PFAM_domain 46463..46759 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0004" + /database="Pfam-A.hmm" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_25" + /evalue="1.50E-04" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: Methyltransf_25. Score: 22.0. + E-value: 0.00015. Domain range: 0..101." + /score="22.0" + /translation="VLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMRLAEAPAD + LRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEPGG" + PFAM_domain 46466..46765 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bmt_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08242" + /description="Methyltransferase domain" + /detection="hmmscan" + /domain="Methyltransf_12" + /evalue="3.70E-06" + /label="bmt" + /locus_tag="bmt" + /note="Pfam-A.hmm-Hit: Methyltransf_12. Score: 27.3. + E-value: 3.7e-06. Domain range: 0..99." + /score="27.3" + /translation="LELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMRLAEAPADL + RDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEPGGKF" + gene complement(47100..48404) + /gene="pgat" + CDS complement(47100..48404) + /aSProdPred="" + /codon_start=1 + /db_xref="GI:15131499" + /db_xref="GOA:Q939Y4" + /db_xref="InterPro:IPR004839" + /db_xref="InterPro:IPR015421" + /db_xref="InterPro:IPR015422" + /db_xref="InterPro:IPR015424" + /db_xref="UniProtKB/TrEMBL:Q939Y4" + /function="transamination of 4-hydroxy- and + 3,5-dihydroxyphenylglycine" + /gene="pgat" + /note="smCOG: SMCOG1019:aminotransferase (Score: 297.3; + E-value: 2.8e-90);" + /product="phenylglycine amino transferase" + /protein_id="CAC48367.1" + /sec_met="NRPS/PKS subtype: PKS/NRPS-like protein" + /sec_met="NRPS/PKS Domain: Aminotran_1_2 (83-425). E-value: + 1.1e-32. Score: 105.1;" + /transl_table=11 + /translation="MEILVFMDSFGLSTPLSVETLHGSLTDPAISSMNLLNELIDEYPV + AISMAAGRPYEEFFDIRLIHEYIDAYCDHLRRDRKLDEAGVTRTLFQYGTTKGVIADLI + ARNLAEDENIDAAPESVVVTVGAQEAMFLVLRTLRATEHDVLLAPAPTYVGLTGAALLT + DTPVWPVQSTENGIDPDDLVLQLKRADEQGKRVRACYVTPNFANPTGTSMDLAARHRLL + EVAEANGILLLEDNAYGLFGAERLPTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQ + RMAGGGLLADQLSKLKGMLTVNTSPIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRL + VLGELERRLGGRAGVRWNTPTGGFFVTVTVPFTVDDDLLALAARDHGVLFTPMHHFYGG + KGGFNQLRLSISLLTPELIEEGVARLAALITARLG" + aSDomain complement(47130..48155) + /asDomain_id="nrpspksdomains_pgat_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="Aminotran_1_2" + /evalue="1.10E-32" + /locus_tag="pgat" + /score="105.1" + /translation="AGVTRTLFQYGTTKGVIADLIARNLAEDENIDAAPESVVVTVGAQ + EAMFLVLRTLRATEHDVLLAPAPTYVGLTGAALLTDTPVWPVQSTENGIDPDDLVLQLK + RADEQGKRVRACYVTPNFANPTGTSMDLAARHRLLEVAEANGILLLEDNAYGLFGAERL + PTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQRMAGGGLLADQLSKLKGMLTVNTS + PIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRLVLGELERRLGGRAGVRWNTPTGGF + FVTVTVPFTVDDDLLALAARDHGVLFTPMHHFYGGKGGFNQLRLSISLLTPELIEEGVA + RL" + PFAM_domain complement(47130..48155) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pgat_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00155" + /description="Aminotransferase class I and II" + /detection="hmmscan" + /domain="Aminotran_1_2" + /evalue="3.70E-30" + /label="pgat" + /locus_tag="pgat" + /note="Pfam-A.hmm-Hit: Aminotran_1_2. Score: 105.1. + E-value: 3.7e-30. Domain range: 30..363." + /score="105.1" + /translation="AGVTRTLFQYGTTKGVIADLIARNLAEDENIDAAPESVVVTVGAQ + EAMFLVLRTLRATEHDVLLAPAPTYVGLTGAALLTDTPVWPVQSTENGIDPDDLVLQLK + RADEQGKRVRACYVTPNFANPTGTSMDLAARHRLLEVAEANGILLLEDNAYGLFGAERL + PTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQRMAGGGLLADQLSKLKGMLTVNTS + PIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRLVLGELERRLGGRAGVRWNTPTGGF + FVTVTVPFTVDDDLLALAARDHGVLFTPMHHFYGGKGGFNQLRLSISLLTPELIEEGVA + RL" + PFAM_domain complement(47214..47849) + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pgat_0002" + /database="Pfam-A.hmm" + /description="Alanine-glyoxylate amino-transferase" + /detection="hmmscan" + /domain="Aminotran_MocR" + /evalue="1.30E-06" + /label="pgat" + /locus_tag="pgat" + /note="Pfam-A.hmm-Hit: Aminotran_MocR. Score: 27.2. + E-value: 1.3e-06. Domain range: 162..379." + /score="27.2" + /translation="LKRADEQGKRVRACYVTPNFANPTGTSMDLAARHRLLEVAEANGI + LLLEDNAYGLFGAERLPTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQRMAGGGLL + ADQLSKLKGMLTVNTSPIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRLVLGELERR + LGGRAGVRWNTPTGGFFVTVTVPFTVDDDLLALAARDHGVLFTPMHHFY" + gene 48610..49464 + /gene="bhp" + CDS 48610..49464 + /codon_start=1 + /db_xref="GI:15131500" + /db_xref="GOA:Q939Y3" + /db_xref="HSSP:1A88" + /db_xref="InterPro:IPR000073" + /db_xref="InterPro:IPR000639" + /db_xref="UniProtKB/TrEMBL:Q939Y3" + /function="involved in the beta-hydroxytyrosine + biosynthesis" + /gene="bhp" + /note="smCOG: SMCOG1036:alpha/beta_hydrolase_fold_protein + (Score: 170.8; E-value: 8.4e-52);" + /note="smCOG tree PNG image: smcogs/bhp.png" + /product="putative hydrolase" + /protein_id="CAC48368.1" + /sec_met="Type: none" + /sec_met="Domains detected: PF12697 (E-value: 2.5e-23, + bitscore: 75.1, seeds: 465); PF00561 (E-value: 5.1e-19, + bitscore: 60.1, seeds: 48)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MLMTTEHGIRLSYHDQGRGAPVLLLTGTGAPSSVWDLHQVPALRA + AGFRVITMDNRGIPPSDDGADGFTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQEL + ALARPELVDAVVLMAACGRSSLVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPAT + LADDDLAADWLDLFAASENWGPGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAP + PSAGQELAAVIPGATHRTIPGCGHFGYLEKPEAVNRELLRFLRTESGVAVTSGASPRTP + EEL" + PFAM_domain 48670..49350 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0003" + /database="Pfam-A.hmm" + /description="Alpha/beta hydrolase family" + /detection="hmmscan" + /domain="Abhydrolase_5" + /evalue="7.80E-16" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Abhydrolase_5. Score: 58.1. E-value: + 7.8e-16. Domain range: 0..145." + /score="58.1" + /translation="PVLLLTGTGAPSSVWDLHQVPALRAAGFRVITMDNRGIPPSDDGA + DGFTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQELALARPELVDAVVLMAACGRS + SLVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPATLADDDLAADWLDLFAASENW + GPGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAPPSAGQELAAVIPGATHRTIP + GCGHF" + PFAM_domain 48673..49383 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0001" + /database="Pfam-A.hmm" + /description="Alpha/beta hydrolase family" + /detection="hmmscan" + /domain="Abhydrolase_6" + /evalue="1.90E-39" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Abhydrolase_6. Score: 135.9. + E-value: 1.9e-39. Domain range: 0..227." + /score="135.9" + /translation="VLLLTGTGAPSSVWDLHQVPALRAAGFRVITMDNRGIPPSDDGAD + GFTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQELALARPELVDAVVLMAACGRSS + LVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPATLADDDLAADWLDLFAASENWG + PGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAPPSAGQELAAVIPGATHRTIPG + CGHFGYLEKPEAVNR" + PFAM_domain 48751..49389 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00561" + /description="alpha/beta hydrolase fold" + /detection="hmmscan" + /domain="Abhydrolase_1" + /evalue="3.30E-24" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Abhydrolase_1. Score: 85.7. E-value: + 3.3e-24. Domain range: 0..228." + /score="85.7" + /translation="FRVITMDNRGIPPSDDGADGFTVDDLVADVAALLDHLDASPCRVV + GTSMGSYIAQELALARPELVDAVVLMAACGRSSLVQRVLAEAEADLIGRGTELPPGYRA + AVRAMHNLGPATLADDDLAADWLDLFAASENWGPGVRAQLLLSALPDRREAYRAIKVPC + HVVSFEHDLVAPPSAGQELAAVIPGATHRTIPGCGHFGYLEKPEAVNREL" + PFAM_domain 48811..48984 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03096" + /description="Ndr family" + /detection="hmmscan" + /domain="Ndr" + /evalue="5.10E-05" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Ndr. Score: 21.9. E-value: 5.1e-05. + Domain range: 79..138." + /score="21.9" + /translation="FTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQELALARPELV + DAVVLMAACGRSS" + PFAM_domain 48814..49389 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00975" + /description="Thioesterase domain" + /detection="hmmscan" + /domain="Thioesterase" + /evalue="1.40E-04" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Thioesterase. Score: 22.1. E-value: + 0.00014. Domain range: 46..225." + /score="22.1" + /translation="TVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQELALARPELVD + AVVLMAACGRSSLVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPATLADDDLAAD + WLDLFAASENWGPGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAPPSAGQELAA + VIPGATHRTIPGCGHFGYLEKPEAVNREL" + PFAM_domain 49228..49407 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bhp_0006" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08386" + /description="TAP-like protein" + /detection="hmmscan" + /domain="Abhydrolase_4" + /evalue="2.40E-03" + /label="bhp" + /locus_tag="bhp" + /note="Pfam-A.hmm-Hit: Abhydrolase_4. Score: 17.7. E-value: + 0.0024. Domain range: 33..93." + /score="17.7" + /translation="KVPCHVVSFEHDLVAPPSAGQELAAVIPGATHRTIPGCGHFGYLE + KPEAVNRELLRFLRT" + gene 49530..51275 + /gene="bpsD" + CDS 49530..51275 + /aSProdPred="bht|tyr" + /codon_start=1 + /db_xref="GI:15131501" + /db_xref="GOA:Q939Y2" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Y2" + /function="involved in the beta-hydroxytyrosine + biosynthesis" + /gene="bpsD" + /note="smCOG: SMCOG1002:AMP-dependent_synthetase_and_ligase + (Score: 385.6; E-value: 4.9e-117);" + /product="peptide synthetase" + /protein_id="CAC48369.1" + /sec_met="Type: other" + /sec_met="Domains detected: AMP-binding (E-value: 6.5e-114, + bitscore: 371.9, seeds: 400); PP-binding (E-value: 1.4e-15, + bitscore: 48.9, seeds: 164)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: NRPS-like protein" + /sec_met="NRPS/PKS Domain: AMP-binding (35-429). E-value: + 8.1e-114. Score: 371.9; NRPS/PKS Domain: bpsD_A1; Substrate + specificity predictions: bht|tyr (Stachelhaus code), N/A + (NRPSPredictor3 SVM), phe (pHMM), Q939Y2_A1-8--bht|tyr + (PrediCAT), bht|tyr (SANDPUMA ensemble); PID to NN: 100.00; + SNN score: 0.999999980762 ." + /sec_met="NRPS/PKS Domain: PCP (510-578). E-value: 2.7e-28. + Score: 89.6;" + /transl_table=11 + /translation="MTGAIVPPSTAPALFEAAAAAVPDRPAVAMGTTTLTYAELNTQAN + RLARRLVAHGVGPERLVALAMPRSIEFAVAMLAVHKAGGAYVPIDPDYPAERRQHMLAG + AAAQCLLCLPGQDVAGAPVVLSVALAEPGRPEPDLDDSDRLAPLLPSHPAYVIFTSGST + GQPKGVVVTHRGIPNLAADYVHRQNLLPDSRLLAFASPSFDAAVAEFWPIWLAGACLVL + APAPDLIPGEPLARLVRDRHITHVTLPPSALAPLEEAGGLPPGLTLLVAGEAGPAPVAK + RWAAGRVMINAYGPTEATVAVTASDPLTGEDTPPIGRPITGVHTYVLDDRLVPVPDGTV + GELYMTGPGLARGYLHRPAATAERFLPDPFGGPGQRMYRTGDRVRARPDGQLVFVGRAD + DQLKVRGHRIEPAEVESALLAVDGVAQAVVTEHDNRLVAYVVGAGGARVPAEDLLPPLR + KQLPAYLVPDVVVGLPHLPTTPNGKVDRAALPAPEAEDTGRAISGRAPSTPTEIHLAAL + FAEVLGVSSVGVEDSFFEVGGHSLLATRLVSRIRESLRVRLRVQAFFDAPTVAELAKVL + DAALT" + CDS_motif 49572..49598 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="6.70E+01" + /label="NRPS-A_a2" + /locus_tag="bpsD" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 67.0, bit-score: + -1.8)" + /score="-1.8" + /translation="FEAAAAAVP" + PFAM_domain 49572..50744 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsD_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="1.60E-103" + /label="bpsD" + /locus_tag="bpsD" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 346.3. E-value: + 1.6e-103. Domain range: 0..417." + /score="346.3" + /translation="FEAAAAAVPDRPAVAMGTTTLTYAELNTQANRLARRLVAHGVGPE + RLVALAMPRSIEFAVAMLAVHKAGGAYVPIDPDYPAERRQHMLAGAAAQCLLCLPGQDV + AGAPVVLSVALAEPGRPEPDLDDSDRLAPLLPSHPAYVIFTSGSTGQPKGVVVTHRGIP + NLAADYVHRQNLLPDSRLLAFASPSFDAAVAEFWPIWLAGACLVLAPAPDLIPGEPLAR + LVRDRHITHVTLPPSALAPLEEAGGLPPGLTLLVAGEAGPAPVAKRWAAGRVMINAYGP + TEATVAVTASDPLTGEDTPPIGRPITGVHTYVLDDRLVPVPDGTVGELYMTGPGLARGY + LHRPAATAERFLPDPFGGPGQRMYRTGDRVRARPDGQLVFVGRADDQLKVR" + aSDomain 49635..50816 + /asDomain_id="nrpspksdomains_bpsD_A1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="8.10E-114" + /label="bpsD_A1" + /locus_tag="bpsD" + /score="371.9" + /specificity="Stachelhaus code: bht|tyr" + /specificity="NRPSpredictor3 SVM: N/A" + /specificity="pHMM: phe" + /specificity="PrediCAT Q939Y2_A1-8--bht|tyr" + /specificity="SANDPUMA ensemble: bht|tyr" + /specificity="PID to NN: 100.00" + /specificity="SNN score: 0.999999980762" + /translation="TYAELNTQANRLARRLVAHGVGPERLVALAMPRSIEFAVAMLAVH + KAGGAYVPIDPDYPAERRQHMLAGAAAQCLLCLPGQDVAGAPVVLSVALAEPGRPEPDL + DDSDRLAPLLPSHPAYVIFTSGSTGQPKGVVVTHRGIPNLAADYVHRQNLLPDSRLLAF + ASPSFDAAVAEFWPIWLAGACLVLAPAPDLIPGEPLARLVRDRHITHVTLPPSALAPLE + EAGGLPPGLTLLVAGEAGPAPVAKRWAAGRVMINAYGPTEATVAVTASDPLTGEDTPPI + GRPITGVHTYVLDDRLVPVPDGTVGELYMTGPGLARGYLHRPAATAERFLPDPFGGPGQ + RMYRTGDRVRARPDGQLVFVGRADDQLKVRGHRIEPAEVESALLAVDGVAQAVV" + CDS_motif 49758..49799 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.60E-06" + /label="NRPS-A_a2" + /locus_tag="bpsD" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 7.6e-06, + bit-score: 19.0)" + /score="19.0" + /translation="LAVHKAGGAYVPID" + CDS_motif 49830..49853 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.60E+01" + /label="NRPS-A_a2" + /locus_tag="bpsD" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 76.0, bit-score: + -2.0)" + /score="-2.0" + /translation="MLAGAAAQ" + CDS_motif 49989..50048 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.40E-11" + /label="NRPS-A_a3" + /locus_tag="bpsD" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 2.4e-11, + bit-score: 35.3)" + /score="35.3" + /translation="AYVIFTSGSTGQPKGVVVTH" + CDS_motif 50166..50192 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0005" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E+01" + /label="NRPS-A_a2" + /locus_tag="bpsD" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 16.0, bit-score: + 0.0)" + /score="0.0" + /translation="IWLAGACLV" + CDS_motif 50535..50624 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0006" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.90E-17" + /label="NRPS-A_a6" + /locus_tag="bpsD" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 3.9e-17, + bit-score: 54.3)" + /score="54.3" + /translation="PDGTVGELYMTGPGLARGYLHRPAATAERF" + CDS_motif 50709..50774 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0007" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E-11" + /label="NRPS-A_a8" + /locus_tag="bpsD" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 1.1e-11, + bit-score: 36.7)" + /score="36.7" + /translation="FVGRADDQLKVRGHRIEPAEVE" + PFAM_domain 50766..50978 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsD_0003" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="5.70E-12" + /label="bpsD" + /locus_tag="bpsD" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 46.4. E-value: + 5.7e-12. Domain range: 0..73." + /score="46.4" + /translation="EVESALLAVDGVAQAVVTEHDNRLVAYVVGAGGARVPAEDLLPPL + RKQLPAYLVPDVVVGLPHLPTTPNGK" + CDS_motif 50850..50876 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_bpsD_0008" + /database="abmotifs" + /detection="hmmscan" + /evalue="5.90E+01" + /label="NRPS-A_a2" + /locus_tag="bpsD" + /motif="NRPS-A_a2" + /note="NRPS/PKS Motif: NRPS-A_a2 (e-value: 59.0, bit-score: + -1.7)" + /score="-1.7" + /translation="VGAGGARVP" + aSDomain 51060..51263 + /asDomain_id="nrpspksdomains_bpsD_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PCP" + /evalue="2.70E-28" + /locus_tag="bpsD" + /score="89.6" + /translation="EIHLAALFAEVLGVSSVGVEDSFFEVGGHSLLATRLVSRIRESLR + VRLRVQAFFDAPTVAELAKVLDA" + PFAM_domain 51069..51251 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_bpsD_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00550" + /description="Phosphopantetheine attachment site" + /detection="hmmscan" + /domain="PP-binding" + /evalue="5.50E-13" + /label="bpsD" + /locus_tag="bpsD" + /note="Pfam-A.hmm-Hit: PP-binding. Score: 48.9. E-value: + 5.5e-13. Domain range: 2..65." + /score="48.9" + /translation="LAALFAEVLGVSSVGVEDSFFEVGGHSLLATRLVSRIRESLRVRL + RVQAFFDAPTVAELAK" + gene 51289..52479 + /gene="oxyD" + CDS 51289..52479 + /codon_start=1 + /db_xref="GI:15131502" + /db_xref="GOA:Q939Y1" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="PDB:3MGX" + /db_xref="UniProtKB/TrEMBL:Q939Y1" + /function="probably involved in the biosynthesis of + beta-hydroxytyrosine" + /gene="oxyD" + /note="smCOG: SMCOG1007:cytochrome_P450 (Score: 328.3; + E-value: 1e-99);" + /product="putative P450 monooxygenase" + /protein_id="CAC48370.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.9e-22, + bitscore: 69.8, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MQTTNAVDLGNPDLYTTLERHARWRELAAEDAMVWSDPGSSPSGF + WSVFSHRACAAVLAPSAPLTSEYGMMIGFDRDHPDNSGGRMMVVSEHEQHRKLRKLVGP + LLSRAAARKLAERVRIEVGDVLGRVLDGEVCDAATAIGPRIPAAVVCEILGVPAEDEDM + LIDLTNHAFGGEDELFDGMTPRQAHTEILVYFDELITARRKEPGDDLVSTLVTDDDLTI + DDVLLNCDNVLIGGNETTRHAITGAVHALATVPGLLTALRDGSADVDTVVEEVLRWTSP + AMHVLRVTTADVTINGRDLPSGTPVVAWLPAANRDPAEFDDPDTFLPGRKPNRHITFGH + GMHHCLGSALARIELSVVLRVLAERVSRVDLEREPAWLRAIVVQGYRELPVRFTGR" + PFAM_domain 52093..52368 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_oxyD_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00067" + /description="Cytochrome P450" + /detection="hmmscan" + /domain="p450" + /evalue="1.60E-19" + /label="oxyD" + /locus_tag="oxyD" + /note="Pfam-A.hmm-Hit: p450. Score: 69.8. E-value: 1.6e-19. + Domain range: 324..427." + /score="69.8" + /translation="DTVVEEVLRWTSPAMHVLRVTTADVTINGRDLPSGTPVVAWLPAA + NRDPAEFDDPDTFLPGRKPNRHITFGHGMHHCLGSALARIELSVVLR" + gene 52645..53715 + /gene="hmaS" + CDS 52645..53715 + /codon_start=1 + /db_xref="GI:15131503" + /db_xref="GOA:Q939Y0" + /db_xref="HSSP:1CJX" + /db_xref="InterPro:IPR004360" + /db_xref="InterPro:IPR005956" + /db_xref="UniProtKB/TrEMBL:Q939Y0" + /function="probably involved in the 4-hydroxyphenylglycine + biosynthesis" + /gene="hmaS" + /product="putative hydroxyphenyl pyruvate dioxygenase" + /protein_id="CAC48371.1" + /transl_table=11 + /translation="MTSDSTVQNFEIDYVEMYVENLEAATFTWVDKYAFAVAGTDRSAD + HRSVTLRQGPIKLVLTEPTSDRHPAAAYLQSHGDGVADIALRTPDVTAAFEAAVRGGAA + AVREPVRLAGGPIVTATIGGFGDVVHTLIQSGEATAAAPETTGQGGGDVNLLGLDHFAV + CLNSGDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNSTVVQSASGEVTLTLIEPDSNAD + PGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGVEFLKTPGTYYDMLGERITLET + HTLDDLRSTNVLADEDHGGQLFQIFAASTHPRHTIFFEIIERQGAGTFGSSNIKALYEA + VELERTGQSEFGAARR" + PFAM_domain 52687..52980 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmaS_0002" + /database="Pfam-A.hmm" + /description="Glyoxalase/Bleomycin resistance + protein/Dioxygenase superfamily" + /detection="hmmscan" + /domain="Glyoxalase_4" + /evalue="4.30E-06" + /label="hmaS" + /locus_tag="hmaS" + /note="Pfam-A.hmm-Hit: Glyoxalase_4. Score: 26.6. E-value: + 4.3e-06. Domain range: 2..100." + /score="26.6" + /translation="VEMYVENLEAATFTWVDKYAFAVAGTDRSADHRSVTLRQGPIKLV + LTEPTSDRHPAAAYLQSHGDGVADIALRTPDVTAAFEAAVRGGAAAVREPVRL" + PFAM_domain 52747..53043 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmaS_0005" + /database="Pfam-A.hmm" + /description="Hydroxyphenylpyruvate dioxygenase, HPPD, + N-terminal" + /detection="hmmscan" + /domain="Glyoxalase_5" + /evalue="6.20E-07" + /label="hmaS" + /locus_tag="hmaS" + /note="Pfam-A.hmm-Hit: Glyoxalase_5. Score: 29.4. E-value: + 6.2e-07. Domain range: 26..124." + /score="29.4" + /translation="FAVAGTDRSADHRSVTLRQGPIKLVLTEPTSDRHPAAAYLQSHGD + GVADIALRTPDVTAAFEAAVRGGAAAVREPVRLAGGPIVTATIGGFGDVVHTLI" + PFAM_domain 53113..53559 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmaS_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00903" + /description="Glyoxalase/Bleomycin resistance + protein/Dioxygenase superfamily" + /detection="hmmscan" + /domain="Glyoxalase" + /evalue="2.10E-19" + /label="hmaS" + /locus_tag="hmaS" + /note="Pfam-A.hmm-Hit: Glyoxalase. Score: 69.7. E-value: + 2.1e-19. Domain range: 0..128." + /score="69.7" + /translation="GLDHFAVCLNSGDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNST + VVQSASGEVTLTLIEPDSNADPGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGV + EFLKTPGTYYDMLGERITLETHTLDDLRSTNVLADEDHGGQLFQI" + PFAM_domain 53143..53439 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmaS_0003" + /database="Pfam-A.hmm" + /description="Glyoxalase/Bleomycin resistance + protein/Dioxygenase superfamily" + /detection="hmmscan" + /domain="Glyoxalase_4" + /evalue="2.00E-11" + /label="hmaS" + /locus_tag="hmaS" + /note="Pfam-A.hmm-Hit: Glyoxalase_4. Score: 43.8. E-value: + 2e-11. Domain range: 6..96." + /score="43.8" + /translation="SGDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNSTVVQSASGEVT + LTLIEPDSNADPGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGVEFLKT" + PFAM_domain 53146..53457 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmaS_0004" + /database="Pfam-A.hmm" + /description="Glyoxalase-like domain" + /detection="hmmscan" + /domain="Glyoxalase_2" + /evalue="2.80E-05" + /label="hmaS" + /locus_tag="hmaS" + /note="Pfam-A.hmm-Hit: Glyoxalase_2. Score: 24.6. E-value: + 2.8e-05. Domain range: 3..90." + /score="24.6" + /translation="GDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNSTVVQSASGEVTL + TLIEPDSNADPGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGVEFLKTPGTYYD + " + gene 53712..54788 + /gene="hmo" + CDS 53712..54788 + /codon_start=1 + /db_xref="GI:15131504" + /db_xref="GOA:Q939X9" + /db_xref="HSSP:1GOX" + /db_xref="InterPro:IPR000262" + /db_xref="InterPro:IPR008259" + /db_xref="InterPro:IPR012133" + /db_xref="InterPro:IPR013785" + /db_xref="UniProtKB/TrEMBL:Q939X9" + /function="probably involved in the 4-hydroxyphenylglycine + biosynthesis" + /gene="hmo" + /product="putative phenylglycolate oxidase" + /protein_id="CAC48372.1" + /transl_table=11 + /translation="MTYVSLGDLERAARDVLPGEIWDFLAGGSGAEASLTANRTALDRV + FVVPRMLCDLTGSTTEAELLGRRAALPMAVAPVAYQRLFHPEGELAAARAARDAGVPYT + ICTLSSVPLEEVAAVGGRPWFQLYWLRDEKRSLELVRRAEDAGCEAIVFTVDVPWMGRR + WRDMRNGFALPESVTAANFDAGSAAHRRTRGASAVADHTAREFAPATWESVATVRAHTD + LPVVLKGILAAEDARRAVEAGADGIVVSNHGGRQLDGAVPGIEVLGEIAAEVSGRCEVL + LDGGIRTGGDILKAAALGASGVLVGRPVMWGLAAAGQEGVRQVFELLAAELRNALGLAG + CDSVSAAGRLGTRVPRYG" + PFAM_domain 53748..54752 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmo_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01070" + /description="FMN-dependent dehydrogenase" + /detection="hmmscan" + /domain="FMN_dh" + /evalue="2.40E-122" + /label="hmo" + /locus_tag="hmo" + /note="Pfam-A.hmm-Hit: FMN_dh. Score: 408.2. E-value: + 2.4e-122. Domain range: 0..350." + /score="408.2" + /translation="ARDVLPGEIWDFLAGGSGAEASLTANRTALDRVFVVPRMLCDLTG + STTEAELLGRRAALPMAVAPVAYQRLFHPEGELAAARAARDAGVPYTICTLSSVPLEEV + AAVGGRPWFQLYWLRDEKRSLELVRRAEDAGCEAIVFTVDVPWMGRRWRDMRNGFALPE + SVTAANFDAGSAAHRRTRGASAVADHTAREFAPATWESVATVRAHTDLPVVLKGILAAE + DARRAVEAGADGIVVSNHGGRQLDGAVPGIEVLGEIAAEVSGRCEVLLDGGIRTGGDIL + KAAALGASGVLVGRPVMWGLAAAGQEGVRQVFELLAAELRNALGLAGCDSVSAA" + PFAM_domain 54327..54629 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmo_0003" + /database="Pfam-A.hmm" + /description="Nitronate monooxygenase" + /detection="hmmscan" + /domain="NMO" + /evalue="2.50E-04" + /label="hmo" + /locus_tag="hmo" + /note="Pfam-A.hmm-Hit: NMO. Score: 20.3. E-value: 0.00025. + Domain range: 118..219." + /score="20.3" + /translation="FAPATWESVATVRAHTDLPVVLKGILAAEDARRAVEAGADGIVVS + NHGGRQLDGAVPGIEVLGEIAAEVSGRCEVLLDGGIRTGGDILKAAALGASGVLVG" + PFAM_domain 54342..54632 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmo_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00478" + /description="IMP dehydrogenase / GMP reductase domain" + /detection="hmmscan" + /domain="IMPDH" + /evalue="1.20E-03" + /label="hmo" + /locus_tag="hmo" + /note="Pfam-A.hmm-Hit: IMPDH. Score: 17.8. E-value: 0.0012. + Domain range: 136..241." + /score="17.8" + /translation="WESVATVRAHTDLPVVLKGILAAEDARRAVEAGADGIVVSNHGGR + QLDGAVPGIEVLGEIAAEVSGRCEVLLDGGIRTGGDILKAAALGASGVLVGR" + PFAM_domain 54525..54644 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_hmo_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01645" + /description="Conserved region in glutamate synthase" + /detection="hmmscan" + /domain="Glu_synthase" + /evalue="2.60E-04" + /label="hmo" + /locus_tag="hmo" + /note="Pfam-A.hmm-Hit: Glu_synthase. Score: 20.0. E-value: + 0.00026. Domain range: 268..308." + /score="20.0" + /translation="AEVSGRCEVLLDGGIRTGGDILKAAALGASGVLVGRPVMW" + CDS 54879..56237 + /codon_start=1 + /db_xref="GI:15131505" + /db_xref="GOA:Q939X8" + /db_xref="InterPro:IPR006153" + /db_xref="UniProtKB/TrEMBL:Q939X8" + /note="orf7" + /note="smCOG: SMCOG1117:sodium/hydrogen_exchanger (Score: + 395.2; E-value: 4.9e-120);" + /product="putative antiporter" + /protein_id="CAC48373.1" + /transl_table=11 + /translation="MLHTFAAAVAPVAPIAAHSLLVFLLQIGLLLLLAVVLGRLAGRFG + MPAVVGELFVGVILGPSLLGWAAPGLHSWLFPAVAEQYHLLDAVGQVGVLLLVGLTGVQ + MDMGLARKRGLTAAGVSIGGLVLPLGLGIGAGYLLPKVLVPEGTDVTVFAMFLGVALCV + SAIPVIAKTLIDMKLLHRNIGQLTLTAGMVDDVFGWFMLSVVSAMAVNAVSAGTVLTSL + AYLVAILAFCFTLGRPLARGVLRVAAKSDGPGLTVATVVVLIFLAAAGTQALGLEAVFG + AFLCGILLGTAGKVDPAKLAPLRTVVLSGLAPLFFATAGLRMDLTALTHPVVLLTGLVV + LALAIAGKFAGAFAGARLSGLNKWEGLALGAGLNARGVIQVVVAMVGLRLGILSVEVYT + IIILVAIVTSLMASPILRFAMSRVEQTAEEQVRENEHRAWNTHPAANPQEQSL" + PFAM_domain 54963..56132 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_CAC48373.1_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00999" + /description="Sodium/hydrogen exchanger family" + /detection="hmmscan" + /domain="Na_H_Exchanger" + /evalue="1.40E-52" + /label="CAC48373.1" + /locus_tag="CAC48373.1" + /note="Pfam-A.hmm-Hit: Na_H_Exchanger. Score: 178.5. + E-value: 1.4e-52. Domain range: 2..376." + /score="178.5" + /translation="LLLLLAVVLGRLAGRFGMPAVVGELFVGVILGPSLLGWAAPGLHS + WLFPAVAEQYHLLDAVGQVGVLLLVGLTGVQMDMGLARKRGLTAAGVSIGGLVLPLGLG + IGAGYLLPKVLVPEGTDVTVFAMFLGVALCVSAIPVIAKTLIDMKLLHRNIGQLTLTAG + MVDDVFGWFMLSVVSAMAVNAVSAGTVLTSLAYLVAILAFCFTLGRPLARGVLRVAAKS + DGPGLTVATVVVLIFLAAAGTQALGLEAVFGAFLCGILLGTAGKVDPAKLAPLRTVVLS + GLAPLFFATAGLRMDLTALTHPVVLLTGLVVLALAIAGKFAGAFAGARLSGLNKWEGLA + LGAGLNARGVIQVVVAMVGLRLGILSVEVYTIIILVAIVTSLMASPILRF" + gene 56501..57913 + /gene="dvaA" + CDS 56501..57913 + /codon_start=1 + /db_xref="GI:15131506" + /db_xref="InterPro:IPR005212" + /db_xref="UniProtKB/TrEMBL:Q939X7" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaA" + /product="putative NDP-hexose 2,3-dehydratase" + /protein_id="CAC48374.1" + /transl_table=11 + /translation="MLPDLVPPVVVRPRDGRDHADRIALSAATTDGVHMRTEDVRAWIA + ERREANDFHVERVPFRDLDQWSFEEVTGNLVHHSGRFFTIEGLHVIEHDGPNGDGPYRE + WQQPVIKQPEVGILGILGKEFGGVLHFLMQAKMEPGNPNLVQLSPTVQATRSNYTKAHG + GTNVKLIEYFAPPDPEHVIVDVLQAEQGSWFFRKSNRNMIVETVDDVPLWDDFCWLTLG + QIAELMHEDETINMNARSVLSCLPYHDAAPGARFSDVQLLSWFTNERSRHDVRARRIPL + ADVCGWKQGDEAIEHEDGRYFRVLAVAVRGSNRERISWTQPLLESVDLGVVAFLVREIG + GVPHVLVHARADGGFLDTVELAPTVQCTPQNYAHLPAENRPPFLDVVLNAPESRIRYEA + IHSEEGGRFLNVRARYLAIEADDTVEPPPGYTWVTPAQLTALTRHGHYVNVEARTLLAC + LNAATAQPRGGA" + PFAM_domain 56606..57235 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03559" + /description="NDP-hexose 2,3-dehydratase" + /detection="hmmscan" + /domain="Hexose_dehydrat" + /evalue="4.80E-82" + /label="dvaA" + /locus_tag="dvaA" + /note="Pfam-A.hmm-Hit: Hexose_dehydrat. Score: 273.9. + E-value: 4.8e-82. Domain range: 1..206." + /score="273.9" + /translation="RTEDVRAWIAERREANDFHVERVPFRDLDQWSFEEVTGNLVHHSG + RFFTIEGLHVIEHDGPNGDGPYREWQQPVIKQPEVGILGILGKEFGGVLHFLMQAKMEP + GNPNLVQLSPTVQATRSNYTKAHGGTNVKLIEYFAPPDPEHVIVDVLQAEQGSWFFRKS + NRNMIVETVDDVPLWDDFCWLTLGQIAELMHEDETINMNARSVLSCL" + PFAM_domain 57272..57877 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaA_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF03559" + /description="NDP-hexose 2,3-dehydratase" + /detection="hmmscan" + /domain="Hexose_dehydrat" + /evalue="1.50E-83" + /label="dvaA" + /locus_tag="dvaA" + /note="Pfam-A.hmm-Hit: Hexose_dehydrat. Score: 278.9. + E-value: 1.5e-83. Domain range: 2..206." + /score="278.9" + /translation="DVQLLSWFTNERSRHDVRARRIPLADVCGWKQGDEAIEHEDGRYF + RVLAVAVRGSNRERISWTQPLLESVDLGVVAFLVREIGGVPHVLVHARADGGFLDTVEL + APTVQCTPQNYAHLPAENRPPFLDVVLNAPESRIRYEAIHSEEGGRFLNVRARYLAIEA + DDTVEPPPGYTWVTPAQLTALTRHGHYVNVEARTLLACL" + gene 57915..58214 + /gene="dvaE" + CDS 57915..58214 + /codon_start=1 + /db_xref="GI:15131507" + /db_xref="UniProtKB/TrEMBL:Q939X6" + /gene="dvaE" + /note="probably not active because of an in frame deletion + of 226 aa" + /product="putative 4-ketoreductase" + /protein_id="CAC48375.1" + /transl_table=11 + /translation="MKTVTVLGASGFAGSAVHRLGEVFRLVAREVAGHTGRGPVDVPCV + APPSHAPETDFRSVTVGSTPFRSITGRRPEMSRPEGVRRTVAALPSSDQGKVRT" + gene 58211..59320 + /gene="dvaB" + CDS 58211..59320 + /codon_start=1 + /db_xref="GI:15131508" + /db_xref="GOA:Q939X5" + /db_xref="HSSP:1MDO" + /db_xref="InterPro:IPR000653" + /db_xref="InterPro:IPR015421" + /db_xref="InterPro:IPR015422" + /db_xref="InterPro:IPR015424" + /db_xref="UniProtKB/TrEMBL:Q939X5" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaB" + /note="smCOG: + SMCOG1056:DegT/DnrJ/EryC1/StrS_aminotransferase (Score: + 387.8; E-value: 9.1e-118);" + /note="smCOG tree PNG image: smcogs/dvaB.png" + /product="putative C-3 amino transferase" + /protein_id="CAC48376.1" + /sec_met="Type: none" + /sec_met="Domains detected: DegT_DnrJ_EryC1 (E-value: + 1.8e-118, bitscore: 387.1, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MTTRVWDYQAEYRNERLDLLDAVETVFDSGQLVLGASVRGFEAEF + AAYHGVGHCVGLDNGTNAIKLGLQALGVGPGDEVITVSNTAAPTVVAIDGTGATPVFVD + VREDDFLMDTGQVAAAITERTKCLLPVHLYGQCVDMAPLKDLAAKHGLSILEDCAQAHG + ARQNGTVAGSTGDAAAFSFYPTKVLGAYGDGGATITSDESVDRRLRRLRYYGMDKQYYT + LETPAHNSRLDEVQAEILRRKLKRLDTYVAARQAIAQRYVDGLGDTELKLPRTVPGNEH + VYYVYVVRHPRRDDIIERLKAYDIHLNISYPWPVHTMTGFAHLGYATGAFPVTEKLAGE + IFSLPMYPALSADLQDKVIHAVREVVSTL" + PFAM_domain 58262..59299 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaB_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF01041" + /description="DegT/DnrJ/EryC1/StrS aminotransferase family" + /detection="hmmscan" + /domain="DegT_DnrJ_EryC1" + /evalue="7.30E-116" + /label="dvaB" + /locus_tag="dvaB" + /note="Pfam-A.hmm-Hit: DegT_DnrJ_EryC1. Score: 387.1. + E-value: 7.3e-116. Domain range: 5..363." + /score="387.1" + /translation="DLLDAVETVFDSGQLVLGASVRGFEAEFAAYHGVGHCVGLDNGTN + AIKLGLQALGVGPGDEVITVSNTAAPTVVAIDGTGATPVFVDVREDDFLMDTGQVAAAI + TERTKCLLPVHLYGQCVDMAPLKDLAAKHGLSILEDCAQAHGARQNGTVAGSTGDAAAF + SFYPTKVLGAYGDGGATITSDESVDRRLRRLRYYGMDKQYYTLETPAHNSRLDEVQAEI + LRRKLKRLDTYVAARQAIAQRYVDGLGDTELKLPRTVPGNEHVYYVYVVRHPRRDDIIE + RLKAYDIHLNISYPWPVHTMTGFAHLGYATGAFPVTEKLAGEIFSLPMYPALSADLQDK + VIHAVR" + PFAM_domain 58379..58708 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaB_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00266" + /description="Aminotransferase class-V" + /detection="hmmscan" + /domain="Aminotran_5" + /evalue="1.10E-03" + /label="dvaB" + /locus_tag="dvaB" + /note="Pfam-A.hmm-Hit: Aminotran_5. Score: 17.7. E-value: + 0.0011. Domain range: 66..184." + /score="17.7" + /translation="LDNGTNAIKLGLQALGVGPGDEVITVSNTAAPTVVAIDGTGATPV + FVDVREDDFLMDTGQVAAAITERTKCLLPVHLYGQCVDMAPLKDLAAKHGLSILEDCAQ + AHGARQ" + gene 59344..59961 + /gene="dvaD" + CDS 59344..59961 + /codon_start=1 + /db_xref="GI:15131509" + /db_xref="GOA:Q939X4" + /db_xref="HSSP:1EP0" + /db_xref="InterPro:IPR000888" + /db_xref="InterPro:IPR011051" + /db_xref="InterPro:IPR014710" + /db_xref="UniProtKB/TrEMBL:Q939X4" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaD" + /product="putative 3,5 epimerase" + /protein_id="CAC48377.1" + /sec_met="Type: none" + /sec_met="Domains detected: dTDP_sugar_isom (E-value: + 1.2e-60, bitscore: 195.4, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MQARKLAVDGAIEFTPRVFPDDRGLFVSPFQEEAFAEARGGPLFR + VAQTNHSMSKRGVVRGIHYTMTPPGTAKYVYCARGKALDIVVDIRVGSPTFGRWDAVLL + DQRDHRAMYFPVGVGHAFVALEDDTAMWYLLSTAYVARNELALSVLDPALGLPIDADVD + PILSERDQVAVTLAEAGRQGLLPDYATCLELDRQLSEVSLSA" + PFAM_domain 59353..59877 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dvaD_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00908" + /description="dTDP-4-dehydrorhamnose 3,5-epimerase" + /detection="hmmscan" + /domain="dTDP_sugar_isom" + /evalue="4.60E-58" + /label="dvaD" + /locus_tag="dvaD" + /note="Pfam-A.hmm-Hit: dTDP_sugar_isom. Score: 195.4. + E-value: 4.6e-58. Domain range: 1..176." + /score="195.4" + /translation="RKLAVDGAIEFTPRVFPDDRGLFVSPFQEEAFAEARGGPLFRVAQ + TNHSMSKRGVVRGIHYTMTPPGTAKYVYCARGKALDIVVDIRVGSPTFGRWDAVLLDQR + DHRAMYFPVGVGHAFVALEDDTAMWYLLSTAYVARNELALSVLDPALGLPIDADVDPIL + SERDQVAVTLAE" + gene 60179..61297 + /gene="dpgA" + CDS 60179..61297 + /citation=[2] + /codon_start=1 + /db_xref="GI:15131510" + /db_xref="GOA:Q939X3" + /db_xref="InterPro:IPR001099" + /db_xref="InterPro:IPR011141" + /db_xref="InterPro:IPR012328" + /db_xref="InterPro:IPR016038" + /db_xref="InterPro:IPR016039" + /db_xref="UniProtKB/TrEMBL:Q939X3" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgA" + /note="smCOG: + SMCOG1024:chalcone_and_stilbene_synthase_domain_protein + (Score: 327.1; E-value: 1.7e-99);" + /product="dihydroxyphenylacetic acid synthase" + /protein_id="CAC48378.1" + /sec_met="Type: t3pks" + /sec_met="Domains detected: Chal_sti_synt_C (E-value: + 1.2e-16, bitscore: 52.4, seeds: 21)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MGVDVSMTTSIEPAEDLSVLSGLTEITRFAGVGTAVSASSYSQSE + VLDILDVEDPKIRSVFLNSAIDRRFLTLPPESPGGGRVSEPQGDLLDKHKELAVDMGCR + ALEACLKSAGATLSDLRHLCCVTSTGFLTPGLSALIIRELGIDPHCSRSDIVGMGCNAG + LNALNVVAGWSAAHPGELGVVLCSEACSAAYALDGTMRTAVVNSLFGDGSAALAVISGD + GRVPGPRVLKFASYIITDALDAMRYDWDRDQDRFSFFLDPQIPYVVGAHAEIVADRLLS + GTGLRRSDIGHWLVHSGGKKVIDSVVVNLGLSRHDVRHTTGVLRDYGNLSSGSFLFSYE + RLAEEGVTRPGDYGVLMTMGPGSTIEMALIQW" + PFAM_domain 60461..60832 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgA_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00195" + /description="Chalcone and stilbene synthases, N-terminal + domain" + /detection="hmmscan" + /domain="Chal_sti_synt_N" + /evalue="2.00E-14" + /label="dpgA" + /locus_tag="dpgA" + /note="Pfam-A.hmm-Hit: Chal_sti_synt_N. Score: 53.3. + E-value: 2e-14. Domain range: 93..223." + /score="53.3" + /translation="KELAVDMGCRALEACLKSAGATLSDLRHLCCVTSTGFLTPGLSAL + IIRELGIDPHCSRSDIVGMGCNAGLNALNVVAGWSAAHPGELGVVLCSEACSAAYALDG + TMRTAVVNSLFGDGSAALAV" + PFAM_domain 60464..60829 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgA_0004" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08392" + /description="FAE1/Type III polyketide synthase-like + protein" + /detection="hmmscan" + /domain="FAE1_CUT1_RppA" + /evalue="1.10E-10" + /label="dpgA" + /locus_tag="dpgA" + /note="Pfam-A.hmm-Hit: FAE1_CUT1_RppA. Score: 41.1. + E-value: 1.1e-10. Domain range: 80..202." + /score="41.1" + /translation="ELAVDMGCRALEACLKSAGATLSDLRHLCCVTSTGFLTPGLSALI + IRELGIDPHCSRSDIVGMGCNAGLNALNVVAGWSAAHPGELGVVLCSEACSAAYALDGT + MRTAVVNSLFGDGSAALA" + PFAM_domain 60647..60874 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgA_0005" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08545" + /description="3-Oxoacyl-[acyl-carrier-protein (ACP)] + synthase III" + /detection="hmmscan" + /domain="ACP_syn_III" + /evalue="4.70E-04" + /label="dpgA" + /locus_tag="dpgA" + /note="Pfam-A.hmm-Hit: ACP_syn_III. Score: 19.7. E-value: + 0.00047. Domain range: 3..73." + /score="19.7" + /translation="GMGCNAGLNALNVVAGWSAAHPGELGVVLCSEACSAAYALDGTMR + TAVVNSLFGDGSAALAVISGDGRVPGPRVLK" + PFAM_domain 60947..61288 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgA_0002" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF02797" + /description="Chalcone and stilbene synthases, C-terminal + domain" + /detection="hmmscan" + /domain="Chal_sti_synt_C" + /evalue="4.90E-14" + /label="dpgA" + /locus_tag="dpgA" + /note="Pfam-A.hmm-Hit: Chal_sti_synt_C. Score: 52.4. + E-value: 4.9e-14. Domain range: 26..147." + /score="52.4" + /translation="SFFLDPQIPYVVGAHAEIVADRLLSGTGLRRSDIGHWLVHSGGKK + VIDSVVVNLGLSRHDVRHTTGVLRDYGNLSSGSFLFSYERLAEEGVTRPGDYGVLMTMG + PGSTIEMALI" + PFAM_domain 61022..61294 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgA_0003" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF08541" + /description="3-Oxoacyl-[acyl-carrier-protein (ACP)] + synthase III C terminal" + /detection="hmmscan" + /domain="ACP_syn_III_C" + /evalue="1.40E-12" + /label="dpgA" + /locus_tag="dpgA" + /note="Pfam-A.hmm-Hit: ACP_syn_III_C. Score: 47.4. E-value: + 1.4e-12. Domain range: 2..90." + /score="47.4" + /translation="GTGLRRSDIGHWLVHSGGKKVIDSVVVNLGLSRHDVRHTTGVLRD + YGNLSSGSFLFSYERLAEEGVTRPGDYGVLMTMGPGSTIEMALIQW" + gene 61294..61947 + /gene="dpgB" + CDS 61294..61947 + /aSProdPred="" + /citation=[2] + /codon_start=1 + /db_xref="GI:15131511" + /db_xref="GOA:Q939X2" + /db_xref="InterPro:IPR001753" + /db_xref="UniProtKB/TrEMBL:Q939X2" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgB" + /note="smCOG: SMCOG1023:enoyl-CoA_hydratase (Score: 49.8; + E-value: 4.2e-15);" + /product="putative enoyl-CoA hydratase" + /protein_id="CAC48379.1" + /sec_met="NRPS/PKS subtype: PKS/NRPS-like protein" + /sec_met="NRPS/PKS Domain: ECH (60-202). E-value: 1e-11. + Score: 36.2;" + /transl_table=11 + /translation="MNGELVLRLDGTRPLSAASVEELDALCDRVEDHREPGPVTVHVTG + VPAAGWTAEVTVGLVSKWERVVRRFERLGRLTIAVAAGDCAGTALDVLLAADVRIAAPG + TRLLLARAGGAPWPGMTVHRLTRQAGAAGIRRAVLLGAPIEAGRALALNLVDEVSEDPA + AALAELAGTAGAVDGKELAIRRQLVFEAGSTAFEDALGAHLAAADRALRRETAS" + aSDomain 61474..61899 + /asDomain_id="nrpspksdomains_dpgB_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="ECH" + /evalue="1.00E-11" + /locus_tag="dpgB" + /score="36.2" + /translation="SKWERVVRRFERLGRLTIAVAAGDCAGTALDVLLAADVRIAAPGT + RLLLARAGGAPWPGMTVHRLTRQAGAAGIRRAVLLGAPIEAGRALALNLVDEVSEDPAA + ALAELAGTAGAVDGKELAIRRQLVFEAGSTAFEDALGA" + PFAM_domain 61474..61899 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgB_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00378" + /description="Enoyl-CoA hydratase/isomerase family" + /detection="hmmscan" + /domain="ECH" + /evalue="3.40E-09" + /label="dpgB" + /locus_tag="dpgB" + /note="Pfam-A.hmm-Hit: ECH. Score: 36.2. E-value: 3.4e-09. + Domain range: 76..217." + /score="36.2" + /translation="SKWERVVRRFERLGRLTIAVAAGDCAGTALDVLLAADVRIAAPGT + RLLLARAGGAPWPGMTVHRLTRQAGAAGIRRAVLLGAPIEAGRALALNLVDEVSEDPAA + ALAELAGTAGAVDGKELAIRRQLVFEAGSTAFEDALGA" + gene 61944..63248 + /gene="dpgC" + CDS 61944..63248 + /aSProdPred="" + /codon_start=1 + /db_xref="GI:15131512" + /db_xref="GOA:Q939X1" + /db_xref="HSSP:1MJ3" + /db_xref="InterPro:IPR001753" + /db_xref="UniProtKB/TrEMBL:Q939X1" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgC" + /note="smCOG: SMCOG1023:enoyl-CoA_hydratase (Score: 109.1; + E-value: 3.3e-33);" + /product="hydroxyacyl-dehydrogenase" + /protein_id="CAC48380.1" + /sec_met="NRPS/PKS subtype: PKS/NRPS-like protein" + /sec_met="NRPS/PKS Domain: ECH (170-382). E-value: 3.9e-24. + Score: 76.9;" + /transl_table=11 + /translation="MTAAPPTSPPGPRLDRPALAEAAGRVDDLLAELPPPSARTPGQRE + AASSALDGIRAMRADYVGAHAEAIYDELTDGRSRSLRIDELVRAAARAFPGLVPTDEQM + AAERARPQAEKDGREIDQGIFLRGILRAERAGPHLLDAMLQPTPRALKLLPGFTESGVV + QMEAVRLERRDGVAYLTLCRDDCLNAEDAQQVDDMETAVDLALLDPAVRVGLLRGGEMS + HPRYRGRRVFCAGINLKKLSSGGIPLVDFLLRRELGYIHKIVRGVVTEGSWHSRLTDKP + WIAAVDSFAIGGGAQLLLVFDHVLAASDAYFSLPAAKEGIIPGASNFRLSRFAGPRVAR + QVILGGRRIRADEPDARLLVDEVVPPAELDAAIDAALARLDGEAVLANRRMLNLAEEPP + DEFRRYMAEFALQQALRIYGEDVIGKVGRFAAGSS" + aSDomain 62454..63089 + /asDomain_id="nrpspksdomains_dpgC_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="ECH" + /evalue="3.90E-24" + /locus_tag="dpgC" + /score="76.9" + /translation="ERRDGVAYLTLCRDDCLNAEDAQQVDDMETAVDLALLDPAVRVGL + LRGGEMSHPRYRGRRVFCAGINLKKLSSGGIPLVDFLLRRELGYIHKIVRGVVTEGSWH + SRLTDKPWIAAVDSFAIGGGAQLLLVFDHVLAASDAYFSLPAAKEGIIPGASNFRLSRF + AGPRVARQVILGGRRIRADEPDARLLVDEVVPPAELDAAIDAALARLDG" + PFAM_domain 62454..63089 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgC_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00378" + /description="Enoyl-CoA hydratase/isomerase family" + /detection="hmmscan" + /domain="ECH" + /evalue="1.30E-21" + /label="dpgC" + /locus_tag="dpgC" + /note="Pfam-A.hmm-Hit: ECH. Score: 76.9. E-value: 1.3e-21. + Domain range: 2..195." + /score="76.9" + /translation="ERRDGVAYLTLCRDDCLNAEDAQQVDDMETAVDLALLDPAVRVGL + LRGGEMSHPRYRGRRVFCAGINLKKLSSGGIPLVDFLLRRELGYIHKIVRGVVTEGSWH + SRLTDKPWIAAVDSFAIGGGAQLLLVFDHVLAASDAYFSLPAAKEGIIPGASNFRLSRF + AGPRVARQVILGGRRIRADEPDARLLVDEVVPPAELDAAIDAALARLDG" + gene 63245..64054 + /gene="dpgD" + CDS 63245..64054 + /aSProdPred="" + /codon_start=1 + /db_xref="GI:15131513" + /db_xref="GOA:Q939X0" + /db_xref="HSSP:1MJ3" + /db_xref="InterPro:IPR001753" + /db_xref="InterPro:IPR018376" + /db_xref="UniProtKB/TrEMBL:Q939X0" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgD" + /note="smCOG: SMCOG1023:enoyl-CoA_hydratase (Score: 271.1; + E-value: 1.1e-82);" + /product="putative enoyl-CoA-isomerase" + /protein_id="CAC48381.1" + /sec_met="NRPS/PKS subtype: PKS/NRPS-like protein" + /sec_met="NRPS/PKS Domain: ECH (7-256). E-value: 1.3e-62. + Score: 202.8;" + /transl_table=11 + /translation="MSGDRVRYEKKDHVAYVTLDRPGVLNAMDRRTHEELAGIWDDAEA + DDEVRVVVLTGAGNRAFSVGQDLKERARLNEAGARATTFGSRGQPGHPRLTDRFTLSKP + VVARVHGYALGGGFELVLACDIVIASDDSVFALPEVRLGLIPGAGGVFRLPRQLPQKVA + MGYLLTGRRMDAATALRYGLVNEVVPPEELDRCVAEWTDSLVRAAPLSVRAIKEAALRS + LDLPLEEAFTASYTWEERRRRSEDAIEGPRAFAAKRDPVWTGEYRPG" + aSDomain 63266..64012 + /asDomain_id="nrpspksdomains_dpgD_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="ECH" + /evalue="1.30E-62" + /locus_tag="dpgD" + /score="202.8" + /translation="YEKKDHVAYVTLDRPGVLNAMDRRTHEELAGIWDDAEADDEVRVV + VLTGAGNRAFSVGQDLKERARLNEAGARATTFGSRGQPGHPRLTDRFTLSKPVVARVHG + YALGGGFELVLACDIVIASDDSVFALPEVRLGLIPGAGGVFRLPRQLPQKVAMGYLLTG + RRMDAATALRYGLVNEVVPPEELDRCVAEWTDSLVRAAPLSVRAIKEAALRSLDLPLEE + AFTASYTWEERRRRSEDAIEGPRAFAA" + PFAM_domain 63266..64012 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_dpgD_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00378" + /description="Enoyl-CoA hydratase/isomerase family" + /detection="hmmscan" + /domain="ECH" + /evalue="4.30E-60" + /label="dpgD" + /locus_tag="dpgD" + /note="Pfam-A.hmm-Hit: ECH. Score: 202.8. E-value: 4.3e-60. + Domain range: 1..244." + /score="202.8" + /translation="YEKKDHVAYVTLDRPGVLNAMDRRTHEELAGIWDDAEADDEVRVV + VLTGAGNRAFSVGQDLKERARLNEAGARATTFGSRGQPGHPRLTDRFTLSKPVVARVHG + YALGGGFELVLACDIVIASDDSVFALPEVRLGLIPGAGGVFRLPRQLPQKVAMGYLLTG + RRMDAATALRYGLVNEVVPPEELDRCVAEWTDSLVRAAPLSVRAIKEAALRSLDLPLEE + AFTASYTWEERRRRSEDAIEGPRAFAA" + gene 64162..65259 + /gene="ald" + CDS 64162..65259 + /codon_start=1 + /db_xref="GI:46275290" + /db_xref="GOA:Q799B0" + /db_xref="HSSP:1KFL" + /db_xref="InterPro:IPR006218" + /db_xref="InterPro:IPR006219" + /db_xref="InterPro:IPR013785" + /db_xref="UniProtKB/TrEMBL:Q799B0" + /gene="ald" + /product="putative alodlase" + /protein_id="CAG25757.1" + /transl_table=11 + /translation="MAAMTHTVATTDLDNQRIERIVPLVTPALLHHELPLSATAAETVR + KGRESVVRVLDGTDDRLLVITGPCSIHDPAAALDYAGHLAAIAGEVAGDLLVVMRVYFE + KPRTIGGWKGLINDPHLDGTGDVNHGLRTARHLLLELAERGLPAACEWLDTTIPAYFAD + TVSWGAIGARTVESQNHRMLASGLSMPVGFKNRRDGDITVAIDAIRAAAVRHVVPGVDP + GGLPAILHTAGNPDCHVVLRGGDGAPNHDSASVHKTLTALEAAGLPGRVVIDASHDNSG + KDHHRQPLVAAEIAGQVENGRNGIVGVMLESNLRAGRQDLQPGRPPAYGQSITDACIDV + PTTRTVLHGLAAAAAARRKLGKQAS" + PFAM_domain 64318..65199 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_ald_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00793" + /description="DAHP synthetase I family" + /detection="hmmscan" + /domain="DAHP_synth_1" + /evalue="1.40E-79" + /label="ald" + /locus_tag="ald" + /note="Pfam-A.hmm-Hit: DAHP_synth_1. Score: 266.6. E-value: + 1.4e-79. Domain range: 8..270." + /score="266.6" + /translation="RVLDGTDDRLLVITGPCSIHDPAAALDYAGHLAAIAGEVAGDLLV + VMRVYFEKPRTIGGWKGLINDPHLDGTGDVNHGLRTARHLLLELAERGLPAACEWLDTT + IPAYFADTVSWGAIGARTVESQNHRMLASGLSMPVGFKNRRDGDITVAIDAIRAAAVRH + VVPGVDPGGLPAILHTAGNPDCHVVLRGGDGAPNHDSASVHKTLTALEAAGLPGRVVID + ASHDNSGKDHHRQPLVAAEIAGQVENGRNGIVGVMLESNLRAGRQDLQPGRPPAYGQSI + TDACIDVPTTRTV" + gene 65378..>66669 + /gene="pks" + CDS 65378..>66669 + /aSProdPred="pk" + /codon_start=1 + /db_xref="GI:46275291" + /db_xref="GOA:Q799A9" + /db_xref="HSSP:1BA3" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q799A9" + /gene="pks" + /note="smCOG: SMCOG1002:AMP-dependent_synthetase_and_ligase + (Score: 302.2; E-value: 9.6e-92);" + /product="putative type I polyketide synthase" + /protein_id="CAG25758.1" + /sec_met="Type: none" + /sec_met="Domains detected: AMP-binding (E-value: 2.1e-75, + bitscore: 245.0, seeds: 400); AMP-binding (E-value: + 2.1e-75, bitscore: 245.0, seeds: 400)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: PKS/NRPS-like protein" + /sec_met="NRPS/PKS Domain: CAL_domain (34-394). E-value: + 2.3e-98. Score: 321.1; Substrate specificity predictions: + AHBA (Minowa);" + /transl_table=11 + /translation="MEEIRTEFIRPLLTSLSAHAADRPAYSDDRRTLTYGGLAHAAAEL + AAGLGVARGDRVLVHVGSRVEFAVALLAVLRAAAVGVPVSVRSTDAELAHLAADSGATL + LVTEARHAAAAERLRRDRPGLRVLFVDDPPPARVGEPRDDLGLDEPAWLLYTSGTTGRP + KGVLLSQRAMLWSTAAYYVPMLGLDAEDTVLWPLPTHHAYALSLAFVTTIALGAHTRLA + DGCTPDLLARYPGSVLAGVPALYLRLRQESGGPLAAPRLCLSGGAPCTPATRAAVRDLF + GLPVADGYGSTETGGKVAAELPGEAGLVPVPGLEIRIDAGEVLVRGPGLMLGYHGRTES + PLRDGWYRTGDAGRFEGGRLVLEGRVDDVIVCGGQNVHPAEIEAVLEESPSVRDVLVLG + RPDDVLGEVPVAFVVAGPGGFDAEELRGRCL" + PFAM_domain 65432..66493 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pks_0001" + /database="Pfam-A.hmm" + /db_xref="PFAM: PF00501" + /description="AMP-binding enzyme" + /detection="hmmscan" + /domain="AMP-binding" + /evalue="1.00E-72" + /label="pks" + /locus_tag="pks" + /note="Pfam-A.hmm-Hit: AMP-binding. Score: 244.8. E-value: + 1e-72. Domain range: 6..416." + /score="244.8" + /translation="HAADRPAYSDDRRTLTYGGLAHAAAELAAGLGVARGDRVLVHVGS + RVEFAVALLAVLRAAAVGVPVSVRSTDAELAHLAADSGATLLVTEARHAAAAERLRRDR + PGLRVLFVDDPPPARVGEPRDDLGLDEPAWLLYTSGTTGRPKGVLLSQRAMLWSTAAYY + VPMLGLDAEDTVLWPLPTHHAYALSLAFVTTIALGAHTRLADGCTPDLLARYPGSVLAG + VPALYLRLRQESGGPLAAPRLCLSGGAPCTPATRAAVRDLFGLPVADGYGSTETGGKVA + AELPGEAGLVPVPGLEIRIDAGEVLVRGPGLMLGYHGRTESPLRDGWYRTGDAGRFEGG + RLVLEGRVDDVIVC" + aSDomain 65480..66559 + /asDomain_id="nrpspksdomains_pks_CAL1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="CAL_domain" + /evalue="2.30E-98" + /label="pks_CAL1" + /locus_tag="pks" + /score="321.1" + /specificity="Minowa: AHBA" + /translation="YGGLAHAAAELAAGLGVARGDRVLVHVGSRVEFAVALLAVLRAAA + VGVPVSVRSTDAELAHLAADSGATLLVTEARHAAAAERLRRDRPGLRVLFVDDPPPARV + GEPRDDLGLDEPAWLLYTSGTTGRPKGVLLSQRAMLWSTAAYYVPMLGLDAEDTVLWPL + PTHHAYALSLAFVTTIALGAHTRLADGCTPDLLARYPGSVLAGVPALYLRLRQESGGPL + AAPRLCLSGGAPCTPATRAAVRDLFGLPVADGYGSTETGGKVAAELPGEAGLVPVPGLE + IRIDAGEVLVRGPGLMLGYHGRTESPLRDGWYRTGDAGRFEGGRLVLEGRVDDVIVCGG + QNVHPAEIEAVLEESPSVRD" + CDS_motif 65534..65575 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_pks_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.00E+01" + /label="NRPS-A_a8" + /locus_tag="pks" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 40.0, bit-score: + -2.3)" + /score="-2.3" + /translation="RGDRVLVHVGSRVE" + CDS_motif 65828..65884 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_pks_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="7.00E-08" + /label="NRPS-A_a3" + /locus_tag="pks" + /motif="NRPS-A_a3" + /note="NRPS/PKS Motif: NRPS-A_a3 (e-value: 7e-08, + bit-score: 24.8)" + /score="24.8" + /translation="AWLLYTSGTTGRPKGVLLS" + CDS_motif 66335..66394 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_pks_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.00E-05" + /label="NRPS-A_a6" + /locus_tag="pks" + /motif="NRPS-A_a6" + /note="NRPS/PKS Motif: NRPS-A_a6 (e-value: 1e-05, + bit-score: 17.9)" + /score="17.9" + /translation="AGEVLVRGPGLMLGYHGRTE" + CDS_motif 66467..66526 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_pks_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.80E-04" + /label="NRPS-A_a8" + /locus_tag="pks" + /motif="NRPS-A_a8" + /note="NRPS/PKS Motif: NRPS-A_a8 (e-value: 0.00038, + bit-score: 13.2)" + /score="13.2" + /translation="GRVDDVIVCGGQNVHPAEIE" + PFAM_domain 66518..66655 + /aSTool="fullhmmer" + /asDomain_id="fullhmmer_pks_0002" + /database="Pfam-A.hmm" + /description="AMP-binding enzyme C-terminal domain" + /detection="hmmscan" + /domain="AMP-binding_C" + /evalue="1.60E-07" + /label="pks" + /locus_tag="pks" + /note="Pfam-A.hmm-Hit: AMP-binding_C. Score: 32.1. E-value: + 1.6e-07. Domain range: 0..41." + /score="32.1" + /translation="EIEAVLEESPSVRDVLVLGRPDDVLGEVPVAFVVAGPGGFDAEEL + R" +ORIGIN + 1 ctgcagcagc agcgtcgccg accccaccac ccggctgatg tgaccggagg cgatgatgac + 61 gacacggcgt ttctcggcga tcgggagcag cgtttcgatg gcctcttcca caagcaggga + 121 caggtcgacc ggttcccggg tgaacgcgcg ctgatcggcg cggctgagca ccagcagcgc + 181 ctcggtgagg tcgatcgccc gggcgttcac cgcgtggagg cggtcgaaga ccagcagcgg + 241 gtctttcgcc ggatcgttgc gggccacttc gagaagcgcc tgcgtgatcg ccagcggggt + 301 gcgcagctcg tgcgaggcgt tcgcggcgaa cctccgctgc gcggcgacgt gggcttcgag + 361 ccgggcgagc atggcgtcga aggcatcggc gagttcgcgg aactcgtctt cggtgccttc + 421 cagccggatc cggtgggaga gcgacccgtt cgcggccatg cgcgcggcgt cggtgatccg + 481 ggtcagcggg gcgagcatgc ggccggcgag gatccagccg cccagcaggc cgaacagcag + 541 caggaagaac agcactgagc ccgcggccgg caggaagttg cgcaccagga cggaccgctc + 601 cagcaccccg ccggggggcg gtttggccag cacgtcgggg acatcacgca gcaggaacag + 661 ccacacggaa gcgagcagca gtacaccggc gagcacgagg aagcaggcgt agctgagggt + 721 gagtttgagg cggacgctca tcccggccgc tctgtccacc gcctgctccc tagcgcccgg + 781 cgccgggcgc cgcgccgatg cggtacccga cgccggccac cgtggtgatg atccagggct + 841 cgccgagccg cttgcgcaga gccgaaaccg tgatgcgcac agcgttggtg aacgggtcgg + 901 cgttcttgtc ccaggcccgt tccagcagct cttcggcgct gacgacaccg ccgtcggcgg + 961 aaacgaggac ttcgagcacg gcgaactgct tcctggtcag cgcgatgtag cggtcgtcgc + 1021 ggtagacctc gcggcggaac gggttcagcc gcaggccggc gatttccagt accggcggcc + 1081 ggttgtgggc acgccggcga tcgagcgccc gcagcctgag cacgagctcc cgcagttcga + 1141 acggcttcgt gaggtagtcg tcggcaccga gctcgaaccc ggtgatcttg tcgtcgagcc + 1201 ggtcggcggc ggtgagcatg aggatcggca ggccgctgcc ggaagcgacg atccgtttgg + 1261 cgatctcgtc cccgctgggt ccgggaatat cccggtcgag gacggcgatg tcgtaggtgt + 1321 tgaggctcag cagttcgaga gcggtgtcgc cgttgcccgc ggtgtccgcc gcgatcgctt + 1381 ccaaacgcag gccgtcgcgg atggcttcgg ccagataggg ctcgtcctcg acgatcagca + 1441 cgcgcatgcc ccgatggtac gagaggccac tacatatcgt cggcatatgg aaaatcgcat + 1501 acgtgccggc aacacatcgc cgacttgaat ggacacatga cctaccgcga gtcggcccgg + 1561 acgacgaccc gccggattcc cggcgccgtc gtgccggtgg cccgccggat tcgcggggtc + 1621 cttctcgccg gcctgcgcgc cgtcggcacg aggattgccc ggtcgcccgg tcgcccggtc + 1681 cgcccccagg accgtgccgg cctcggcaag acccacggtg ccgtccccgc cggggtgacg + 1741 gtcttcgacg acgacgtccc ggctgtgact cgcctcgacc cggcgcttct gagtgcactg + 1801 cgccgggccg cgaccgcggc cgccgacggc ggggtcgaac tgtgcgtgaa cagcggctgg + 1861 cggtctccgg aataccagag ccggcttctt cgcgaggcgg tggcgaaata cgggtcggcg + 1921 gcggcggccg cccggtgggt ggccaccccg gagacgtcga tccacgtggc ggggaaggcg + 1981 gtcgacatcg ggccacccgc gtccgcgtcg tggttgtccg agcacggcgc cgattacggg + 2041 ttgtgccgcg tctaccgcaa cgaaccctgg cacttcgaat tgcgtcccga agcgatcgag + 2101 cacggctgcc cgcccctgta tgccgacccg agtcacgacc cgcggctgcg ccggtgacca + 2161 gggtcgcccg gcgtgctccg cagacggccc ggaaatttgc ccctaccatc catggatatg + 2221 gcggtcaagg ggcagtcggg tgagcccggc tcgcggacgg aatgcgagcg gtggacacta + 2281 taatcatttc tcgcggacaa tcaaggctcg caatcggccc aatgtgatcc actgaggagt + 2341 ccagccggtg acattccggc tcaatgtgtc cggcgcggcg catcatacta atggttgacg + 2401 atcgcgtcaa tacaacatta gtgaaagagt cgctcctcga tcccggccga tggcctgcgc + 2461 cttggacatc acgattgttc gtcaattgcc gacaacgatc ggtcatgcca tgctgacgtc + 2521 gtgtgcactc actggggatc aagggagggc gcggcggtga ccggggacca cgaggcaatc + 2581 ggggttccat cgggtcgaac cgctgagccg cgcccggcat gtactgggaa ttcgttgtcg + 2641 aacatgccct gtggcctcgc gttacccgac cgccggcctt ggtcgggctg acggtcgtcg + 2701 gtctcggccg ggtacaagcc accgaccggt agttccgcgg cgatcatcgc ccggggggat + 2761 tgcaagctgc atcgagttcc ggccgatggc cggaacgcgg gctgctgcct gccgtccggg + 2821 gaatcgcgga tcgttcgcgc acaggcctgt ccgcggcgtt catggggatc catcagcgtc + 2881 gaggacatcc gccgcggccg aggccggccc ggacaccggt cgtgtgtcca accaccgagg + 2941 ctgagcagtt ggacggtgcg gttctctgcc gtgccgcaaa gttgacgaag tagatccaaa + 3001 tgggggctag gtggatccga cgagagttga catattcgct ctccctgccg tcgaaatcga + 3061 gctgtcccgg ctgtcttccg cgagctcacc gcgaacatcg ggtgaggatc cggagcacgt + 3121 cgagacgctg ttgtcggcag agggagaact tccgcccatt ctcgttcacc gtccgacgat + 3181 gcaggtgctc gacggcctgc accggttgaa ggtggcgcga gtccggggtg acacgaaaat + 3241 cctggccaga ctggtcgacg ccaccgaatc ggatgcgttc gtcctggcgg tcgaggcgaa + 3301 catccggcac ggtctgccgc tgtccctcgc cgatcgcaag cgtgcggccg tccagatcat + 3361 cgggacgcat ccgcagtggt ccgatcggcg ggtggcctcg gcgaccggga tctccgcggg + 3421 cacggtggcc gacctgcgca ggcgcgcggg agaggacggg accgaggccc ggatcgggcg + 3481 ggacgggcgt gtccgcccgt ccgacggttc ggagcggaga agactcgccg ccgagctcat + 3541 ccgcagcgat ccgggtctgt ctctgcggca ggtcgccaag caggtcggca tctccccgga + 3601 gacggtgcgt gacgtgcggg gccggctgga gcgcggggag agcccgactc cggacgggac + 3661 aaggagattg ccggccaagc cgcacccgct gcggttgtcg gagcccgact tcggccgtgc + 3721 cgtggaccag gatcggctcg cgctgctgga aaggctcaag agcgacccgg cactgcggct + 3781 gaacgaggtc ggccggatcc tgctgcgcat gctcaccatg cactccatgg acgggcagga + 3841 gtgggaacgg atcctgcagg gtgttccacc acacctgcac ggcgtgatcg ccgggttcgc + 3901 ccgggaccac gcccgggtct gggcggagtt cgccgaccac ctggagagcc gggcgaccga + 3961 gctggccgcg ggatgatcgc gtgaccacgg cggagcccag tggcgtcgcc ggttccggtg + 4021 cgggccgccg caccggaacc ggcgtcgccg tccgcccaag tcccgcggct ggacttcgtc + 4081 cggtgagccg ttttaagaag gaacggtgac catcgagaaa gcgcttgtcg tcggtaccgg + 4141 gctgatcggc acctcggtgg cgctggccct ccgggagaag ggcgtcgcgg tcttcctctc + 4201 cgacgtcgac accgaggccg cccggctggc gcaggtactc ggcgccgggc gggagtgggc + 4261 gggagaaggt gtggatctgg cggtgatcgc cgtgccgccg cacctggtgg gggaccggct + 4321 ggccgacctg cagaagcaag gtgcggcccg ggtgtacacc gacgtggcca gtgtgaaggc + 4381 cgatccgatc gccgacgcgg agcggctcgg gtgtgacctg gcctcctatg tgccgggcca + 4441 cccgcttgcc ggccgggaac gctcgggccc ggccgccgcc cgcgccgagc tgttctcggg + 4501 ccggccgtgg gcactgtgcc ccggccccga gacggacgcg gaagccctgc gacgggtgcg + 4561 ggagctggtg tccctgtgcg gggcgacggc cgtcgtcgtg ggtgcgggcg agcacgactc + 4621 ggccgtggcg ctggtgtcgc acgccccgca cgtggtggcg tcggcggtgg cggccagcct + 4681 ggcgagcggc gacgacgtcg cgctgggcct ggcggggcag ggactccgtg atgtgacgcg + 4741 catcgcagcc ggggatccct tgctgtggcg gaggattctc tccgggaaca cccggccggt + 4801 ggccggggtg ctcgaacgga tcgcggccga cctcgccgcg gcggcctcgg cgttgcggtc + 4861 cggcgacctg gacgaggtga cggatctgct gcggcgcggc gtggacggtc acggccggat + 4921 ccctggtcag cgcggcggat cccttcccgg ccgcaacccg gcgggttccc cggggcgtta + 4981 ggccggccgc aaaaacgatt gccgaaggtg gccggaacgt ccgtcgtgat tgtacggtta + 5041 tccgtgcggc gcggcacggg ggcgtcggca aaaaaatgcg tccaagtgcc gaaagcgctt + 5101 gcttggaccc actcgtggac atcgactcga ttcagcacga ttgagatcgc cgactttggc + 5161 gtgtgagaga ggtgaccgga tggacatggt gttgcgtttc gagggggtgg acaagagccc + 5221 tgacgacccc gacccctggg tgaccaaggt ccgcaagggg acgctgcgcc gcgtgctcgc + 5281 ctacttccgc ccgcacgtcg ggaaggtggc gctcttctgt ctcgtcgccg tgctggagtc + 5341 gctcatcgtc gtggcaactc cgttgctgtt gaaggaactc atcgacaacg gcatcgtcaa + 5401 gaacgatctc ggggtcgtga tcctgatggc cggcctcacc gcggtgctcg ccgtgctggg + 5461 cgccgggctg acgatggtgt ccggctacat ctccgggcgg atcggggagg ggatcaccta + 5521 cgatctccgg gtccaggcgc tcggccacgt ccggcggctg ccgatcgcgt tcttcacccg + 5581 tacccagacg ggggtgctgg tcggcaggct gcacacggaa ctgatcatgg cgcagcagca + 5641 tttcaccggc ttgctcatgg cggccaccag cgtggtcatg gtcgtggtgg tgctggccga + 5701 gctgatctac ctttcgtgga tcgtcgccat cgtctcgctg gtgctgattc cgatattcct + 5761 cgtgccctgg attcgcgtgg ggcgggcgat ccagcggcgc agtatccggc tcatggacgc + 5821 gaataccggc ctcggcgggc ttctccagga gcggttcaac gtccaggggg ccatgctctc + 5881 caagctcttc ggccgtcccg ccgaggaaat ggccgagtac gaggagcgtg ccggggagat + 5941 ccgcaagatc ggcgtgagcc tttccgtgtg gggccggatg gccttcgtca tgatggcgct + 6001 gatggcctcg ctcgccacgg ccctcgtcta cgggatcggg ggcgggctcg tgctcgccgg + 6061 tgcgttcgag ctcggcacgc tggtcgccat cgccaccctg ctccagcggc tgttcgggcc + 6121 gatcacccag ctgtccggga tgcaggagct cgcgcagacg gtcgtggtga gcttttcccg + 6181 ggtcttcgag ctgctcgacc tcaagccact gatccaggaa cgccccgacg cgatcgcgct + 6241 gaagaagaag gtggtgccgg acgtcgagtt cgagcacgtg tcgttccgct accccaccgc + 6301 ggacgaggtc tcgctggcgt cgctggagca cctgcgggcc gagcgggagc gcagcgaagt + 6361 gacgccggat gtcctgcgcg acgtgagctt ccacgcgcag gccggaaccc tcaccgcgct + 6421 cgtcggcccg tccggcgcgg ggaagagcac catcacccac ctggtctccc ggctgtacga + 6481 cccgaacggc gggaccgtcc gcctcggcgg ccacgatctg cgcgatctca ccttcgaatc + 6541 gctccgcgaa gcggtcgggg tggtcagcca ggacgcctac ctcttccacg acacgatccg + 6601 ggagaacctc ctctacgccc gcccgaccgc caccgaggac gagctgatgg aggcgtgcaa + 6661 gggggcccag atccgggacc tgatcgactc cctcccgctc gggctggaca ccgtcacggg + 6721 cgatcgcggc taccgcatgt cgggcgggga gaagcaacga ctggccatcg cccggctgct + 6781 gctgaaggag ccgtcgatcg tcgtcctcga cgaagccacc gcccacctgg actccgagtc + 6841 ggaggccgcc gtccagcggg cgctcaagac ggccctgcac ggccggacct cgctggtgat + 6901 cgcccaccgg ttgtccacga tccgcgaggc cgaccagatc ctcgtgatcg acggcggcag + 6961 ggtgcgggag cgcgggacac acgacgagct gctggcccag ggcggcctgt acgcggagct + 7021 ctaccacacg cagttcgcca acccggccgc caacgacccc aagccggaga tcgaggacga + 7081 gctcgacgac atcgagcccg agccggtgat ccaacacatg ggctacggag gatgacgatg + 7141 aattccgcag cgcggaccac gccgacgatg ctggatctgt tcgcttcgca cgtggaccgg + 7201 acacctgacg cggtggccgt ggccggcggt gacggggttc tgacgtaccg gcagctcgac + 7261 gagcgcgcgg gccggttggc ggggcggctg gcgagtcgcg gcattcgccg tggcgaccgc + 7321 gtcgcggtgg tgatggaccg ttcggcggac ctggtggtgg cgctgctcgc cgtgtggaag + 7381 gcgggggcgg cgtacgtgcc ggtggacgcc ggctaccccg cgccgcgagt ggccttcatg + 7441 gtggcggact cggcggccaa gctcgtggtg tgctcggccg cgtcgcgcgg cgccgtaccg + 7501 gccggggtcg agtcgctcga gccggccgcc gccgccgagg agggcgcgtc cgacgcgccg + 7561 gcggccacgg tgcgaccggg ggatccggcg tacgtgatgt acacgtccgg ctcgacgggc + 7621 acaccgaagg gcgtgaccat ttcgcagggc tgcgtcgcgg agctgacgat ggacgccggg + 7681 tgggcgatgg agcccggcga ggcggtgctc atgcattcgc cgcacgcctt cgacgcgtca + 7741 ctgttcgaac tctggatgcc gctggcgtcg ggggtccggg tggtgctcgc cgaaccgggt + 7801 tcggtggacg cccggcggct gcgggaagcg gccgcggccg gggtgacgag ggtgtacctc + 7861 accgcgggga gcctgcgcgc ggtggcggag gaggcgccgg aatcgttcgc ggagttccgt + 7921 gaggtgctga ccggcggtga cgtggtaccc gcgcacgcgg tggagcgggt gcggacggcc + 7981 gcaccccggg cgcggttccg gaacatgtac ggcccgacgg aagcgacgat gtgcgcgacg + 8041 tggcacctgc tgcagccggg tgacgtggtg ggcccggtcg tgccgatcgg ccgtccgctg + 8101 accggccgcc gggtgcaggt gctcgacgcg tcgctgcggc ccgtggggcc gggtgtggtc + 8161 ggcgacctgt acctctccgg ggcgctggcg gagggctact tcaaccgggc ggcgctgacg + 8221 gcggagcggt tcgtggcgga tccgtccgca ccggggcagc ggatgtactg gaccggggac + 8281 ctcgcccagt ggaccgcgga cggtgagctg gtgttcgcgg gccgggccga cgaccaggtg + 8341 aagatccgcg ggttccggat cgagcccggc gagatcgagg ccgcgctgat cgctcagccg + 8401 gacgtgcacg acgccgtcgt ggcggcggtc gacggacggc tgatcgggta tgtggtgacc + 8461 gagggggacg ccgatccccg ggtcatccgc gaacgcctcg gtgcggtgct gccggagcac + 8521 ctggtcccgg ccgccgtgct cgcactggac gcactgccgc tgaccggcaa cggcaaggtg + 8581 gaccggtccg cgctgccggc gcccgagttc gcggcgagtg ccgccgggcg ggcaccgagc + 8641 accgatgcgg aacgtgtcct ctgtggactc ttcgccgagg tgctcggcgt ggcacgagcc + 8701 ggcgtcgacg acggtttctt cgagctgggc ggggattcga tcggcgcgat gcggctggcg + 8761 gcccgggccg ccaaggcggg cctgctggtg acgcccgccc agatcttcga ggagccgacc + 8821 cccgcccggc tggccgccgt ggcgcggccg gtcccggccg gcgggcccgt cgacggcccc + 8881 ctgctcaccc tgaccgcggc cgaggaggcg gagctggcgc tcgccgctcc gggcgccgag + 8941 gagatctggc cgctggcccc gttgcaggag gggctgctct tcgaatcgat cctcgacgac + 9001 cagggctccg acatctacca ggtgcaggtg atcctggagc tgaacgggcc ggtggacgcg + 9061 ccccggctgc gggccgcgtg ggacgcggtc gtccggcggc accccgagct ccggctgagc + 9121 ttccaccgcc tcgcctcggg caagacggtg caggccgtcc acggggacgt caccccgccg + 9181 tggcgggtgg tggacctgac gggtgccggc gacgtcgacg cggccgtcgc ggccctcgtc + 9241 gccgaggaac agcagcagcg gttcgaactc gccacggcgc cgctggtccg gctggtgctg + 9301 gtccggatcg cggcggaccg gtaccgcctg ctgttcgtca tccaccacat cctcgtcgac + 9361 ggctggtcgg tggcggtcat cctcaacgac gtctccgagg cgtacgaagc cggcgagccg + 9421 gtgccggaac agcggggcgg cgccaccttc cgggactacc tggcctggct ggaccggcag + 9481 gacgacgacg cggcccgggc ggcctggcgg gcggagctgg ccggtctcga cgagcccgcg + 9541 ctgatcgcga cttcgggcgt cgagacggag tacgactacc gcgccacgca cctgacgccg + 9601 gccctgcaca ccaggctgct ggggttcgcc cgcgagcacg ggctgacgcc gagcacggtg + 9661 gtgcacgccg cctgggcgat ggtgctggcg cggctcacgc ggcggaccga cgtcgtgttc + 9721 ggcaccatgg tcgcgacccg tcccccggaa ctggcgggga tcgagtcgat gccgggcctg + 9781 ctgatgaccg cggtgccggt ccgggtgccg ctggacggcg ggcaatcggt cctggacatg + 9841 ctcaccgacc tgcacagcag gcagacggcc ctcaaacgac accagtacct ggggctgccg + 9901 gagatccaga aggcggcggg accgggcgcg acgttcgaca cgatgctggt ggtcgagaac + 9961 tacccgcggg agtacgcccg ccggtacacg catctgcgca cgatcgaggg gacccactac + 10021 ccggtgaccc tgggcatcac cccgggcgac cggttcaaga tccagctcgg ctactggccg + 10081 ggccaggtcc cggacaccgt cgccgagtcg ctgctggagt ggttcgtcgg cgccatcggc + 10141 gcgctggtcg ccgatcccgc cggcctggtg gggcggatcg ggatgggcgc ggccgacgtg + 10201 cgccgctggg acccgccgct gcaggcgggg gagccgctgc cggccctggt ggggcggatg + 10261 gcggcgcggc cgccggacaa cgtggcggtc gtggacggcg acggtgcgct gtcctatgcg + 10321 gacttgtggg agcggtcgct gaagttcgcg gccgtcctgc gggcccacgg agtccggtcc + 10381 gaggaccggg tcggcctggt ggtggggcgc tcggcctggt ggacggtcgg catgctgggc + 10441 gtcctgctgg cgggcggcac gttcgtgccg gtggacccgg cctatccggc cgagcgcaag + 10501 gaatggatct tccggagcgc gaacccgatg ctggtggtgt gcgcgggcgc gacacggggg + 10561 gcggtgcccg cggagttcgc ggaccggctg gtggtgatcg acgaggtcga tccggccgcg + 10621 ggctcggcgg gggacctgcc gcgggtggat ccgcgcagtg ccgcgtacgt gatctacacg + 10681 tcggggtcga cgggaacccc gaagggggtc gtcgtcaccc atgccgggct gggaaacctg + 10741 gcgctggcgc acatcgaccg gttcggggtg tccccgtcgt cacgggtgct gcagttcgcg + 10801 gcgctcgggt tcgacaccat cgtctccgag gtgatgatgg cgttgctctc gggagcgacg + 10861 ctggtggtgc cgccggagcg ggacctgccg ccgcgggcgt cgttcaccga cgccctggaa + 10921 cggtgggaca tcacgcacgt gaaggcgccg ccgtcggtgc tgggcacggc cgacgtgttg + 10981 ccgtcgacgg tggagacggt ggtggcggcg ggcgagctct gcccgccggg cctggtggac + 11041 cggctgtccg cggaccggcg gatgatcaac gcctacgggc cgaccgaaac cacgatctgc + 11101 gcgacgatga gcatgccgtt gtcgcccggc cagcacccga tcccgttcgg caagccggtg + 11161 ccgggggtgc gcggatatct gctggactcg ttcctgcgcc cgttgccgcc cggggtcacc + 11221 ggtgagctct acctggccgg gatcggcgtg gcccgcggct acctcggccg ttcggcgctg + 11281 acggccgagc ggttcgtcgc cgatccgttc gtgcccggtg agcggatgta ccggaccggg + 11341 gacctggcgt actggaccga acagggcgag ctggtgtccg ccgggcgcgc cgacgaccag + 11401 gtcaagatcc gcggcttccg tgtcgaaccc cgcgagatcg agttcgcctt gtccggctac + 11461 ccccgggtca cccaggccgc ggtcgccgtc cgcgacgacc gcctggtcgc ctacgtgaca + 11521 ccaggcgaca tcgacacgca ggcggtgcgg gcgcacctcg cgtcccggat gccccagtac + 11581 atggtccccg cggcggtggt ggcgctggac gccctgccgc tgacggcgca cgggaagatc + 11641 gatcggcgcg cactgcccga ccccgacttc accgccggga agcaggccag ggagccggcc + 11701 accgagaccg agcgggtgct gtgcgagttg ttcgccggcg tgctcggcct ggcgcgggtc + 11761 ggggtggacg acagcttctt cgagctcggc ggggactcca tcctctcgat gcagctggcg + 11821 gcgcgggcgc ggcggtcggg gctgacgttc accgcggcgg acgtcttcga cgggaagacg + 11881 cccgagcgga tcgcgcagct ggcggcggag tcgtcggtgc cggagcccgg tcgttccccg + 11941 aaacccgatg gcgtcggtga cgtcgcgtgg acgccggtga tgtggatgct gggagacggc + 12001 gtcgcgggac cggcgttcgc gcagtggatg gtggtcggga cgccttcgga cctgacggag + 12061 aaggcgctgg cggcgggctt tgcggccgtg gtggatacgc acgacatgct gcgggcgcgg + 12121 gtcgtcgccg acgagggcgg ccggcgcctg gtggtgggtg agcgtgggtc ggtggatgtc + 12181 gccggggcgg tcacccgcat ccgcgccgat ggccgctcgc tggacgaagc cgtggcggac + 12241 gcggcgcgcg cggccgtgac ccggttggac ccgtcggcgg gcgtgatggc ccaggcggtg + 12301 tgggtcgacg ccggaccgga ccaggtgggg cggctggtgg tggtggcgca ccacctgtcg + 12361 gtcgatggcg tgtcgtggcg gattctgctg tcggatctgc aggcggcctg cgaagccgcg + 12421 gtcgcggggc gggagccggt gctggagccg gtcggtgcgt cgttcaagcg gtgggcgggc + 12481 ttgctggccg agtgggcggt ttccgcggag cgggccggtg agctggccgc gtggaaggcg + 12541 attctcggac cgggggaccg gccggccggt gcgcaggcca cgagccgggc cgcggaaggt + 12601 gccgtgcgct cgcggtcgtg ggtcgtgccg aaggtggaga cggcggcgtt ggcaggccgg + 12661 gctccggtgg cgttccactg cggggtgaac gaggtcctgc tcgccgggct ggcgggcgcg + 12721 gtcgcgcggt ggcgcggcgg ggacgccgtg ctggtggacg tggaaagcca tggccgccac + 12781 ccggtggacg ggacggacct gtcccggacg gtgggctggt tcaccagcgc acatccggta + 12841 cggctggacg tggccggcac cgatctggcg gacgtgctcg ccggcggtcc ggcggccggg + 12901 cgtttgctga aggccgtcaa ggagcagtca cgggccgtgc ccggcgacgg gctcggatac + 12961 ggcttgctgc ggtacctcaa cggcacgacg gggccggtgc tggcggacct gccgtcgccg + 13021 cagatcgggt tcaactacat gggccggttc gccgccggcg agaagagcgg ggtgcgggcg + 13081 tggcagccgg tcggtgacat cggcagttcg ctggaacccg gtatgggcct gccgcacgcg + 13141 ctcgaggtca acgcgatcgt ccaggacctg ccggacggtc ccgagctgac gctcatgctg + 13201 gaatggcagg acggcctgct cggcgaggac gagatcgacc ggctgggccg ggcctggctg + 13261 gacatgctgt ccggggtggc ccgccaggcg gctgatcccg ccgcgggcgg gcacaccgcg + 13321 tccgacttcg acctcgtcac cctggaccag gcggagatcg aggccctcga ggccgaattc + 13381 gcggccgccg gcggactggc cgaggtgctg ccgctgtcgc cgctgcagca cgggctggcc + 13441 ttccacgccg gttacgccgg cgacggcgtc gacgtctaca ccgcgcaggc ggtgctggag + 13501 ctggccggcc cgctggacgt gccgctgctg cggaagtcgg tgcgcgcgct gctggacagg + 13561 cacgcgaatc tgcgtgccgg cttccggcac ggcgccgacg ggaccgccta ccaggtggtc + 13621 cccggcgccg tggcggtgcc ggtgaccctg gtggacgtga cggaatcggc ggatccggcg + 13681 gccgaggcgg cggcggtggc cgcggccgaa cgggcgcggc cgttcgagct ggcccggccc + 13741 ccgctgctgc gggtcatggt ggtggtgctc ggcccggacc ggcaccggct ggtgctgacc + 13801 aaccaccaca tcctgctcga cggctggtcg acgccgctgc tgctggacga actgctcacg + 13861 ctttaccgca acggagccgc tccggccgcg ctggcgccgg tcaccccgta tcgggactac + 13921 ctggcctggg tgcgcgaaac cgaccgggag gcggctaccg aagcctggcg cgacgccctg + 13981 gccggcttgc ccgagccgac cctggtggcg gcggaccggc cggtcccggt cgaggtgccc + 14041 gagcagatct ggaccaccct ggacgagacg ttcgcccagg cgctgggggc gcgggcacgc + 14101 gagtgcggtg tcacggtcag caccgtgctg caggcggtgt ggggcatggt gctggcggcg + 14161 ctcaccggac gcgacgacgt ggtgttcggg tcggtggtgt ccgggcgccc ggccgagctg + 14221 ccggggatcg agaccatggt cgggttgttc atcaacaccg tcccggtccg ggtccggatg + 14281 cggccgcagg acaccttcgc cgaactggtg cggggactgc agaacgagca ggtggcgctg + 14341 ctggcccacc accacgtggg tctcaccgac atccagcagg ccgcggggct ggggcggctg + 14401 ttcgacacca tcatcgtcta cgagaactac ccgagaccgg ccgagatcgg cgacgaatcc + 14461 gccgatgccg atcgggtccg ggtgcaggga ctgaccgccg ccgatgccac ccactacccg + 14521 ctggcgctgg cggtcgtgcc gggcaccgac ctgcggctgc ggctggagca ccagcccgcg + 14581 ctgttcaccg ccgagcaggc cggcgccgtg ctcgagcggt tcacgctggt gctcgaagcc + 14641 gtcgtcgccg atccgcggct gccgctcgcg gtggtgccga tcctgtccga tgccgaacgg + 14701 cgacagctgc aggcgggcaa cgacaccgcg ctgccggtgc cggaccggac gttgccggag + 14761 ctgttcgccg cgcaggccgc cgccaccccg gaggcgaccg cggtggtctt cgaggaccgg + 14821 tcgctgacct acgccgagct cgacgcgcgc gccaaccagc tggcgcgctg gctcatcgac + 14881 cagggtgccg ggccggaagg cctggtcgcg gtgctgctgc cccggtcgct ggaactggtc + 14941 gtcgcgttgc tggcggtcac caagaccggc ggcgcgtggc tgccgatcga tccgggctat + 15001 ccggccgacc gcatcgcctt catgctcgac gacgccggac cggcgctggt gatcaccacc + 15061 gcggtgctgt cggcatcgcc gatcggtgac gtgctggccg cccgctcgag gacggtggtg + 15121 ctcgacgagc ccgcggccgc gggccagctg gcggggcggg accgcgcgcc ggtcaccgac + 15181 accgaccgcg ctcgagcgct ggatccgcgc cacccggcgt acctcatcta cacctcgggc + 15241 tccaccggtc gccccaaggc cgtggtcgtc acccatcgga acctgacgaa ctacctgctc + 15301 cactgtggac ggatgtaccc gggtctgcgg gggcggtcgg tgctgcattc gtcgatcgcc + 15361 ttcgatctga cggtcaccgc gacgttcacc ccgctcatcg tggggggaga gattcacgtc + 15421 ggtgccctgg aagacctgat cggggtggtg gaggccgcac cgtcgatctt cctcaaggcc + 15481 acgccgagcc atctgctgac cttggacacc gcttcccggg gcagtgccgg ttcgggtgac + 15541 ctcctgctcg gcggcgaaca attgccggcc gacacggtcg tccaatggcg ccggaagtat + 15601 ccgaacatcg tggtggtcaa tgaatacggg ccgaccgagg cgaccgtcgg gtgcgtcgaa + 15661 taccggctcg aaccggggca ggaatgcccg ccgggcggtg tggtgccgat cggcaccccg + 15721 ctggcgaaca tgcgggcgtt cgtgctggat tcgtggctgc ggctggtgcc gccgggtgcg + 15781 gtgggcgagt tgtacgtggc cggtgcgggc ctggcgcggg gatacctggg ccgggcaggg + 15841 ctgacggcga cgcggttcgt ggccgatccg ttcggctccg gcgagcggat gtaccggacc + 15901 ggggacctgg tgcagtggaa cccggacgga cagctggtgt tcgccggccg ggtcgacgac + 15961 caggtgaagg tgcggggctt ccggatcgag cccggtgaga tcgaggccgc cctggtggcg + 16021 caggagtcag tgggccaggc ggtggtggtg gcccgtgaca gcgagatcgg cacccggctg + 16081 atcgggtacg tgaccgccgc gggggagtcc ggtgtggacg aagccgcggt gcgcgaggga + 16141 gtggcggccc ggttgccgca gtacatggtg ccggcggcgc tggtggtact cggcgcgctg + 16201 ccgttgacgg cgaacggaaa ggtggaccgg gcggcgctgc cggatcccga cttcggcgcc + 16261 cgtgccgggg gccgggagcc ggtcacggag gccgagcggc tgctgtgtgc gctcttcgcc + 16321 gaggtgctcg gcctggagcg cgccggtgcg gacgacagtt tcttcgagct gggtggggat + 16381 tccatccttt cgatgcggct ggcggcccgg gcccaccgcg agggaatgtc cttcggtgcg + 16441 cgcgaggtgt tcgagcagcg cacgcccgcg gggatcgcgg cgatcgtgga acgggttgcg + 16501 ggcgatcgtc ctgtcgcggc ggtacacgcc gtgtccgatg tcgcccttct cgacctggac + 16561 caaggcgagc tcgacgaatt caaggctgag ttcgacgacg attcccagcc ctttgctgat + 16621 ccagggagat attgatgagc cagtcgcgga tcgaggaaat ctggccgctg tcgccactgc + 16681 aggccggttt gctcttccac gcggtttacg acggcgaagg gcccgacgtc tacatcggtc + 16741 actggattct cgacctggcc ggaccggtgg acgcggccgg gctgcgtgcg gcgtgggaga + 16801 cgctgctggc ccggcacgcc ccgctccggg cgtgtttccg gcagcgcaag tcgggcgaga + 16861 cggtgcagat catcgccagg caggtggaac tgccgtggcg ggaggtcgac ctttcccacc + 16921 tcgacgaccc cgaggaggcc gttcgcgagc tggccgagca ggaccggacg acgaggttcg + 16981 acctcgcgca ggcgccgttg ctgcggctga ccctgatccg gctcggcgcc gacgcgcacc + 17041 gcctggtggt gacctgccac cacacgatca tggacggctg gtcgctgccc atcgtgatcg + 17101 acgagctgtc ggtgctgtac ccggcgggcg gtgacgcgtc ggcgctgccg gacgtgccgt + 17161 cctaccggga atacctcgcg tggctgagcc ggcaggacaa ggaacgcgcg ctgtcggcgt + 17221 ggaccgcgga gctcagcggc gccgaggaac cgacgctggt ggtgcccgcc gatccggggc + 17281 gggcacccgc cgagccggag agcgtcgagg cccacctgcc ggagcacctc acgcgctcgc + 17341 tggccgagct ggcccgtcgc cacgggttga cgttgaacac cgtggtgcag ggcgcctggg + 17401 cgctggtgct ggcgcagctg gccggccggc cggacgtggt gttcggggcg gcggtgtcgg + 17461 cgcgcccgcc ggacctgccc ggtgtggagg ggatggtggg gctgttcctc aacaccgttc + 17521 ccgtgcgcgt gcggttgcgc ggctcgacgc cggtcgtcga gctgctggcg gagttgcaga + 17581 aacggcagtc ggcgctcatt cccgaccagt tcgtcgggct ggcggacatc cagcaggcgg + 17641 cgggtcccgc cgcggttttc gacacgctgc tcgtcttcga gaagttccac cacgggcccg + 17701 ccggatcgga ctccgcggga accttccgca ttcacgtgaa ccagggccgg gtggcggccc + 17761 actacccgct gacgctggtc gccgtccccg gcgagtcgat gtacctcaag ctcgactacc + 17821 tgacggagct cttcgaccgg gaaaccgcgt tcgccatcct cgagcggttc accggggtgc + 17881 tgcggcagct gaccggcgcg ggcgagctca cggtggccgg cgtcgaggtg acgaccgcgg + 17941 ccgagcgggc cctggtggcc ggggaatggg gtgcctcgac ctcggcgccg ccgagcctgc + 18001 cggcgctgga tctgttcggg caccaggtgg cgcaccgccg cgacgagccg gcggtcgtcg + 18061 acggcgatcg gacggtgtcg tacggagaac tcgccgagcg cgctgagcgg ctcgccggct + 18121 acctgaacgg ccggggagtc cggcgcggag accgggtggc cgtggtgctc gaccggtcac + 18181 ccgacctgat cgcgacgctg ctcgcggtgt ggaaggcggg cgcggcgtac gttccggtgg + 18241 accccgccta cccggtggaa cgcaggaagt tcatgctggc ggactccggg cccgcggcgg + 18301 tggtgtgcgc ggaagcgtac cgggccgccg tgccggacac ctgccccgag ccgatcgtcc + 18361 tggacgatcc ccggacgcgg caggcggtgg cggagagccc tcgcctgtcg gcaggcacga + 18421 gcgccgacga cctcgcctac gtgatgtaca cgtccggatc gaccgggacg ccgaagggcg + 18481 tcgcggtgtc gcacggcaac gtcgcggcgc tggccgggga gccgggctgg cgggtgggcc + 18541 ccggtgacgc cgtgctgctg cacgcctcgc acgccttcga catctcgttg ttcgaaatgt + 18601 gggtgccgct gctgtcgggt gcccgggtgg tgctggccgg accgggcgcg gtggacggcg + 18661 cggcgctggc ggcctacgtg gccggtggcg tcacggccgc ccacctgacc gcgggggcct + 18721 tccgggtgct ggccgacgag tcgccggagg cggtcgccgg gctgcgcgag gtgctgaccg + 18781 gtggggacgc ggtgccgctg gcggcggtcg agcgggtgcg cggacgtgtc cggaacgtgc + 18841 gggtgcggca cctctacggc ccgaccgagg ccacgctgtg cgcgacgtgg tggctgctcg + 18901 aacccggcga cgagacggga tcggtgctgc cgatcggacg tccgctcgcc gggcggcgcg + 18961 tccacgtcct cgacgcgttc ctgcggcccg tgccgccggg cgtggcgggc gagctgtatg + 19021 tcgccggagc cggtgtggcg cagggctatt cgagccgccc ggcgctgacg gccgagcggt + 19081 tcgtcgccga tccctccggt tccggtgcgc ggatgtaccg caccggggac ctggcgtact + 19141 ggacggagca gggtgcgctg gcgttcgccg ggcgggccga cgaccaggtg aagatccgcg + 19201 ggtaccgcgt ggagcccggc gagatcgagg tggtcctcgc cggcctgccc ggcgtcggcc + 19261 aggccgtggt gaccccgcgg ggtgagcacc tgatcggcta tgtggtcgcc gaagcgggcc + 19321 acgacgccga cccggtgcgg ctgcgcgagc agctcgccgg gacgctgccc gagttcatgg + 19381 tgccggccgc ggtgctggtg ctggacgagt tgccgttgac ggtcaacggg aaggtggacc + 19441 ggcgggcact gcccgaaccg gacttcgcgg cgaagtcggc gggccgggag ccggtcaccg + 19501 aggccgaacg agtcctttgt ggagtgtttg ccgacgtcct cggcctcgac cacgtcggcg + 19561 tcgacgacag cttcttcgag ctgggcggcg actcgatctc gtcgatgcag gtcgccgcgc + 19621 gtgcgcgtcg cgaagggatc tcgctgaccc cgcggctggt gttcgagcac cggacgccgg + 19681 aacgcctcgc ggcactggcg caggaggcag gcgcgacgcc acgcgccgag gtcgtcacgg + 19741 gcgtgggcga gatcccgtgg acgccggtga tgcgtgccct cggggacgac gcgatgcgcc + 19801 ccggcttcgc gcaggtgaga gtcgtcgtca ccccggcggg ggtgaacccg gacgcgctcg + 19861 tgagcgccct gcaggcggtg ctggacgcgc acgacctgct gcgggcccgg gtggagccgg + 19921 acggacggct gatcgtgccc gagcgcggcg cggtggccgc ggccggcctg ctcacgcggg + 19981 tggccgccgg gaccggcggc ctcgacgaga tcgccgagcg cgaggtcagg acggcgacgg + 20041 gcacgctgga cccgtcggcg ggaatcatgg cgcgggtcgt gtggatcgac gccggggacg + 20101 ccgagccggg ccggttggcc ttcgtggcgc accacctctc ggtcgacgcg gtctcctggg + 20161 ggatcctgct gccggatctg cgagcggcct acgacgaggt gatctccggc gggaccccgg + 20221 ccctcgaacc cccggtgacg tcgtatcggc agtgggcgcg ccggctgacc gcgcgggcgc + 20281 tcagcgaaag caccgtggcc gaactcgaaa aatgggctgc cgtcgtggaa ggcgcggaac + 20341 cggcactgcc ccaggacacc gggcagcaca ccgggcagtc gcactcgtgg tccacgagcc + 20401 tgtccggcac cgaggtgcga gacctggtca ctgtcttgcc gggcgcgttc cactgcggga + 20461 tccaggacgt tctgctggcg gggctcgcgg gtgcggtggc gcgtgtgcgc ggttccggcg + 20521 ccgcgctgct ggtcgacgtg gaagggcacg gtcgcgaagc cgccgacggc gaggacctgt + 20581 tgcgcaccgt cggctggttc accagcgttc acccggtccg tctcgaactg tccgatgtgg + 20641 acctcgcggg cgcggcggac ggcgagcggc ctgccgggca gttgctgaag gccgtgaagg + 20701 agcagatccg ggccgtgccc ggcgacggat ccggctacgg gctgctgcgc cacctcaacc + 20761 cgggcaccgg ggcgaggctg gccgagttgc cgtccgcgca gatcggcttc aactacctcg + 20821 gccggactgt cctcgctccc gaggacaccg cgtggcagcc caacggcgga gggccgctcg + 20881 gcggcggtcc ggacatggtc ctcgcgcacg ccgtggaggt cagcgcggaa ctccaggaca + 20941 cgccggccgg cccccggctc gggctggcca tcgacacgcg ggatttcgac ctcgccacgg + 21001 tggagcggct cggcgaggcc tggctggaga tgctgaccgg tctcgcggcg gtggcccgcg + 21061 gatccggcgc gggcgggcac acgcccgccg acttcgctct ggtcgacctg acgcagcggg + 21121 acgtggcgga gctggaggcc gcggcgcccg ggctgacgga catctggccg ttgtcgccgc + 21181 tgcaggaagg catgctcttc gaacgggcct tcgacgagga cggcgtcgac gtctaccaga + 21241 cgcagcggat cctggacctc gacgggccgc tcgacgaacc ccggctgcgc gcggcctgga + 21301 accaggtcct cgcccggcac gcctcgctgc ggaccggctt ccaccagctg gggtccggcg + 21361 ccacggtgca ggtcgtcgtg cgcgaggccg acatcccgtg gcgggtggcg gatctgtcgc + 21421 acctcgatgc ggcggaggcg gccgcggagg tcgagcggct gctcgccgag gaccagggcc + 21481 ggcggttcga cgtgacccgg ccgccgctgc tgcggctgct gctgatccgg ctcggtgcgg + 21541 acgagcaccg actcgtcgtg acctcgcacc acgtactcct cgacggctgg tcgaccccgc + 21601 tcgtcgtggg ggagatgtcg gacggctacg cgggcggccg cagctcctcg aagccgccgt + 21661 cctaccagga ctacctggcg tggctgagcc gtcaggacgc ggaggcgacc cgatcggcgt + 21721 ggcgggccga gctcgccggc gcggacgaac cgaccttggt cgacgccgac gcgggcaaga + 21781 cgctcgtgat gccggacgag cacgccgaat ggctgcccga gccggcgacg cgggcactcg + 21841 ccggcttcgc ccgtggccac gggctgacgg tgagcacgat cgtgctgggc gcgtgggcgc + 21901 tggtgctggc gcggctggcc ggccggaccg acgtggtgtt cggctcggtg gtgtcggggc + 21961 gtccggcgga cgtgccggat gtcgagcgca tggtgggcat gttcatcaac accgtcccgg + 22021 cccgggtgcg gctcgacggc cgccggccgt tgctggagat gctcgaagac ctgcaggcgc + 22081 gccaggcggc gttgaccgag caccagtacc tggggctgcc ggagatccag aaggtggcgg + 22141 ggaccggcgc gatcttcgac acgatcgtga tggtcgagaa ctacccgcac gacgccgccg + 22201 gtctcggcgg cgacggcggg gtggcgatca gctcggtcgt cacccggacc ggcaccagct + 22261 atccgctgac catgaacgtc agcctcgggg accgcctgcg catcaccgtg tcctaccggc + 22321 ccgaccggat cgacgacgcg acggccgccg aggtcgccag gcaggtcgtg cgggtcctgg + 22381 aacgggtggt ggccgagcct tcgctgccgg tgggccgcct cggcgtgacg agcgaaccga + 22441 cgcgcgcggc ggtggtggaa cgctggaact cgacgggcga agcggccgcc gagacgtccg + 22501 tgctggagct gttccggcgg caggcaggtg cctcgccgga cgcggtggcg gtcgtggcgg + 22561 gggaacgcac cctgtcctac gccgacctcg accgcgagtc cgaccggctg gccgggcacc + 22621 tggccgggat cggcgtgggg cgtggtgacc gcgtcggcgt ggtgatgaca cgcggcgcgg + 22681 acctgttcgt cgccctgctc ggggtctgga aggcaggcgc cgcgcaggta ccggtgaacg + 22741 tggactaccc cgcggaacgg atcgagcgga tgctggccga cgtcggcgcg tcggtcgcgg + 22801 tctgcgtgga agcgacccgc aaggcggtgc cggacggggt cgagccggtg gtcgtggacc + 22861 tgccggtgat cggcggagta cggcccgagg cgccaccggt cacggtcgga gcgcacgacg + 22921 tggcctacgt gatgtacacg tccggctcga ccggcgtgcc gaaggccgtc gcggtgccgc + 22981 acgggagcgt ggcggcgctg gcgagcgacc cgggctggtc gcagggcccc ggcgattgcg + 23041 tgctgctgca cgcgtcgcac gcgttcgacg cgtcgctggt cgagatctgg gtgccgctgg + 23101 tcagcggagc ccgcgtgctg gttgcggaac cgggcacggt cgacgcggaa cggctgcgcg + 23161 aagcggtctc ccgcggcgtg accaccgtcc acctgacggc cggtgccttc cgtgcggtgg + 23221 ccgaggaatc gccggactcc ttcatcgggc tgcgcgagat cctgaccggt ggggacgcgg + 23281 tgccgctcgc gtccgtcgtg cggatgcgcc aggcctgccc ggacgtccgg gtccggcagc + 23341 tgtacggccc caccgagatc accctctgcg ccacctggct cgtcctcgag ccgggggccg + 23401 cgacgggcga cgtcctgccg atcggcaggc cgctggccgg ccggcaggcc tacgtgctcg + 23461 acgcgttcct gcagcccgtg gcgccgaacg tgaccggcga gctctacctc gccggcgctg + 23521 gcctggcgca cggttacctg ggcaacaccg cggcgacctc ggagcggttc gtcgccaacc + 23581 cgttctccgg cggcggccgg atgtaccgca ccggcgacct ggctcgctgg accgaccagg + 23641 gcgagctggt gttcgccggc cgcgccgact cccaggtgaa gatccgcggc taccgcgtcg + 23701 agccgggtga ggtcgaggtg gcgctgaccg aggtgcccca cgtcgcgcag gcggtcgtgg + 23761 tggcgcggga aggccagccc ggcgagaagc gcctgatcgc gtacgtgacc gcggaagcgg + 23821 gatcggcact ggaatccgcc gcggtccgcg cgcacctcgc gacgcggctg ccggagttca + 23881 tggtgccgtc ggtggtggtg gtgctggaga gcttcccgtt gacgctcaac gggaagatcg + 23941 accgcgcggc cctgcccgcc cccgagttcg ccgggaaggc ggccgggcgc gaaccgcgca + 24001 cggaggccga gcgggtgctg tgcggcctgt tcgccgagat cctcgggctg gagcgggtcg + 24061 gcgccgacga cggcttcttc gagctgggcg gcgactcgat cctctcgatg cggctggccg + 24121 cccgcgcgcg tcgcgagaac ttcgtcttcg gcgcgaagca ggtcttcgag cagaagacgc + 24181 ccgcggggat cgcggcggtc gccgagcgtg gcgggcagag ccgcccggcc ggcgtcgccg + 24241 acggcgtcgg cgaggttccg tggacgccgg tggtgcgggc actgctcgaa cgcgatcccg + 24301 ccgggctgac ccgcggtgcc atggcgcagt gggtcagcgt ggcagcgccc cgcgaccttt + 24361 cggtgaccgc gctggtcgcc gggctgggcg cggtgatcga cacgcacgac atgctgcgga + 24421 gccggatcgt cgagagcgag ggcgtggaac cccggctggt cgtggccggg cggggcacgg + 24481 tggacgcggc ggcgctggtc gaacgggtcg aggccggcga cggtgatctc gccgagatcg + 24541 cggaccggtg cgcccacgac acggccgcac gcctggatcc cgtggccggc gtgctggtcc + 24601 gggccgtctg ggtggacgcc ggaccgggcc gcgccggacg gctcgtggtg gccgcgcacc + 24661 acctcgtggt cgacgtcgtg tcgtggcgaa ccctcctgcc ggacctgcag gcggcctgtg + 24721 aagccgtggt cgcgggcggg cagccggcgc tcgatccgcc ggacgtctcg ttccggcgct + 24781 ggtcgcggac gctggacggc gaggcggcga tccggaccgg cgaactggcg gtgtggacgg + 24841 agatcctcga cggggcgcag tcccggctgg gcgagctcga tccgcggcgc gacaccgtgt + 24901 ccaccgcggg acgccggtcc tggaccgtgc cccgggaaca cgcgggcgtg ctcgtggaac + 24961 aggtcacctc ggccttccac tgtggtgtcc acgaggtgct gctggccacc ctggcgggcg + 25021 ccgtggcggg ctggcgcggc ggcacggccg tcgtggtgga cgtcgaaggc cacggccgtc + 25081 agcccctcgg ggaactggac ctgtcgcgga cactcggctg gttcaccgac gtccacccgc + 25141 tccggctgga cgtcaccggg gtcgacccgg ccgaggcggt cgccggcggc gacgcggcgg + 25201 gccggttgct gaagcaggtc aaggagaacg tgcgagccgt gcccgacggc gggctcggct + 25261 acgggatgct gcggtacctc aacgccgaga cggggccggt cctcgccgcg ctgccgaagg + 25321 cggagatcgg gttcaactac ctcggccgct tctcggcggg gtccggcggc gaggcacaac + 25381 cctggcagat cacgggaatc gtcggcggtg cggcggagca ggacacgccc ttgcggcacg + 25441 tcgtggagat cgacgccgtc gtggtggacg gcccggacgg acccgaattc accctgaccg + 25501 tgacctgggc cgggcggatg ctcggcgacg ccgaggcgga gtcgctcgcg caggcgtggc + 25561 tggacatgct ggccggcctg gccgcccacg tggccgccgg tggccccggg gggcacacgc + 25621 cgtccgactt cccgctcacc gcgctgacgc agcgggaggt ggcggagttc gaggccgccg + 25681 tgccgggcct gctcgacatc tggccgcttt ccccgctgca ggaaggcctg ttgttccacg + 25741 ccgccgacga ccgcggcccg gacgtctacg cgagcatgcg caccctcgcc atcgacggcc + 25801 cgctggacgt cgcccggttc cgggcgtcct ggacggtcct gctcgaccgg catcccgccc + 25861 tgcgggcgag tttccaccag ctggaatccg gcgaggccgt gcaggtgatc gcccgggacg + 25921 tgccgccgga ctggcgggag accgacctgt ccgggctgcc cgagagcgaa gcgctcgcgg + 25981 agttcgaccg cctcgcggcg cggatgcacg ccgagcggtt cgacctgacc aaggctccgc + 26041 agctgcgcct gcacctggtg cgcctcggtg accgcaggta ccggctgatc ttcacgtcgc + 26101 accacatcgt ggccgacggc tggtccctgc cgctcatcct ggtcgacgtg ctgacggcgt + 26161 acgaggcagg cggtgacggc cggacgctgc cggccgcgac gtcgtaccgt gacttcctcg + 26221 cctgggtcga ccgccaggac aagggggcgg ccgggcaggc gtggcggacc gagctcgcgg + 26281 ggctcgacga ggcgacccac gtcgtgccgc cgggctcgat catcacgccc ctggagcccg + 26341 aacgcgtcgc gttcgaactc gacgacgaga cgagcaagcg gctggtcgag ttcacccggc + 26401 ggcacggcgt cacggcgaac acgctcttcc agggggtctg ggcactgcac ctggcccggc + 26461 tggccgggcg gaacgacgtg gtcttcggtg ccgcggtcgc ggggcgcccg ccggagatcc + 26521 ccggcgtcga gtccgcggtc ggcctgttca tgaacatgct gccggtccgg gcgcgcctca + 26581 ccggtgccga gccggtcgtc gacatgctga aggacctgca ggagcggcag gtcgcgatga + 26641 tggcgcacca gcacatcggg ctgcccgaga tcaagcagct caccgggccg ggggcggcgt + 26701 tcgacacgat cgtggtgttc gagaactacc cgcccgcgcc gccgaggtcc gacgaccccg + 26761 acgcgctcgt catccgcccg gtggggatcc cgaacgacac cgggcactac ccgctgtcca + 26821 tgcgcgcgtc cgtggcggcg ggccccgtcc gcggtgagtt catctaccgg ccggacgtgg + 26881 tcgaccggac cgaggccggg gagatggtcg cggcgatcct ccgcgcgctc gagcaggtgg + 26941 tggccgagcc gtggacgccg gtgggccagg tcggcctgat cggcccggag cagcgccgtc + 27001 tggtcgtgga cgagtggaac cggaccgacg tgccgctggc ggcggagacg ctgccggtgt + 27061 tgttccgcag gcaggcggag cggtcaccgg atgcggtggc cgtcgaggac ggggcgcgga + 27121 gcctgacgtt cggtgggctg ctcggcgagg tggaagcgct ggcccggctg ctcgtggggg + 27181 cgggcgtgcg gcgcgagcac cgggtgggcg tcctggtcga gcgctcggcc gagctggcgg + 27241 tgaccatgat ggccgtgtcg ttcgccggcg gggtgttcgt gccggtcgac cccgactatc + 27301 cccgtgagcg cgtcgagttc atgctggcga actcggcacc cggggtcatg gtgtgcacga + 27361 agacgacccg ggcggccgtg cccgcggagt tcgcgggcac cgtgctggtg ctggacgagc + 27421 tgcccgccgc ggacccggac gtcgagctgc cgccggtggc accggaagac gcggcgtacg + 27481 tgatctacac gtccgggtcg acgggggtgc ccaagggcgt cctggtgacc cactccgggc + 27541 tcgccaatct ggggtacgcg cacatcgagc ggatggcggt gacgtcgtcc tcgcgggtcc + 27601 tgcagttgtc cgcgaccggc ttcgacgcca tcgtgtccga gctctacatg gccttgctgg + 27661 ccggcgcgac cctggtgctg ccggacgcgg cgagcatgcc gccccgggtg acgctgggcg + 27721 aggcgatccg gcgggcgggc atcacgcacc tgaccgtgtc gccgagtgtg ctggcgagcg + 27781 aggacgacct gccggacacg ctgcggaccg tgctgacggg cggcgaggca ctgccgcccg + 27841 cgctggtgga ccgctggtca ccgggccgcc gggtgatcca ggcctacggg ccgaccgaga + 27901 cgaccatctg ctcgacgatg agtgccccgc tgtccccggg gcacgaccag gtcccgctcg + 27961 gcggcccgat ccacaacgtg cggcactacg tgctcgacgc gttcctgcag ccggtgccgc + 28021 ccggcgtggt cggcgagctc tacatcacgg gtgtcgggct ggcgcgcggc tacctcgggc + 28081 gtcccggcct gaccgcggaa cggttcgtgg ccagcccgtt cgcccccggc gagcggatgt + 28141 accgctcggg cgacctgttc cgctggaccc gggaaggcca gctgctcttc gcgggccgtg + 28201 tcgacgcgca ggtcaaggtg cgggggtacc gggtcgagcc cgccgagatc gaggccgtgc + 28261 tcgcggagca cccgtgggtc ggccaggtgg cggtgtccgt ccgccgggac ggcccgggcg + 28321 acaagcagct ggtggcttac gtcgtgccgt cggccgacgc ggccgccgag aacggcacgc + 28381 tggcctcggc actgcgcgag ctggcggccg aacgcctgcc ggagtacatg atgcccgcgg + 28441 cgttcgtgtc gctggagcag atgccgctca ccccgaacgg caagctcgac caccgggcgc + 28501 tgcaagcccc cgacttcgcc gggatgtcct cgaagcgggc cccccgcacg cccatggagg + 28561 cgaggctgtg cgcgctcttc gcggacgtgc tcggccttga ccaggtgggg cccgacgaca + 28621 gcttcttcga actcggcggc gactcgatca cctcgatgca gctgtcggcc cgggctcggc + 28681 cgacggggct ggaactgacc ccgtggcagg tgttcgacga gaagacgccg gaacggctgg + 28741 cggtgatcgt ccaggaactc gcggccgagg gcgggaccac cccggcgccg gagcccggcg + 28801 agggcacgct cgtcgctctc tcacctgacc agatggacct actcgaggcc gggctggccg + 28861 gcgaatgacc gccataagga gcagattgtg accgttgacg acactcgcgc gaagcgccgc + 28921 tccagcgtcg aggacgtctg gcctctttcg ccgctgcagg agggaatgct ctatcacacc + 28981 gccctcgacg acgacgggcc ggacacctac acggtgcaga ccgtctacgg catcgacggc + 29041 ccgctggacc cggggctctt gcgggcgtcg tggcaggcgc tcgtggaccg gcacgccgcg + 29101 ctgcgggcct gtttccggta cgtctccggg gcgcagatgg tgcaggtcat cgcgcgggag + 29161 gccgaggttc cctggcgcga gacggacctt tccgggctgc cggacgacat cgccgagggc + 29221 gaggttgacc ggctggcggc ggacgaggtg gccgagcggc tgcgcatcga ggccgcgccg + 29281 ctgatgaagc tgcacctgat ccggctcggc ccggaccgcc accggctcgt gcacacgctg + 29341 caccacgtgc tggtggacgg ctggtcgatg ccgatcctgc accgggagct cgccgcgatc + 29401 tacgcggcgg gcggggacgc gtccggcctc ccgcccaccg tctcctaccg ggactacctc + 29461 gcctggctgg gccggcagga caaggaggtg gcgcgggcgg cctggcgggc cgagctcgcc + 29521 gggctggaca cgccgaccac ggtcgccgcg cccgatccgg cccgcgtccc ggacatccac + 29581 acggcggtgg tcgagctgcc ggcggagctg acggacggct tggcgcagtt cgcgcgtggc + 29641 cacgacctca cgctgaacac cgtcgtgcag ggcgcgtggg ccgtcgtgct ggcccagctc + 29701 gcgggccgcg acgacgtcgt gttcggcgcg accgcctccg ggcggcccgc ggacctgccc + 29761 ggggtggagg cgatggtcgg ccagctgctc aacaccctgc cggtgcgggt ccggctcgac + 29821 ggcgggcgcc gcgcggccga gctgttcgcc cggctgcagc gcgaccagtc ggcactcatg + 29881 gcccaccagc acctcggcct gcaggacgtg caggccgtcg tcggacccgg agcggtcttc + 29941 gacacgctcg tcatctacga gaacttcccc cgcaagggac tcggccgggc accgggcggt + 30001 ggcctgagcc tggtcccggt gaagcgcggg cggaactcct cgcactaccc gttcacgctg + 30061 atcaccggac ccggcgagcg gatgccgctg atcctcgact acgaccgggg cctgttcgac + 30121 cccgcggccg ccgaatcggt cgtcggcgcg ctggccaggg tgctggagcg gctggtcgcc + 30181 gagcccgacg tcctcgtcgg caggctgacg ctcgcgagcg aggccgaacg cgcgctggtg + 30241 gtggagggct tcaacgccac cgcgggcccg gtgccggggg agtccgtcct cgagctgttc + 30301 gcccggcggg tggccgccgc gccggacgcg gtggcgatca ccggcgccgc cggcgcgaac + 30361 ctgacctacg ccgaggtcga ccaggcgtcg aaccggctgg cgggctacct cgccgtccgg + 30421 ggcgtgggcc gtggcgaccg cgtcggggtg gccatggaac ggtcgccgga tctgctgatc + 30481 gcgttcctgg cgatctggaa ggcgggtgcc gcctacgttc cggtggacgt cgagtacccg + 30541 gccgagcgga tctcgttcgt cttcgacgac tccggcgtct cgaccgtcct gtgcaccctg + 30601 gccaccagcg cggtcgcgcc gggcaacgcg atcgtgctcg acgcgcccga aacacgcgtg + 30661 gccgtgcggg actgcgccgc gccggaaatc cggccgcacg cggacgacct ggcgtacgtc + 30721 atgtacacct ccggctccac cggcctgccg aagggcgtgg ccatcccgca cggggccgtg + 30781 gccggcctgg cgggcgacgc gggctggcag atcggtcccg gcgacggcgt gctgatgcac + 30841 gcgacgcacg tcttcgaccc ttcgctctac gcgatgtggg tgccgctcgt ctcgggcgcc + 30901 cgggtcctgc tcaccgagcc gggggtgctg gacgcggccg gggtacggca ggccgtgcac + 30961 cggggcgcga ccttcgtcca cctcaccgcc ggcaccttcc gcgcgctggc ggagacggca + 31021 ccggagtgct tcgaaggcct ggtcgagatc gggaccggcg gcgacgtggt tccgctgcag + 31081 tcggtggaga acctgcggcg ggcccagccc ggcctgcggg tgcgcaacac ctacgggccg + 31141 accgagacca ccctgtgcgc gacgtggctg ccgatcgagc ccggtgaggt gctcggccgg + 31201 gagctgccga tcggccatcc gatgaccaac cgccggatct acctcctcga cgccttcctg + 31261 cgcccggttc cgccgggcgt ggccggcgag ctgtacatcg cgggcacggg cctggcccac + 31321 gggtacctga agagccccgg cctgacggcc ggccggttcg tggcctgccc gttcgccgcc + 31381 ggtgaacgca tgtaccgcac cggcgaccgg gcgcgctgga cccgcgacgg cgaggtggtg + 31441 ttcctcggcc gcgccgacga ccaggtgaag atccgcggct accgggtcga gctcggcgaa + 31501 gtggaggctg cgctggcggc ccagccgggc gtggtcgagg ccgtcgtcac ggcgcgggag + 31561 gaccagcccg gcgagaagcg cctggtcggc tacttcgtct ccgacggcgg cgacgcgggg + 31621 ccggtggaga tccggcggca gctggccctg gtgctgcccg actacctggt ccccatcgcc + 31681 gtggtcgccc tgcccggcct gcccgtcacc cccaacggca aggtcgatcg ccgggccctg + 31741 cccgccccgg atctcgcggg acactcgccg gagaaggcac ccgagaacga gaccgagaag + 31801 gtgctgtgcg cgctgttcgc cgagatcctc agcatcgacc aggtgggggt cgacgacacc + 31861 ttccacgacc tcggcggcag ttcggcgctg gccatgcggc tcgtcgcgcg gatccgtgag + 31921 gagctcggcg cggacctgcc catccggcag ctgttctcct cgccgacccc cgcgggcctg + 31981 gccagggcgc tggccgcgaa gtcacgcccc gcgctggaag ccgcccagcg gccggaccgg + 32041 gtgcccgtca ccgcccggca gctgcgtgcc tggctgctgg ccgatcccgg cggggagacg + 32101 gccggcctgc acacctccgt cgccctgcgc ctgcacggcc gggtggacgt gcccgcgctg + 32161 gcggcggcgc tcggcgacgt cgcggcccgg cacgagatcc tccgcacgac cttcccgggt + 32221 gacgcgcaga gcgttcacca gcacgtccac gacgccttgg cggtcgagct gactccggtc + 32281 ggagtcaccg aggaagacct cccggggctg ctcgccgagc ggcgtgacct gctcttcgac + 32341 ctcaccaggg acgtgccgtg gcggtgtgac ctcttcgcgc tctcggacaa cgagcacgtg + 32401 ctgcacctgc aggtccaccg gatcctcgcc gacgacgact cgctcgacgt gttcttccgc + 32461 gacctggcgg ccgcctatgg tgcgcgccgc gaaggccggg tcccggagcg cgcgcccctg + 32521 gcgttgcagt tcgccgacta cgcgctctgg gagcagcgcc tgctcacgga cgagaacgag + 32581 ccgggcagcc tgatcaacga gcaggtggcc ttctggcggg acaacctggc cggcctcgac + 32641 ggggagacgg tgctgccgtt cgaccgcccg cgcccggccg tcccgtcgcg gcgcgccgga + 32701 acggtcgcgc tgcggctgga ggccggcccg cacgcccggt tgacggaggc ggcggagccg + 32761 ccgggcgcgg acacgctcga gatggtgcac gccgcgctcg cgatgctgct ggccaagctc + 32821 ggagcgggcc acgacgtggt gatcggcacg gcgctgccgc gggacgagga gctcttcgac + 32881 ctcgagccga tgatcgggcc gttcacccgg gcgctcgccc tgcgcaccga cgtctcgggc + 32941 gatccgacct tcctcgaggt cgtcgccagg gtgcaggagg cgggccaagc cacgggcgag + 33001 cacctggacc tgcccttcga acggatcgtc gagctgctcg atctgccggc ctcgctcgcc + 33061 cgccaccccg tgttccaggt gggacttcag gtggacgagg aggacatcga cggatgggcc + 33121 gcggcggaac tgcccgccct gcgcaccgcc gtcgaacccg gcgggaccgc ggccatggag + 33181 ctggacctcg cggtcaagct caccgagcgc ttcgacgacg acgacaacgc cggcggcctc + 33241 gagggcgcgc tgcactacgc caccgacctg ttcgacgagg ccacggcgga gtcggtggcc + 33301 cggcggctgg tccgcgtcct cgagcaggtg gcggaggatc ccgggcggcg gatcagcgac + 33361 ctggatgtct tcctggacga cttcgaacgc ggccgtccgc ccatcgctcc ggcgcggtgg + 33421 gccggggccg tgcccccggt ggtcgccgaa ctggccgggg acggcccgct cggcgcgctc + 33481 ctgctcgacg agcagctgcg cccggtcgct cccggagccg tcggcgatct gtacgtcacc + 33541 ggcccggccg tggacgcggg aacggccacc ctggcgaccg tgccctgccc gttcggggac + 33601 gaggggcacc ggatgctgca cacgggcctg ctcgcccgca aaacgcccgc caagaccctg + 33661 gtcgtcgtgg gcgagcggag gcggtcgagc gcttcggtga agacgggtga cttcgagatc + 33721 ctgctgccgc tgcgcgccgg cggtgaccgc ccgcccctgt tctgcgtcca cgcgagcggt + 33781 ggcctgagct ggaactacga gccgttgctg cggtacctcc cgccgaacca gccggtctac + 33841 ggcgtgcagg ctcgcggcct ggcccggacc gaaccgctgc cgggcagcgt cgaggagatg + 33901 gcggccgact acctcgagca gatccgtgcc gtgcagccgg ccgggccgta ccacctcctc + 33961 ggctggtccc tcggcggccg gatcgcgcag gcgatggcca ggttgctcga ggcggacggg + 34021 gagcggctcg gcctgctcgc cctgctcgac gcctatcccg tctacatggg acgcaagacg + 34081 accggcgccg cgagcgaaga agcggctctc gaacagcgga accagcagga tctggacctc + 34141 gcggggcaac tggtcaaggg tgtggccgcc cggtcgcgcc tcgaggcggt catgcgcaac + 34201 ctctggaagg tcgggccacg gcacacacgt tcgcccttcg ccggcgacgt cctgcttttc + 34261 gtggccactg tggaccgtcc cgcgcatttg cccgtcccag tggcgaaggc cagctggaag + 34321 gaattcacca gtggggcggt agaggcccac gaaatcccgt ccaaccacta cgacatggtg + 34381 caatccgcgg cgctgggcca gattggtgcc atcgtcgccg agaaactccg gtcccggccg + 34441 gagggtgaaa ggacacaacg atgagcaatc cgttcgacaa cgaagacggc tcctttttcg + 34501 tgctggtcaa cgacgagggc cagcactccc tctggccgac cttcgcggag gtgcccgccg + 34561 gctggacccg cgtgcacggc gaagcgggcc gtcaggagtg cctcgcctac gtcgaggaga + 34621 actggacgga cctccggccg aagagcctca tccgggaagc gagcgcctga gtgtccagcc + 34681 gctcaacgcg gacggttgga cgagcccggc gctcgtccga ccggctccgg gcggcttccg + 34741 cttgagtcca ccggctggat gaggcgaaag gaccttgtgc agtgttcgag gagagcaacg + 34801 ccctccgggg cacggaaata caccggagag accggttcgc tccggggccg gaactgcgct + 34861 ccctgatggg cgagggcacc atgtccatcc tgcagccccc ggattccccc ggcgggcgga + 34921 ccgggtggct ggccaccggg cacgacgagg tccggcaggt cctcggctcg gacaagttca + 34981 gcgccaagct gctctacggc gggaccgtgg ccggccgcat ctggccgggc ttcctcaacc + 35041 agtacgaccc cccggagcac acgcgcctgc gccggatggt gacgtcggcg ttcaccgtcc + 35101 ggcggatgca ggacttccgg ccgcggatcg agcagatcgt ccaggcgagc ctggacgcca + 35161 tcgaggccgc cggtggcccg gtggacttcg tcccccggtt cgcctggtcc gtggcgacga + 35221 cggtgacgtg cgacttcctc ggcatcccgc gtgacgatca ggcggacttg tcgcgcgccc + 35281 tgcacgccag ccggtccgaa cggtcgggca agcggcgggt ggcggcgggg aacaagtact + 35341 ggacgtacat gaccgagatc gcggcccgcg cgcgccgcga tcccggtgac gacatgttcg + 35401 gcgcggtggt gcgcgaccac ggcgacgcga tcaccgacgc ggaactgctg ggcgtggccg + 35461 cgttcgtcat gggcgcgggc ggggaccagg tggcccggtt tctcgcggcg ggcgcgtggc + 35521 tgatggtcga gcaccccgat cagttcgcgc tgctgcggga aaagccggac accgtcccgg + 35581 actggctgaa cgaggtggag cggtacctca ccagcgacga gaagaccact ccgcgcatcg + 35641 cgcaggagga cgtgcgcatc ggtgatcagc tcgtcaaggc cggcgatgcc gtcacctgct + 35701 cgctgctggc ggcgaaccgc aggaagttcc ccgccccgga ggacgagttc gacatcaccc + 35761 gggaacggcc ggtgcacgtc acgttcggcc acggcatcca ccactgcctc ggcaggccac + 35821 tggccgagat ggtgttccgg gcggcgattc cggcgctggc acaacgcttt cccaagctga + 35881 ggctggccga gccggaccgc gagatcaagc tggggccgcc gccgttcgac gtggaagccc + 35941 tgctgctgga gtggtgacgc cgggccggac acgaaatcgt cgggagcaaa agagggggtt + 36001 ttcccgttga atgatgacga cccgcggccg ctgcacattc gccggcaggg cctggacccg + 36061 gcggacgagc tgctcgccgc cggatcgctg acgagggtca ccatcggatc cggagcggat + 36121 gccgagaccc attggatggc caccgcgcac gccctcgtcc ggcaggtgat gggcgaccac + 36181 cagcggttca gcacccggcg ccgctgggac ccgcgggacg agatcggcgg gacgggcacc + 36241 ttccggccgc gtgaactggt cggcaacctg atggactacg acccgcccga gcacacgcgg + 36301 ctgcgccaga agctgacccc cgggttcacg ctgcgcaaga tgcagcggct gcagccgtac + 36361 atcgaacaga tcgtcaacga gcgactcgac gagatggcgc gggcgggatc gcccgcggat + 36421 ctggtcgcgt tcgtcgccga caaggtgccc ggcgccgtgc tgtgcgagct gatcggcgtg + 36481 ccgagggacg accgggccac gttcatgcag ctgtgccacg cgcatctcga cgcctcgcga + 36541 agccagaaac ggcgggcggc ggcgggagag gcgttctccc gctacctgct ggcgatgatc + 36601 gccagggaac gcaaggaccc gggcgagggg ctcatcggag cggtcgtcgc cgaatacggc + 36661 gacgaagcca cggacgagga gctgcgcggc ttctgcgtgc aggtgatgct ggctggcgac + 36721 gacaacatct ccggcatgat cgggctcggc gtgctggcgc tgctgcggca ccccgagcag + 36781 atcgacgcgt tgcgcggcgg cgaacagccg gcgcaacgag ccgtcgacga gctgatccgg + 36841 tacctgaccg tgccctacgg cccgacaccc cgcatcgcga agcaggacgt caccgtcggg + 36901 gaccaggtga tcaaggcggg cgagagcgtc atctgctcgc tcccggcggc caaccgcgac + 36961 cccgccctcg tgccggacgc ggaccggctc gatgtcacgc gcgaccccgt cccgcacgtc + 37021 gcgttcgggc acgggatcca ccactgcctg ggagccgcac tggcccgcct cgaactgcgc + 37081 acggtcttca ccgcgctgtg gcggcggttt cccgacctgc ggctcgcgga tcccgcccag + 37141 gagaccaagt tccgcctcac cacccccgct tacgggctga ccgagctgat ggtcgcctgg + 37201 tgaccggggg cccgcgacgt ccacgcgagg ccgctggacg tcctgcctga ttccgggtgg + 37261 aattgggacc gtcggcgggt tcgggccgaa aaaaccgaac gaccaagaca gagggacatt + 37321 tcttcccggt cgaccaagga gtttccacgc ggatggggca cgatatcggt cagctcgcgc + 37381 cgctcttgcc ggagccggcg aacttccagc tgaggacgaa ctgcgatccg catgcggaca + 37441 acttcgacct gagggcgcac ggcccgctgg tccggatagc cggggactcc tccgctcagc + 37501 tgggcaggga atatgtctgg caggcccacg gctacgacgt cgtgcgccgg atattgggcg + 37561 accacgagaa tttcacgacg cggccgcaat tcacccaagc gaaatccggg gcgcacgtcg + 37621 aggcccagtt cgtcgggcag atatcgacct acgacccacc cgagcacacc cggctgcgga + 37681 agatgctcac gccggagttc acggtccggc ggatccgccg gatggagccc gcgatccaag + 37741 ccctcgtcga cgatcggctc gaccgggtgg cggccgaggg accgcccgcc gacctccagg + 37801 cgctgttcgc cgacccggtc ggcgcgctcg ctctgtgcga actgctcggc atcccccgag + 37861 acgaccagcg cgagttcgtc cggcggatca ggcggaacac cgatctgagc cgcgggctca + 37921 aggcgcgggc ggcggacagc gcggcgttca accggtacct ggacaacctc atcgcccggc + 37981 agcgccggga cgccgacgac gggttcctcg gcatgatcgt gcgagagcac ggggacaccg + 38041 tcacggacga ggagctgaag ggcctgtgca cggcgctgat cctcggcggc gtcgagaccg + 38101 tcgccgggat gatcggcttc ggggtgctcg ccctgctcga gaaccccggc caggtgccgt + 38161 tgctgttcgc gggccccgag caggccgacc gcgtggtcaa cgagctgctg cgttacctgt + 38221 ctccggtgca ggcgccgaat cccagcctcg ccgtcaagga tgtgatcatc gacggacagc + 38281 tgatcaaagc gggagattat gtcctgtgct cggtcctcat ggccaaccgg gacgaagcgc + 38341 tgacgccgaa ccccaacgtc ttcgacgcga atcgcgccgc ggtatcggac gtcggtttcg + 38401 ggcacggcat ccactactgc gtgggcgcgg cgctggccag gtcgatgctg cggatggcgt + 38461 accaggccct gtggcagcga ttccccgggc tccggctggc cgtgcccatc gcggaagtga + 38521 agtaccgaag cgcgttcgtc gactgccctg atcgggttcc ggtcacctgg tagcgcaatc + 38581 cgggttgaaa accagcctcg gcaatttgac actcgacaga ggaatggtgg gagatgtcgg + 38641 tcgaagactt cgacgtggtg gtggcgggcg gcgggccggg tggttcgacg gtggccacgc + 38701 tggtggccat gcagggacac cgggtgctgc tgctggagaa agaggttttc ccgcggtatc + 38761 agatcggtga gtcgctgctg cccgccacgg tgcacggcgt gtgccggatg ctcggcatct + 38821 ccgacgagct ggccaatgcc gggttcccga tcaagcgcgg cggcacgttc cgctggggcg + 38881 cccggccgga gccgtggacg ttccacttcg gcatctcggc caagatggcc ggctcgacgt + 38941 cgcacgccta ccaggtcgag cgggcgcggt tcgacgagat gctgctgaac aacgccaagc + 39001 gcaagggcgt ggtcgtgcgg gaggggtgcg cggtcaccga tgtggtggaa gacggcgagc + 39061 gggtcaccgg tgcgcggtac accgatcccg acggcaccga gcgggaagtg tcggcgcggt + 39121 tcgtgatcga cgcgtcgggc aacaagagcc ggctctacac caaggtcggc ggttcgcgga + 39181 actattcgga gttcttccgc agcctcgcgc tgttcggtta cttcgagggt ggcaagcggc + 39241 tgcccgagcc ggtctccggg aacatcctga gtgtggcctt cgacagcggc tggttctggt + 39301 acatcccgct gagcgacacg ctgaccagcg tcggcgcggt ggtgcgccgg gaggacgccg + 39361 agaagatcca gggtgaccgg gagaaggccc tcaacacgct gatcgccgag tgcccgctga + 39421 tctcggaata cctcgcggac gcgacccggg tgacgaccgg ccggtacggg gaactgcgcg + 39481 tccgcaagga ctactcctac cagcaggaga cctactggcg gccgggcatg atcctggtcg + 39541 gcgacgccgc gtgtttcgtg gacccggtgt tctcctccgg tgtgcacctg gcgacctaca + 39601 gcgcgctgct cgcggcccgg tcgatcaaca gcgtcctcgc cggcgacctg gacgagaaga + 39661 ccgcgctgaa cgagttcgag ctgcggtatc gccgtgagta cggcgtgttc tacgagttcc + 39721 tcgtgtcctt ctaccagatg aacgtgaacg aggagtcgta cttctggcag gccaagaagg + 39781 tcacgcagaa ccagagcacc gacgtcgagt cgttcgtcga gctgatcggc ggagtgtcgt + 39841 ccggggagac cgcgctgacg gccgccgacc gcatcgccgc gcgcagtgcc gagttcgccg + 39901 cggcggtgga cgagatggcg ggcggggacg gcgacaacat ggtgccgatg ttcaagtcga + 39961 cggtggtcca gcaggcgatg caggaagcgg gccaggtgca gatgaaggcg ctgctcggcg + 40021 aggacgccga acccgagctg cccctgttcc ccggtggcct ggtgacctcg cccgaacgga + 40081 tgaagtggct gcctcaccac cctgcgtgaa gcctgtgcgc gccggccgtt cgcgggtggc + 40141 cgggacctgc ggaacaacct atggaaaaac ctacggaaca gagggtgcga aatgcgcgtg + 40201 ttgatctcgg ggtgcggatc gcgcggggac accgaaccgc tgatcgcctt ggcggtccgg + 40261 ttgcgggaac tcggtgtaga cgtccggatg tgcctgccgc cggactacgt ggagcggtgc + 40321 gccgaggtcg gggtgtcgat ggtggcggtc ggcccggcga tgcgcgccgg ggcacgcggg + 40381 ccgggagaac cgccgccggg agcacccgaa atcgtgtccg aggtggtcgc ggactggttc + 40441 gacaaggtgc cggcggccgc cgaagggtgt gacgtggtgg tggcgaccgg cttgctgccc + 40501 gccgcggtcg tcgtgcggtc ggtcgccgag aagctgggca tcccttacct ctacaccgtg + 40561 ctgtcgccgg accacctgcc gtcggtgctc agccaggcgg agcgcgacga atacgaccag + 40621 ggcgccgacc ggctgttcgg tgcggtggtc accagcgggc gggccgcgat cggcctgccg + 40681 ccggtggcga acctcttcac ctacggctac accgaacagc cctggctggg ggcggaccag + 40741 atcctcgccc cgccgccacc gggagacttg gacaccgtgc agaccggtgc gtggatcctg + 40801 cccgacgaac ggccccttcc cgcggagctg gagacgttcc tcgcggccgg gtcgccgccc + 40861 gtgtacgtgg gtttcggcag ctcgtccgga ccccggaccg ccggcgccgc caaggcggcc + 40921 atcgaggcga tccgcgcccg gggccaccgg gtcgtcctct cccgcggctg ggccgacctg + 40981 gccgcgcccg acgactcggc cgactgcttc accgtcggcg aagtgaacct ccaggtgctg + 41041 ttccgccggg tggccgccgc cgtccaccac gacagcgcgg gcacgacact cctggccatc + 41101 cgggcaggca ccccccagat cgtcgtccgc cgcgtgatag acaacgtggt ggagcaggcg + 41161 taccacgccg accgggtggc cgaactgggg gtcggtgtgg cactcgaagg tccgatcccg + 41221 gcctccgagg ccatgtcgga cgcgctcgag acggcgctgg caccggaaac ccgcgcgcga + 41281 gcggcggagg tggcgggcac ggtccgcacc gacgggacga cggtggccgc ggaactgctg + 41341 ttcgccgcgg tcagccggga aaagcccgcc gttcccgcat gacccgcacc gagccggcgt + 41401 gccgggaacc ggccggcggg ccagaaccca ggaaacgggg aaatacgtga agcgtgtgct + 41461 gttgtcgacg ctcggaagcc gcggagacgt cgaaccactg gtggccttgg cggtccggct + 41521 gcgcgacctc ggcgcggagc cgctgatgtg cgcaccgccg gactgcgcgg accggctgga + 41581 agaggtcggc gtgccgcacg tgcccgtcgg cccgtcggcg cgcgcgccga tccatcggga + 41641 gaagccgttg acgcccgagg acatgcgccg gctcatggcc gaagcgatcg ccatgccgtt + 41701 cgaccggata ccggcggccg ccgaggggtg tgccgcagtg gtgacgaccg ggctgctggc + 41761 cgccgcgatc ggcgtgcggt cggtggccga gaagctgggc atcccctact tctacgcctt + 41821 ccactgcccg agctacgtgc cgtcaccgta ctatccgccg ccaccgcccc tcggcgagcc + 41881 gcccgccgag gacgtgaccg acatccgggc gctgtgggag cggaacaacc ggagcgctta + 41941 ccagcggtac gggggtccgc tcaacagcca ccgggccgcg atcggcctgc ctccggtgga + 42001 ggacatcttc accttcggct acaccgatca cccgtgggtg gcggcggatt cggtcctggc + 42061 cccgatgcag ccgaccgacc tcggtgccgt gcagaccggc gcgtggatcc tgcccgacga + 42121 acggccgctt tccccggagc tggaagcttt cctggacacc ggcaccccgc cggtgtacct + 42181 cgggttcggc agcctgcgcg ccccggccga cgccgtccgg gtgtccatcg acgcgatccg + 42241 ggcccaaggc cgccgggtaa tcctttcccg gggctgggcc gacctggtcc tgcccgacga + 42301 ccgggaagac tgcttcgcca ccggcgaggt gaaccagcag gtgctgttcg gccgggtggc + 42361 cgccgtcatc caccacggcg gcgcgggcac gacgcacgtg gccatgcagg ccggggcacc + 42421 ccaggtcctg gtgccccaga tggcggacca gccgtactac gccggccggg tggccgagct + 42481 ggggatcggg gtggcccacg acggtccggt cccgaccttc gactcgctgt cggccgcgct + 42541 cgtcacggcg ctggccccgg aaacccgcgc acgagcggag gccgtggcgc gcacggccgg + 42601 tgccgacggg gcggcggtgg ccgcgaaact gctgctcgac gcggtcagcc gggaaaagcc + 42661 ggctgttccc gcgtaaacca caccgggtcg gcgccgccgg aaagtggcgc atgcggtgac + 42721 ccgggtcctg tccattcttt gaccgttccg gacataaatg cgctcggata gcattccgcc + 42781 tcattatcgc agggggacag aaccgatcaa attggggtgc gggatgcgtg tgttgctgtc + 42841 gacggcggga agccgtggag acgtcgaacc gttgctggcc ttggcggtcc ggttgcaggg + 42901 actcggcgcg gaggtgctga tgtgcgcgtc gcctgcttcc gcggagcggc tggccgaggt + 42961 cggggtgccg cacgtgccgg tcggcctgca gctggacggc atgttgttgc aggaaggaat + 43021 gccgccgccg tcggccgagg acgagcgcag actcgcggcc atggcgatcg acatgcagtt + 43081 cgacgcggtc cccgcggccg ccgaagggtg tgccgcggtc gtggcgaccg gagagctggc + 43141 cgccgcggcc gccgtgcggt cggtggccga gaagctgggc atcccgtact tctacggcgc + 43201 atacagcccg aactacctgg cgtcgccgca ctatccgccg cccgacgacg agcggaccac + 43261 cccgggcgtg accgacaacg gggtgctgtg ggccgagcgt gccgagcgtt tcgccaagcg + 43321 gtacggggaa acgctcaaca gcagacgggc ggcgatcggc ctgcccccgg tggcggacgt + 43381 cttcggctac ggctacaccg agcagccctg gctggcggcg gacccggtcc tggccccgct + 43441 ggatccggat ctcgacgcgg tgcagaccgg cgcgtggatc ctgcgtgacg atcggccgct + 43501 ttcccctgag ctggcggcgt ttctcgctgc cgggtcaccg ccggtgtacg tgggtttcgg + 43561 cagcgcgtcc gggccgggaa tcgaggacgc cgcgaaggtg gccatcgagg cgatccgggc + 43621 cctcggccgc cgggcgatcc tttcccgcgg ctgggccgat ctggtcctgc ccgacgaccg + 43681 ggaggactgc ttcgccgtcg acgaggcgaa tctccaggtg ctgttcgagc agtcggccgc + 43741 cgtcgtccac cacggcagcg cgggcaccga gcacctggcc acgcgggccg gcgtccccca + 43801 gatcgcgata ccccggcaca cggatcaggc gtactacgcc ggccgggtgg ccgagctggg + 43861 ggtcggtgtg gcactcgaag gtccggtccc gtccttcgcg gcgatgtcgg cggagctcgc + 43921 gacggccctg gccccggaaa cccgtgcgcg agcggcggag gtggcgggca cggtccgcac + 43981 cgacgggacg acgatggccg cggagctgct cttccaggcg gccgaacagg gcaaactgac + 44041 cgttcccgcg tgaatttctt cgaagacaaa gcaaagagga gactgcatgt cgaccacgtc + 44101 ccagtgccgt atctgtgacg gcactgtcca cgagttcatc gacttcggac gccagccgct + 44161 ctcggacgcg ttcgtggctc ccggcgcgga aaagggtgag ttcttcttcc gccttgccac + 44221 cggcatctgc gattcctgca cgatggtgca gctgatggag gaagtcccgc gggacctgat + 44281 gttccacgag gcctacccct acctgtcgtc gggttcggcc gtcatgcgca cgcacttcca + 44341 cgagctggcc aagcacctgc tggccacgga gctgaccggc gaggacccgt tcatcgtcga + 44401 gctcggctgc aacgacggca tcatgctcaa ggccgtggcc gacgcggggg tgcgccagct + 44461 cggcgtcgaa ccctccggca gtgtcgcgga tctggcggca gccaagggga tccgcgtccg + 44521 caaggacttc ttcgaagagg cgacggccgc cgacatccgc gagaacgacg gccccgcgga + 44581 cgtgatctac gcggccaaca cactgtgcca catcccttac atggactcga tcctgaaggg + 44641 cgtcaccaag ctgctcggcc cgaacggcgt gttcgtcttc gaggacccgt acctcggcga + 44701 catcgtggag cgcacgtcgt tcgaccagat ctacgacgag catttcttcc tcttcacggc + 44761 gcgctcggtc caggagatgg cccggcgcaa cggcctcgag ctcgtggacg tcgagcgcat + 44821 tccggtgcac ggcggcgagg tccgctacac cctggccctg gccggcgctc gcaagccgtc + 44881 cgaggccgtg gcggagctcc tggcctggga ggcggagcgc aagctggcgg agtacgccac + 44941 gctggaacgt ttcgccaccg acgtgaagaa gatcaaggaa gacctgatcg cgctgctgac + 45001 caagctccgt gccgaaggca agcgcgtcgt cggctacggc gcgacggcca agagtgccac + 45061 ggtgaccaac ttctgcggca tcaccccgga cctggtcgag ttcatctcgg acacgacacc + 45121 ggccaagcag ggcaagctca gcccgggaca gcacatcccg gtccgcgagt acggggaatt + 45181 cgccggcaac cacccggact acgccctgct gttcgcctgg aaccacgccg acgagatcat + 45241 gaacgtggaa caggcctttc gtgacgccgg cggtcagtgg atcctttacg tgccgaacgt + 45301 gcacgtgagc tgaccggcca tgcgaatcct cgcgtcgagc cctttcggcg ggcactgtcc + 45361 agggttcacc agttgtcttg tggcagcgct gtcggcaccg tgattgccat gcctcaagac + 45421 ctcgacgcgg accggattct ggcgatatcc ccgcatttgg acgacgcggt tctttccttc + 45481 ggtgccggcc tcgcccgtgc ggcgcaggcc ggcgcgaagg tgaccgttca cacggtgttc + 45541 gccgggaccg cggcgccccc ttattcgccg gcggcggagc ggctgcacgc gatctgggag + 45601 ctctcaccgg atcaagacgc gtcgctccgc cgccgggacg aagacatcgc cgcgctcgac + 45661 cacctgggcg tcgactaccg gcacggccgg ttcctcgacg ccatctaccg caagctgccg + 45721 gacggccgat ggctggccga caacgtgccg ggccgccaga agctggccat cggacggcaa + 45781 tcgccgcagg gcgatccgga gctgttctcc gcggtccggg cggacatcga gtcgatcgtc + 45841 gaagagtacg ccccggcgct gatcctgacc tgcgcggcag gcaacggtca tgtcgacaac + 45901 gagatcgcgc gggatgccgc gctgttcgtc gcgtacgaga agggcatccg ggttcggctg + 45961 tgggaagacc ttccgcacgc gatgttcgcg gagggcgccg ccgaactgcc ggacggattc + 46021 cggctggggc cgcccgattt cggttccgtc gaaccggagg cacgggcgcg gaaattcgaa + 46081 gcgctgcggc tctactcgtc gcagatgctg atgctgcacg ggccggaaaa ggatttcttc + 46141 gctcagctgg acgggcatgc ccggaagagt gcaccgggtg gtggatacgg cgaaacgacc + 46201 tggccggttg tctctcgcga agacaacggc tgaatccagg gctgaaccca gggaggttgt + 46261 cattgtgagc ggtcaactcg agcgtggtcc ggtgcggacc acgcacgccg acgtcctgct + 46321 ggcctcggtg ggtgagcgag gcgttctgtg cgacttctac gacgaggagg gctcgaacac + 46381 ctatcgggac ctgatccagg acgcggacgg taccccggaa gcgcgggagt tcgccactcg + 46441 cgtcggcccg gtgcccggac ccgtgctgga gctcgcggcc ggcacgggcc ggctgacctt + 46501 cccgttcctg gagctcggct gggaggtgac cgccctggaa ctgtcggccc cggtggtcga + 46561 cggcttccgg atgcggctgg cggaagcacc ggcggacctg cgggaccgct gcacagtggt + 46621 tcaggcggac atgagcgctt tctcggtgga ccggcgcttc ggggcagcgg tcatcagctc + 46681 gggttcggtc aacgaactgg acgaagccgg ccggcagggc ctgtacgcgt cggttcgcga + 46741 gcacctcgag cccggcggga agttcctgct cagcctggcc ttgtcggagg tcgccgagtc + 46801 acagccgccg gagcgccggc aagagttgcc aggccagagc ggccggctgt acgtgttgca + 46861 cgtgagtgtg cagccggcgg aggagaccca ggacatcacg atctaccccg ccgacgaaac + 46921 agcggatccc ttcgtcgtct gcacgcatcg ccgccggctc gtcccggcgg accggatagt + 46981 gcgggaactt cttcgggccg gcttcgacgt gatcgcgcgg acgccgttcg cgtccggtgc + 47041 gtccggccgg gcgggccatg aagacatgtt gctggtggaa gcggtgaagc aggagggcgc + 47101 tatcccagcc gcgcggtgat gagcgcggcg agccgggcga cgccctcttc gatcagttcc + 47161 ggggtgagca ggctgatcga cagccgcagc tggttgaacc cgcccttgcc gccgtagaag + 47221 tggtgcatcg gggtgaacag cacgccgtgg tcgcgggcgg cgagggcgag caggtcgtcg + 47281 tcgacggtga aggggacggt gacggtgacg aagaacccac cggtcggcgt gttccagcgg + 47341 accccggcgc gcccgccgag ccgtcgctcg agctcgccca gcacgagccg caggttgcgc + 47401 tggtagaccg cgatctcgcg cgcgttggcc ttggtcaggc tgaagtcgtt gaggagcagc + 47461 ttcccggcga tcaccgactg ggctatcggg gacgtgttca ccgtgagcat gcccttgagc + 47521 ttggagagct ggtcggcgag caggccgccc ccggccattc gctggtccgc cacggtgaag + 47581 ccgacccggg caccgggcat gccggtcttg gcgaaggagc cgatgtagac cacggtcccc + 47641 gaccggtcga gggctttcag ggtggggagg cgttcggcgc cgaagagccc gtacgcgttg + 47701 tcctccagga gcaggatgcc gttggcctcg gcgacctcga ggagccggtg ccgggcggcc + 47761 aggtccatgc tggtcccggt cgggttggcg aagttcggtg tcacgtaaca ggcccggacc + 47821 cgcttgccct gttcgtcggc ccgcttcagc tgcaggacca ggtcgtccgg gtcgatgccg + 47881 ttctcggtcg actgcaccgg ccagacgggc gtgtcggtga gcagcgccgc ccccgtcagg + 47941 ccgacgtagg tgggggcggg ggcgagcagc acgtcgtgtt cggtcgcccg cagcgtgcga + 48001 agcaccagga acatcgcctc ctgggcgccc acggtgacca ccacggattc cggggcggcg + 48061 tcgatgttct cgtcctcggc gaggttgcgg gcgatgaggt cggcgatgac gcctttcgtg + 48121 gtgccgtact ggaagagcgt gcgggtgacc cccgcttcgt cgagcttccg gtcgcggcgg + 48181 aggtggtcgc agtaggcgtc gatgtactcg tggatgaggc ggatgtcgaa gaattcttcg + 48241 tacgggcggc ctgccgccat ggaaatagcc accgggtatt cgtcgatcag ctcgttgagc + 48301 aagttcatcg acgagatggc cggatcggtg agcgatccgt gcagggtttc cacgctcaat + 48361 ggggtggaca gaccgaagga atccataaat actaggattt ccatacgccg ccgaggtgtc + 48421 aagcggcggc ggtggacgcg atcgcgtggc gattcccgac gatttcccgg ctcggtaccg + 48481 cgcgcggaac aaaagccgtc cgagactgtc gatgtccatt tctcgctttt ccggacactc + 48541 gatcttcgaa ggtacggtca cacgtgtcgc cgcgcgccgc ggatgggcgg cgggcagggg + 48601 aggaccttca tgctgatgac gactgagcac gggatccggc tgtcgtacca cgaccagggc + 48661 cgtggtgcgc cggttctgct gctgaccggc accggggcgc cgagctcggt gtgggacctg + 48721 caccaggtgc ccgcgctccg cgccgccggg ttccgggtga tcaccatgga caaccgcggg + 48781 atcccgccca gcgacgacgg cgcggacggg ttcaccgtcg acgacctcgt cgcggacgtg + 48841 gccgcgctgc tcgaccacct cgacgcgtcg ccgtgccgcg tcgtcggcac gtcgatgggc + 48901 tcgtacatcg cgcaggagct ggcgctggcc cgcccggaac tggtggacgc cgtcgtgctg + 48961 atggcggcct gcggccggag cagtctcgtc cagcgcgtgc tcgcggaggc cgaggcggac + 49021 ctgatcggac gggggaccga gctgccgccg gggtaccgcg ccgccgttcg cgcgatgcac + 49081 aacctggggc ccgcgacgct cgccgacgac gacctcgctg ccgactggct cgacctgttc + 49141 gcggcgtcgg agaactgggg gccgggcgtc cgggcgcagc tgctgctgag cgcgttgccc + 49201 gaccgtcgcg aggcctaccg ggcgatcaag gtgccctgcc acgtcgtttc gttcgagcac + 49261 gacctcgtgg cgccgccgtc cgccgggcag gagctggccg ccgtgatccc cggcgccacg + 49321 caccgcacga tcccggggtg cgggcacttc ggctacctgg agaagccgga agcggtgaac + 49381 cgcgagctgc tccggttcct ccgcacggaa tccggcgtgg ctgtgacatc cggggcttcg + 49441 ccccggaccc ccgaagaact gtgacagccg gggctcgccc cgggccgggg gctccgccac + 49501 ccggaccccc gaaacctgga ggagaccgca tgaccggcgc gatcgtgccc ccgtccacgg + 49561 cacccgccct gttcgaggcg gccgccgccg cggtgccgga ccggccggcc gtggcgatgg + 49621 ggaccaccac gctgacctac gccgagctga atacccaggc caaccggctc gcgcgccggc + 49681 tcgtggcgca cggcgtgggc ccggaacggc tggtcgcact ggcgatgccg cggtcgatcg + 49741 agttcgccgt cgcgatgctg gccgtgcaca aggccggcgg tgcgtacgtg ccgatcgacc + 49801 cggactatcc cgcggaacgc cggcagcaca tgctggccgg tgcggcggcg cagtgcctgc + 49861 tgtgcctgcc cgggcaggac gtcgccggcg ctccggtcgt gctgagcgtg gcgctggcgg + 49921 agccgggccg tcccgagccg gacctggacg actccgaccg gctcgccccg ctgctgccca + 49981 gccaccccgc gtacgtcatc ttcacctcgg gctcgaccgg gcagccgaag ggcgtcgtgg + 50041 tcacgcaccg gggaatcccg aacctggccg ccgactacgt gcaccgccag aacctgctgc + 50101 ccgacagccg gttgctggct ttcgcgtccc ccagcttcga cgccgccgtc gccgagttct + 50161 ggccgatctg gctggccggt gcctgcctgg tgctggcgcc cgcgccggac ctgatccccg + 50221 gggagccgct cgcccggctg gtccgggacc ggcacatcac ccacgtgacg ctgccgccgt + 50281 ccgccctggc cccgctggaa gaagccggcg gcctgccgcc ggggctgacc ctcctggtcg + 50341 ccggcgaggc gggcccggct ccggtcgcga agcgctgggc cgccggccgc gtcatgatca + 50401 acgcgtacgg ccccaccgaa gccacggtcg cggtgaccgc gagcgacccg ctgaccggcg + 50461 aagacacgcc gccgatcggc aggccgatca ccggtgtcca cacctacgtc ctggacgacc + 50521 ggctggtccc cgtcccggac gggaccgtgg gggagctgta catgaccggc ccgggcctcg + 50581 cccgcggtta cctgcaccgg ccggccgcga ccgcggaacg gttcctgccg gacccgttcg + 50641 gcggtccggg gcagcgcatg taccgcacgg gtgaccgggt gcgggcgcgc ccggacggtc + 50701 agctcgtctt cgtcggccgg gccgacgacc agctgaaggt gcgtggtcac cggatcgagc + 50761 cggcggaggt cgaatccgcg ctgctcgcgg tggacggggt ggcccaggcg gtggtgaccg + 50821 aacacgacaa ccggctcgtg gcgtacgtgg tcggcgccgg gggcgcgcgg gtgcccgccg + 50881 aagacctcct gccgccgctg aggaagcagc tgcccgccta cctggtcccc gacgtggtcg + 50941 tcggcctgcc gcacctgccg accaccccga acggcaaggt cgaccgggcc gcgctgcccg + 51001 cgcccgaggc ggaggacacc gggcgcgcga tctccgggcg ggcgccgagc acgcccacgg + 51061 aaatccacct ggccgccttg ttcgcggaag tgctcggtgt cagcagcgtc ggcgtggagg + 51121 acagcttctt cgaggtcggc ggccactcgc tgctcgccac ccggctggtt tcccgcatcc + 51181 gcgaaagcct gcgggtccgg ctgcgggtgc aggccttctt cgacgcgccg accgtggccg + 51241 aactcgccaa ggtgctcgac gccgccctga cgtgacctgg agaccctgat gcagacgacg + 51301 aacgccgtcg acctcggcaa ccccgacctg tacacgaccc tggaacggca cgcccgctgg + 51361 cgcgagctcg cggcggaaga cgcgatggtg tggagtgacc cgggcagttc cccctccggc + 51421 ttctggtcgg tgttctcgca ccgggcgtgc gccgcggtcc tcgcgccgtc ggcgccgctc + 51481 acctccgaat acgggatgat gatcgggttc gaccgcgacc acccggacaa ctccggcggc + 51541 cggatgatgg tggtctccga acacgagcag caccgcaagc tgcgcaagct cgtcgggccg + 51601 ctgctgtccc gggcggccgc gcgcaagctg gccgagcggg tgcggatcga ggtcggcgac + 51661 gtgctcggcc gggtcctcga cggcgaggtc tgcgacgcgg ccacggcgat cggcccccgc + 51721 atccccgccg cggtcgtgtg cgagatcctc ggcgtgcccg ccgaggacga agacatgctc + 51781 atcgacctga ccaaccacgc cttcggcggc gaggacgagc tgttcgacgg gatgaccccg + 51841 cggcaggcgc acaccgagat cctcgtctac ttcgacgaac tgatcaccgc gcgccgcaag + 51901 gaacccggcg acgacctcgt cagcacgctg gtgaccgacg acgacctcac gatcgacgac + 51961 gtgctgctca actgcgacaa cgtgctcatc ggcggcaacg agaccacgcg gcacgcgatc + 52021 accggcgcgg tgcacgcgct ggcgacggtg cccggcctgc tgacggcgct gcgggacggg + 52081 agcgcggacg tcgacaccgt cgtggaagag gtgctgcgct ggacctcgcc cgcgatgcac + 52141 gtgctccggg tgacgaccgc cgacgtcacg atcaacggcc gcgacctgcc gtccggcacc + 52201 ccggtggtgg cgtggctgcc cgcggcgaac cgggaccccg ccgagttcga cgacccggac + 52261 accttcctgc ccgggcggaa acccaaccgg cacatcacct tcggccacgg catgcaccac + 52321 tgcctcgggt ccgcgctcgc gcggatcgag ctgtcggtcg tgctgcgggt gctggccgag + 52381 cgggtgtccc gggtggacct ggaacgggag ccggcctggt tgcgggcgat cgtcgtgcag + 52441 gggtaccggg aactcccggt gcggttcacc gggcgctgac ccgcgcgcgg tgccccggtg + 52501 agggtgcggc tgccccgcgc ccattttgtc cactgtggac tccggcgccc gccgcggcgg + 52561 gtgtcaagct gacaccgttg atgcggaatt ggcttggagc catcctgggg aatgagcgtt + 52621 acacctattt gacggaggaa tgtcttgact tccgattcga ctgtccagaa tttcgagatc + 52681 gactacgtcg aaatgtatgt ggaaaacctc gaggcggcca cgttcacctg ggtcgacaag + 52741 tatgctttcg ccgtcgccgg taccgaccgg tcggcggacc accggagcgt cacgctgcgg + 52801 cagggcccga tcaagctggt cctcaccgaa ccgacgtcgg accggcaccc ggcggccgcc + 52861 tacctccagt cgcacggcga cggcgtggcc gacatcgcgc tgcgcacgcc ggacgtgacc + 52921 gccgctttcg aagccgcggt gcggggcggg gccgccgccg tgcgcgaacc ggtgcggctc + 52981 gccggcgggc cgatcgtcac ggccaccatc ggcgggttcg gcgacgtcgt gcacaccctg + 53041 atccagagcg gcgaagccac cgcggccgcg ccggagacca ccggccaggg cgggggagac + 53101 gtgaacctgc tcgggctcga ccacttcgcg gtctgcctga actcgggtga cctcggtccc + 53161 acggtggcgt tctacgagcg ggccttcggg ttccggcaga tcttcgagga gcacatcgtg + 53221 gtcggcaggc aggcgatgaa ctccaccgtg gtgcagagcg cgtcggggga ggtcaccctc + 53281 accctgatcg agcccgacag caacgccgac cccggccaga tcgacgagtt cctcaaggcc + 53341 caccagggag ccggcgtcca gcacatcgcc ttcaacgccg acgacgcggt ccgcgcggtc + 53401 cgggcgctgt ccggccgcgg ggtggagttc ctgaagactc cggggaccta ttacgacatg + 53461 ctcggcgagc ggatcacgct ggagacgcac acgctggacg acctgcggtc gacgaacgtg + 53521 ctcgccgacg aggaccacgg cggccagctg ttccagatct tcgccgcttc cacccacccg + 53581 cgtcacacca tcttcttcga gatcatcgag cggcagggcg cgggaacctt cggcagctcc + 53641 aacatcaagg ccctgtacga ggccgtggag ctggagcgga ccgggcagag cgagttcggc + 53701 gccgcccggc gatgacgtac gtttccctgg gcgacctcga acgtgccgct cgcgacgtcc + 53761 tccccggcga gatctgggac ttcctcgccg gggggagcgg cgccgaggca tcgctgacgg + 53821 ccaaccgcac cgcgctcgac cgggttttcg tggttccccg gatgctgtgc gacctgaccg + 53881 gcagcaccac cgaggccgag ctcctgggcc ggcgcgccgc gctcccgatg gcggtcgcgc + 53941 cggtcgcgta ccagcggttg ttccaccccg agggcgagct ggcggccgct cgcgcggctc + 54001 gcgacgccgg cgtgccgtac accatctgca ccttgagcag cgtcccgctc gaggaggtcg + 54061 cggccgtcgg cggccggccg tggttccagc tgtactggct gcgtgacgag aagcggtcgc + 54121 tggagctcgt gcgccgcgcg gaagacgccg ggtgcgaagc gatcgtgttc accgtggacg + 54181 tgccgtggat gggacggcgg tggcgggaca tgcgcaacgg cttcgcgttg ccggaatcgg + 54241 tgacagcggc caacttcgac gccggatcgg ccgcgcaccg ccgcacgcgc ggggcctcgg + 54301 ccgtggccga ccacaccgcg cgcgagttcg cccccgccac ctgggagtcg gtggcgacgg + 54361 tccgcgcgca cacggacctg ccggtggtgc tcaagggcat cctcgccgcc gaggacgccc + 54421 gccgtgccgt cgaggccggg gccgacggga tcgtggtgtc caaccacgga ggtcgtcagt + 54481 tggacggcgc ggtgcccggg atcgaggtgc tgggcgagat cgccgccgag gtctccggcc + 54541 gctgcgaagt gctgctggac ggcggaatcc ggaccggcgg ggacatcctc aaggcggccg + 54601 cgctgggcgc gtcgggcgtg ctggtcgggc ggcccgtgat gtgggggctg gccgcggcgg + 54661 gccaggaggg cgtccggcag gtgttcgaac tgctcgccgc cgaactccgg aacgcgctgg + 54721 gcctggcggg ctgtgactcg gtgagcgcgg ccggccggct gggcacgagg gtcccccgct + 54781 acggctgatt ccccgcccca cgcccgattt cgacgtgaac ccgatccgcc cgcgcgtgcc + 54841 gggctcgact ggagcggggc ctttcccgga ggagaaaaat gctgcacacc tttgccgcgg + 54901 cggtcgcgcc ggtcgcaccg atcgccgcgc acagtctcct ggtcttcctg ctgcagatcg + 54961 gcttgctgct cctgctcgcc gtcgtgctcg gccggctggc cggccggttc gggatgcccg + 55021 cggtcgtcgg tgagctgttc gtcggggtga tcctcggtcc gtcgctgctg ggctgggcgg + 55081 cgccgggcct gcacagctgg ctgttcccgg ccgtcgccga gcagtaccac ctgctcgacg + 55141 ccgtcggcca ggtcggcgtc ctgctgctgg tcggcctcac cggcgtgcag atggacatgg + 55201 ggctggcccg caagcgcggc ctcaccgcgg ccggggtcag catcggcggc ctggtccttc + 55261 cgctcggcct ggggatcggc gcgggttacc tgctgccgaa ggtgctcgtt ccggagggca + 55321 ccgacgtcac cgtcttcgcg atgttcctcg gcgtggccct gtgcgtcagc gccatcccgg + 55381 tcatcgccaa gaccctcatc gacatgaaac tgctgcaccg caacatcggg cagctcacgc + 55441 tcaccgccgg catggtcgac gacgtgttcg gctggttcat gctgtccgtc gtcagcgcga + 55501 tggcggtcaa cgcggtctcc gccggcaccg tgctcacttc gctggcctac ctggtcgcca + 55561 tcctcgcctt ctgtttcacc ctcggccgtc cgctggcccg gggtgtgctc cgcgtcgcgg + 55621 ccaagtccga cggtcccggg ctcaccgtcg ccaccgtcgt cgtcctgatc ttcctcgccg + 55681 cggccggtac gcaggcgctc ggcctggagg cggtcttcgg cgccttcctc tgcggcatcc + 55741 tgctcgggac ggcgggcaag gtggatccgg ccaagctcgc ccccctgcgc acggtcgtcc + 55801 tgtcgggact cgcccccctc ttcttcgcca cggccgggtt gcggatggac ctcaccgcgc + 55861 tgacccaccc ggtggtcctg ctcaccggtc tggtggtgct cgccctggcc atcgccggca + 55921 agttcgccgg cgcgttcgcc ggcgcgcggc tgagcgggtt gaacaagtgg gaagggctgg + 55981 cgctcggcgc cgggctgaac gcgcggggag tcatccaggt cgtggtggcc atggtcggcc + 56041 tgcggctggg tatcctcagc gtggaggtct acacgatcat catcctcgtc gcgatcgtca + 56101 cttccctgat ggcgtcgccg atcctgcggt tcgcgatgtc cagagtggag cagaccgccg + 56161 aagaacaggt tcgcgagaac gaacaccggg cgtggaacac gcacccggcg gcgaacccgc + 56221 aggagcaaag tctctaggcg caggccggta ctgctcgggg cgacgggacg aaccgcgggt + 56281 gtccaaccgc ggaattcgcc ggtcggacgg gaaatcgctt tctcgtgcca cggcggccgt + 56341 tgaccaatcc acggcgtgga acagtgcggt gcctgccgct atcttggcgg cacgaggaac + 56401 gaaaagactt cctcgacagc gtcttcggcc tgacccgacg ccggttccgg agcagcgatg + 56461 acgcagcctt cgcacgacgg tcatgacaag gagtcgtccg atgctgcctg acctcgttcc + 56521 cccggtcgtg gtgcgccccc gcgacggccg cgaccacgcg gaccgcatcg cgttgtcggc + 56581 ggcgaccacc gacggggtgc acatgcggac cgaggacgtc cgcgcctgga tcgccgaacg + 56641 ccgtgaggcc aacgacttcc acgtcgaacg cgtcccgttc cgggacctcg accagtggtc + 56701 gttcgaggag gtgaccggca acctcgtgca ccacagcgga cggttcttca ccatcgaggg + 56761 cctgcacgtg atcgagcacg acggcccgaa cggcgacggc ccctaccgcg agtggcagca + 56821 accggtcatc aagcagcccg aagtcggcat cctcggcatc ctgggcaagg agttcggcgg + 56881 cgtcctgcac ttcctgatgc aggccaagat ggagccgggg aaccccaatc tggtgcagct + 56941 ctcgccgacc gtgcaggcca cccgcagcaa ctacaccaag gcgcacggcg gcacgaacgt + 57001 caagctgatc gagtacttcg ccccgcccga ccccgagcac gtcatcgtcg acgttctcca + 57061 ggccgagcaa ggctcgtggt tcttccgcaa gtccaatcgc aacatgatcg tcgagaccgt + 57121 cgacgacgtg ccgctgtggg acgacttctg ctggctcacc ctcggccaga tcgcggagct + 57181 gatgcacgag gacgagacga tcaacatgaa cgccaggagc gtgttgtcgt gcctgcctta + 57241 ccacgacgcg gctcccggcg cgcggttctc cgacgtccag ctcctgtcgt ggttcacgaa + 57301 cgagcgttcg cggcacgacg tgcgtgcccg ccgcatcccg ctcgcggacg tgtgcggctg + 57361 gaagcagggc gacgaagcga tcgagcacga ggacggccgt tatttccggg tcctcgcggt + 57421 cgccgtgcgg gggagcaacc gcgagcggat cagctggacc cagccgctgc tcgaatccgt + 57481 cgacctgggt gtcgtcgcgt tcctcgtgcg cgagatcggc ggtgtgcccc acgttctggt + 57541 gcacgcccgc gccgacggtg gtttcctgga cacggtcgag ctggcaccga ccgtccagtg + 57601 cactccccaa aactacgcgc acctgcccgc ggagaaccgc ccgcccttcc tcgacgtcgt + 57661 cctcaacgct ccggagtcgc gcattcgtta cgaggcaata cattccgaag agggcgggcg + 57721 cttcctcaac gtccgggcgc gctacctcgc gatcgaagcg gacgacacgg tcgagccccc + 57781 tcccggctac acctgggtca cgccggccca gctcaccgcg ctcacccggc acgggcacta + 57841 cgtcaacgtc gaggcccgca cgctgctcgc ctgcctcaac gccgcgacgg cccagcctcg + 57901 aggcggtgcc tgacatgaag acggtcaccg tcctcggcgc ctcgggtttc gccggctcgg + 57961 ccgtccaccg gctgggcgaa gtcttccggc tcgtggcacg ggaggtcgcc gggcacaccg + 58021 gacgcggccc ggtggacgtg ccctgcgtgg cacccccgtc gcacgcgccc gagacggatt + 58081 tccggagcgt cacggtcggt tccacgccgt tccggtcgat caccggccgg cgcccggaga + 58141 tgtcgcggcc cgagggagtg cgccgcactg tcgccgcttt gccgtcatca gatcagggaa + 58201 aggttcgcac atgaccacgc gtgtatggga ctaccaggcc gaataccgga acgagcggct + 58261 cgacctgctg gacgcggtcg agacggtctt cgactcgggg cagctcgtgc tcggggcgag + 58321 cgtgcgtggc ttcgaggcgg aattcgccgc gtaccacggg gtcgggcact gcgttggcct + 58381 cgacaacggg acgaacgcga tcaagctcgg cctgcaggcg ctgggtgtcg ggccgggcga + 58441 cgaggtgatc acggtgtcca acaccgccgc cccgaccgtg gtcgccatcg acggcaccgg + 58501 cgccacgccg gtcttcgtcg acgtccgcga ggacgacttc ctgatggaca ccggccaggt + 58561 cgcggccgcg atcaccgagc gcaccaagtg cctgctgccc gtgcacctat acggacagtg + 58621 cgtggacatg gctccgctga aggacctcgc cgcgaagcat ggactgtcca ttttggagga + 58681 ctgtgcccag gcgcacgggg cccggcagaa cggaacggtc gcgggctcga ccggtgacgc + 58741 ggccgcgttc tccttctacc cgaccaaggt gctcggggcg tacggcgacg gcggcgcgac + 58801 catcacctcc gacgaatccg tggaccggcg gctgcggcgg ctgcgctact acggcatgga + 58861 caagcagtac tacacgctgg aaacgccggc ccacaacagc cggctggacg aggtccaggc + 58921 cgagatcctg cggcgcaagc tcaagcggct cgacacctac gtcgccgccc gccaggccat + 58981 cgcccagcgc tacgtcgacg gactgggcga cacggagctg aagctgccgc ggaccgtccc + 59041 cggcaacgag cacgtgtact acgtgtacgt cgtgcgccac ccgcgacgtg acgacatcat + 59101 cgagcgcctc aaggcgtacg acatccactt gaacatcagc tatccgtggc cggtgcacac + 59161 catgaccggt ttcgcccacc tcggctacgc gaccggcgcg ttcccggtca ccgaaaaact + 59221 ggccggcgag atcttctcgc tgccgatgta ccccgcgctt tccgccgacc tgcaggacaa + 59281 ggtcatccat gcggtgcgcg aggtggtgtc caccctctga ccactccacc aacaggagta + 59341 gccgtgcaag cacgcaaact cgccgtcgac ggcgcgatcg agttcacccc ccgggtcttc + 59401 cccgacgacc ggggcctgtt cgtctcgccg ttccaggaag aggccttcgc cgaggcccgc + 59461 ggcggcccgc tgttccgggt ggcgcagacg aaccacagca tgtccaagcg cggcgtggtg + 59521 cgtggcatcc actacacgat gacgccaccg ggcacggcca agtacgtcta ctgtgcccgc + 59581 ggcaaggcgt tggacatcgt ggtcgacatc cgggtcggct cgccgacgtt cggccggtgg + 59641 gacgcggtcc tgctggacca gcgggaccac cgggcgatgt acttcccggt gggggtcggc + 59701 cacgcgttcg tggccctcga ggacgacacc gccatgtggt acctgctctc cacggcctac + 59761 gtggcgcgga acgagctcgc cctctcggtc ctggatcccg cgctgggcct gcccatcgac + 59821 gccgacgtcg acccgatcct gtccgaacgg gaccaggtgg ccgtcacgct cgccgaggcg + 59881 ggacggcagg ggttgctgcc ggactacgcc acctgcctgg agctcgaccg gcagctgtcc + 59941 gaagtctccc tttccgcctg acctcacgac cgatcgggcc gaaggcgtcc ttcaccacgt + 60001 ccgaacgcgg tgaaggacgc cttcgacgga aaccaatcac gaactccgcg ccttggacga + 60061 cattgaccgc cgagttcggc cgagcctact ttcggaatgt ccggtccgct ctttcgcgaa + 60121 aggtgagatc catgcccgct gcgcaggtca agcagctgct tcgaagcaag ttgagaacgt + 60181 gggggtggat gtatcgatga cgaccagcat cgaacccgcc gaagaccttt cggtcctctc + 60241 cggcctgacc gagatcactc gattcgccgg cgtgggaaca gcggtttccg cgtcgtccta + 60301 ttcgcagtcc gaggtcctcg acatcctcga cgtcgaggac cccaaaatcc gctcggtctt + 60361 cctgaacagc gccatcgacc ggcgttttct caccctgccg ccggagagtc ccggtggggg + 60421 ccgcgtgtcc gaaccgcagg gcgacctcct ggacaagcac aaggagctcg cggtcgacat + 60481 ggggtgccgg gccctcgagg cctgcctgaa gtcggcggga gcgacgcttt cggacctgcg + 60541 tcacctgtgc tgcgtcacct cgaccgggtt cctgaccccc ggcctcagcg cactgatcat + 60601 ccgcgaactg gggatcgacc cgcactgcag ccgctcggac atcgtgggca tggggtgcaa + 60661 cgccggcctg aacgcgctca acgtcgtcgc cggctggtcc gcggcgcacc cgggtgaact + 60721 cggcgtcgtc ctgtgcagcg aggcgtgttc cgcggcctac gccctggacg gcaccatgcg + 60781 gaccgcggtg gtcaacagcc tcttcggcga cggatccgcc gcactcgccg tgatttccgg + 60841 tgacggccgc gtgcccggcc ctcgggtcct caagttcgcg agctacatca tcaccgacgc + 60901 gctggacgcc atgcgctacg actgggaccg tgaccaggac cggttcagct tcttcctcga + 60961 cccgcagatt ccgtacgtgg tcggggcgca cgcggagatc gtcgccgacc ggctgctgtc + 61021 cggcacgggc ctgcggcgca gcgacatcgg gcactggctg gtgcattccg gcggcaagaa + 61081 ggtgatcgac tccgtcgtcg tcaacctcgg cctgagccgc cacgacgtcc gccacaccac + 61141 cggagttctc cgtgactacg ggaacctttc cagcggctcc ttcctcttct cctacgagcg + 61201 gctcgccgaa gaaggcgtca cccggcccgg agactacggc gtactcatga ccatggggcc + 61261 tggctccaca atcgaaatgg cgctgatcca atggtgaacg gtgaactggt gctccggctc + 61321 gacggcaccc ggcccctgtc ggccgcgtcg gtcgaggaac tggacgccct ctgcgatcgc + 61381 gtggaagacc accgggaacc cggcccggtc accgtccacg tcacgggtgt cccggccgcc + 61441 ggctggacgg cggaggtgac ggtcggcctg gtctccaagt gggaacgggt ggtgcgccgg + 61501 ttcgagcggc tcggcaggct caccatcgcc gtggcggcgg gtgactgcgc cggaacggca + 61561 ctggacgtcc tcctcgcggc cgacgtccgg atcgccgcgc cgggcacccg gctgctgctc + 61621 gcccgggccg gcggcgcgcc gtggcccggg atgaccgtgc accggctcac ccggcaggcc + 61681 ggggcggccg gcatccggcg ggcggtgctg ctcggcgccc cgatcgaggc cggtcgcgcg + 61741 ctggccctga acctggtcga cgaggtctcg gaggacccgg cggccgcgct ggcggagctc + 61801 gccgggacgg ccggtgccgt ggacggcaag gagctggcga tccgccgtca gctggtcttc + 61861 gaagccggct cgaccgcctt cgaggacgca ctcggcgccc acctggccgc ggcggaccgg + 61921 gccctgcgca gggaaaccgc gtcgtgacgg ccgcaccccc gacgtctccg ccggggccgc + 61981 ggctcgaccg cccggccctg gcggaggcag ccggccgcgt cgacgacctg ctcgccgaac + 62041 tgccgccgcc gtccgcccgg acccccgggc aacgcgaggc cgcgtcttcg gcgctggacg + 62101 ggatccgggc gatgcgcgcg gactacgtcg gggcgcacgc cgaagcgatc tacgacgaac + 62161 tcaccgacgg ccggtcccgg tccctgcgca tcgacgagct cgtccgggcc gccgcccggg + 62221 cctttcccgg cctggtgccc acggacgagc agatggcggc cgagcgcgcg cggccgcagg + 62281 cggagaagga cgggcgggag atcgaccagg gcatcttcct gcgcgggatc ctgcgggcgg + 62341 agcgggccgg cccgcacctg ctcgacgcca tgctccagcc caccccgagg gcgctgaagc + 62401 tgctcccggg attcaccgag tccggtgtcg tgcagatgga ggcggtccgg ctggaacgcc + 62461 gggacggcgt cgcgtacctg accctgtgcc gggacgactg cctcaacgcc gaggacgccc + 62521 agcaggtcga cgacatggag accgcggtcg acctggcgct gctcgacccg gccgtccggg + 62581 tggggctgct gcgcggcggg gagatgagcc atccccgcta ccgggggcgc cgcgtgttct + 62641 gcgccggcat caacctcaag aagctgagct cgggcggcat cccgctggtc gatttcctgc + 62701 tgcggcggga gctggggtac atccacaaga tcgtgcgcgg cgtggtcacc gaaggttcgt + 62761 ggcattcgcg gctgaccgac aagccgtgga tcgcggccgt cgactccttc gccatcggcg + 62821 gcggggccca gctgctcctc gtcttcgacc acgtgctggc cgcgtccgac gcctacttca + 62881 gcctgcccgc ggcgaaggag gggatcatcc ccggcgcgtc gaacttccgg ctctcccggt + 62941 tcgccgggcc ccgcgtggcc cggcaggtga tcctcggcgg ccgccggatc cgggcggacg + 63001 agccggatgc ccgactgctc gtcgacgagg tcgtcccgcc ggcggagctg gacgcggcga + 63061 tcgacgccgc gctggcccgc ctggacgggg aggcggtgct ggccaaccgg cgcatgctga + 63121 acctggccga ggaaccgccg gacgaattcc gccggtacat ggccgagttc gccctgcagc + 63181 aggcgctgcg gatctacggc gaagacgtga tcggcaaggt cggccggttc gcggcgggct + 63241 cgtcgtgagc ggcgaccggg tgcggtacga gaagaaggac cacgtcgcct acgtgacgct + 63301 ggaccggccc ggcgtgctga acgccatgga ccggcggacg cacgaggagc tcgccggaat + 63361 ctgggacgac gccgaggccg acgacgaagt ccgggtggtg gtgctgaccg gcgccgggaa + 63421 ccgcgcgttc tccgtcggcc aggacctcaa ggaacgcgcc cggctgaacg aagcgggtgc + 63481 gcgggccacg acgttcggca gccggggcca gccggggcat ccccggctga ccgaccggtt + 63541 caccctgtcc aagccggtgg tcgcccgggt gcacggctac gcgctgggcg gtggcttcga + 63601 gctggtgctc gcctgcgaca tcgtcatcgc ctccgacgat tcggtgttcg ccctgccgga + 63661 ggtccgcctc ggcctgatcc ccggggcggg cggggtgttc cggctgccgc ggcagctgcc + 63721 gcagaaggtg gcgatgggct acctgctgac cggccgccgg atggacgcgg cgacggcgct + 63781 gcggtacgga ttggtcaacg aggtcgtgcc accggaggaa ctggaccggt gcgtcgccga + 63841 atggacggac agcctcgtgc gcgccgctcc gctttcggtt cgcgcgatca aggaggccgc + 63901 gctacggtcg ctcgacctcc ccctggagga ggcgttcacc gcttcctaca cctgggaaga + 63961 gcgccgtcgg cggagcgaag acgcgatcga gggtccccgg gccttcgccg cgaaacggga + 64021 tccggtctgg accggggaat accggccggg ttgaccaggc tgttcggtgg tttcgagtga + 64081 ggatggtgcg gagatgtcgg tgacggaatt cgctgtgacg gcgcgaaggg gaccggtcgc + 64141 ggccgggccg gggcaccggg tgtggccgcg atgacccaca ccgtcgccac gaccgacctc + 64201 gacaaccagc gcatcgagcg gatcgtcccc ctggtcaccc ccgccctgct gcatcacgaa + 64261 ctgccgctca gcgccaccgc ggccgagacg gtgcgaaagg gccgcgagag cgtcgtccgc + 64321 gtcctcgacg gcacggacga ccggctgctc gtgatcaccg ggccgtgctc catccacgac + 64381 cccgccgcgg cgctcgacta cgccggccac ctcgccgcca tcgccggcga ggtcgccggc + 64441 gacctgctcg tcgtcatgcg cgtgtacttc gagaaacccc ggacgatcgg cggctggaag + 64501 gggctcatca acgaccccca cctcgacggc accggcgacg tcaaccacgg gctgcgcacg + 64561 gcccggcacc tcctgctgga gctcgccgaa cgcggcctgc ccgccgcgtg cgaatggctg + 64621 gacaccacca ttcccgcgta cttcgcggac acggtctcgt ggggcgccat cggcgcccgc + 64681 accgtggaaa gccagaacca ccgcatgctc gccagcggcc tgtccatgcc cgtcggcttc + 64741 aagaaccgcc gcgacggcga catcaccgtc gccatcgacg cgatccgggc cgccgcggtc + 64801 cgccacgtgg tccccggcgt cgaccccggc gggttgcccg ccatcctgca cacggcgggc + 64861 aacccggact gccacgtcgt cctgcgcggt ggtgacggcg cgcccaacca cgactccgcg + 64921 tccgtccaca agacactgac cgcgctggag gccgcgggcc tgcccggccg ggtggtgatc + 64981 gacgccagcc acgacaacag cggcaaggac caccaccgcc agcccctcgt cgcggccgag + 65041 atcgcgggcc aggtcgagaa cgggcggaac ggcatcgtcg gggtgatgct cgagtccaac + 65101 ctccgcgccg gccgccagga cctccagccg ggccgtccgc cggcatacgg ccagtccatc + 65161 accgacgcct gcatcgacgt ccccaccacc cggacggtcc tccacggcct cgccgcggcg + 65221 gccgcggccc ggcgaaagct cggcaagcaa gcaagctgag caagtccgtg aatggcacat + 65281 tgccggacat agagtccctc gatgtgccat tcacggactt ggccctgacg gcggccggca + 65341 ccgacacgga tcccccggat ggggtatacc cgaaccggtg gaggaaatcc ggacagagtt + 65401 catccgaccg ctgctgacat cgttgtccgc gcacgccgcg gaccgccccg cctactccga + 65461 cgaccggcga acgctgacct acggcgggct ggcccacgcc gccgcggagc tcgccgccgg + 65521 gctcggggtg gcccggggcg accgcgtgct ggtgcacgtc ggcagccggg tcgagttcgc + 65581 cgtcgccctg ctggcggtgc tgcgggcggc ggccgtggga gtcccggtga gcgtgcgctc + 65641 gaccgacgcc gaactcgccc acctggcggc cgattcgggg gcgacgctcc tggtcacgga + 65701 ggcgcggcac gccgccgcgg ccgaacggct gcgccgcgac cggcccggtc tgcgggttct + 65761 cttcgtcgac gatccgccgc ccgcgcgggt gggcgagccg cgcgacgacc tcggactgga + 65821 cgagccggct tggctgctct acacctccgg caccaccggc cggcccaagg gcgtcctgct + 65881 ctcgcagcgc gcgatgctgt ggtcgacggc cgcgtactac gtccccatgc tcgggctcga + 65941 cgccgaagac accgtgctgt ggccgttgcc gacgcaccac gcgtacgccc tgtcgctggc + 66001 gttcgtcacc acgatcgcgc tgggggcgca cacccggctg gccgacgggt gcacgccgga + 66061 cctgctcgcc cggtaccccg gcagtgtgct cgccggtgtt cccgcgctct acctccggct + 66121 ccgccaggag tccggcggtc ccctcgccgc gccgcggctg tgcctgagcg gcggcgcgcc + 66181 gtgcacgccg gcgacccggg ccgcggtccg ggacctgttc gggctcccgg tggccgacgg + 66241 ctacgggagc accgagacgg gcgggaaggt cgccgccgag cttcccggtg aagcgggcct + 66301 ggtcccggtg cccggcttgg agatccggat cgacgcgggg gaggtgctcg tccgcggtcc + 66361 cgggctgatg ctgggctacc acgggcgaac cgaatcaccg ctgcgggacg gctggtaccg + 66421 cacgggcgac gccggccggt tcgagggcgg ccggctcgtg ctcgagggcc gcgtggacga + 66481 cgtgatcgtc tgcggtggcc agaacgtcca ccctgccgaa atcgaggcgg tgctcgaaga + 66541 gtcgccttcg gtgcgggacg tcctcgtgct cggccgtccc gacgacgtcc tgggcgaggt + 66601 gccggtggcg ttcgtggtcg ccgggcccgg cggcttcgac gccgaggagc tgcgtggccg + 66661 gtgtctaga +// diff --git a/tests/data/Y16952.3.final.minimal.gbk b/tests/data/Y16952.3.final.minimal.gbk new file mode 100644 index 0000000..8871ebf --- /dev/null +++ b/tests/data/Y16952.3.final.minimal.gbk @@ -0,0 +1,2270 @@ +LOCUS Y16952 66669 bp DNA linear BCT 14-DEC-2008 +DEFINITION Amycolatopsis balhimycina biosynthetic gene cluster for balhimycin, + strain DSM 5908. +ACCESSION Y16952 +VERSION Y16952.3 GI:46275283 +KEYWORDS abc gene; bbr gene; bgtfA gene; bgtfB gene; bgtfC gene; bhaA gene; + bhp gene; bmt gene; bpsA gene; bpsB gene; bpsC gene; bpsD gene; + dihydroxyphenylacetic acid synthase; dpgA gene; dpgB gene; dpgC + gene; dpgD gene; enoyl-CoA hydratase; enoyl-CoA-isomerase; glycosyl + transferase; halogenase; hydrolase; hydroxyacyl-dehydrogenase; ORF1; + ORF10; ORF11; ORF2; ORF3; ORF5; ORF6; ORF7; ORF8; ORF9; orfX; oxyA + gene; oxyB gene; oxyC gene; oxyD gene; P450 monooxygenase; pdh gene; + peptide synthetase; pgat gene; phenylglycine amino transferase; + putative ABC transporter ATP-binding protein; putative prephenate + dehydrogenase; putative two-component system respons; putative + two-component system sensor kinase; putative VanY-type + carboxypeptidase; StrR family transcriptional regulator; vanR gene; + vanS gene; vanY gene. +SOURCE Amycolatopsis balhimycina DSM 5908 + ORGANISM Amycolatopsis balhimycina DSM 5908 + Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales; + Pseudonocardineae; Pseudonocardiaceae; Amycolatopsis. +REFERENCE 1 + AUTHORS Pelzer,S., Sussmuth,R., Heckmann,D., Recktenwald,J., Huber,P., + Jung,G. and Wohlleben,W. + TITLE Identification and analysis of the balhimycin biosynthetic gene + cluster and its use for manipulating glycopeptide biosynthesis in + Amycolatopsis mediterranei DSM5908 + JOURNAL Antimicrob. Agents Chemother. 43 (7), 1565-1573 (1999) + PUBMED 10390204 +REFERENCE 2 + AUTHORS Pfeifer,V., Nicholson,G.J., Ries,J., Recktenwald,J., Schefer,A.B., + Shawky,R.M., Schroder,J., Wohlleben,W. and Pelzer,S. + TITLE A polyketide synthase in glycopeptide biosynthesis: the biosynthesis + of the non-proteinogenic amino acid (S)-3,5-dihydroxyphenylglycine + JOURNAL J. Biol. Chem. 276 (42), 38370-38377 (2001) + PUBMED 11495926 +REFERENCE 3 + AUTHORS Bischoff,D., Pelzer,S., Bister,B., Nicholson,G.J., Stockert,S., + Schirle,M., Wohlleben,W., Jung,G. and Sussmuth,R.D. + TITLE The Biosynthesis of Vancomycin-Type Glycopeptide Antibiotics-The + Order of the Cyclization Steps This work was supported by the + Deutsche Forschungsgemeinschaft (SFB 323) and by a grant of the EU + (MEGATOP, QLK3-1999-00650). R. D. S. gratefully acknowledges the + support of a Feodor-Lynen Fellowship granted by the + Alexander-von-Humboldt Stiftung. We thank Corina Bihlmaier and + Volker Pfeifer for help with transformation and Southern + hybridization, J. A. Moss (La Jolla (USA)) for critical comments on + the manuscript and Prof. Dr. M. E. Maier and Prof. Dr. H.-P. Fiedler + (Tubingen) for generous support + JOURNAL Angew. Chem. Int. Ed. Engl. 40 (24), 4688-4691 (2001) + PUBMED 12404385 +REFERENCE 4 + AUTHORS Puk,O., Huber,P., Bischoff,D., Recktenwald,J., Jung,G., + Sussmuth,R.D., van Pee,K.H., Wohlleben,W. and Pelzer,S. + TITLE Glycopeptide biosynthesis in Amycolatopsis mediterranei DSM5908: + function of a halogenase and a haloperoxidase/perhydrolase + JOURNAL Chem. Biol. 9 (2), 225-235 (2002) + PUBMED 11880037 +REFERENCE 5 + AUTHORS Recktenwald,J., Shawky,R., Puk,O., Pfennig,F., Keller,U., + Wohlleben,W. and Pelzer,S. + TITLE Nonribosomal biosynthesis of vancomycin-type antibiotics: a + heptapeptide backbone and eight peptide synthetase modules + JOURNAL Microbiology (Reading, Engl.) 148 (PT 4), 1105-1118 (2002) + PUBMED 11932455 +REFERENCE 6 + AUTHORS Stegmann,E., Bischoff,D., Kittel,C., Pelzer,S., Puk,O., + Recktenwald,J., Weist,S., Sussmuth,R. and Wohlleben,W. + TITLE Precursor-directed biosynthesis for the generation of novel + glycopetides + JOURNAL Ernst Schering Res. Found. Workshop -(51), 215-232 (2005) +REFERENCE 7 + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (24-MAR-1998) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG + REMARK revised by [6] +REFERENCE 8 + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (03-AUG-2001) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG + REMARK revised by [10] +REFERENCE 9 (bases 1 to 66669) + AUTHORS Pelzer,S. + TITLE Direct Submission + JOURNAL Submitted (07-APR-2004) S. Pelzer, Universitaet Tuebingen, Lehrstuhl + Mikrobiologie-Biotechnologie, Auf der Morgenstelle 28, D- 72076 + Tuebingen, FRG +COMMENT On Apr 7, 2004 this sequence version replaced gi:15131491. +FEATURES Location/Qualifiers + gene complement(<1..759) + /gene="vanS" + CDS complement(<1..759) + /codon_start=1 + /db_xref="GI:46275284" + /db_xref="GOA:Q799B6" + /db_xref="InterPro:IPR003660" + /db_xref="InterPro:IPR003661" + /db_xref="InterPro:IPR009082" + /db_xref="UniProtKB/TrEMBL:Q799B6" + /gene="vanS" + /product="putative two-component system sensor kinase" + /protein_id="CAG25751.1" + /transl_table=11 + /translation="MDRAAGMSVRLKLTLSYACFLVLAGVLLLASVWLFLLRDVPDVLA + KPPPGGVLERSVLVRNFLPAAGSVLFFLLLFGLLGGWILAGRMLAPLTRITDAARMAAN + GSLSHRIRLEGTEDEFRELADAFDAMLARLEAHVAAQRRFAANASHELRTPLAITQALL + EVARNDPAKDPLLVFDRLHAVNARAIDLTEALLVLSRADQRAFTREPVDLSLLVEEAIE + TLLPIAEKRRVVIIASGHISRVVGSATLLLQ" + source 1..66669 + /db_xref="taxon:1081091" + /mol_type="genomic DNA" + /organism="Amycolatopsis balhimycina DSM 5908" + /strain="DSM 5908" + cluster 1..66669 + /contig_edge="True" + /cutoff=20000 + /extension=20000 + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: t3pks: + (Chal_sti_synt_C or Chal_sti_synt_N); nrps: ((Condensation + & AMP-binding) or (Condensation & A-OX) or + cluster(Condensation,AMP-binding));" + /product="t3pks-nrps" + gene complement(770..1447) + /gene="vanR" + CDS complement(770..1447) + /codon_start=1 + /db_xref="GI:46275285" + /db_xref="GOA:Q799B5" + /db_xref="HSSP:1KGS" + /db_xref="InterPro:IPR001789" + /db_xref="InterPro:IPR001867" + /db_xref="InterPro:IPR011006" + /db_xref="InterPro:IPR011991" + /db_xref="UniProtKB/TrEMBL:Q799B5" + /gene="vanR" + /product="putative two-component system response regulator" + /protein_id="CAG25752.1" + /transl_table=11 + /translation="MRVLIVEDEPYLAEAIRDGLRLEAIAADTAGNGDTALELLSLNTY + DIAVLDRDIPGPSGDEIAKRIVASGSGLPILMLTAADRLDDKITGFELGADDYLTKPFE + LRELVLRLRALDRRRAHNRPPVLEIAGLRLNPFRREVYRDDRYIALTRKQFAVLEVLVS + ADGGVVSAEELLERAWDKNADPFTNAVRITVSALRKRLGEPWIITTVAGVGYRIGAAPG + AGR" + gene 1537..2157 + /gene="vanY" + CDS 1537..2157 + /codon_start=1 + /db_xref="GI:46275286" + /db_xref="GOA:Q799B4" + /db_xref="InterPro:IPR003709" + /db_xref="InterPro:IPR009045" + /db_xref="UniProtKB/TrEMBL:Q799B4" + /gene="vanY" + /product="putative VanY-type carboxypeptidase" + /protein_id="CAG25753.1" + /transl_table=11 + /translation="MTYRESARTTTRRIPGAVVPVARRIRGVLLAGLRAVGTRIARSPG + RPVRPQDRAGLGKTHGAVPAGVTVFDDDVPAVTRLDPALLSALRRAATAAADGGVELCV + NSGWRSPEYQSRLLREAVAKYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEH + GADYGLCRVYRNEPWHFELRPEAIEHGCPPLYADPSHDPRLRR" + gene 3011..3976 + /gene="bbr" + CDS 3011..3976 + /codon_start=1 + /db_xref="GI:46275287" + /db_xref="GOA:Q799B3" + /db_xref="InterPro:IPR003115" + /db_xref="InterPro:IPR009057" + /db_xref="UniProtKB/TrEMBL:Q799B3" + /gene="bbr" + /product="StrR family transcriptional regulator" + /protein_id="CAG25754.1" + /transl_table=11 + /translation="MDPTRVDIFALPAVEIELSRLSSASSPRTSGEDPEHVETLLSAEG + ELPPILVHRPTMQVLDGLHRLKVARVRGDTKILARLVDATESDAFVLAVEANIRHGLPL + SLADRKRAAVQIIGTHPQWSDRRVASATGISAGTVADLRRRAGEDGTEARIGRDGRVRP + SDGSERRRLAAELIRSDPGLSLRQVAKQVGISPETVRDVRGRLERGESPTPDGTRRLPA + KPHPLRLSEPDFGRAVDQDRLALLERLKSDPALRLNEVGRILLRMLTMHSMDGQEWERI + LQGVPPHLHGVIAGFARDHARVWAEFADHLESRATELAAG" + gene 4106..4981 + /gene="pdh" + CDS 4106..4981 + /codon_start=1 + /db_xref="GI:46275288" + /db_xref="GOA:Q799B2" + /db_xref="InterPro:IPR003099" + /db_xref="InterPro:IPR008927" + /db_xref="InterPro:IPR016040" + /db_xref="UniProtKB/TrEMBL:Q799B2" + /gene="pdh" + /product="putative prephenate dehydrogenase" + /protein_id="CAG25755.1" + /transl_table=11 + /translation="MTIEKALVVGTGLIGTSVALALREKGVAVFLSDVDTEAARLAQVL + GAGREWAGEGVDLAVIAVPPHLVGDRLADLQKQGAARVYTDVASVKADPIADAERLGCD + LASYVPGHPLAGRERSGPAAARAELFSGRPWALCPGPETDAEALRRVRELVSLCGATAV + VVGAGEHDSAVALVSHAPHVVASAVAASLASGDDVALGLAGQGLRDVTRIAAGDPLLWR + RILSGNTRPVAGVLERIAADLAAAASALRSGDLDEVTDLLRRGVDGHGRIPGQRGGSLP + GRNPAGSPGR" + gene 5180..7135 + /gene="abc" + CDS 5180..7135 + /codon_start=1 + /db_xref="GI:46275289" + /db_xref="GOA:Q799B1" + /db_xref="InterPro:IPR001140" + /db_xref="InterPro:IPR003439" + /db_xref="InterPro:IPR003593" + /db_xref="InterPro:IPR011527" + /db_xref="InterPro:IPR017871" + /db_xref="InterPro:IPR017940" + /db_xref="UniProtKB/TrEMBL:Q799B1" + /gene="abc" + /product="putative ABC transporter ATP-binding protein" + /protein_id="CAG25756.1" + /transl_table=11 + /translation="MDMVLRFEGVDKSPDDPDPWVTKVRKGTLRRVLAYFRPHVGKVAL + FCLVAVLESLIVVATPLLLKELIDNGIVKNDLGVVILMAGLTAVLAVLGAGLTMVSGYI + SGRIGEGITYDLRVQALGHVRRLPIAFFTRTQTGVLVGRLHTELIMAQQHFTGLLMAAT + SVVMVVVVLAELIYLSWIVAIVSLVLIPIFLVPWIRVGRAIQRRSIRLMDANTGLGGLL + QERFNVQGAMLSKLFGRPAEEMAEYEERAGEIRKIGVSLSVWGRMAFVMMALMASLATA + LVYGIGGGLVLAGAFELGTLVAIATLLQRLFGPITQLSGMQELAQTVVVSFSRVFELLD + LKPLIQERPDAIALKKKVVPDVEFEHVSFRYPTADEVSLASLEHLRAERERSEVTPDVL + RDVSFHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLRDLTFESLREA + VGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLIDSLPLGLDTVTGDRG + YRMSGGEKQRLAIARLLLKEPSIVVLDEATAHLDSESEAAVQRALKTALHGRTSLVIAH + RLSTIREADQILVIDGGRVRERGTHDELLAQGGLYAELYHTQFANPAANDPKPEIEDEL + DDIEPEPVIQHMGYGG" + gene 7138..16635 + /gene="bpsA" + CDS 7138..16635 + /citation=[5] + /codon_start=1 + /db_xref="GI:15131492" + /db_xref="GOA:Q939Z1" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010060" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020806" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Z1" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsA" + /product="peptide synthetase" + /protein_id="CAC48360.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 7.4e-70, + bitscore: 226.6, seeds: 42); AMP-binding (E-value: 4e-111, + bitscore: 362.7, seeds: 400); PP-binding (E-value: 1.6e-13, + bitscore: 42.3, seeds: 164)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MNSAARTTPTMLDLFASHVDRTPDAVAVAGGDGVLTYRQLDERAG + RLAGRLASRGIRRGDRVAVVMDRSADLVVALLAVWKAGAAYVPVDAGYPAPRVAFMVAD + SAAKLVVCSAASRGAVPAGVESLEPAAAAEEGASDAPAATVRPGDPAYVMYTSGSTGTP + KGVTISQGCVAELTMDAGWAMEPGEAVLMHSPHAFDASLFELWMPLASGVRVVLAEPGS + VDARRLREAAAAGVTRVYLTAGSLRAVAEEAPESFAEFREVLTGGDVVPAHAVERVRTA + APRARFRNMYGPTEATMCATWHLLQPGDVVGPVVPIGRPLTGRRVQVLDASLRPVGPGV + VGDLYLSGALAEGYFNRAALTAERFVADPSAPGQRMYWTGDLAQWTADGELVFAGRADD + QVKIRGFRIEPGEIEAALIAQPDVHDAVVAAVDGRLIGYVVTEGDADPRVIRERLGAVL + PEHLVPAAVLALDALPLTGNGKVDRSALPAPEFAASAAGRAPSTDAERVLCGLFAEVLG + VARAGVDDGFFELGGDSIGAMRLAARAAKAGLLVTPAQIFEEPTPARLAAVARPVPAGG + PVDGPLLTLTAAEEAELALAAPGAEEIWPLAPLQEGLLFESILDDQGSDIYQVQVILEL + NGPVDAPRLRAAWDAVVRRHPELRLSFHRLASGKTVQAVHGDVTPPWRVVDLTGAGDVD + AAVAALVAEEQQQRFELATAPLVRLVLVRIAADRYRLLFVIHHILVDGWSVAVILNDVS + EAYEAGEPVPEQRGGATFRDYLAWLDRQDDDAARAAWRAELAGLDEPALIATSGVETEY + DYRATHLTPALHTRLLGFAREHGLTPSTVVHAAWAMVLARLTRRTDVVFGTMVATRPPE + LAGIESMPGLLMTAVPVRVPLDGGQSVLDMLTDLHSRQTALKRHQYLGLPEIQKAAGPG + ATFDTMLVVENYPREYARRYTHLRTIEGTHYPVTLGITPGDRFKIQLGYWPGQVPDTVA + ESLLEWFVGAIGALVADPAGLVGRIGMGAADVRRWDPPLQAGEPLPALVGRMAARPPDN + VAVVDGDGALSYADLWERSLKFAAVLRAHGVRSEDRVGLVVGRSAWWTVGMLGVLLAGG + TFVPVDPAYPAERKEWIFRSANPMLVVCAGATRGAVPAEFADRLVVIDEVDPAAGSAGD + LPRVDPRSAAYVIYTSGSTGTPKGVVVTHAGLGNLALAHIDRFGVSPSSRVLQFAALGF + DTIVSEVMMALLSGATLVVPPERDLPPRASFTDALERWDITHVKAPPSVLGTADVLPST + VETVVAAGELCPPGLVDRLSADRRMINAYGPTETTICATMSMPLSPGQHPIPFGKPVPG + VRGYLLDSFLRPLPPGVTGELYLAGIGVARGYLGRSALTAERFVADPFVPGERMYRTGD + LAYWTEQGELVSAGRADDQVKIRGFRVEPREIEFALSGYPRVTQAAVAVRDDRLVAYVT + PGDIDTQAVRAHLASRMPQYMVPAAVVALDALPLTAHGKIDRRALPDPDFTAGKQAREP + ATETERVLCELFAGVLGLARVGVDDSFFELGGDSILSMQLAARARRSGLTFTAADVFDG + KTPERIAQLAAESSVPEPGRSPKPDGVGDVAWTPVMWMLGDGVAGPAFAQWMVVGTPSD + LTEKALAAGFAAVVDTHDMLRARVVADEGGRRLVVGERGSVDVAGAVTRIRADGRSLDE + AVADAARAAVTRLDPSAGVMAQAVWVDAGPDQVGRLVVVAHHLSVDGVSWRILLSDLQA + ACEAAVAGREPVLEPVGASFKRWAGLLAEWAVSAERAGELAAWKAILGPGDRPAGAQAT + SRAAEGAVRSRSWVVPKVETAALAGRAPVAFHCGVNEVLLAGLAGAVARWRGGDAVLVD + VESHGRHPVDGTDLSRTVGWFTSAHPVRLDVAGTDLADVLAGGPAAGRLLKAVKEQSRA + VPGDGLGYGLLRYLNGTTGPVLADLPSPQIGFNYMGRFAAGEKSGVRAWQPVGDIGSSL + EPGMGLPHALEVNAIVQDLPDGPELTLMLEWQDGLLGEDEIDRLGRAWLDMLSGVARQA + ADPAAGGHTASDFDLVTLDQAEIEALEAEFAAAGGLAEVLPLSPLQHGLAFHAGYAGDG + VDVYTAQAVLELAGPLDVPLLRKSVRALLDRHANLRAGFRHGADGTAYQVVPGAVAVPV + TLVDVTESADPAAEAAAVAAAERARPFELARPPLLRVMVVVLGPDRHRLVLTNHHILLD + GWSTPLLLDELLTLYRNGAAPAALAPVTPYRDYLAWVRETDREAATEAWRDALAGLPEP + TLVAADRPVPVEVPEQIWTTLDETFAQALGARARECGVTVSTVLQAVWGMVLAALTGRD + DVVFGSVVSGRPAELPGIETMVGLFINTVPVRVRMRPQDTFAELVRGLQNEQVALLAHH + HVGLTDIQQAAGLGRLFDTIIVYENYPRPAEIGDESADADRVRVQGLTAADATHYPLAL + AVVPGTDLRLRLEHQPALFTAEQAGAVLERFTLVLEAVVADPRLPLAVVPILSDAERRQ + LQAGNDTALPVPDRTLPELFAAQAAATPEATAVVFEDRSLTYAELDARANQLARWLIDQ + GAGPEGLVAVLLPRSLELVVALLAVTKTGGAWLPIDPGYPADRIAFMLDDAGPALVITT + AVLSASPIGDVLAARSRTVVLDEPAAAGQLAGRDRAPVTDTDRARALDPRHPAYLIYTS + GSTGRPKAVVVTHRNLTNYLLHCGRMYPGLRGRSVLHSSIAFDLTVTATFTPLIVGGEI + HVGALEDLIGVVEAAPSIFLKATPSHLLTLDTASRGSAGSGDLLLGGEQLPADTVVQWR + RKYPNIVVVNEYGPTEATVGCVEYRLEPGQECPPGGVVPIGTPLANMRAFVLDSWLRLV + PPGAVGELYVAGAGLARGYLGRAGLTATRFVADPFGSGERMYRTGDLVQWNPDGQLVFA + GRVDDQVKVRGFRIEPGEIEAALVAQESVGQAVVVARDSEIGTRLIGYVTAAGESGVDE + AAVREGVAARLPQYMVPAALVVLGALPLTANGKVDRAALPDPDFGARAGGREPVTEAER + LLCALFAEVLGLERAGADDSFFELGGDSILSMRLAARAHREGMSFGAREVFEQRTPAGI + AAIVERVAGDRPVAAVHAVSDVALLDLDQGELDEFKAEFDDDSQPFADPGRY" + gene 16635..28868 + /gene="bpsB" + CDS 16635..28868 + /citation=[5] + /codon_start=1 + /db_xref="GI:15131493" + /db_xref="GOA:Q939Z0" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010060" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020806" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Z0" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsB" + /product="peptide synthetase" + /protein_id="CAC48361.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 7.9e-66, + bitscore: 213.3, seeds: 42); AMP-binding (E-value: + 5.8e-116, bitscore: 378.6, seeds: 400); PP-binding + (E-value: 7e-17, bitscore: 53.1, seeds: 164)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MSQSRIEEIWPLSPLQAGLLFHAVYDGEGPDVYIGHWILDLAGPV + DAAGLRAAWETLLARHAPLRACFRQRKSGETVQIIARQVELPWREVDLSHLDDPEEAVR + ELAEQDRTTRFDLAQAPLLRLTLIRLGADAHRLVVTCHHTIMDGWSLPIVIDELSVLYP + AGGDASALPDVPSYREYLAWLSRQDKERALSAWTAELSGAEEPTLVVPADPGRAPAEPE + SVEAHLPEHLTRSLAELARRHGLTLNTVVQGAWALVLAQLAGRPDVVFGAAVSARPPDL + PGVEGMVGLFLNTVPVRVRLRGSTPVVELLAELQKRQSALIPDQFVGLADIQQAAGPAA + VFDTLLVFEKFHHGPAGSDSAGTFRIHVNQGRVAAHYPLTLVAVPGESMYLKLDYLTEL + FDRETAFAILERFTGVLRQLTGAGELTVAGVEVTTAAERALVAGEWGASTSAPPSLPAL + DLFGHQVAHRRDEPAVVDGDRTVSYGELAERAERLAGYLNGRGVRRGDRVAVVLDRSPD + LIATLLAVWKAGAAYVPVDPAYPVERRKFMLADSGPAAVVCAEAYRAAVPDTCPEPIVL + DDPRTRQAVAESPRLSAGTSADDLAYVMYTSGSTGTPKGVAVSHGNVAALAGEPGWRVG + PGDAVLLHASHAFDISLFEMWVPLLSGARVVLAGPGAVDGAALAAYVAGGVTAAHLTAG + AFRVLADESPEAVAGLREVLTGGDAVPLAAVERVRGRVRNVRVRHLYGPTEATLCATWW + LLEPGDETGSVLPIGRPLAGRRVHVLDAFLRPVPPGVAGELYVAGAGVAQGYSSRPALT + AERFVADPSGSGARMYRTGDLAYWTEQGALAFAGRADDQVKIRGYRVEPGEIEVVLAGL + PGVGQAVVTPRGEHLIGYVVAEAGHDADPVRLREQLAGTLPEFMVPAAVLVLDELPLTV + NGKVDRRALPEPDFAAKSAGREPVTEAERVLCGVFADVLGLDHVGVDDSFFELGGDSIS + SMQVAARARREGISLTPRLVFEHRTPERLAALAQEAGATPRAEVVTGVGEIPWTPVMRA + LGDDAMRPGFAQVRVVVTPAGVNPDALVSALQAVLDAHDLLRARVEPDGRLIVPERGAV + AAAGLLTRVAAGTGGLDEIAEREVRTATGTLDPSAGIMARVVWIDAGDAEPGRLAFVAH + HLSVDAVSWGILLPDLRAAYDEVISGGTPALEPPVTSYRQWARRLTARALSESTVAELE + KWAAVVEGAEPALPQDTGQHTGQSHSWSTSLSGTEVRDLVTVLPGAFHCGIQDVLLAGL + AGAVARVRGSGAALLVDVEGHGREAADGEDLLRTVGWFTSVHPVRLELSDVDLAGAADG + ERPAGQLLKAVKEQIRAVPGDGSGYGLLRHLNPGTGARLAELPSAQIGFNYLGRTVLAP + EDTAWQPNGGGPLGGGPDMVLAHAVEVSAELQDTPAGPRLGLAIDTRDFDLATVERLGE + AWLEMLTGLAAVARGSGAGGHTPADFALVDLTQRDVAELEAAAPGLTDIWPLSPLQEGM + LFERAFDEDGVDVYQTQRILDLDGPLDEPRLRAAWNQVLARHASLRTGFHQLGSGATVQ + VVVREADIPWRVADLSHLDAAEAAAEVERLLAEDQGRRFDVTRPPLLRLLLIRLGADEH + RLVVTSHHVLLDGWSTPLVVGEMSDGYAGGRSSSKPPSYQDYLAWLSRQDAEATRSAWR + AELAGADEPTLVDADAGKTLVMPDEHAEWLPEPATRALAGFARGHGLTVSTIVLGAWAL + VLARLAGRTDVVFGSVVSGRPADVPDVERMVGMFINTVPARVRLDGRRPLLEMLEDLQA + RQAALTEHQYLGLPEIQKVAGTGAIFDTIVMVENYPHDAAGLGGDGGVAISSVVTRTGT + SYPLTMNVSLGDRLRITVSYRPDRIDDATAAEVARQVVRVLERVVAEPSLPVGRLGVTS + EPTRAAVVERWNSTGEAAAETSVLELFRRQAGASPDAVAVVAGERTLSYADLDRESDRL + AGHLAGIGVGRGDRVGVVMTRGADLFVALLGVWKAGAAQVPVNVDYPAERIERMLADVG + ASVAVCVEATRKAVPDGVEPVVVDLPVIGGVRPEAPPVTVGAHDVAYVMYTSGSTGVPK + AVAVPHGSVAALASDPGWSQGPGDCVLLHASHAFDASLVEIWVPLVSGARVLVAEPGTV + DAERLREAVSRGVTTVHLTAGAFRAVAEESPDSFIGLREILTGGDAVPLASVVRMRQAC + PDVRVRQLYGPTEITLCATWLVLEPGAATGDVLPIGRPLAGRQAYVLDAFLQPVAPNVT + GELYLAGAGLAHGYLGNTAATSERFVANPFSGGGRMYRTGDLARWTDQGELVFAGRADS + QVKIRGYRVEPGEVEVALTEVPHVAQAVVVAREGQPGEKRLIAYVTAEAGSALESAAVR + AHLATRLPEFMVPSVVVVLESFPLTLNGKIDRAALPAPEFAGKAAGREPRTEAERVLCG + LFAEILGLERVGADDGFFELGGDSILSMRLAARARRENFVFGAKQVFEQKTPAGIAAVA + ERGGQSRPAGVADGVGEVPWTPVVRALLERDPAGLTRGAMAQWVSVAAPRDLSVTALVA + GLGAVIDTHDMLRSRIVESEGVEPRLVVAGRGTVDAAALVERVEAGDGDLAEIADRCAH + DTAARLDPVAGVLVRAVWVDAGPGRAGRLVVAAHHLVVDVVSWRTLLPDLQAACEAVVA + GGQPALDPPDVSFRRWSRTLDGEAAIRTGELAVWTEILDGAQSRLGELDPRRDTVSTAG + RRSWTVPREHAGVLVEQVTSAFHCGVHEVLLATLAGAVAGWRGGTAVVVDVEGHGRQPL + GELDLSRTLGWFTDVHPLRLDVTGVDPAEAVAGGDAAGRLLKQVKENVRAVPDGGLGYG + MLRYLNAETGPVLAALPKAEIGFNYLGRFSAGSGGEAQPWQITGIVGGAAEQDTPLRHV + VEIDAVVVDGPDGPEFTLTVTWAGRMLGDAEAESLAQAWLDMLAGLAAHVAAGGPGGHT + PSDFPLTALTQREVAEFEAAVPGLLDIWPLSPLQEGLLFHAADDRGPDVYASMRTLAID + GPLDVARFRASWTVLLDRHPALRASFHQLESGEAVQVIARDVPPDWRETDLSGLPESEA + LAEFDRLAARMHAERFDLTKAPQLRLHLVRLGDRRYRLIFTSHHIVADGWSLPLILVDV + LTAYEAGGDGRTLPAATSYRDFLAWVDRQDKGAAGQAWRTELAGLDEATHVVPPGSIIT + PLEPERVAFELDDETSKRLVEFTRRHGVTANTLFQGVWALHLARLAGRNDVVFGAAVAG + RPPEIPGVESAVGLFMNMLPVRARLTGAEPVVDMLKDLQERQVAMMAHQHIGLPEIKQL + TGPGAAFDTIVVFENYPPAPPRSDDPDALVIRPVGIPNDTGHYPLSMRASVAAGPVRGE + FIYRPDVVDRTEAGEMVAAILRALEQVVAEPWTPVGQVGLIGPEQRRLVVDEWNRTDVP + LAAETLPVLFRRQAERSPDAVAVEDGARSLTFGGLLGEVEALARLLVGAGVRREHRVGV + LVERSAELAVTMMAVSFAGGVFVPVDPDYPRERVEFMLANSAPGVMVCTKTTRAAVPAE + FAGTVLVLDELPAADPDVELPPVAPEDAAYVIYTSGSTGVPKGVLVTHSGLANLGYAHI + ERMAVTSSSRVLQLSATGFDAIVSELYMALLAGATLVLPDAASMPPRVTLGEAIRRAGI + THLTVSPSVLASEDDLPDTLRTVLTGGEALPPALVDRWSPGRRVIQAYGPTETTICSTM + SAPLSPGHDQVPLGGPIHNVRHYVLDAFLQPVPPGVVGELYITGVGLARGYLGRPGLTA + ERFVASPFAPGERMYRSGDLFRWTREGQLLFAGRVDAQVKVRGYRVEPAEIEAVLAEHP + WVGQVAVSVRRDGPGDKQLVAYVVPSADAAAENGTLASALRELAAERLPEYMMPAAFVS + LEQMPLTPNGKLDHRALQAPDFAGMSSKRAPRTPMEARLCALFADVLGLDQVGPDDSFF + ELGGDSITSMQLSARARPTGLELTPWQVFDEKTPERLAVIVQELAAEGGTTPAPEPGEG + TLVALSPDQMDLLEAGLAGE" + gene 28888..34464 + /gene="bpsC" + CDS 28888..34464 + /codon_start=1 + /db_xref="GI:15131494" + /db_xref="GOA:Q939Y9" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR001031" + /db_xref="InterPro:IPR001242" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Y9" + /function="involved in the biosynthesis of the balhimycin + heptapeptide backbone" + /gene="bpsC" + /product="peptide synthetase" + /protein_id="CAC48362.1" + /sec_met="Type: nrps" + /sec_met="Domains detected: Condensation (E-value: 1.7e-67, + bitscore: 218.8, seeds: 42); AMP-binding (E-value: + 5.2e-109, bitscore: 355.7, seeds: 400); PP-binding + (E-value: 2.5e-16, bitscore: 51.3, seeds: 164); PF00561 + (E-value: 4.1e-08, bitscore: 24.4, seeds: 48)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MTVDDTRAKRRSSVEDVWPLSPLQEGMLYHTALDDDGPDTYTVQT + VYGIDGPLDPGLLRASWQALVDRHAALRACFRYVSGAQMVQVIAREAEVPWRETDLSGL + PDDIAEGEVDRLAADEVAERLRIEAAPLMKLHLIRLGPDRHRLVHTLHHVLVDGWSMPI + LHRELAAIYAAGGDASGLPPTVSYRDYLAWLGRQDKEVARAAWRAELAGLDTPTTVAAP + DPARVPDIHTAVVELPAELTDGLAQFARGHDLTLNTVVQGAWAVVLAQLAGRDDVVFGA + TASGRPADLPGVEAMVGQLLNTLPVRVRLDGGRRAAELFARLQRDQSALMAHQHLGLQD + VQAVVGPGAVFDTLVIYENFPRKGLGRAPGGGLSLVPVKRGRNSSHYPFTLITGPGERM + PLILDYDRGLFDPAAAESVVGALARVLERLVAEPDVLVGRLTLASEAERALVVEGFNAT + AGPVPGESVLELFARRVAAAPDAVAITGAAGANLTYAEVDQASNRLAGYLAVRGVGRGD + RVGVAMERSPDLLIAFLAIWKAGAAYVPVDVEYPAERISFVFDDSGVSTVLCTLATSAV + APGNAIVLDAPETRVAVRDCAAPEIRPHADDLAYVMYTSGSTGLPKGVAIPHGAVAGLA + GDAGWQIGPGDGVLMHATHVFDPSLYAMWVPLVSGARVLLTEPGVLDAAGVRQAVHRGA + TFVHLTAGTFRALAETAPECFEGLVEIGTGGDVVPLQSVENLRRAQPGLRVRNTYGPTE + TTLCATWLPIEPGEVLGRELPIGHPMTNRRIYLLDAFLRPVPPGVAGELYIAGTGLAHG + YLKSPGLTAGRFVACPFAAGERMYRTGDRARWTRDGEVVFLGRADDQVKIRGYRVELGE + VEAALAAQPGVVEAVVTAREDQPGEKRLVGYFVSDGGDAGPVEIRRQLALVLPDYLVPI + AVVALPGLPVTPNGKVDRRALPAPDLAGHSPEKAPENETEKVLCALFAEILSIDQVGVD + DTFHDLGGSSALAMRLVARIREELGADLPIRQLFSSPTPAGLARALAAKSRPALEAAQR + PDRVPVTARQLRAWLLADPGGETAGLHTSVALRLHGRVDVPALAAALGDVAARHEILRT + TFPGDAQSVHQHVHDALAVELTPVGVTEEDLPGLLAERRDLLFDLTRDVPWRCDLFALS + DNEHVLHLQVHRILADDDSLDVFFRDLAAAYGARREGRVPERAPLALQFADYALWEQRL + LTDENEPGSLINEQVAFWRDNLAGLDGETVLPFDRPRPAVPSRRAGTVALRLEAGPHAR + LTEAAEPPGADTLEMVHAALAMLLAKLGAGHDVVIGTALPRDEELFDLEPMIGPFTRAL + ALRTDVSGDPTFLEVVARVQEAGQATGEHLDLPFERIVELLDLPASLARHPVFQVGLQV + DEEDIDGWAAAELPALRTAVEPGGTAAMELDLAVKLTERFDDDDNAGGLEGALHYATDL + FDEATAESVARRLVRVLEQVAEDPGRRISDLDVFLDDFERGRPPIAPARWAGAVPPVVA + ELAGDGPLGALLLDEQLRPVAPGAVGDLYVTGPAVDAGTATLATVPCPFGDEGHRMLHT + GLLARKTPAKTLVVVGERRRSSASVKTGDFEILLPLRAGGDRPPLFCVHASGGLSWNYE + PLLRYLPPNQPVYGVQARGLARTEPLPGSVEEMAADYLEQIRAVQPAGPYHLLGWSLGG + RIAQAMARLLEADGERLGLLALLDAYPVYMGRKTTGAASEEAALEQRNQQDLDLAGQLV + KGVAARSRLEAVMRNLWKVGPRHTRSPFAGDVLLFVATVDRPAHLPVPVAKASWKEFTS + GAVEAHEIPSNHYDMVQSAALGQIGAIVAEKLRSRPEGERTQR" + CDS 34461..34670 + /codon_start=1 + /db_xref="GI:15131495" + /db_xref="InterPro:IPR005153" + /db_xref="InterPro:IPR015166" + /db_xref="UniProtKB/TrEMBL:Q939Y8" + /note="orf1" + /product="hypothetical protein" + /protein_id="CAC48363.1" + /transl_table=11 + /translation="MSNPFDNEDGSFFVLVNDEGQHSLWPTFAEVPAGWTRVHGEAGRQ + ECLAYVEENWTDLRPKSLIREASA" + gene 34782..35957 + /gene="oxyA" + CDS 34782..35957 + /codon_start=1 + /db_xref="GI:3688114" + /db_xref="GOA:O87673" + /db_xref="HSSP:1LFK" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87673" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyA" + /note="already deposited under Y16952" + /product="P450 monooxygenase" + /protein_id="CAA76547.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.8e-11, + bitscore: 33.6, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MFEESNALRGTEIHRRDRFAPGPELRSLMGEGTMSILQPPDSPGG + RTGWLATGHDEVRQVLGSDKFSAKLLYGGTVAGRIWPGFLNQYDPPEHTRLRRMVTSAF + TVRRMQDFRPRIEQIVQASLDAIEAAGGPVDFVPRFAWSVATTVTCDFLGIPRDDQADL + SRALHASRSERSGKRRVAAGNKYWTYMTEIAARARRDPGDDMFGAVVRDHGDAITDAEL + LGVAAFVMGAGGDQVARFLAAGAWLMVEHPDQFALLREKPDTVPDWLNEVERYLTSDEK + TTPRIAQEDVRIGDQLVKAGDAVTCSLLAANRRKFPAPEDEFDITRERPVHVTFGHGIH + HCLGRPLAEMVFRAAIPALAQRFPKLRLAEPDREIKLGPPPFDVEALLLEW" + gene 36007..37203 + /gene="oxyB" + CDS 36007..37203 + /codon_start=1 + /db_xref="GI:3688115" + /db_xref="GOA:O87674" + /db_xref="HSSP:1LFK" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87674" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyB" + /product="P450 monooxygenase" + /protein_id="CAA76548.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.3e-16, + bitscore: 50.2, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MNDDDPRPLHIRRQGLDPADELLAAGSLTRVTIGSGADAETHWMA + TAHALVRQVMGDHQRFSTRRRWDPRDEIGGTGTFRPRELVGNLMDYDPPEHTRLRQKLT + PGFTLRKMQRLQPYIEQIVNERLDEMARAGSPADLVAFVADKVPGAVLCELIGVPRDDR + ATFMQLCHAHLDASRSQKRRAAAGEAFSRYLLAMIARERKDPGEGLIGAVVAEYGDEAT + DEELRGFCVQVMLAGDDNISGMIGLGVLALLRHPEQIDALRGGEQPAQRAVDELIRYLT + VPYGPTPRIAKQDVTVGDQVIKAGESVICSLPAANRDPALVPDADRLDVTRDPVPHVAF + GHGIHHCLGAALARLELRTVFTALWRRFPDLRLADPAQETKFRLTTPAYGLTELMVAW" + gene 37353..38573 + /gene="oxyC" + CDS 37353..38573 + /codon_start=1 + /db_xref="GI:3688116" + /db_xref="GOA:O87675" + /db_xref="HSSP:1UED" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="UniProtKB/TrEMBL:O87675" + /function="involved in the coupling of the aromatic side + chains of the heptapeptide" + /gene="oxyC" + /product="P450 monooxygenase" + /protein_id="CAA76549.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 2.6e-15, + bitscore: 47.3, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MGHDIGQLAPLLPEPANFQLRTNCDPHADNFDLRAHGPLVRIAGD + SSAQLGREYVWQAHGYDVVRRILGDHENFTTRPQFTQAKSGAHVEAQFVGQISTYDPPE + HTRLRKMLTPEFTVRRIRRMEPAIQALVDDRLDRVAAEGPPADLQALFADPVGALALCE + LLGIPRDDQREFVRRIRRNTDLSRGLKARAADSAAFNRYLDNLIARQRRDADDGFLGMI + VREHGDTVTDEELKGLCTALILGGVETVAGMIGFGVLALLENPGQVPLLFAGPEQADRV + VNELLRYLSPVQAPNPSLAVKDVIIDGQLIKAGDYVLCSVLMANRDEALTPNPNVFDAN + RAAVSDVGFGHGIHYCVGAALARSMLRMAYQALWQRFPGLRLAVPIAEVKYRSAFVDCP + DRVPVTW" + gene 38634..40109 + /gene="bhaA" + CDS 38634..40109 + /codon_start=1 + /db_xref="GI:3688117" + /db_xref="GOA:O87676" + /db_xref="InterPro:IPR003042" + /db_xref="InterPro:IPR006905" + /db_xref="UniProtKB/TrEMBL:O87676" + /function="involved in the halogenation of balhimycin" + /gene="bhaA" + /product="halogenase" + /protein_id="CAA76550.1" + /sec_met="Type: none" + /sec_met="Domains detected: Trp_halogenase (E-value: + 4.7e-77, bitscore: 250.9, seeds: 23)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MSVEDFDVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIG + ESLLPATVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSH + AYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARF + VIDASGNKSRLYTKVGGSRNYSEFFRSLALFGYFEGGKRLPEPVSGNILSVAFDSGWFW + YIPLSDTLTSVGAVVRREDAEKIQGDREKALNTLIAECPLISEYLADATRVTTGRYGEL + RVRKDYSYQQETYWRPGMILVGDAACFVDPVFSSGVHLATYSALLAARSINSVLAGDLD + EKTALNEFELRYRREYGVFYEFLVSFYQMNVNEESYFWQAKKVTQNQSTDVESFVELIG + GVSSGETALTAADRIAARSAEFAAAVDEMAGGDGDNMVPMFKSTVVQQAMQEAGQVQMK + ALLGEDAEPELPLFPGGLVTSPERMKWLPHHPA" + gene 40192..41382 + /gene="bgtfA" + CDS 40192..41382 + /codon_start=1 + /db_xref="GI:3688118" + /db_xref="GOA:O87677" + /db_xref="HSSP:1PN3" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87677" + /function="involved in glycosylation of balhimycin" + /gene="bgtfA" + /product="glycosyltransferase" + /protein_id="CAA76551.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 2.5e-28, bitscore: 90.1, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MRVLISGCGSRGDTEPLIALAVRLRELGVDVRMCLPPDYVERCAE + VGVSMVAVGPAMRAGARGPGEPPPGAPEIVSEVVADWFDKVPAAAEGCDVVVATGLLPA + AVVVRSVAEKLGIPYLYTVLSPDHLPSVLSQAERDEYDQGADRLFGAVVTSGRAAIGLP + PVANLFTYGYTEQPWLGADQILAPPPPGDLDTVQTGAWILPDERPLPAELETFLAAGSP + PVYVGFGSSSGPRTAGAAKAAIEAIRARGHRVVLSRGWADLAAPDDSADCFTVGEVNLQ + VLFRRVAAAVHHDSAGTTLLAIRAGTPQIVVRRVIDNVVEQAYHADRVAELGVGVALEG + PIPASEAMSDALETALAPETRARAAEVAGTVRTDGTTVAAELLFAAVSREKPAVPA" + gene 41447..42676 + /gene="bgtfB" + CDS 41447..42676 + /codon_start=1 + /db_xref="GI:3688119" + /db_xref="GOA:O87678" + /db_xref="HSSP:1IIR" + /db_xref="InterPro:IPR002213" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87678" + /function="involved in glycosylation of balhimycin" + /gene="bgtfB" + /product="glycosyltransferase" + /protein_id="CAA76552.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 2.9e-29, bitscore: 93.1, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MKRVLLSTLGSRGDVEPLVALAVRLRDLGAEPLMCAPPDCADRLE + EVGVPHVPVGPSARAPIHREKPLTPEDMRRLMAEAIAMPFDRIPAAAEGCAAVVTTGLL + AAAIGVRSVAEKLGIPYFYAFHCPSYVPSPYYPPPPPLGEPPAEDVTDIRALWERNNRS + AYQRYGGPLNSHRAAIGLPPVEDIFTFGYTDHPWVAADSVLAPMQPTDLGAVQTGAWIL + PDERPLSPELEAFLDTGTPPVYLGFGSLRAPADAVRVSIDAIRAQGRRVILSRGWADLV + LPDDREDCFATGEVNQQVLFGRVAAVIHHGGAGTTHVAMQAGAPQVLVPQMADQPYYAG + RVAELGIGVAHDGPVPTFDSLSAALVTALAPETRARAEAVARTAGADGAAVAAKLLLDA + VSREKPAVPA" + gene 42824..44053 + /gene="bgtfC" + CDS 42824..44053 + /codon_start=1 + /db_xref="GI:3688120" + /db_xref="GOA:O87679" + /db_xref="HSSP:1IIR" + /db_xref="InterPro:IPR004276" + /db_xref="UniProtKB/TrEMBL:O87679" + /function="involved in the glycosylation of balhimycin" + /gene="bgtfC" + /product="glycosyltransferase" + /protein_id="CAA76553.1" + /sec_met="Type: none" + /sec_met="Domains detected: Glyco_transf_28 (E-value: + 3.6e-31, bitscore: 99.3, seeds: 43)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MRVLLSTAGSRGDVEPLLALAVRLQGLGAEVLMCASPASAERLAE + VGVPHVPVGLQLDGMLLQEGMPPPSAEDERRLAAMAIDMQFDAVPAAAEGCAAVVATGE + LAAAAAVRSVAEKLGIPYFYGAYSPNYLASPHYPPPDDERTTPGVTDNGVLWAERAERF + AKRYGETLNSRRAAIGLPPVADVFGYGYTEQPWLAADPVLAPLDPDLDAVQTGAWILRD + DRPLSPELAAFLAAGSPPVYVGFGSASGPGIEDAAKVAIEAIRALGRRAILSRGWADLV + LPDDREDCFAVDEANLQVLFEQSAAVVHHGSAGTEHLATRAGVPQIAIPRHTDQAYYAG + RVAELGVGVALEGPVPSFAAMSAELATALAPETRARAAEVAGTVRTDGTTMAAELLFQA + AEQGKLTVPA" + gene 44087..45313 + /gene="dvaC" + CDS 44087..45313 + /codon_start=1 + /db_xref="GI:15131496" + /db_xref="GOA:Q939Y7" + /db_xref="InterPro:IPR013217" + /db_xref="InterPro:IPR013630" + /db_xref="InterPro:IPR013691" + /db_xref="UniProtKB/TrEMBL:Q939Y7" + /function="probably involved in the C-3 methylation of + dehydrovancosamine" + /gene="dvaC" + /product="putative C-3 methyl transferase" + /protein_id="CAC48364.1" + /transl_table=11 + /translation="MSTTSQCRICDGTVHEFIDFGRQPLSDAFVAPGAEKGEFFFRLAT + GICDSCTMVQLMEEVPRDLMFHEAYPYLSSGSAVMRTHFHELAKHLLATELTGEDPFIV + ELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKDFFEEATAADIRENDGP + ADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFEDPYLGDIVERTSFDQIYDEHFFL + FTARSVQEMARRNGLELVDVERIPVHGGEVRYTLALAGARKPSEAVAELLAWEAERKLA + EYATLERFATDVKKIKEDLIALLTKLRAEGKRVVGYGATAKSATVTNFCGITPDLVEFI + SDTTPAKQGKLSPGQHIPVREYGEFAGNHPDYALLFAWNHADEIMNVEQAFRDAGGQWI + LYVPNVHVS" + CDS 45409..46233 + /codon_start=1 + /db_xref="GI:15131497" + /db_xref="InterPro:IPR003737" + /db_xref="UniProtKB/TrEMBL:Q939Y6" + /note="orf2" + /product="hypothetical protein" + /protein_id="CAC48365.1" + /sec_met="Type: none" + /sec_met="Domains detected: PIG-L (E-value: 2.7e-23, + bitscore: 74.4, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MPQDLDADRILAISPHLDDAVLSFGAGLARAAQAGAKVTVHTVFA + GTAAPPYSPAAERLHAIWELSPDQDASLRRRDEDIAALDHLGVDYRHGRFLDAIYRKLP + DGRWLADNVPGRQKLAIGRQSPQGDPELFSAVRADIESIVEEYAPALILTCAAGNGHVD + NEIARDAALFVAYEKGIRVRLWEDLPHAMFAEGAAELPDGFRLGPPDFGSVEPEARARK + FEALRLYSSQMLMLHGPEKDFFAQLDGHARKSAPGGGYGETTWPVVSREDNG" + gene 46265..47119 + /gene="bmt" + CDS 46265..47119 + /codon_start=1 + /db_xref="GI:15131498" + /db_xref="GOA:Q939Y5" + /db_xref="UniProtKB/TrEMBL:Q939Y5" + /function="probably involved in the methylation of the + D-Leu residue of the heptapeptide" + /gene="bmt" + /product="putative N-methyl transferase" + /protein_id="CAC48366.1" + /transl_table=11 + /translation="MSGQLERGPVRTTHADVLLASVGERGVLCDFYDEEGSNTYRDLIQ + DADGTPEAREFATRVGPVPGPVLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMR + LAEAPADLRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEP + GGKFLLSLALSEVAESQPPERRQELPGQSGRLYVLHVSVQPAEETQDITIYPADETADP + FVVCTHRRRLVPADRIVRELLRAGFDVIARTPFASGASGRAGHEDMLLVEAVKQEGAIP + AAR" + gene complement(47100..48404) + /gene="pgat" + CDS complement(47100..48404) + /codon_start=1 + /db_xref="GI:15131499" + /db_xref="GOA:Q939Y4" + /db_xref="InterPro:IPR004839" + /db_xref="InterPro:IPR015421" + /db_xref="InterPro:IPR015422" + /db_xref="InterPro:IPR015424" + /db_xref="UniProtKB/TrEMBL:Q939Y4" + /function="transamination of 4-hydroxy- and + 3,5-dihydroxyphenylglycine" + /gene="pgat" + /product="phenylglycine amino transferase" + /protein_id="CAC48367.1" + /transl_table=11 + /translation="MEILVFMDSFGLSTPLSVETLHGSLTDPAISSMNLLNELIDEYPV + AISMAAGRPYEEFFDIRLIHEYIDAYCDHLRRDRKLDEAGVTRTLFQYGTTKGVIADLI + ARNLAEDENIDAAPESVVVTVGAQEAMFLVLRTLRATEHDVLLAPAPTYVGLTGAALLT + DTPVWPVQSTENGIDPDDLVLQLKRADEQGKRVRACYVTPNFANPTGTSMDLAARHRLL + EVAEANGILLLEDNAYGLFGAERLPTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQ + RMAGGGLLADQLSKLKGMLTVNTSPIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRL + VLGELERRLGGRAGVRWNTPTGGFFVTVTVPFTVDDDLLALAARDHGVLFTPMHHFYGG + KGGFNQLRLSISLLTPELIEEGVARLAALITARLG" + gene 48610..49464 + /gene="bhp" + CDS 48610..49464 + /codon_start=1 + /db_xref="GI:15131500" + /db_xref="GOA:Q939Y3" + /db_xref="HSSP:1A88" + /db_xref="InterPro:IPR000073" + /db_xref="InterPro:IPR000639" + /db_xref="UniProtKB/TrEMBL:Q939Y3" + /function="involved in the beta-hydroxytyrosine + biosynthesis" + /gene="bhp" + /product="putative hydrolase" + /protein_id="CAC48368.1" + /sec_met="Type: none" + /sec_met="Domains detected: PF12697 (E-value: 2.5e-23, + bitscore: 75.1, seeds: 465); PF00561 (E-value: 5.1e-19, + bitscore: 60.1, seeds: 48)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MLMTTEHGIRLSYHDQGRGAPVLLLTGTGAPSSVWDLHQVPALRA + AGFRVITMDNRGIPPSDDGADGFTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQEL + ALARPELVDAVVLMAACGRSSLVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPAT + LADDDLAADWLDLFAASENWGPGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAP + PSAGQELAAVIPGATHRTIPGCGHFGYLEKPEAVNRELLRFLRTESGVAVTSGASPRTP + EEL" + gene 49530..51275 + /gene="bpsD" + CDS 49530..51275 + /codon_start=1 + /db_xref="GI:15131501" + /db_xref="GOA:Q939Y2" + /db_xref="HSSP:1AMU" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR006162" + /db_xref="InterPro:IPR006163" + /db_xref="InterPro:IPR009081" + /db_xref="InterPro:IPR010071" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q939Y2" + /function="involved in the beta-hydroxytyrosine + biosynthesis" + /gene="bpsD" + /product="peptide synthetase" + /protein_id="CAC48369.1" + /sec_met="Type: other" + /sec_met="Domains detected: AMP-binding (E-value: 6.5e-114, + bitscore: 371.9, seeds: 400); PP-binding (E-value: 1.4e-15, + bitscore: 48.9, seeds: 164)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MTGAIVPPSTAPALFEAAAAAVPDRPAVAMGTTTLTYAELNTQAN + RLARRLVAHGVGPERLVALAMPRSIEFAVAMLAVHKAGGAYVPIDPDYPAERRQHMLAG + AAAQCLLCLPGQDVAGAPVVLSVALAEPGRPEPDLDDSDRLAPLLPSHPAYVIFTSGST + GQPKGVVVTHRGIPNLAADYVHRQNLLPDSRLLAFASPSFDAAVAEFWPIWLAGACLVL + APAPDLIPGEPLARLVRDRHITHVTLPPSALAPLEEAGGLPPGLTLLVAGEAGPAPVAK + RWAAGRVMINAYGPTEATVAVTASDPLTGEDTPPIGRPITGVHTYVLDDRLVPVPDGTV + GELYMTGPGLARGYLHRPAATAERFLPDPFGGPGQRMYRTGDRVRARPDGQLVFVGRAD + DQLKVRGHRIEPAEVESALLAVDGVAQAVVTEHDNRLVAYVVGAGGARVPAEDLLPPLR + KQLPAYLVPDVVVGLPHLPTTPNGKVDRAALPAPEAEDTGRAISGRAPSTPTEIHLAAL + FAEVLGVSSVGVEDSFFEVGGHSLLATRLVSRIRESLRVRLRVQAFFDAPTVAELAKVL + DAALT" + gene 51289..52479 + /gene="oxyD" + CDS 51289..52479 + /codon_start=1 + /db_xref="GI:15131502" + /db_xref="GOA:Q939Y1" + /db_xref="InterPro:IPR001128" + /db_xref="InterPro:IPR002397" + /db_xref="InterPro:IPR017972" + /db_xref="PDB:3MGX" + /db_xref="UniProtKB/TrEMBL:Q939Y1" + /function="probably involved in the biosynthesis of + beta-hydroxytyrosine" + /gene="oxyD" + /product="putative P450 monooxygenase" + /protein_id="CAC48370.1" + /sec_met="Type: none" + /sec_met="Domains detected: p450 (E-value: 3.9e-22, + bitscore: 69.8, seeds: 50)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MQTTNAVDLGNPDLYTTLERHARWRELAAEDAMVWSDPGSSPSGF + WSVFSHRACAAVLAPSAPLTSEYGMMIGFDRDHPDNSGGRMMVVSEHEQHRKLRKLVGP + LLSRAAARKLAERVRIEVGDVLGRVLDGEVCDAATAIGPRIPAAVVCEILGVPAEDEDM + LIDLTNHAFGGEDELFDGMTPRQAHTEILVYFDELITARRKEPGDDLVSTLVTDDDLTI + DDVLLNCDNVLIGGNETTRHAITGAVHALATVPGLLTALRDGSADVDTVVEEVLRWTSP + AMHVLRVTTADVTINGRDLPSGTPVVAWLPAANRDPAEFDDPDTFLPGRKPNRHITFGH + GMHHCLGSALARIELSVVLRVLAERVSRVDLEREPAWLRAIVVQGYRELPVRFTGR" + gene 52645..53715 + /gene="hmaS" + CDS 52645..53715 + /codon_start=1 + /db_xref="GI:15131503" + /db_xref="GOA:Q939Y0" + /db_xref="HSSP:1CJX" + /db_xref="InterPro:IPR004360" + /db_xref="InterPro:IPR005956" + /db_xref="UniProtKB/TrEMBL:Q939Y0" + /function="probably involved in the 4-hydroxyphenylglycine + biosynthesis" + /gene="hmaS" + /product="putative hydroxyphenyl pyruvate dioxygenase" + /protein_id="CAC48371.1" + /transl_table=11 + /translation="MTSDSTVQNFEIDYVEMYVENLEAATFTWVDKYAFAVAGTDRSAD + HRSVTLRQGPIKLVLTEPTSDRHPAAAYLQSHGDGVADIALRTPDVTAAFEAAVRGGAA + AVREPVRLAGGPIVTATIGGFGDVVHTLIQSGEATAAAPETTGQGGGDVNLLGLDHFAV + CLNSGDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNSTVVQSASGEVTLTLIEPDSNAD + PGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGVEFLKTPGTYYDMLGERITLET + HTLDDLRSTNVLADEDHGGQLFQIFAASTHPRHTIFFEIIERQGAGTFGSSNIKALYEA + VELERTGQSEFGAARR" + gene 53712..54788 + /gene="hmo" + CDS 53712..54788 + /codon_start=1 + /db_xref="GI:15131504" + /db_xref="GOA:Q939X9" + /db_xref="HSSP:1GOX" + /db_xref="InterPro:IPR000262" + /db_xref="InterPro:IPR008259" + /db_xref="InterPro:IPR012133" + /db_xref="InterPro:IPR013785" + /db_xref="UniProtKB/TrEMBL:Q939X9" + /function="probably involved in the 4-hydroxyphenylglycine + biosynthesis" + /gene="hmo" + /product="putative phenylglycolate oxidase" + /protein_id="CAC48372.1" + /transl_table=11 + /translation="MTYVSLGDLERAARDVLPGEIWDFLAGGSGAEASLTANRTALDRV + FVVPRMLCDLTGSTTEAELLGRRAALPMAVAPVAYQRLFHPEGELAAARAARDAGVPYT + ICTLSSVPLEEVAAVGGRPWFQLYWLRDEKRSLELVRRAEDAGCEAIVFTVDVPWMGRR + WRDMRNGFALPESVTAANFDAGSAAHRRTRGASAVADHTAREFAPATWESVATVRAHTD + LPVVLKGILAAEDARRAVEAGADGIVVSNHGGRQLDGAVPGIEVLGEIAAEVSGRCEVL + LDGGIRTGGDILKAAALGASGVLVGRPVMWGLAAAGQEGVRQVFELLAAELRNALGLAG + CDSVSAAGRLGTRVPRYG" + CDS 54879..56237 + /codon_start=1 + /db_xref="GI:15131505" + /db_xref="GOA:Q939X8" + /db_xref="InterPro:IPR006153" + /db_xref="UniProtKB/TrEMBL:Q939X8" + /note="orf7" + /product="putative antiporter" + /protein_id="CAC48373.1" + /transl_table=11 + /translation="MLHTFAAAVAPVAPIAAHSLLVFLLQIGLLLLLAVVLGRLAGRFG + MPAVVGELFVGVILGPSLLGWAAPGLHSWLFPAVAEQYHLLDAVGQVGVLLLVGLTGVQ + MDMGLARKRGLTAAGVSIGGLVLPLGLGIGAGYLLPKVLVPEGTDVTVFAMFLGVALCV + SAIPVIAKTLIDMKLLHRNIGQLTLTAGMVDDVFGWFMLSVVSAMAVNAVSAGTVLTSL + AYLVAILAFCFTLGRPLARGVLRVAAKSDGPGLTVATVVVLIFLAAAGTQALGLEAVFG + AFLCGILLGTAGKVDPAKLAPLRTVVLSGLAPLFFATAGLRMDLTALTHPVVLLTGLVV + LALAIAGKFAGAFAGARLSGLNKWEGLALGAGLNARGVIQVVVAMVGLRLGILSVEVYT + IIILVAIVTSLMASPILRFAMSRVEQTAEEQVRENEHRAWNTHPAANPQEQSL" + gene 56501..57913 + /gene="dvaA" + CDS 56501..57913 + /codon_start=1 + /db_xref="GI:15131506" + /db_xref="InterPro:IPR005212" + /db_xref="UniProtKB/TrEMBL:Q939X7" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaA" + /product="putative NDP-hexose 2,3-dehydratase" + /protein_id="CAC48374.1" + /transl_table=11 + /translation="MLPDLVPPVVVRPRDGRDHADRIALSAATTDGVHMRTEDVRAWIA + ERREANDFHVERVPFRDLDQWSFEEVTGNLVHHSGRFFTIEGLHVIEHDGPNGDGPYRE + WQQPVIKQPEVGILGILGKEFGGVLHFLMQAKMEPGNPNLVQLSPTVQATRSNYTKAHG + GTNVKLIEYFAPPDPEHVIVDVLQAEQGSWFFRKSNRNMIVETVDDVPLWDDFCWLTLG + QIAELMHEDETINMNARSVLSCLPYHDAAPGARFSDVQLLSWFTNERSRHDVRARRIPL + ADVCGWKQGDEAIEHEDGRYFRVLAVAVRGSNRERISWTQPLLESVDLGVVAFLVREIG + GVPHVLVHARADGGFLDTVELAPTVQCTPQNYAHLPAENRPPFLDVVLNAPESRIRYEA + IHSEEGGRFLNVRARYLAIEADDTVEPPPGYTWVTPAQLTALTRHGHYVNVEARTLLAC + LNAATAQPRGGA" + gene 57915..58214 + /gene="dvaE" + CDS 57915..58214 + /codon_start=1 + /db_xref="GI:15131507" + /db_xref="UniProtKB/TrEMBL:Q939X6" + /gene="dvaE" + /note="probably not active because of an in frame deletion + of 226 aa" + /product="putative 4-ketoreductase" + /protein_id="CAC48375.1" + /transl_table=11 + /translation="MKTVTVLGASGFAGSAVHRLGEVFRLVAREVAGHTGRGPVDVPCV + APPSHAPETDFRSVTVGSTPFRSITGRRPEMSRPEGVRRTVAALPSSDQGKVRT" + gene 58211..59320 + /gene="dvaB" + CDS 58211..59320 + /codon_start=1 + /db_xref="GI:15131508" + /db_xref="GOA:Q939X5" + /db_xref="HSSP:1MDO" + /db_xref="InterPro:IPR000653" + /db_xref="InterPro:IPR015421" + /db_xref="InterPro:IPR015422" + /db_xref="InterPro:IPR015424" + /db_xref="UniProtKB/TrEMBL:Q939X5" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaB" + /product="putative C-3 amino transferase" + /protein_id="CAC48376.1" + /sec_met="Type: none" + /sec_met="Domains detected: DegT_DnrJ_EryC1 (E-value: + 1.8e-118, bitscore: 387.1, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MTTRVWDYQAEYRNERLDLLDAVETVFDSGQLVLGASVRGFEAEF + AAYHGVGHCVGLDNGTNAIKLGLQALGVGPGDEVITVSNTAAPTVVAIDGTGATPVFVD + VREDDFLMDTGQVAAAITERTKCLLPVHLYGQCVDMAPLKDLAAKHGLSILEDCAQAHG + ARQNGTVAGSTGDAAAFSFYPTKVLGAYGDGGATITSDESVDRRLRRLRYYGMDKQYYT + LETPAHNSRLDEVQAEILRRKLKRLDTYVAARQAIAQRYVDGLGDTELKLPRTVPGNEH + VYYVYVVRHPRRDDIIERLKAYDIHLNISYPWPVHTMTGFAHLGYATGAFPVTEKLAGE + IFSLPMYPALSADLQDKVIHAVREVVSTL" + gene 59344..59961 + /gene="dvaD" + CDS 59344..59961 + /codon_start=1 + /db_xref="GI:15131509" + /db_xref="GOA:Q939X4" + /db_xref="HSSP:1EP0" + /db_xref="InterPro:IPR000888" + /db_xref="InterPro:IPR011051" + /db_xref="InterPro:IPR014710" + /db_xref="UniProtKB/TrEMBL:Q939X4" + /function="probably involved in the biosynthesis of + dehydrovancosamine" + /gene="dvaD" + /product="putative 3,5 epimerase" + /protein_id="CAC48377.1" + /sec_met="Type: none" + /sec_met="Domains detected: dTDP_sugar_isom (E-value: + 1.2e-60, bitscore: 195.4, seeds: ?)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MQARKLAVDGAIEFTPRVFPDDRGLFVSPFQEEAFAEARGGPLFR + VAQTNHSMSKRGVVRGIHYTMTPPGTAKYVYCARGKALDIVVDIRVGSPTFGRWDAVLL + DQRDHRAMYFPVGVGHAFVALEDDTAMWYLLSTAYVARNELALSVLDPALGLPIDADVD + PILSERDQVAVTLAEAGRQGLLPDYATCLELDRQLSEVSLSA" + gene 60179..61297 + /gene="dpgA" + CDS 60179..61297 + /citation=[2] + /codon_start=1 + /db_xref="GI:15131510" + /db_xref="GOA:Q939X3" + /db_xref="InterPro:IPR001099" + /db_xref="InterPro:IPR011141" + /db_xref="InterPro:IPR012328" + /db_xref="InterPro:IPR016038" + /db_xref="InterPro:IPR016039" + /db_xref="UniProtKB/TrEMBL:Q939X3" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgA" + /product="dihydroxyphenylacetic acid synthase" + /protein_id="CAC48378.1" + /sec_met="Type: t3pks" + /sec_met="Domains detected: Chal_sti_synt_C (E-value: + 1.2e-16, bitscore: 52.4, seeds: 21)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MGVDVSMTTSIEPAEDLSVLSGLTEITRFAGVGTAVSASSYSQSE + VLDILDVEDPKIRSVFLNSAIDRRFLTLPPESPGGGRVSEPQGDLLDKHKELAVDMGCR + ALEACLKSAGATLSDLRHLCCVTSTGFLTPGLSALIIRELGIDPHCSRSDIVGMGCNAG + LNALNVVAGWSAAHPGELGVVLCSEACSAAYALDGTMRTAVVNSLFGDGSAALAVISGD + GRVPGPRVLKFASYIITDALDAMRYDWDRDQDRFSFFLDPQIPYVVGAHAEIVADRLLS + GTGLRRSDIGHWLVHSGGKKVIDSVVVNLGLSRHDVRHTTGVLRDYGNLSSGSFLFSYE + RLAEEGVTRPGDYGVLMTMGPGSTIEMALIQW" + gene 61294..61947 + /gene="dpgB" + CDS 61294..61947 + /citation=[2] + /codon_start=1 + /db_xref="GI:15131511" + /db_xref="GOA:Q939X2" + /db_xref="InterPro:IPR001753" + /db_xref="UniProtKB/TrEMBL:Q939X2" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgB" + /product="putative enoyl-CoA hydratase" + /protein_id="CAC48379.1" + /transl_table=11 + /translation="MNGELVLRLDGTRPLSAASVEELDALCDRVEDHREPGPVTVHVTG + VPAAGWTAEVTVGLVSKWERVVRRFERLGRLTIAVAAGDCAGTALDVLLAADVRIAAPG + TRLLLARAGGAPWPGMTVHRLTRQAGAAGIRRAVLLGAPIEAGRALALNLVDEVSEDPA + AALAELAGTAGAVDGKELAIRRQLVFEAGSTAFEDALGAHLAAADRALRRETAS" + gene 61944..63248 + /gene="dpgC" + CDS 61944..63248 + /codon_start=1 + /db_xref="GI:15131512" + /db_xref="GOA:Q939X1" + /db_xref="HSSP:1MJ3" + /db_xref="InterPro:IPR001753" + /db_xref="UniProtKB/TrEMBL:Q939X1" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgC" + /product="hydroxyacyl-dehydrogenase" + /protein_id="CAC48380.1" + /transl_table=11 + /translation="MTAAPPTSPPGPRLDRPALAEAAGRVDDLLAELPPPSARTPGQRE + AASSALDGIRAMRADYVGAHAEAIYDELTDGRSRSLRIDELVRAAARAFPGLVPTDEQM + AAERARPQAEKDGREIDQGIFLRGILRAERAGPHLLDAMLQPTPRALKLLPGFTESGVV + QMEAVRLERRDGVAYLTLCRDDCLNAEDAQQVDDMETAVDLALLDPAVRVGLLRGGEMS + HPRYRGRRVFCAGINLKKLSSGGIPLVDFLLRRELGYIHKIVRGVVTEGSWHSRLTDKP + WIAAVDSFAIGGGAQLLLVFDHVLAASDAYFSLPAAKEGIIPGASNFRLSRFAGPRVAR + QVILGGRRIRADEPDARLLVDEVVPPAELDAAIDAALARLDGEAVLANRRMLNLAEEPP + DEFRRYMAEFALQQALRIYGEDVIGKVGRFAAGSS" + gene 63245..64054 + /gene="dpgD" + CDS 63245..64054 + /codon_start=1 + /db_xref="GI:15131513" + /db_xref="GOA:Q939X0" + /db_xref="HSSP:1MJ3" + /db_xref="InterPro:IPR001753" + /db_xref="InterPro:IPR018376" + /db_xref="UniProtKB/TrEMBL:Q939X0" + /function="involved in the biosynthesis of + 3,5-dihydroxyphenylglycine" + /gene="dpgD" + /product="putative enoyl-CoA-isomerase" + /protein_id="CAC48381.1" + /transl_table=11 + /translation="MSGDRVRYEKKDHVAYVTLDRPGVLNAMDRRTHEELAGIWDDAEA + DDEVRVVVLTGAGNRAFSVGQDLKERARLNEAGARATTFGSRGQPGHPRLTDRFTLSKP + VVARVHGYALGGGFELVLACDIVIASDDSVFALPEVRLGLIPGAGGVFRLPRQLPQKVA + MGYLLTGRRMDAATALRYGLVNEVVPPEELDRCVAEWTDSLVRAAPLSVRAIKEAALRS + LDLPLEEAFTASYTWEERRRRSEDAIEGPRAFAAKRDPVWTGEYRPG" + gene 64162..65259 + /gene="ald" + CDS 64162..65259 + /codon_start=1 + /db_xref="GI:46275290" + /db_xref="GOA:Q799B0" + /db_xref="HSSP:1KFL" + /db_xref="InterPro:IPR006218" + /db_xref="InterPro:IPR006219" + /db_xref="InterPro:IPR013785" + /db_xref="UniProtKB/TrEMBL:Q799B0" + /gene="ald" + /product="putative alodlase" + /protein_id="CAG25757.1" + /transl_table=11 + /translation="MAAMTHTVATTDLDNQRIERIVPLVTPALLHHELPLSATAAETVR + KGRESVVRVLDGTDDRLLVITGPCSIHDPAAALDYAGHLAAIAGEVAGDLLVVMRVYFE + KPRTIGGWKGLINDPHLDGTGDVNHGLRTARHLLLELAERGLPAACEWLDTTIPAYFAD + TVSWGAIGARTVESQNHRMLASGLSMPVGFKNRRDGDITVAIDAIRAAAVRHVVPGVDP + GGLPAILHTAGNPDCHVVLRGGDGAPNHDSASVHKTLTALEAAGLPGRVVIDASHDNSG + KDHHRQPLVAAEIAGQVENGRNGIVGVMLESNLRAGRQDLQPGRPPAYGQSITDACIDV + PTTRTVLHGLAAAAAARRKLGKQAS" + gene 65378..>66669 + /gene="pks" + CDS 65378..>66669 + /codon_start=1 + /db_xref="GI:46275291" + /db_xref="GOA:Q799A9" + /db_xref="HSSP:1BA3" + /db_xref="InterPro:IPR000873" + /db_xref="InterPro:IPR020845" + /db_xref="UniProtKB/TrEMBL:Q799A9" + /gene="pks" + /product="putative type I polyketide synthase" + /protein_id="CAG25758.1" + /sec_met="Type: none" + /sec_met="Domains detected: AMP-binding (E-value: 2.1e-75, + bitscore: 245.0, seeds: 400); AMP-binding (E-value: + 2.1e-75, bitscore: 245.0, seeds: 400)" + /sec_met="Kind: biosynthetic" + /transl_table=11 + /translation="MEEIRTEFIRPLLTSLSAHAADRPAYSDDRRTLTYGGLAHAAAEL + AAGLGVARGDRVLVHVGSRVEFAVALLAVLRAAAVGVPVSVRSTDAELAHLAADSGATL + LVTEARHAAAAERLRRDRPGLRVLFVDDPPPARVGEPRDDLGLDEPAWLLYTSGTTGRP + KGVLLSQRAMLWSTAAYYVPMLGLDAEDTVLWPLPTHHAYALSLAFVTTIALGAHTRLA + DGCTPDLLARYPGSVLAGVPALYLRLRQESGGPLAAPRLCLSGGAPCTPATRAAVRDLF + GLPVADGYGSTETGGKVAAELPGEAGLVPVPGLEIRIDAGEVLVRGPGLMLGYHGRTES + PLRDGWYRTGDAGRFEGGRLVLEGRVDDVIVCGGQNVHPAEIEAVLEESPSVRDVLVLG + RPDDVLGEVPVAFVVAGPGGFDAEELRGRCL" +ORIGIN + 1 ctgcagcagc agcgtcgccg accccaccac ccggctgatg tgaccggagg cgatgatgac + 61 gacacggcgt ttctcggcga tcgggagcag cgtttcgatg gcctcttcca caagcaggga + 121 caggtcgacc ggttcccggg tgaacgcgcg ctgatcggcg cggctgagca ccagcagcgc + 181 ctcggtgagg tcgatcgccc gggcgttcac cgcgtggagg cggtcgaaga ccagcagcgg + 241 gtctttcgcc ggatcgttgc gggccacttc gagaagcgcc tgcgtgatcg ccagcggggt + 301 gcgcagctcg tgcgaggcgt tcgcggcgaa cctccgctgc gcggcgacgt gggcttcgag + 361 ccgggcgagc atggcgtcga aggcatcggc gagttcgcgg aactcgtctt cggtgccttc + 421 cagccggatc cggtgggaga gcgacccgtt cgcggccatg cgcgcggcgt cggtgatccg + 481 ggtcagcggg gcgagcatgc ggccggcgag gatccagccg cccagcaggc cgaacagcag + 541 caggaagaac agcactgagc ccgcggccgg caggaagttg cgcaccagga cggaccgctc + 601 cagcaccccg ccggggggcg gtttggccag cacgtcgggg acatcacgca gcaggaacag + 661 ccacacggaa gcgagcagca gtacaccggc gagcacgagg aagcaggcgt agctgagggt + 721 gagtttgagg cggacgctca tcccggccgc tctgtccacc gcctgctccc tagcgcccgg + 781 cgccgggcgc cgcgccgatg cggtacccga cgccggccac cgtggtgatg atccagggct + 841 cgccgagccg cttgcgcaga gccgaaaccg tgatgcgcac agcgttggtg aacgggtcgg + 901 cgttcttgtc ccaggcccgt tccagcagct cttcggcgct gacgacaccg ccgtcggcgg + 961 aaacgaggac ttcgagcacg gcgaactgct tcctggtcag cgcgatgtag cggtcgtcgc + 1021 ggtagacctc gcggcggaac gggttcagcc gcaggccggc gatttccagt accggcggcc + 1081 ggttgtgggc acgccggcga tcgagcgccc gcagcctgag cacgagctcc cgcagttcga + 1141 acggcttcgt gaggtagtcg tcggcaccga gctcgaaccc ggtgatcttg tcgtcgagcc + 1201 ggtcggcggc ggtgagcatg aggatcggca ggccgctgcc ggaagcgacg atccgtttgg + 1261 cgatctcgtc cccgctgggt ccgggaatat cccggtcgag gacggcgatg tcgtaggtgt + 1321 tgaggctcag cagttcgaga gcggtgtcgc cgttgcccgc ggtgtccgcc gcgatcgctt + 1381 ccaaacgcag gccgtcgcgg atggcttcgg ccagataggg ctcgtcctcg acgatcagca + 1441 cgcgcatgcc ccgatggtac gagaggccac tacatatcgt cggcatatgg aaaatcgcat + 1501 acgtgccggc aacacatcgc cgacttgaat ggacacatga cctaccgcga gtcggcccgg + 1561 acgacgaccc gccggattcc cggcgccgtc gtgccggtgg cccgccggat tcgcggggtc + 1621 cttctcgccg gcctgcgcgc cgtcggcacg aggattgccc ggtcgcccgg tcgcccggtc + 1681 cgcccccagg accgtgccgg cctcggcaag acccacggtg ccgtccccgc cggggtgacg + 1741 gtcttcgacg acgacgtccc ggctgtgact cgcctcgacc cggcgcttct gagtgcactg + 1801 cgccgggccg cgaccgcggc cgccgacggc ggggtcgaac tgtgcgtgaa cagcggctgg + 1861 cggtctccgg aataccagag ccggcttctt cgcgaggcgg tggcgaaata cgggtcggcg + 1921 gcggcggccg cccggtgggt ggccaccccg gagacgtcga tccacgtggc ggggaaggcg + 1981 gtcgacatcg ggccacccgc gtccgcgtcg tggttgtccg agcacggcgc cgattacggg + 2041 ttgtgccgcg tctaccgcaa cgaaccctgg cacttcgaat tgcgtcccga agcgatcgag + 2101 cacggctgcc cgcccctgta tgccgacccg agtcacgacc cgcggctgcg ccggtgacca + 2161 gggtcgcccg gcgtgctccg cagacggccc ggaaatttgc ccctaccatc catggatatg + 2221 gcggtcaagg ggcagtcggg tgagcccggc tcgcggacgg aatgcgagcg gtggacacta + 2281 taatcatttc tcgcggacaa tcaaggctcg caatcggccc aatgtgatcc actgaggagt + 2341 ccagccggtg acattccggc tcaatgtgtc cggcgcggcg catcatacta atggttgacg + 2401 atcgcgtcaa tacaacatta gtgaaagagt cgctcctcga tcccggccga tggcctgcgc + 2461 cttggacatc acgattgttc gtcaattgcc gacaacgatc ggtcatgcca tgctgacgtc + 2521 gtgtgcactc actggggatc aagggagggc gcggcggtga ccggggacca cgaggcaatc + 2581 ggggttccat cgggtcgaac cgctgagccg cgcccggcat gtactgggaa ttcgttgtcg + 2641 aacatgccct gtggcctcgc gttacccgac cgccggcctt ggtcgggctg acggtcgtcg + 2701 gtctcggccg ggtacaagcc accgaccggt agttccgcgg cgatcatcgc ccggggggat + 2761 tgcaagctgc atcgagttcc ggccgatggc cggaacgcgg gctgctgcct gccgtccggg + 2821 gaatcgcgga tcgttcgcgc acaggcctgt ccgcggcgtt catggggatc catcagcgtc + 2881 gaggacatcc gccgcggccg aggccggccc ggacaccggt cgtgtgtcca accaccgagg + 2941 ctgagcagtt ggacggtgcg gttctctgcc gtgccgcaaa gttgacgaag tagatccaaa + 3001 tgggggctag gtggatccga cgagagttga catattcgct ctccctgccg tcgaaatcga + 3061 gctgtcccgg ctgtcttccg cgagctcacc gcgaacatcg ggtgaggatc cggagcacgt + 3121 cgagacgctg ttgtcggcag agggagaact tccgcccatt ctcgttcacc gtccgacgat + 3181 gcaggtgctc gacggcctgc accggttgaa ggtggcgcga gtccggggtg acacgaaaat + 3241 cctggccaga ctggtcgacg ccaccgaatc ggatgcgttc gtcctggcgg tcgaggcgaa + 3301 catccggcac ggtctgccgc tgtccctcgc cgatcgcaag cgtgcggccg tccagatcat + 3361 cgggacgcat ccgcagtggt ccgatcggcg ggtggcctcg gcgaccggga tctccgcggg + 3421 cacggtggcc gacctgcgca ggcgcgcggg agaggacggg accgaggccc ggatcgggcg + 3481 ggacgggcgt gtccgcccgt ccgacggttc ggagcggaga agactcgccg ccgagctcat + 3541 ccgcagcgat ccgggtctgt ctctgcggca ggtcgccaag caggtcggca tctccccgga + 3601 gacggtgcgt gacgtgcggg gccggctgga gcgcggggag agcccgactc cggacgggac + 3661 aaggagattg ccggccaagc cgcacccgct gcggttgtcg gagcccgact tcggccgtgc + 3721 cgtggaccag gatcggctcg cgctgctgga aaggctcaag agcgacccgg cactgcggct + 3781 gaacgaggtc ggccggatcc tgctgcgcat gctcaccatg cactccatgg acgggcagga + 3841 gtgggaacgg atcctgcagg gtgttccacc acacctgcac ggcgtgatcg ccgggttcgc + 3901 ccgggaccac gcccgggtct gggcggagtt cgccgaccac ctggagagcc gggcgaccga + 3961 gctggccgcg ggatgatcgc gtgaccacgg cggagcccag tggcgtcgcc ggttccggtg + 4021 cgggccgccg caccggaacc ggcgtcgccg tccgcccaag tcccgcggct ggacttcgtc + 4081 cggtgagccg ttttaagaag gaacggtgac catcgagaaa gcgcttgtcg tcggtaccgg + 4141 gctgatcggc acctcggtgg cgctggccct ccgggagaag ggcgtcgcgg tcttcctctc + 4201 cgacgtcgac accgaggccg cccggctggc gcaggtactc ggcgccgggc gggagtgggc + 4261 gggagaaggt gtggatctgg cggtgatcgc cgtgccgccg cacctggtgg gggaccggct + 4321 ggccgacctg cagaagcaag gtgcggcccg ggtgtacacc gacgtggcca gtgtgaaggc + 4381 cgatccgatc gccgacgcgg agcggctcgg gtgtgacctg gcctcctatg tgccgggcca + 4441 cccgcttgcc ggccgggaac gctcgggccc ggccgccgcc cgcgccgagc tgttctcggg + 4501 ccggccgtgg gcactgtgcc ccggccccga gacggacgcg gaagccctgc gacgggtgcg + 4561 ggagctggtg tccctgtgcg gggcgacggc cgtcgtcgtg ggtgcgggcg agcacgactc + 4621 ggccgtggcg ctggtgtcgc acgccccgca cgtggtggcg tcggcggtgg cggccagcct + 4681 ggcgagcggc gacgacgtcg cgctgggcct ggcggggcag ggactccgtg atgtgacgcg + 4741 catcgcagcc ggggatccct tgctgtggcg gaggattctc tccgggaaca cccggccggt + 4801 ggccggggtg ctcgaacgga tcgcggccga cctcgccgcg gcggcctcgg cgttgcggtc + 4861 cggcgacctg gacgaggtga cggatctgct gcggcgcggc gtggacggtc acggccggat + 4921 ccctggtcag cgcggcggat cccttcccgg ccgcaacccg gcgggttccc cggggcgtta + 4981 ggccggccgc aaaaacgatt gccgaaggtg gccggaacgt ccgtcgtgat tgtacggtta + 5041 tccgtgcggc gcggcacggg ggcgtcggca aaaaaatgcg tccaagtgcc gaaagcgctt + 5101 gcttggaccc actcgtggac atcgactcga ttcagcacga ttgagatcgc cgactttggc + 5161 gtgtgagaga ggtgaccgga tggacatggt gttgcgtttc gagggggtgg acaagagccc + 5221 tgacgacccc gacccctggg tgaccaaggt ccgcaagggg acgctgcgcc gcgtgctcgc + 5281 ctacttccgc ccgcacgtcg ggaaggtggc gctcttctgt ctcgtcgccg tgctggagtc + 5341 gctcatcgtc gtggcaactc cgttgctgtt gaaggaactc atcgacaacg gcatcgtcaa + 5401 gaacgatctc ggggtcgtga tcctgatggc cggcctcacc gcggtgctcg ccgtgctggg + 5461 cgccgggctg acgatggtgt ccggctacat ctccgggcgg atcggggagg ggatcaccta + 5521 cgatctccgg gtccaggcgc tcggccacgt ccggcggctg ccgatcgcgt tcttcacccg + 5581 tacccagacg ggggtgctgg tcggcaggct gcacacggaa ctgatcatgg cgcagcagca + 5641 tttcaccggc ttgctcatgg cggccaccag cgtggtcatg gtcgtggtgg tgctggccga + 5701 gctgatctac ctttcgtgga tcgtcgccat cgtctcgctg gtgctgattc cgatattcct + 5761 cgtgccctgg attcgcgtgg ggcgggcgat ccagcggcgc agtatccggc tcatggacgc + 5821 gaataccggc ctcggcgggc ttctccagga gcggttcaac gtccaggggg ccatgctctc + 5881 caagctcttc ggccgtcccg ccgaggaaat ggccgagtac gaggagcgtg ccggggagat + 5941 ccgcaagatc ggcgtgagcc tttccgtgtg gggccggatg gccttcgtca tgatggcgct + 6001 gatggcctcg ctcgccacgg ccctcgtcta cgggatcggg ggcgggctcg tgctcgccgg + 6061 tgcgttcgag ctcggcacgc tggtcgccat cgccaccctg ctccagcggc tgttcgggcc + 6121 gatcacccag ctgtccggga tgcaggagct cgcgcagacg gtcgtggtga gcttttcccg + 6181 ggtcttcgag ctgctcgacc tcaagccact gatccaggaa cgccccgacg cgatcgcgct + 6241 gaagaagaag gtggtgccgg acgtcgagtt cgagcacgtg tcgttccgct accccaccgc + 6301 ggacgaggtc tcgctggcgt cgctggagca cctgcgggcc gagcgggagc gcagcgaagt + 6361 gacgccggat gtcctgcgcg acgtgagctt ccacgcgcag gccggaaccc tcaccgcgct + 6421 cgtcggcccg tccggcgcgg ggaagagcac catcacccac ctggtctccc ggctgtacga + 6481 cccgaacggc gggaccgtcc gcctcggcgg ccacgatctg cgcgatctca ccttcgaatc + 6541 gctccgcgaa gcggtcgggg tggtcagcca ggacgcctac ctcttccacg acacgatccg + 6601 ggagaacctc ctctacgccc gcccgaccgc caccgaggac gagctgatgg aggcgtgcaa + 6661 gggggcccag atccgggacc tgatcgactc cctcccgctc gggctggaca ccgtcacggg + 6721 cgatcgcggc taccgcatgt cgggcgggga gaagcaacga ctggccatcg cccggctgct + 6781 gctgaaggag ccgtcgatcg tcgtcctcga cgaagccacc gcccacctgg actccgagtc + 6841 ggaggccgcc gtccagcggg cgctcaagac ggccctgcac ggccggacct cgctggtgat + 6901 cgcccaccgg ttgtccacga tccgcgaggc cgaccagatc ctcgtgatcg acggcggcag + 6961 ggtgcgggag cgcgggacac acgacgagct gctggcccag ggcggcctgt acgcggagct + 7021 ctaccacacg cagttcgcca acccggccgc caacgacccc aagccggaga tcgaggacga + 7081 gctcgacgac atcgagcccg agccggtgat ccaacacatg ggctacggag gatgacgatg + 7141 aattccgcag cgcggaccac gccgacgatg ctggatctgt tcgcttcgca cgtggaccgg + 7201 acacctgacg cggtggccgt ggccggcggt gacggggttc tgacgtaccg gcagctcgac + 7261 gagcgcgcgg gccggttggc ggggcggctg gcgagtcgcg gcattcgccg tggcgaccgc + 7321 gtcgcggtgg tgatggaccg ttcggcggac ctggtggtgg cgctgctcgc cgtgtggaag + 7381 gcgggggcgg cgtacgtgcc ggtggacgcc ggctaccccg cgccgcgagt ggccttcatg + 7441 gtggcggact cggcggccaa gctcgtggtg tgctcggccg cgtcgcgcgg cgccgtaccg + 7501 gccggggtcg agtcgctcga gccggccgcc gccgccgagg agggcgcgtc cgacgcgccg + 7561 gcggccacgg tgcgaccggg ggatccggcg tacgtgatgt acacgtccgg ctcgacgggc + 7621 acaccgaagg gcgtgaccat ttcgcagggc tgcgtcgcgg agctgacgat ggacgccggg + 7681 tgggcgatgg agcccggcga ggcggtgctc atgcattcgc cgcacgcctt cgacgcgtca + 7741 ctgttcgaac tctggatgcc gctggcgtcg ggggtccggg tggtgctcgc cgaaccgggt + 7801 tcggtggacg cccggcggct gcgggaagcg gccgcggccg gggtgacgag ggtgtacctc + 7861 accgcgggga gcctgcgcgc ggtggcggag gaggcgccgg aatcgttcgc ggagttccgt + 7921 gaggtgctga ccggcggtga cgtggtaccc gcgcacgcgg tggagcgggt gcggacggcc + 7981 gcaccccggg cgcggttccg gaacatgtac ggcccgacgg aagcgacgat gtgcgcgacg + 8041 tggcacctgc tgcagccggg tgacgtggtg ggcccggtcg tgccgatcgg ccgtccgctg + 8101 accggccgcc gggtgcaggt gctcgacgcg tcgctgcggc ccgtggggcc gggtgtggtc + 8161 ggcgacctgt acctctccgg ggcgctggcg gagggctact tcaaccgggc ggcgctgacg + 8221 gcggagcggt tcgtggcgga tccgtccgca ccggggcagc ggatgtactg gaccggggac + 8281 ctcgcccagt ggaccgcgga cggtgagctg gtgttcgcgg gccgggccga cgaccaggtg + 8341 aagatccgcg ggttccggat cgagcccggc gagatcgagg ccgcgctgat cgctcagccg + 8401 gacgtgcacg acgccgtcgt ggcggcggtc gacggacggc tgatcgggta tgtggtgacc + 8461 gagggggacg ccgatccccg ggtcatccgc gaacgcctcg gtgcggtgct gccggagcac + 8521 ctggtcccgg ccgccgtgct cgcactggac gcactgccgc tgaccggcaa cggcaaggtg + 8581 gaccggtccg cgctgccggc gcccgagttc gcggcgagtg ccgccgggcg ggcaccgagc + 8641 accgatgcgg aacgtgtcct ctgtggactc ttcgccgagg tgctcggcgt ggcacgagcc + 8701 ggcgtcgacg acggtttctt cgagctgggc ggggattcga tcggcgcgat gcggctggcg + 8761 gcccgggccg ccaaggcggg cctgctggtg acgcccgccc agatcttcga ggagccgacc + 8821 cccgcccggc tggccgccgt ggcgcggccg gtcccggccg gcgggcccgt cgacggcccc + 8881 ctgctcaccc tgaccgcggc cgaggaggcg gagctggcgc tcgccgctcc gggcgccgag + 8941 gagatctggc cgctggcccc gttgcaggag gggctgctct tcgaatcgat cctcgacgac + 9001 cagggctccg acatctacca ggtgcaggtg atcctggagc tgaacgggcc ggtggacgcg + 9061 ccccggctgc gggccgcgtg ggacgcggtc gtccggcggc accccgagct ccggctgagc + 9121 ttccaccgcc tcgcctcggg caagacggtg caggccgtcc acggggacgt caccccgccg + 9181 tggcgggtgg tggacctgac gggtgccggc gacgtcgacg cggccgtcgc ggccctcgtc + 9241 gccgaggaac agcagcagcg gttcgaactc gccacggcgc cgctggtccg gctggtgctg + 9301 gtccggatcg cggcggaccg gtaccgcctg ctgttcgtca tccaccacat cctcgtcgac + 9361 ggctggtcgg tggcggtcat cctcaacgac gtctccgagg cgtacgaagc cggcgagccg + 9421 gtgccggaac agcggggcgg cgccaccttc cgggactacc tggcctggct ggaccggcag + 9481 gacgacgacg cggcccgggc ggcctggcgg gcggagctgg ccggtctcga cgagcccgcg + 9541 ctgatcgcga cttcgggcgt cgagacggag tacgactacc gcgccacgca cctgacgccg + 9601 gccctgcaca ccaggctgct ggggttcgcc cgcgagcacg ggctgacgcc gagcacggtg + 9661 gtgcacgccg cctgggcgat ggtgctggcg cggctcacgc ggcggaccga cgtcgtgttc + 9721 ggcaccatgg tcgcgacccg tcccccggaa ctggcgggga tcgagtcgat gccgggcctg + 9781 ctgatgaccg cggtgccggt ccgggtgccg ctggacggcg ggcaatcggt cctggacatg + 9841 ctcaccgacc tgcacagcag gcagacggcc ctcaaacgac accagtacct ggggctgccg + 9901 gagatccaga aggcggcggg accgggcgcg acgttcgaca cgatgctggt ggtcgagaac + 9961 tacccgcggg agtacgcccg ccggtacacg catctgcgca cgatcgaggg gacccactac + 10021 ccggtgaccc tgggcatcac cccgggcgac cggttcaaga tccagctcgg ctactggccg + 10081 ggccaggtcc cggacaccgt cgccgagtcg ctgctggagt ggttcgtcgg cgccatcggc + 10141 gcgctggtcg ccgatcccgc cggcctggtg gggcggatcg ggatgggcgc ggccgacgtg + 10201 cgccgctggg acccgccgct gcaggcgggg gagccgctgc cggccctggt ggggcggatg + 10261 gcggcgcggc cgccggacaa cgtggcggtc gtggacggcg acggtgcgct gtcctatgcg + 10321 gacttgtggg agcggtcgct gaagttcgcg gccgtcctgc gggcccacgg agtccggtcc + 10381 gaggaccggg tcggcctggt ggtggggcgc tcggcctggt ggacggtcgg catgctgggc + 10441 gtcctgctgg cgggcggcac gttcgtgccg gtggacccgg cctatccggc cgagcgcaag + 10501 gaatggatct tccggagcgc gaacccgatg ctggtggtgt gcgcgggcgc gacacggggg + 10561 gcggtgcccg cggagttcgc ggaccggctg gtggtgatcg acgaggtcga tccggccgcg + 10621 ggctcggcgg gggacctgcc gcgggtggat ccgcgcagtg ccgcgtacgt gatctacacg + 10681 tcggggtcga cgggaacccc gaagggggtc gtcgtcaccc atgccgggct gggaaacctg + 10741 gcgctggcgc acatcgaccg gttcggggtg tccccgtcgt cacgggtgct gcagttcgcg + 10801 gcgctcgggt tcgacaccat cgtctccgag gtgatgatgg cgttgctctc gggagcgacg + 10861 ctggtggtgc cgccggagcg ggacctgccg ccgcgggcgt cgttcaccga cgccctggaa + 10921 cggtgggaca tcacgcacgt gaaggcgccg ccgtcggtgc tgggcacggc cgacgtgttg + 10981 ccgtcgacgg tggagacggt ggtggcggcg ggcgagctct gcccgccggg cctggtggac + 11041 cggctgtccg cggaccggcg gatgatcaac gcctacgggc cgaccgaaac cacgatctgc + 11101 gcgacgatga gcatgccgtt gtcgcccggc cagcacccga tcccgttcgg caagccggtg + 11161 ccgggggtgc gcggatatct gctggactcg ttcctgcgcc cgttgccgcc cggggtcacc + 11221 ggtgagctct acctggccgg gatcggcgtg gcccgcggct acctcggccg ttcggcgctg + 11281 acggccgagc ggttcgtcgc cgatccgttc gtgcccggtg agcggatgta ccggaccggg + 11341 gacctggcgt actggaccga acagggcgag ctggtgtccg ccgggcgcgc cgacgaccag + 11401 gtcaagatcc gcggcttccg tgtcgaaccc cgcgagatcg agttcgcctt gtccggctac + 11461 ccccgggtca cccaggccgc ggtcgccgtc cgcgacgacc gcctggtcgc ctacgtgaca + 11521 ccaggcgaca tcgacacgca ggcggtgcgg gcgcacctcg cgtcccggat gccccagtac + 11581 atggtccccg cggcggtggt ggcgctggac gccctgccgc tgacggcgca cgggaagatc + 11641 gatcggcgcg cactgcccga ccccgacttc accgccggga agcaggccag ggagccggcc + 11701 accgagaccg agcgggtgct gtgcgagttg ttcgccggcg tgctcggcct ggcgcgggtc + 11761 ggggtggacg acagcttctt cgagctcggc ggggactcca tcctctcgat gcagctggcg + 11821 gcgcgggcgc ggcggtcggg gctgacgttc accgcggcgg acgtcttcga cgggaagacg + 11881 cccgagcgga tcgcgcagct ggcggcggag tcgtcggtgc cggagcccgg tcgttccccg + 11941 aaacccgatg gcgtcggtga cgtcgcgtgg acgccggtga tgtggatgct gggagacggc + 12001 gtcgcgggac cggcgttcgc gcagtggatg gtggtcggga cgccttcgga cctgacggag + 12061 aaggcgctgg cggcgggctt tgcggccgtg gtggatacgc acgacatgct gcgggcgcgg + 12121 gtcgtcgccg acgagggcgg ccggcgcctg gtggtgggtg agcgtgggtc ggtggatgtc + 12181 gccggggcgg tcacccgcat ccgcgccgat ggccgctcgc tggacgaagc cgtggcggac + 12241 gcggcgcgcg cggccgtgac ccggttggac ccgtcggcgg gcgtgatggc ccaggcggtg + 12301 tgggtcgacg ccggaccgga ccaggtgggg cggctggtgg tggtggcgca ccacctgtcg + 12361 gtcgatggcg tgtcgtggcg gattctgctg tcggatctgc aggcggcctg cgaagccgcg + 12421 gtcgcggggc gggagccggt gctggagccg gtcggtgcgt cgttcaagcg gtgggcgggc + 12481 ttgctggccg agtgggcggt ttccgcggag cgggccggtg agctggccgc gtggaaggcg + 12541 attctcggac cgggggaccg gccggccggt gcgcaggcca cgagccgggc cgcggaaggt + 12601 gccgtgcgct cgcggtcgtg ggtcgtgccg aaggtggaga cggcggcgtt ggcaggccgg + 12661 gctccggtgg cgttccactg cggggtgaac gaggtcctgc tcgccgggct ggcgggcgcg + 12721 gtcgcgcggt ggcgcggcgg ggacgccgtg ctggtggacg tggaaagcca tggccgccac + 12781 ccggtggacg ggacggacct gtcccggacg gtgggctggt tcaccagcgc acatccggta + 12841 cggctggacg tggccggcac cgatctggcg gacgtgctcg ccggcggtcc ggcggccggg + 12901 cgtttgctga aggccgtcaa ggagcagtca cgggccgtgc ccggcgacgg gctcggatac + 12961 ggcttgctgc ggtacctcaa cggcacgacg gggccggtgc tggcggacct gccgtcgccg + 13021 cagatcgggt tcaactacat gggccggttc gccgccggcg agaagagcgg ggtgcgggcg + 13081 tggcagccgg tcggtgacat cggcagttcg ctggaacccg gtatgggcct gccgcacgcg + 13141 ctcgaggtca acgcgatcgt ccaggacctg ccggacggtc ccgagctgac gctcatgctg + 13201 gaatggcagg acggcctgct cggcgaggac gagatcgacc ggctgggccg ggcctggctg + 13261 gacatgctgt ccggggtggc ccgccaggcg gctgatcccg ccgcgggcgg gcacaccgcg + 13321 tccgacttcg acctcgtcac cctggaccag gcggagatcg aggccctcga ggccgaattc + 13381 gcggccgccg gcggactggc cgaggtgctg ccgctgtcgc cgctgcagca cgggctggcc + 13441 ttccacgccg gttacgccgg cgacggcgtc gacgtctaca ccgcgcaggc ggtgctggag + 13501 ctggccggcc cgctggacgt gccgctgctg cggaagtcgg tgcgcgcgct gctggacagg + 13561 cacgcgaatc tgcgtgccgg cttccggcac ggcgccgacg ggaccgccta ccaggtggtc + 13621 cccggcgccg tggcggtgcc ggtgaccctg gtggacgtga cggaatcggc ggatccggcg + 13681 gccgaggcgg cggcggtggc cgcggccgaa cgggcgcggc cgttcgagct ggcccggccc + 13741 ccgctgctgc gggtcatggt ggtggtgctc ggcccggacc ggcaccggct ggtgctgacc + 13801 aaccaccaca tcctgctcga cggctggtcg acgccgctgc tgctggacga actgctcacg + 13861 ctttaccgca acggagccgc tccggccgcg ctggcgccgg tcaccccgta tcgggactac + 13921 ctggcctggg tgcgcgaaac cgaccgggag gcggctaccg aagcctggcg cgacgccctg + 13981 gccggcttgc ccgagccgac cctggtggcg gcggaccggc cggtcccggt cgaggtgccc + 14041 gagcagatct ggaccaccct ggacgagacg ttcgcccagg cgctgggggc gcgggcacgc + 14101 gagtgcggtg tcacggtcag caccgtgctg caggcggtgt ggggcatggt gctggcggcg + 14161 ctcaccggac gcgacgacgt ggtgttcggg tcggtggtgt ccgggcgccc ggccgagctg + 14221 ccggggatcg agaccatggt cgggttgttc atcaacaccg tcccggtccg ggtccggatg + 14281 cggccgcagg acaccttcgc cgaactggtg cggggactgc agaacgagca ggtggcgctg + 14341 ctggcccacc accacgtggg tctcaccgac atccagcagg ccgcggggct ggggcggctg + 14401 ttcgacacca tcatcgtcta cgagaactac ccgagaccgg ccgagatcgg cgacgaatcc + 14461 gccgatgccg atcgggtccg ggtgcaggga ctgaccgccg ccgatgccac ccactacccg + 14521 ctggcgctgg cggtcgtgcc gggcaccgac ctgcggctgc ggctggagca ccagcccgcg + 14581 ctgttcaccg ccgagcaggc cggcgccgtg ctcgagcggt tcacgctggt gctcgaagcc + 14641 gtcgtcgccg atccgcggct gccgctcgcg gtggtgccga tcctgtccga tgccgaacgg + 14701 cgacagctgc aggcgggcaa cgacaccgcg ctgccggtgc cggaccggac gttgccggag + 14761 ctgttcgccg cgcaggccgc cgccaccccg gaggcgaccg cggtggtctt cgaggaccgg + 14821 tcgctgacct acgccgagct cgacgcgcgc gccaaccagc tggcgcgctg gctcatcgac + 14881 cagggtgccg ggccggaagg cctggtcgcg gtgctgctgc cccggtcgct ggaactggtc + 14941 gtcgcgttgc tggcggtcac caagaccggc ggcgcgtggc tgccgatcga tccgggctat + 15001 ccggccgacc gcatcgcctt catgctcgac gacgccggac cggcgctggt gatcaccacc + 15061 gcggtgctgt cggcatcgcc gatcggtgac gtgctggccg cccgctcgag gacggtggtg + 15121 ctcgacgagc ccgcggccgc gggccagctg gcggggcggg accgcgcgcc ggtcaccgac + 15181 accgaccgcg ctcgagcgct ggatccgcgc cacccggcgt acctcatcta cacctcgggc + 15241 tccaccggtc gccccaaggc cgtggtcgtc acccatcgga acctgacgaa ctacctgctc + 15301 cactgtggac ggatgtaccc gggtctgcgg gggcggtcgg tgctgcattc gtcgatcgcc + 15361 ttcgatctga cggtcaccgc gacgttcacc ccgctcatcg tggggggaga gattcacgtc + 15421 ggtgccctgg aagacctgat cggggtggtg gaggccgcac cgtcgatctt cctcaaggcc + 15481 acgccgagcc atctgctgac cttggacacc gcttcccggg gcagtgccgg ttcgggtgac + 15541 ctcctgctcg gcggcgaaca attgccggcc gacacggtcg tccaatggcg ccggaagtat + 15601 ccgaacatcg tggtggtcaa tgaatacggg ccgaccgagg cgaccgtcgg gtgcgtcgaa + 15661 taccggctcg aaccggggca ggaatgcccg ccgggcggtg tggtgccgat cggcaccccg + 15721 ctggcgaaca tgcgggcgtt cgtgctggat tcgtggctgc ggctggtgcc gccgggtgcg + 15781 gtgggcgagt tgtacgtggc cggtgcgggc ctggcgcggg gatacctggg ccgggcaggg + 15841 ctgacggcga cgcggttcgt ggccgatccg ttcggctccg gcgagcggat gtaccggacc + 15901 ggggacctgg tgcagtggaa cccggacgga cagctggtgt tcgccggccg ggtcgacgac + 15961 caggtgaagg tgcggggctt ccggatcgag cccggtgaga tcgaggccgc cctggtggcg + 16021 caggagtcag tgggccaggc ggtggtggtg gcccgtgaca gcgagatcgg cacccggctg + 16081 atcgggtacg tgaccgccgc gggggagtcc ggtgtggacg aagccgcggt gcgcgaggga + 16141 gtggcggccc ggttgccgca gtacatggtg ccggcggcgc tggtggtact cggcgcgctg + 16201 ccgttgacgg cgaacggaaa ggtggaccgg gcggcgctgc cggatcccga cttcggcgcc + 16261 cgtgccgggg gccgggagcc ggtcacggag gccgagcggc tgctgtgtgc gctcttcgcc + 16321 gaggtgctcg gcctggagcg cgccggtgcg gacgacagtt tcttcgagct gggtggggat + 16381 tccatccttt cgatgcggct ggcggcccgg gcccaccgcg agggaatgtc cttcggtgcg + 16441 cgcgaggtgt tcgagcagcg cacgcccgcg gggatcgcgg cgatcgtgga acgggttgcg + 16501 ggcgatcgtc ctgtcgcggc ggtacacgcc gtgtccgatg tcgcccttct cgacctggac + 16561 caaggcgagc tcgacgaatt caaggctgag ttcgacgacg attcccagcc ctttgctgat + 16621 ccagggagat attgatgagc cagtcgcgga tcgaggaaat ctggccgctg tcgccactgc + 16681 aggccggttt gctcttccac gcggtttacg acggcgaagg gcccgacgtc tacatcggtc + 16741 actggattct cgacctggcc ggaccggtgg acgcggccgg gctgcgtgcg gcgtgggaga + 16801 cgctgctggc ccggcacgcc ccgctccggg cgtgtttccg gcagcgcaag tcgggcgaga + 16861 cggtgcagat catcgccagg caggtggaac tgccgtggcg ggaggtcgac ctttcccacc + 16921 tcgacgaccc cgaggaggcc gttcgcgagc tggccgagca ggaccggacg acgaggttcg + 16981 acctcgcgca ggcgccgttg ctgcggctga ccctgatccg gctcggcgcc gacgcgcacc + 17041 gcctggtggt gacctgccac cacacgatca tggacggctg gtcgctgccc atcgtgatcg + 17101 acgagctgtc ggtgctgtac ccggcgggcg gtgacgcgtc ggcgctgccg gacgtgccgt + 17161 cctaccggga atacctcgcg tggctgagcc ggcaggacaa ggaacgcgcg ctgtcggcgt + 17221 ggaccgcgga gctcagcggc gccgaggaac cgacgctggt ggtgcccgcc gatccggggc + 17281 gggcacccgc cgagccggag agcgtcgagg cccacctgcc ggagcacctc acgcgctcgc + 17341 tggccgagct ggcccgtcgc cacgggttga cgttgaacac cgtggtgcag ggcgcctggg + 17401 cgctggtgct ggcgcagctg gccggccggc cggacgtggt gttcggggcg gcggtgtcgg + 17461 cgcgcccgcc ggacctgccc ggtgtggagg ggatggtggg gctgttcctc aacaccgttc + 17521 ccgtgcgcgt gcggttgcgc ggctcgacgc cggtcgtcga gctgctggcg gagttgcaga + 17581 aacggcagtc ggcgctcatt cccgaccagt tcgtcgggct ggcggacatc cagcaggcgg + 17641 cgggtcccgc cgcggttttc gacacgctgc tcgtcttcga gaagttccac cacgggcccg + 17701 ccggatcgga ctccgcggga accttccgca ttcacgtgaa ccagggccgg gtggcggccc + 17761 actacccgct gacgctggtc gccgtccccg gcgagtcgat gtacctcaag ctcgactacc + 17821 tgacggagct cttcgaccgg gaaaccgcgt tcgccatcct cgagcggttc accggggtgc + 17881 tgcggcagct gaccggcgcg ggcgagctca cggtggccgg cgtcgaggtg acgaccgcgg + 17941 ccgagcgggc cctggtggcc ggggaatggg gtgcctcgac ctcggcgccg ccgagcctgc + 18001 cggcgctgga tctgttcggg caccaggtgg cgcaccgccg cgacgagccg gcggtcgtcg + 18061 acggcgatcg gacggtgtcg tacggagaac tcgccgagcg cgctgagcgg ctcgccggct + 18121 acctgaacgg ccggggagtc cggcgcggag accgggtggc cgtggtgctc gaccggtcac + 18181 ccgacctgat cgcgacgctg ctcgcggtgt ggaaggcggg cgcggcgtac gttccggtgg + 18241 accccgccta cccggtggaa cgcaggaagt tcatgctggc ggactccggg cccgcggcgg + 18301 tggtgtgcgc ggaagcgtac cgggccgccg tgccggacac ctgccccgag ccgatcgtcc + 18361 tggacgatcc ccggacgcgg caggcggtgg cggagagccc tcgcctgtcg gcaggcacga + 18421 gcgccgacga cctcgcctac gtgatgtaca cgtccggatc gaccgggacg ccgaagggcg + 18481 tcgcggtgtc gcacggcaac gtcgcggcgc tggccgggga gccgggctgg cgggtgggcc + 18541 ccggtgacgc cgtgctgctg cacgcctcgc acgccttcga catctcgttg ttcgaaatgt + 18601 gggtgccgct gctgtcgggt gcccgggtgg tgctggccgg accgggcgcg gtggacggcg + 18661 cggcgctggc ggcctacgtg gccggtggcg tcacggccgc ccacctgacc gcgggggcct + 18721 tccgggtgct ggccgacgag tcgccggagg cggtcgccgg gctgcgcgag gtgctgaccg + 18781 gtggggacgc ggtgccgctg gcggcggtcg agcgggtgcg cggacgtgtc cggaacgtgc + 18841 gggtgcggca cctctacggc ccgaccgagg ccacgctgtg cgcgacgtgg tggctgctcg + 18901 aacccggcga cgagacggga tcggtgctgc cgatcggacg tccgctcgcc gggcggcgcg + 18961 tccacgtcct cgacgcgttc ctgcggcccg tgccgccggg cgtggcgggc gagctgtatg + 19021 tcgccggagc cggtgtggcg cagggctatt cgagccgccc ggcgctgacg gccgagcggt + 19081 tcgtcgccga tccctccggt tccggtgcgc ggatgtaccg caccggggac ctggcgtact + 19141 ggacggagca gggtgcgctg gcgttcgccg ggcgggccga cgaccaggtg aagatccgcg + 19201 ggtaccgcgt ggagcccggc gagatcgagg tggtcctcgc cggcctgccc ggcgtcggcc + 19261 aggccgtggt gaccccgcgg ggtgagcacc tgatcggcta tgtggtcgcc gaagcgggcc + 19321 acgacgccga cccggtgcgg ctgcgcgagc agctcgccgg gacgctgccc gagttcatgg + 19381 tgccggccgc ggtgctggtg ctggacgagt tgccgttgac ggtcaacggg aaggtggacc + 19441 ggcgggcact gcccgaaccg gacttcgcgg cgaagtcggc gggccgggag ccggtcaccg + 19501 aggccgaacg agtcctttgt ggagtgtttg ccgacgtcct cggcctcgac cacgtcggcg + 19561 tcgacgacag cttcttcgag ctgggcggcg actcgatctc gtcgatgcag gtcgccgcgc + 19621 gtgcgcgtcg cgaagggatc tcgctgaccc cgcggctggt gttcgagcac cggacgccgg + 19681 aacgcctcgc ggcactggcg caggaggcag gcgcgacgcc acgcgccgag gtcgtcacgg + 19741 gcgtgggcga gatcccgtgg acgccggtga tgcgtgccct cggggacgac gcgatgcgcc + 19801 ccggcttcgc gcaggtgaga gtcgtcgtca ccccggcggg ggtgaacccg gacgcgctcg + 19861 tgagcgccct gcaggcggtg ctggacgcgc acgacctgct gcgggcccgg gtggagccgg + 19921 acggacggct gatcgtgccc gagcgcggcg cggtggccgc ggccggcctg ctcacgcggg + 19981 tggccgccgg gaccggcggc ctcgacgaga tcgccgagcg cgaggtcagg acggcgacgg + 20041 gcacgctgga cccgtcggcg ggaatcatgg cgcgggtcgt gtggatcgac gccggggacg + 20101 ccgagccggg ccggttggcc ttcgtggcgc accacctctc ggtcgacgcg gtctcctggg + 20161 ggatcctgct gccggatctg cgagcggcct acgacgaggt gatctccggc gggaccccgg + 20221 ccctcgaacc cccggtgacg tcgtatcggc agtgggcgcg ccggctgacc gcgcgggcgc + 20281 tcagcgaaag caccgtggcc gaactcgaaa aatgggctgc cgtcgtggaa ggcgcggaac + 20341 cggcactgcc ccaggacacc gggcagcaca ccgggcagtc gcactcgtgg tccacgagcc + 20401 tgtccggcac cgaggtgcga gacctggtca ctgtcttgcc gggcgcgttc cactgcggga + 20461 tccaggacgt tctgctggcg gggctcgcgg gtgcggtggc gcgtgtgcgc ggttccggcg + 20521 ccgcgctgct ggtcgacgtg gaagggcacg gtcgcgaagc cgccgacggc gaggacctgt + 20581 tgcgcaccgt cggctggttc accagcgttc acccggtccg tctcgaactg tccgatgtgg + 20641 acctcgcggg cgcggcggac ggcgagcggc ctgccgggca gttgctgaag gccgtgaagg + 20701 agcagatccg ggccgtgccc ggcgacggat ccggctacgg gctgctgcgc cacctcaacc + 20761 cgggcaccgg ggcgaggctg gccgagttgc cgtccgcgca gatcggcttc aactacctcg + 20821 gccggactgt cctcgctccc gaggacaccg cgtggcagcc caacggcgga gggccgctcg + 20881 gcggcggtcc ggacatggtc ctcgcgcacg ccgtggaggt cagcgcggaa ctccaggaca + 20941 cgccggccgg cccccggctc gggctggcca tcgacacgcg ggatttcgac ctcgccacgg + 21001 tggagcggct cggcgaggcc tggctggaga tgctgaccgg tctcgcggcg gtggcccgcg + 21061 gatccggcgc gggcgggcac acgcccgccg acttcgctct ggtcgacctg acgcagcggg + 21121 acgtggcgga gctggaggcc gcggcgcccg ggctgacgga catctggccg ttgtcgccgc + 21181 tgcaggaagg catgctcttc gaacgggcct tcgacgagga cggcgtcgac gtctaccaga + 21241 cgcagcggat cctggacctc gacgggccgc tcgacgaacc ccggctgcgc gcggcctgga + 21301 accaggtcct cgcccggcac gcctcgctgc ggaccggctt ccaccagctg gggtccggcg + 21361 ccacggtgca ggtcgtcgtg cgcgaggccg acatcccgtg gcgggtggcg gatctgtcgc + 21421 acctcgatgc ggcggaggcg gccgcggagg tcgagcggct gctcgccgag gaccagggcc + 21481 ggcggttcga cgtgacccgg ccgccgctgc tgcggctgct gctgatccgg ctcggtgcgg + 21541 acgagcaccg actcgtcgtg acctcgcacc acgtactcct cgacggctgg tcgaccccgc + 21601 tcgtcgtggg ggagatgtcg gacggctacg cgggcggccg cagctcctcg aagccgccgt + 21661 cctaccagga ctacctggcg tggctgagcc gtcaggacgc ggaggcgacc cgatcggcgt + 21721 ggcgggccga gctcgccggc gcggacgaac cgaccttggt cgacgccgac gcgggcaaga + 21781 cgctcgtgat gccggacgag cacgccgaat ggctgcccga gccggcgacg cgggcactcg + 21841 ccggcttcgc ccgtggccac gggctgacgg tgagcacgat cgtgctgggc gcgtgggcgc + 21901 tggtgctggc gcggctggcc ggccggaccg acgtggtgtt cggctcggtg gtgtcggggc + 21961 gtccggcgga cgtgccggat gtcgagcgca tggtgggcat gttcatcaac accgtcccgg + 22021 cccgggtgcg gctcgacggc cgccggccgt tgctggagat gctcgaagac ctgcaggcgc + 22081 gccaggcggc gttgaccgag caccagtacc tggggctgcc ggagatccag aaggtggcgg + 22141 ggaccggcgc gatcttcgac acgatcgtga tggtcgagaa ctacccgcac gacgccgccg + 22201 gtctcggcgg cgacggcggg gtggcgatca gctcggtcgt cacccggacc ggcaccagct + 22261 atccgctgac catgaacgtc agcctcgggg accgcctgcg catcaccgtg tcctaccggc + 22321 ccgaccggat cgacgacgcg acggccgccg aggtcgccag gcaggtcgtg cgggtcctgg + 22381 aacgggtggt ggccgagcct tcgctgccgg tgggccgcct cggcgtgacg agcgaaccga + 22441 cgcgcgcggc ggtggtggaa cgctggaact cgacgggcga agcggccgcc gagacgtccg + 22501 tgctggagct gttccggcgg caggcaggtg cctcgccgga cgcggtggcg gtcgtggcgg + 22561 gggaacgcac cctgtcctac gccgacctcg accgcgagtc cgaccggctg gccgggcacc + 22621 tggccgggat cggcgtgggg cgtggtgacc gcgtcggcgt ggtgatgaca cgcggcgcgg + 22681 acctgttcgt cgccctgctc ggggtctgga aggcaggcgc cgcgcaggta ccggtgaacg + 22741 tggactaccc cgcggaacgg atcgagcgga tgctggccga cgtcggcgcg tcggtcgcgg + 22801 tctgcgtgga agcgacccgc aaggcggtgc cggacggggt cgagccggtg gtcgtggacc + 22861 tgccggtgat cggcggagta cggcccgagg cgccaccggt cacggtcgga gcgcacgacg + 22921 tggcctacgt gatgtacacg tccggctcga ccggcgtgcc gaaggccgtc gcggtgccgc + 22981 acgggagcgt ggcggcgctg gcgagcgacc cgggctggtc gcagggcccc ggcgattgcg + 23041 tgctgctgca cgcgtcgcac gcgttcgacg cgtcgctggt cgagatctgg gtgccgctgg + 23101 tcagcggagc ccgcgtgctg gttgcggaac cgggcacggt cgacgcggaa cggctgcgcg + 23161 aagcggtctc ccgcggcgtg accaccgtcc acctgacggc cggtgccttc cgtgcggtgg + 23221 ccgaggaatc gccggactcc ttcatcgggc tgcgcgagat cctgaccggt ggggacgcgg + 23281 tgccgctcgc gtccgtcgtg cggatgcgcc aggcctgccc ggacgtccgg gtccggcagc + 23341 tgtacggccc caccgagatc accctctgcg ccacctggct cgtcctcgag ccgggggccg + 23401 cgacgggcga cgtcctgccg atcggcaggc cgctggccgg ccggcaggcc tacgtgctcg + 23461 acgcgttcct gcagcccgtg gcgccgaacg tgaccggcga gctctacctc gccggcgctg + 23521 gcctggcgca cggttacctg ggcaacaccg cggcgacctc ggagcggttc gtcgccaacc + 23581 cgttctccgg cggcggccgg atgtaccgca ccggcgacct ggctcgctgg accgaccagg + 23641 gcgagctggt gttcgccggc cgcgccgact cccaggtgaa gatccgcggc taccgcgtcg + 23701 agccgggtga ggtcgaggtg gcgctgaccg aggtgcccca cgtcgcgcag gcggtcgtgg + 23761 tggcgcggga aggccagccc ggcgagaagc gcctgatcgc gtacgtgacc gcggaagcgg + 23821 gatcggcact ggaatccgcc gcggtccgcg cgcacctcgc gacgcggctg ccggagttca + 23881 tggtgccgtc ggtggtggtg gtgctggaga gcttcccgtt gacgctcaac gggaagatcg + 23941 accgcgcggc cctgcccgcc cccgagttcg ccgggaaggc ggccgggcgc gaaccgcgca + 24001 cggaggccga gcgggtgctg tgcggcctgt tcgccgagat cctcgggctg gagcgggtcg + 24061 gcgccgacga cggcttcttc gagctgggcg gcgactcgat cctctcgatg cggctggccg + 24121 cccgcgcgcg tcgcgagaac ttcgtcttcg gcgcgaagca ggtcttcgag cagaagacgc + 24181 ccgcggggat cgcggcggtc gccgagcgtg gcgggcagag ccgcccggcc ggcgtcgccg + 24241 acggcgtcgg cgaggttccg tggacgccgg tggtgcgggc actgctcgaa cgcgatcccg + 24301 ccgggctgac ccgcggtgcc atggcgcagt gggtcagcgt ggcagcgccc cgcgaccttt + 24361 cggtgaccgc gctggtcgcc gggctgggcg cggtgatcga cacgcacgac atgctgcgga + 24421 gccggatcgt cgagagcgag ggcgtggaac cccggctggt cgtggccggg cggggcacgg + 24481 tggacgcggc ggcgctggtc gaacgggtcg aggccggcga cggtgatctc gccgagatcg + 24541 cggaccggtg cgcccacgac acggccgcac gcctggatcc cgtggccggc gtgctggtcc + 24601 gggccgtctg ggtggacgcc ggaccgggcc gcgccggacg gctcgtggtg gccgcgcacc + 24661 acctcgtggt cgacgtcgtg tcgtggcgaa ccctcctgcc ggacctgcag gcggcctgtg + 24721 aagccgtggt cgcgggcggg cagccggcgc tcgatccgcc ggacgtctcg ttccggcgct + 24781 ggtcgcggac gctggacggc gaggcggcga tccggaccgg cgaactggcg gtgtggacgg + 24841 agatcctcga cggggcgcag tcccggctgg gcgagctcga tccgcggcgc gacaccgtgt + 24901 ccaccgcggg acgccggtcc tggaccgtgc cccgggaaca cgcgggcgtg ctcgtggaac + 24961 aggtcacctc ggccttccac tgtggtgtcc acgaggtgct gctggccacc ctggcgggcg + 25021 ccgtggcggg ctggcgcggc ggcacggccg tcgtggtgga cgtcgaaggc cacggccgtc + 25081 agcccctcgg ggaactggac ctgtcgcgga cactcggctg gttcaccgac gtccacccgc + 25141 tccggctgga cgtcaccggg gtcgacccgg ccgaggcggt cgccggcggc gacgcggcgg + 25201 gccggttgct gaagcaggtc aaggagaacg tgcgagccgt gcccgacggc gggctcggct + 25261 acgggatgct gcggtacctc aacgccgaga cggggccggt cctcgccgcg ctgccgaagg + 25321 cggagatcgg gttcaactac ctcggccgct tctcggcggg gtccggcggc gaggcacaac + 25381 cctggcagat cacgggaatc gtcggcggtg cggcggagca ggacacgccc ttgcggcacg + 25441 tcgtggagat cgacgccgtc gtggtggacg gcccggacgg acccgaattc accctgaccg + 25501 tgacctgggc cgggcggatg ctcggcgacg ccgaggcgga gtcgctcgcg caggcgtggc + 25561 tggacatgct ggccggcctg gccgcccacg tggccgccgg tggccccggg gggcacacgc + 25621 cgtccgactt cccgctcacc gcgctgacgc agcgggaggt ggcggagttc gaggccgccg + 25681 tgccgggcct gctcgacatc tggccgcttt ccccgctgca ggaaggcctg ttgttccacg + 25741 ccgccgacga ccgcggcccg gacgtctacg cgagcatgcg caccctcgcc atcgacggcc + 25801 cgctggacgt cgcccggttc cgggcgtcct ggacggtcct gctcgaccgg catcccgccc + 25861 tgcgggcgag tttccaccag ctggaatccg gcgaggccgt gcaggtgatc gcccgggacg + 25921 tgccgccgga ctggcgggag accgacctgt ccgggctgcc cgagagcgaa gcgctcgcgg + 25981 agttcgaccg cctcgcggcg cggatgcacg ccgagcggtt cgacctgacc aaggctccgc + 26041 agctgcgcct gcacctggtg cgcctcggtg accgcaggta ccggctgatc ttcacgtcgc + 26101 accacatcgt ggccgacggc tggtccctgc cgctcatcct ggtcgacgtg ctgacggcgt + 26161 acgaggcagg cggtgacggc cggacgctgc cggccgcgac gtcgtaccgt gacttcctcg + 26221 cctgggtcga ccgccaggac aagggggcgg ccgggcaggc gtggcggacc gagctcgcgg + 26281 ggctcgacga ggcgacccac gtcgtgccgc cgggctcgat catcacgccc ctggagcccg + 26341 aacgcgtcgc gttcgaactc gacgacgaga cgagcaagcg gctggtcgag ttcacccggc + 26401 ggcacggcgt cacggcgaac acgctcttcc agggggtctg ggcactgcac ctggcccggc + 26461 tggccgggcg gaacgacgtg gtcttcggtg ccgcggtcgc ggggcgcccg ccggagatcc + 26521 ccggcgtcga gtccgcggtc ggcctgttca tgaacatgct gccggtccgg gcgcgcctca + 26581 ccggtgccga gccggtcgtc gacatgctga aggacctgca ggagcggcag gtcgcgatga + 26641 tggcgcacca gcacatcggg ctgcccgaga tcaagcagct caccgggccg ggggcggcgt + 26701 tcgacacgat cgtggtgttc gagaactacc cgcccgcgcc gccgaggtcc gacgaccccg + 26761 acgcgctcgt catccgcccg gtggggatcc cgaacgacac cgggcactac ccgctgtcca + 26821 tgcgcgcgtc cgtggcggcg ggccccgtcc gcggtgagtt catctaccgg ccggacgtgg + 26881 tcgaccggac cgaggccggg gagatggtcg cggcgatcct ccgcgcgctc gagcaggtgg + 26941 tggccgagcc gtggacgccg gtgggccagg tcggcctgat cggcccggag cagcgccgtc + 27001 tggtcgtgga cgagtggaac cggaccgacg tgccgctggc ggcggagacg ctgccggtgt + 27061 tgttccgcag gcaggcggag cggtcaccgg atgcggtggc cgtcgaggac ggggcgcgga + 27121 gcctgacgtt cggtgggctg ctcggcgagg tggaagcgct ggcccggctg ctcgtggggg + 27181 cgggcgtgcg gcgcgagcac cgggtgggcg tcctggtcga gcgctcggcc gagctggcgg + 27241 tgaccatgat ggccgtgtcg ttcgccggcg gggtgttcgt gccggtcgac cccgactatc + 27301 cccgtgagcg cgtcgagttc atgctggcga actcggcacc cggggtcatg gtgtgcacga + 27361 agacgacccg ggcggccgtg cccgcggagt tcgcgggcac cgtgctggtg ctggacgagc + 27421 tgcccgccgc ggacccggac gtcgagctgc cgccggtggc accggaagac gcggcgtacg + 27481 tgatctacac gtccgggtcg acgggggtgc ccaagggcgt cctggtgacc cactccgggc + 27541 tcgccaatct ggggtacgcg cacatcgagc ggatggcggt gacgtcgtcc tcgcgggtcc + 27601 tgcagttgtc cgcgaccggc ttcgacgcca tcgtgtccga gctctacatg gccttgctgg + 27661 ccggcgcgac cctggtgctg ccggacgcgg cgagcatgcc gccccgggtg acgctgggcg + 27721 aggcgatccg gcgggcgggc atcacgcacc tgaccgtgtc gccgagtgtg ctggcgagcg + 27781 aggacgacct gccggacacg ctgcggaccg tgctgacggg cggcgaggca ctgccgcccg + 27841 cgctggtgga ccgctggtca ccgggccgcc gggtgatcca ggcctacggg ccgaccgaga + 27901 cgaccatctg ctcgacgatg agtgccccgc tgtccccggg gcacgaccag gtcccgctcg + 27961 gcggcccgat ccacaacgtg cggcactacg tgctcgacgc gttcctgcag ccggtgccgc + 28021 ccggcgtggt cggcgagctc tacatcacgg gtgtcgggct ggcgcgcggc tacctcgggc + 28081 gtcccggcct gaccgcggaa cggttcgtgg ccagcccgtt cgcccccggc gagcggatgt + 28141 accgctcggg cgacctgttc cgctggaccc gggaaggcca gctgctcttc gcgggccgtg + 28201 tcgacgcgca ggtcaaggtg cgggggtacc gggtcgagcc cgccgagatc gaggccgtgc + 28261 tcgcggagca cccgtgggtc ggccaggtgg cggtgtccgt ccgccgggac ggcccgggcg + 28321 acaagcagct ggtggcttac gtcgtgccgt cggccgacgc ggccgccgag aacggcacgc + 28381 tggcctcggc actgcgcgag ctggcggccg aacgcctgcc ggagtacatg atgcccgcgg + 28441 cgttcgtgtc gctggagcag atgccgctca ccccgaacgg caagctcgac caccgggcgc + 28501 tgcaagcccc cgacttcgcc gggatgtcct cgaagcgggc cccccgcacg cccatggagg + 28561 cgaggctgtg cgcgctcttc gcggacgtgc tcggccttga ccaggtgggg cccgacgaca + 28621 gcttcttcga actcggcggc gactcgatca cctcgatgca gctgtcggcc cgggctcggc + 28681 cgacggggct ggaactgacc ccgtggcagg tgttcgacga gaagacgccg gaacggctgg + 28741 cggtgatcgt ccaggaactc gcggccgagg gcgggaccac cccggcgccg gagcccggcg + 28801 agggcacgct cgtcgctctc tcacctgacc agatggacct actcgaggcc gggctggccg + 28861 gcgaatgacc gccataagga gcagattgtg accgttgacg acactcgcgc gaagcgccgc + 28921 tccagcgtcg aggacgtctg gcctctttcg ccgctgcagg agggaatgct ctatcacacc + 28981 gccctcgacg acgacgggcc ggacacctac acggtgcaga ccgtctacgg catcgacggc + 29041 ccgctggacc cggggctctt gcgggcgtcg tggcaggcgc tcgtggaccg gcacgccgcg + 29101 ctgcgggcct gtttccggta cgtctccggg gcgcagatgg tgcaggtcat cgcgcgggag + 29161 gccgaggttc cctggcgcga gacggacctt tccgggctgc cggacgacat cgccgagggc + 29221 gaggttgacc ggctggcggc ggacgaggtg gccgagcggc tgcgcatcga ggccgcgccg + 29281 ctgatgaagc tgcacctgat ccggctcggc ccggaccgcc accggctcgt gcacacgctg + 29341 caccacgtgc tggtggacgg ctggtcgatg ccgatcctgc accgggagct cgccgcgatc + 29401 tacgcggcgg gcggggacgc gtccggcctc ccgcccaccg tctcctaccg ggactacctc + 29461 gcctggctgg gccggcagga caaggaggtg gcgcgggcgg cctggcgggc cgagctcgcc + 29521 gggctggaca cgccgaccac ggtcgccgcg cccgatccgg cccgcgtccc ggacatccac + 29581 acggcggtgg tcgagctgcc ggcggagctg acggacggct tggcgcagtt cgcgcgtggc + 29641 cacgacctca cgctgaacac cgtcgtgcag ggcgcgtggg ccgtcgtgct ggcccagctc + 29701 gcgggccgcg acgacgtcgt gttcggcgcg accgcctccg ggcggcccgc ggacctgccc + 29761 ggggtggagg cgatggtcgg ccagctgctc aacaccctgc cggtgcgggt ccggctcgac + 29821 ggcgggcgcc gcgcggccga gctgttcgcc cggctgcagc gcgaccagtc ggcactcatg + 29881 gcccaccagc acctcggcct gcaggacgtg caggccgtcg tcggacccgg agcggtcttc + 29941 gacacgctcg tcatctacga gaacttcccc cgcaagggac tcggccgggc accgggcggt + 30001 ggcctgagcc tggtcccggt gaagcgcggg cggaactcct cgcactaccc gttcacgctg + 30061 atcaccggac ccggcgagcg gatgccgctg atcctcgact acgaccgggg cctgttcgac + 30121 cccgcggccg ccgaatcggt cgtcggcgcg ctggccaggg tgctggagcg gctggtcgcc + 30181 gagcccgacg tcctcgtcgg caggctgacg ctcgcgagcg aggccgaacg cgcgctggtg + 30241 gtggagggct tcaacgccac cgcgggcccg gtgccggggg agtccgtcct cgagctgttc + 30301 gcccggcggg tggccgccgc gccggacgcg gtggcgatca ccggcgccgc cggcgcgaac + 30361 ctgacctacg ccgaggtcga ccaggcgtcg aaccggctgg cgggctacct cgccgtccgg + 30421 ggcgtgggcc gtggcgaccg cgtcggggtg gccatggaac ggtcgccgga tctgctgatc + 30481 gcgttcctgg cgatctggaa ggcgggtgcc gcctacgttc cggtggacgt cgagtacccg + 30541 gccgagcgga tctcgttcgt cttcgacgac tccggcgtct cgaccgtcct gtgcaccctg + 30601 gccaccagcg cggtcgcgcc gggcaacgcg atcgtgctcg acgcgcccga aacacgcgtg + 30661 gccgtgcggg actgcgccgc gccggaaatc cggccgcacg cggacgacct ggcgtacgtc + 30721 atgtacacct ccggctccac cggcctgccg aagggcgtgg ccatcccgca cggggccgtg + 30781 gccggcctgg cgggcgacgc gggctggcag atcggtcccg gcgacggcgt gctgatgcac + 30841 gcgacgcacg tcttcgaccc ttcgctctac gcgatgtggg tgccgctcgt ctcgggcgcc + 30901 cgggtcctgc tcaccgagcc gggggtgctg gacgcggccg gggtacggca ggccgtgcac + 30961 cggggcgcga ccttcgtcca cctcaccgcc ggcaccttcc gcgcgctggc ggagacggca + 31021 ccggagtgct tcgaaggcct ggtcgagatc gggaccggcg gcgacgtggt tccgctgcag + 31081 tcggtggaga acctgcggcg ggcccagccc ggcctgcggg tgcgcaacac ctacgggccg + 31141 accgagacca ccctgtgcgc gacgtggctg ccgatcgagc ccggtgaggt gctcggccgg + 31201 gagctgccga tcggccatcc gatgaccaac cgccggatct acctcctcga cgccttcctg + 31261 cgcccggttc cgccgggcgt ggccggcgag ctgtacatcg cgggcacggg cctggcccac + 31321 gggtacctga agagccccgg cctgacggcc ggccggttcg tggcctgccc gttcgccgcc + 31381 ggtgaacgca tgtaccgcac cggcgaccgg gcgcgctgga cccgcgacgg cgaggtggtg + 31441 ttcctcggcc gcgccgacga ccaggtgaag atccgcggct accgggtcga gctcggcgaa + 31501 gtggaggctg cgctggcggc ccagccgggc gtggtcgagg ccgtcgtcac ggcgcgggag + 31561 gaccagcccg gcgagaagcg cctggtcggc tacttcgtct ccgacggcgg cgacgcgggg + 31621 ccggtggaga tccggcggca gctggccctg gtgctgcccg actacctggt ccccatcgcc + 31681 gtggtcgccc tgcccggcct gcccgtcacc cccaacggca aggtcgatcg ccgggccctg + 31741 cccgccccgg atctcgcggg acactcgccg gagaaggcac ccgagaacga gaccgagaag + 31801 gtgctgtgcg cgctgttcgc cgagatcctc agcatcgacc aggtgggggt cgacgacacc + 31861 ttccacgacc tcggcggcag ttcggcgctg gccatgcggc tcgtcgcgcg gatccgtgag + 31921 gagctcggcg cggacctgcc catccggcag ctgttctcct cgccgacccc cgcgggcctg + 31981 gccagggcgc tggccgcgaa gtcacgcccc gcgctggaag ccgcccagcg gccggaccgg + 32041 gtgcccgtca ccgcccggca gctgcgtgcc tggctgctgg ccgatcccgg cggggagacg + 32101 gccggcctgc acacctccgt cgccctgcgc ctgcacggcc gggtggacgt gcccgcgctg + 32161 gcggcggcgc tcggcgacgt cgcggcccgg cacgagatcc tccgcacgac cttcccgggt + 32221 gacgcgcaga gcgttcacca gcacgtccac gacgccttgg cggtcgagct gactccggtc + 32281 ggagtcaccg aggaagacct cccggggctg ctcgccgagc ggcgtgacct gctcttcgac + 32341 ctcaccaggg acgtgccgtg gcggtgtgac ctcttcgcgc tctcggacaa cgagcacgtg + 32401 ctgcacctgc aggtccaccg gatcctcgcc gacgacgact cgctcgacgt gttcttccgc + 32461 gacctggcgg ccgcctatgg tgcgcgccgc gaaggccggg tcccggagcg cgcgcccctg + 32521 gcgttgcagt tcgccgacta cgcgctctgg gagcagcgcc tgctcacgga cgagaacgag + 32581 ccgggcagcc tgatcaacga gcaggtggcc ttctggcggg acaacctggc cggcctcgac + 32641 ggggagacgg tgctgccgtt cgaccgcccg cgcccggccg tcccgtcgcg gcgcgccgga + 32701 acggtcgcgc tgcggctgga ggccggcccg cacgcccggt tgacggaggc ggcggagccg + 32761 ccgggcgcgg acacgctcga gatggtgcac gccgcgctcg cgatgctgct ggccaagctc + 32821 ggagcgggcc acgacgtggt gatcggcacg gcgctgccgc gggacgagga gctcttcgac + 32881 ctcgagccga tgatcgggcc gttcacccgg gcgctcgccc tgcgcaccga cgtctcgggc + 32941 gatccgacct tcctcgaggt cgtcgccagg gtgcaggagg cgggccaagc cacgggcgag + 33001 cacctggacc tgcccttcga acggatcgtc gagctgctcg atctgccggc ctcgctcgcc + 33061 cgccaccccg tgttccaggt gggacttcag gtggacgagg aggacatcga cggatgggcc + 33121 gcggcggaac tgcccgccct gcgcaccgcc gtcgaacccg gcgggaccgc ggccatggag + 33181 ctggacctcg cggtcaagct caccgagcgc ttcgacgacg acgacaacgc cggcggcctc + 33241 gagggcgcgc tgcactacgc caccgacctg ttcgacgagg ccacggcgga gtcggtggcc + 33301 cggcggctgg tccgcgtcct cgagcaggtg gcggaggatc ccgggcggcg gatcagcgac + 33361 ctggatgtct tcctggacga cttcgaacgc ggccgtccgc ccatcgctcc ggcgcggtgg + 33421 gccggggccg tgcccccggt ggtcgccgaa ctggccgggg acggcccgct cggcgcgctc + 33481 ctgctcgacg agcagctgcg cccggtcgct cccggagccg tcggcgatct gtacgtcacc + 33541 ggcccggccg tggacgcggg aacggccacc ctggcgaccg tgccctgccc gttcggggac + 33601 gaggggcacc ggatgctgca cacgggcctg ctcgcccgca aaacgcccgc caagaccctg + 33661 gtcgtcgtgg gcgagcggag gcggtcgagc gcttcggtga agacgggtga cttcgagatc + 33721 ctgctgccgc tgcgcgccgg cggtgaccgc ccgcccctgt tctgcgtcca cgcgagcggt + 33781 ggcctgagct ggaactacga gccgttgctg cggtacctcc cgccgaacca gccggtctac + 33841 ggcgtgcagg ctcgcggcct ggcccggacc gaaccgctgc cgggcagcgt cgaggagatg + 33901 gcggccgact acctcgagca gatccgtgcc gtgcagccgg ccgggccgta ccacctcctc + 33961 ggctggtccc tcggcggccg gatcgcgcag gcgatggcca ggttgctcga ggcggacggg + 34021 gagcggctcg gcctgctcgc cctgctcgac gcctatcccg tctacatggg acgcaagacg + 34081 accggcgccg cgagcgaaga agcggctctc gaacagcgga accagcagga tctggacctc + 34141 gcggggcaac tggtcaaggg tgtggccgcc cggtcgcgcc tcgaggcggt catgcgcaac + 34201 ctctggaagg tcgggccacg gcacacacgt tcgcccttcg ccggcgacgt cctgcttttc + 34261 gtggccactg tggaccgtcc cgcgcatttg cccgtcccag tggcgaaggc cagctggaag + 34321 gaattcacca gtggggcggt agaggcccac gaaatcccgt ccaaccacta cgacatggtg + 34381 caatccgcgg cgctgggcca gattggtgcc atcgtcgccg agaaactccg gtcccggccg + 34441 gagggtgaaa ggacacaacg atgagcaatc cgttcgacaa cgaagacggc tcctttttcg + 34501 tgctggtcaa cgacgagggc cagcactccc tctggccgac cttcgcggag gtgcccgccg + 34561 gctggacccg cgtgcacggc gaagcgggcc gtcaggagtg cctcgcctac gtcgaggaga + 34621 actggacgga cctccggccg aagagcctca tccgggaagc gagcgcctga gtgtccagcc + 34681 gctcaacgcg gacggttgga cgagcccggc gctcgtccga ccggctccgg gcggcttccg + 34741 cttgagtcca ccggctggat gaggcgaaag gaccttgtgc agtgttcgag gagagcaacg + 34801 ccctccgggg cacggaaata caccggagag accggttcgc tccggggccg gaactgcgct + 34861 ccctgatggg cgagggcacc atgtccatcc tgcagccccc ggattccccc ggcgggcgga + 34921 ccgggtggct ggccaccggg cacgacgagg tccggcaggt cctcggctcg gacaagttca + 34981 gcgccaagct gctctacggc gggaccgtgg ccggccgcat ctggccgggc ttcctcaacc + 35041 agtacgaccc cccggagcac acgcgcctgc gccggatggt gacgtcggcg ttcaccgtcc + 35101 ggcggatgca ggacttccgg ccgcggatcg agcagatcgt ccaggcgagc ctggacgcca + 35161 tcgaggccgc cggtggcccg gtggacttcg tcccccggtt cgcctggtcc gtggcgacga + 35221 cggtgacgtg cgacttcctc ggcatcccgc gtgacgatca ggcggacttg tcgcgcgccc + 35281 tgcacgccag ccggtccgaa cggtcgggca agcggcgggt ggcggcgggg aacaagtact + 35341 ggacgtacat gaccgagatc gcggcccgcg cgcgccgcga tcccggtgac gacatgttcg + 35401 gcgcggtggt gcgcgaccac ggcgacgcga tcaccgacgc ggaactgctg ggcgtggccg + 35461 cgttcgtcat gggcgcgggc ggggaccagg tggcccggtt tctcgcggcg ggcgcgtggc + 35521 tgatggtcga gcaccccgat cagttcgcgc tgctgcggga aaagccggac accgtcccgg + 35581 actggctgaa cgaggtggag cggtacctca ccagcgacga gaagaccact ccgcgcatcg + 35641 cgcaggagga cgtgcgcatc ggtgatcagc tcgtcaaggc cggcgatgcc gtcacctgct + 35701 cgctgctggc ggcgaaccgc aggaagttcc ccgccccgga ggacgagttc gacatcaccc + 35761 gggaacggcc ggtgcacgtc acgttcggcc acggcatcca ccactgcctc ggcaggccac + 35821 tggccgagat ggtgttccgg gcggcgattc cggcgctggc acaacgcttt cccaagctga + 35881 ggctggccga gccggaccgc gagatcaagc tggggccgcc gccgttcgac gtggaagccc + 35941 tgctgctgga gtggtgacgc cgggccggac acgaaatcgt cgggagcaaa agagggggtt + 36001 ttcccgttga atgatgacga cccgcggccg ctgcacattc gccggcaggg cctggacccg + 36061 gcggacgagc tgctcgccgc cggatcgctg acgagggtca ccatcggatc cggagcggat + 36121 gccgagaccc attggatggc caccgcgcac gccctcgtcc ggcaggtgat gggcgaccac + 36181 cagcggttca gcacccggcg ccgctgggac ccgcgggacg agatcggcgg gacgggcacc + 36241 ttccggccgc gtgaactggt cggcaacctg atggactacg acccgcccga gcacacgcgg + 36301 ctgcgccaga agctgacccc cgggttcacg ctgcgcaaga tgcagcggct gcagccgtac + 36361 atcgaacaga tcgtcaacga gcgactcgac gagatggcgc gggcgggatc gcccgcggat + 36421 ctggtcgcgt tcgtcgccga caaggtgccc ggcgccgtgc tgtgcgagct gatcggcgtg + 36481 ccgagggacg accgggccac gttcatgcag ctgtgccacg cgcatctcga cgcctcgcga + 36541 agccagaaac ggcgggcggc ggcgggagag gcgttctccc gctacctgct ggcgatgatc + 36601 gccagggaac gcaaggaccc gggcgagggg ctcatcggag cggtcgtcgc cgaatacggc + 36661 gacgaagcca cggacgagga gctgcgcggc ttctgcgtgc aggtgatgct ggctggcgac + 36721 gacaacatct ccggcatgat cgggctcggc gtgctggcgc tgctgcggca ccccgagcag + 36781 atcgacgcgt tgcgcggcgg cgaacagccg gcgcaacgag ccgtcgacga gctgatccgg + 36841 tacctgaccg tgccctacgg cccgacaccc cgcatcgcga agcaggacgt caccgtcggg + 36901 gaccaggtga tcaaggcggg cgagagcgtc atctgctcgc tcccggcggc caaccgcgac + 36961 cccgccctcg tgccggacgc ggaccggctc gatgtcacgc gcgaccccgt cccgcacgtc + 37021 gcgttcgggc acgggatcca ccactgcctg ggagccgcac tggcccgcct cgaactgcgc + 37081 acggtcttca ccgcgctgtg gcggcggttt cccgacctgc ggctcgcgga tcccgcccag + 37141 gagaccaagt tccgcctcac cacccccgct tacgggctga ccgagctgat ggtcgcctgg + 37201 tgaccggggg cccgcgacgt ccacgcgagg ccgctggacg tcctgcctga ttccgggtgg + 37261 aattgggacc gtcggcgggt tcgggccgaa aaaaccgaac gaccaagaca gagggacatt + 37321 tcttcccggt cgaccaagga gtttccacgc ggatggggca cgatatcggt cagctcgcgc + 37381 cgctcttgcc ggagccggcg aacttccagc tgaggacgaa ctgcgatccg catgcggaca + 37441 acttcgacct gagggcgcac ggcccgctgg tccggatagc cggggactcc tccgctcagc + 37501 tgggcaggga atatgtctgg caggcccacg gctacgacgt cgtgcgccgg atattgggcg + 37561 accacgagaa tttcacgacg cggccgcaat tcacccaagc gaaatccggg gcgcacgtcg + 37621 aggcccagtt cgtcgggcag atatcgacct acgacccacc cgagcacacc cggctgcgga + 37681 agatgctcac gccggagttc acggtccggc ggatccgccg gatggagccc gcgatccaag + 37741 ccctcgtcga cgatcggctc gaccgggtgg cggccgaggg accgcccgcc gacctccagg + 37801 cgctgttcgc cgacccggtc ggcgcgctcg ctctgtgcga actgctcggc atcccccgag + 37861 acgaccagcg cgagttcgtc cggcggatca ggcggaacac cgatctgagc cgcgggctca + 37921 aggcgcgggc ggcggacagc gcggcgttca accggtacct ggacaacctc atcgcccggc + 37981 agcgccggga cgccgacgac gggttcctcg gcatgatcgt gcgagagcac ggggacaccg + 38041 tcacggacga ggagctgaag ggcctgtgca cggcgctgat cctcggcggc gtcgagaccg + 38101 tcgccgggat gatcggcttc ggggtgctcg ccctgctcga gaaccccggc caggtgccgt + 38161 tgctgttcgc gggccccgag caggccgacc gcgtggtcaa cgagctgctg cgttacctgt + 38221 ctccggtgca ggcgccgaat cccagcctcg ccgtcaagga tgtgatcatc gacggacagc + 38281 tgatcaaagc gggagattat gtcctgtgct cggtcctcat ggccaaccgg gacgaagcgc + 38341 tgacgccgaa ccccaacgtc ttcgacgcga atcgcgccgc ggtatcggac gtcggtttcg + 38401 ggcacggcat ccactactgc gtgggcgcgg cgctggccag gtcgatgctg cggatggcgt + 38461 accaggccct gtggcagcga ttccccgggc tccggctggc cgtgcccatc gcggaagtga + 38521 agtaccgaag cgcgttcgtc gactgccctg atcgggttcc ggtcacctgg tagcgcaatc + 38581 cgggttgaaa accagcctcg gcaatttgac actcgacaga ggaatggtgg gagatgtcgg + 38641 tcgaagactt cgacgtggtg gtggcgggcg gcgggccggg tggttcgacg gtggccacgc + 38701 tggtggccat gcagggacac cgggtgctgc tgctggagaa agaggttttc ccgcggtatc + 38761 agatcggtga gtcgctgctg cccgccacgg tgcacggcgt gtgccggatg ctcggcatct + 38821 ccgacgagct ggccaatgcc gggttcccga tcaagcgcgg cggcacgttc cgctggggcg + 38881 cccggccgga gccgtggacg ttccacttcg gcatctcggc caagatggcc ggctcgacgt + 38941 cgcacgccta ccaggtcgag cgggcgcggt tcgacgagat gctgctgaac aacgccaagc + 39001 gcaagggcgt ggtcgtgcgg gaggggtgcg cggtcaccga tgtggtggaa gacggcgagc + 39061 gggtcaccgg tgcgcggtac accgatcccg acggcaccga gcgggaagtg tcggcgcggt + 39121 tcgtgatcga cgcgtcgggc aacaagagcc ggctctacac caaggtcggc ggttcgcgga + 39181 actattcgga gttcttccgc agcctcgcgc tgttcggtta cttcgagggt ggcaagcggc + 39241 tgcccgagcc ggtctccggg aacatcctga gtgtggcctt cgacagcggc tggttctggt + 39301 acatcccgct gagcgacacg ctgaccagcg tcggcgcggt ggtgcgccgg gaggacgccg + 39361 agaagatcca gggtgaccgg gagaaggccc tcaacacgct gatcgccgag tgcccgctga + 39421 tctcggaata cctcgcggac gcgacccggg tgacgaccgg ccggtacggg gaactgcgcg + 39481 tccgcaagga ctactcctac cagcaggaga cctactggcg gccgggcatg atcctggtcg + 39541 gcgacgccgc gtgtttcgtg gacccggtgt tctcctccgg tgtgcacctg gcgacctaca + 39601 gcgcgctgct cgcggcccgg tcgatcaaca gcgtcctcgc cggcgacctg gacgagaaga + 39661 ccgcgctgaa cgagttcgag ctgcggtatc gccgtgagta cggcgtgttc tacgagttcc + 39721 tcgtgtcctt ctaccagatg aacgtgaacg aggagtcgta cttctggcag gccaagaagg + 39781 tcacgcagaa ccagagcacc gacgtcgagt cgttcgtcga gctgatcggc ggagtgtcgt + 39841 ccggggagac cgcgctgacg gccgccgacc gcatcgccgc gcgcagtgcc gagttcgccg + 39901 cggcggtgga cgagatggcg ggcggggacg gcgacaacat ggtgccgatg ttcaagtcga + 39961 cggtggtcca gcaggcgatg caggaagcgg gccaggtgca gatgaaggcg ctgctcggcg + 40021 aggacgccga acccgagctg cccctgttcc ccggtggcct ggtgacctcg cccgaacgga + 40081 tgaagtggct gcctcaccac cctgcgtgaa gcctgtgcgc gccggccgtt cgcgggtggc + 40141 cgggacctgc ggaacaacct atggaaaaac ctacggaaca gagggtgcga aatgcgcgtg + 40201 ttgatctcgg ggtgcggatc gcgcggggac accgaaccgc tgatcgcctt ggcggtccgg + 40261 ttgcgggaac tcggtgtaga cgtccggatg tgcctgccgc cggactacgt ggagcggtgc + 40321 gccgaggtcg gggtgtcgat ggtggcggtc ggcccggcga tgcgcgccgg ggcacgcggg + 40381 ccgggagaac cgccgccggg agcacccgaa atcgtgtccg aggtggtcgc ggactggttc + 40441 gacaaggtgc cggcggccgc cgaagggtgt gacgtggtgg tggcgaccgg cttgctgccc + 40501 gccgcggtcg tcgtgcggtc ggtcgccgag aagctgggca tcccttacct ctacaccgtg + 40561 ctgtcgccgg accacctgcc gtcggtgctc agccaggcgg agcgcgacga atacgaccag + 40621 ggcgccgacc ggctgttcgg tgcggtggtc accagcgggc gggccgcgat cggcctgccg + 40681 ccggtggcga acctcttcac ctacggctac accgaacagc cctggctggg ggcggaccag + 40741 atcctcgccc cgccgccacc gggagacttg gacaccgtgc agaccggtgc gtggatcctg + 40801 cccgacgaac ggccccttcc cgcggagctg gagacgttcc tcgcggccgg gtcgccgccc + 40861 gtgtacgtgg gtttcggcag ctcgtccgga ccccggaccg ccggcgccgc caaggcggcc + 40921 atcgaggcga tccgcgcccg gggccaccgg gtcgtcctct cccgcggctg ggccgacctg + 40981 gccgcgcccg acgactcggc cgactgcttc accgtcggcg aagtgaacct ccaggtgctg + 41041 ttccgccggg tggccgccgc cgtccaccac gacagcgcgg gcacgacact cctggccatc + 41101 cgggcaggca ccccccagat cgtcgtccgc cgcgtgatag acaacgtggt ggagcaggcg + 41161 taccacgccg accgggtggc cgaactgggg gtcggtgtgg cactcgaagg tccgatcccg + 41221 gcctccgagg ccatgtcgga cgcgctcgag acggcgctgg caccggaaac ccgcgcgcga + 41281 gcggcggagg tggcgggcac ggtccgcacc gacgggacga cggtggccgc ggaactgctg + 41341 ttcgccgcgg tcagccggga aaagcccgcc gttcccgcat gacccgcacc gagccggcgt + 41401 gccgggaacc ggccggcggg ccagaaccca ggaaacgggg aaatacgtga agcgtgtgct + 41461 gttgtcgacg ctcggaagcc gcggagacgt cgaaccactg gtggccttgg cggtccggct + 41521 gcgcgacctc ggcgcggagc cgctgatgtg cgcaccgccg gactgcgcgg accggctgga + 41581 agaggtcggc gtgccgcacg tgcccgtcgg cccgtcggcg cgcgcgccga tccatcggga + 41641 gaagccgttg acgcccgagg acatgcgccg gctcatggcc gaagcgatcg ccatgccgtt + 41701 cgaccggata ccggcggccg ccgaggggtg tgccgcagtg gtgacgaccg ggctgctggc + 41761 cgccgcgatc ggcgtgcggt cggtggccga gaagctgggc atcccctact tctacgcctt + 41821 ccactgcccg agctacgtgc cgtcaccgta ctatccgccg ccaccgcccc tcggcgagcc + 41881 gcccgccgag gacgtgaccg acatccgggc gctgtgggag cggaacaacc ggagcgctta + 41941 ccagcggtac gggggtccgc tcaacagcca ccgggccgcg atcggcctgc ctccggtgga + 42001 ggacatcttc accttcggct acaccgatca cccgtgggtg gcggcggatt cggtcctggc + 42061 cccgatgcag ccgaccgacc tcggtgccgt gcagaccggc gcgtggatcc tgcccgacga + 42121 acggccgctt tccccggagc tggaagcttt cctggacacc ggcaccccgc cggtgtacct + 42181 cgggttcggc agcctgcgcg ccccggccga cgccgtccgg gtgtccatcg acgcgatccg + 42241 ggcccaaggc cgccgggtaa tcctttcccg gggctgggcc gacctggtcc tgcccgacga + 42301 ccgggaagac tgcttcgcca ccggcgaggt gaaccagcag gtgctgttcg gccgggtggc + 42361 cgccgtcatc caccacggcg gcgcgggcac gacgcacgtg gccatgcagg ccggggcacc + 42421 ccaggtcctg gtgccccaga tggcggacca gccgtactac gccggccggg tggccgagct + 42481 ggggatcggg gtggcccacg acggtccggt cccgaccttc gactcgctgt cggccgcgct + 42541 cgtcacggcg ctggccccgg aaacccgcgc acgagcggag gccgtggcgc gcacggccgg + 42601 tgccgacggg gcggcggtgg ccgcgaaact gctgctcgac gcggtcagcc gggaaaagcc + 42661 ggctgttccc gcgtaaacca caccgggtcg gcgccgccgg aaagtggcgc atgcggtgac + 42721 ccgggtcctg tccattcttt gaccgttccg gacataaatg cgctcggata gcattccgcc + 42781 tcattatcgc agggggacag aaccgatcaa attggggtgc gggatgcgtg tgttgctgtc + 42841 gacggcggga agccgtggag acgtcgaacc gttgctggcc ttggcggtcc ggttgcaggg + 42901 actcggcgcg gaggtgctga tgtgcgcgtc gcctgcttcc gcggagcggc tggccgaggt + 42961 cggggtgccg cacgtgccgg tcggcctgca gctggacggc atgttgttgc aggaaggaat + 43021 gccgccgccg tcggccgagg acgagcgcag actcgcggcc atggcgatcg acatgcagtt + 43081 cgacgcggtc cccgcggccg ccgaagggtg tgccgcggtc gtggcgaccg gagagctggc + 43141 cgccgcggcc gccgtgcggt cggtggccga gaagctgggc atcccgtact tctacggcgc + 43201 atacagcccg aactacctgg cgtcgccgca ctatccgccg cccgacgacg agcggaccac + 43261 cccgggcgtg accgacaacg gggtgctgtg ggccgagcgt gccgagcgtt tcgccaagcg + 43321 gtacggggaa acgctcaaca gcagacgggc ggcgatcggc ctgcccccgg tggcggacgt + 43381 cttcggctac ggctacaccg agcagccctg gctggcggcg gacccggtcc tggccccgct + 43441 ggatccggat ctcgacgcgg tgcagaccgg cgcgtggatc ctgcgtgacg atcggccgct + 43501 ttcccctgag ctggcggcgt ttctcgctgc cgggtcaccg ccggtgtacg tgggtttcgg + 43561 cagcgcgtcc gggccgggaa tcgaggacgc cgcgaaggtg gccatcgagg cgatccgggc + 43621 cctcggccgc cgggcgatcc tttcccgcgg ctgggccgat ctggtcctgc ccgacgaccg + 43681 ggaggactgc ttcgccgtcg acgaggcgaa tctccaggtg ctgttcgagc agtcggccgc + 43741 cgtcgtccac cacggcagcg cgggcaccga gcacctggcc acgcgggccg gcgtccccca + 43801 gatcgcgata ccccggcaca cggatcaggc gtactacgcc ggccgggtgg ccgagctggg + 43861 ggtcggtgtg gcactcgaag gtccggtccc gtccttcgcg gcgatgtcgg cggagctcgc + 43921 gacggccctg gccccggaaa cccgtgcgcg agcggcggag gtggcgggca cggtccgcac + 43981 cgacgggacg acgatggccg cggagctgct cttccaggcg gccgaacagg gcaaactgac + 44041 cgttcccgcg tgaatttctt cgaagacaaa gcaaagagga gactgcatgt cgaccacgtc + 44101 ccagtgccgt atctgtgacg gcactgtcca cgagttcatc gacttcggac gccagccgct + 44161 ctcggacgcg ttcgtggctc ccggcgcgga aaagggtgag ttcttcttcc gccttgccac + 44221 cggcatctgc gattcctgca cgatggtgca gctgatggag gaagtcccgc gggacctgat + 44281 gttccacgag gcctacccct acctgtcgtc gggttcggcc gtcatgcgca cgcacttcca + 44341 cgagctggcc aagcacctgc tggccacgga gctgaccggc gaggacccgt tcatcgtcga + 44401 gctcggctgc aacgacggca tcatgctcaa ggccgtggcc gacgcggggg tgcgccagct + 44461 cggcgtcgaa ccctccggca gtgtcgcgga tctggcggca gccaagggga tccgcgtccg + 44521 caaggacttc ttcgaagagg cgacggccgc cgacatccgc gagaacgacg gccccgcgga + 44581 cgtgatctac gcggccaaca cactgtgcca catcccttac atggactcga tcctgaaggg + 44641 cgtcaccaag ctgctcggcc cgaacggcgt gttcgtcttc gaggacccgt acctcggcga + 44701 catcgtggag cgcacgtcgt tcgaccagat ctacgacgag catttcttcc tcttcacggc + 44761 gcgctcggtc caggagatgg cccggcgcaa cggcctcgag ctcgtggacg tcgagcgcat + 44821 tccggtgcac ggcggcgagg tccgctacac cctggccctg gccggcgctc gcaagccgtc + 44881 cgaggccgtg gcggagctcc tggcctggga ggcggagcgc aagctggcgg agtacgccac + 44941 gctggaacgt ttcgccaccg acgtgaagaa gatcaaggaa gacctgatcg cgctgctgac + 45001 caagctccgt gccgaaggca agcgcgtcgt cggctacggc gcgacggcca agagtgccac + 45061 ggtgaccaac ttctgcggca tcaccccgga cctggtcgag ttcatctcgg acacgacacc + 45121 ggccaagcag ggcaagctca gcccgggaca gcacatcccg gtccgcgagt acggggaatt + 45181 cgccggcaac cacccggact acgccctgct gttcgcctgg aaccacgccg acgagatcat + 45241 gaacgtggaa caggcctttc gtgacgccgg cggtcagtgg atcctttacg tgccgaacgt + 45301 gcacgtgagc tgaccggcca tgcgaatcct cgcgtcgagc cctttcggcg ggcactgtcc + 45361 agggttcacc agttgtcttg tggcagcgct gtcggcaccg tgattgccat gcctcaagac + 45421 ctcgacgcgg accggattct ggcgatatcc ccgcatttgg acgacgcggt tctttccttc + 45481 ggtgccggcc tcgcccgtgc ggcgcaggcc ggcgcgaagg tgaccgttca cacggtgttc + 45541 gccgggaccg cggcgccccc ttattcgccg gcggcggagc ggctgcacgc gatctgggag + 45601 ctctcaccgg atcaagacgc gtcgctccgc cgccgggacg aagacatcgc cgcgctcgac + 45661 cacctgggcg tcgactaccg gcacggccgg ttcctcgacg ccatctaccg caagctgccg + 45721 gacggccgat ggctggccga caacgtgccg ggccgccaga agctggccat cggacggcaa + 45781 tcgccgcagg gcgatccgga gctgttctcc gcggtccggg cggacatcga gtcgatcgtc + 45841 gaagagtacg ccccggcgct gatcctgacc tgcgcggcag gcaacggtca tgtcgacaac + 45901 gagatcgcgc gggatgccgc gctgttcgtc gcgtacgaga agggcatccg ggttcggctg + 45961 tgggaagacc ttccgcacgc gatgttcgcg gagggcgccg ccgaactgcc ggacggattc + 46021 cggctggggc cgcccgattt cggttccgtc gaaccggagg cacgggcgcg gaaattcgaa + 46081 gcgctgcggc tctactcgtc gcagatgctg atgctgcacg ggccggaaaa ggatttcttc + 46141 gctcagctgg acgggcatgc ccggaagagt gcaccgggtg gtggatacgg cgaaacgacc + 46201 tggccggttg tctctcgcga agacaacggc tgaatccagg gctgaaccca gggaggttgt + 46261 cattgtgagc ggtcaactcg agcgtggtcc ggtgcggacc acgcacgccg acgtcctgct + 46321 ggcctcggtg ggtgagcgag gcgttctgtg cgacttctac gacgaggagg gctcgaacac + 46381 ctatcgggac ctgatccagg acgcggacgg taccccggaa gcgcgggagt tcgccactcg + 46441 cgtcggcccg gtgcccggac ccgtgctgga gctcgcggcc ggcacgggcc ggctgacctt + 46501 cccgttcctg gagctcggct gggaggtgac cgccctggaa ctgtcggccc cggtggtcga + 46561 cggcttccgg atgcggctgg cggaagcacc ggcggacctg cgggaccgct gcacagtggt + 46621 tcaggcggac atgagcgctt tctcggtgga ccggcgcttc ggggcagcgg tcatcagctc + 46681 gggttcggtc aacgaactgg acgaagccgg ccggcagggc ctgtacgcgt cggttcgcga + 46741 gcacctcgag cccggcggga agttcctgct cagcctggcc ttgtcggagg tcgccgagtc + 46801 acagccgccg gagcgccggc aagagttgcc aggccagagc ggccggctgt acgtgttgca + 46861 cgtgagtgtg cagccggcgg aggagaccca ggacatcacg atctaccccg ccgacgaaac + 46921 agcggatccc ttcgtcgtct gcacgcatcg ccgccggctc gtcccggcgg accggatagt + 46981 gcgggaactt cttcgggccg gcttcgacgt gatcgcgcgg acgccgttcg cgtccggtgc + 47041 gtccggccgg gcgggccatg aagacatgtt gctggtggaa gcggtgaagc aggagggcgc + 47101 tatcccagcc gcgcggtgat gagcgcggcg agccgggcga cgccctcttc gatcagttcc + 47161 ggggtgagca ggctgatcga cagccgcagc tggttgaacc cgcccttgcc gccgtagaag + 47221 tggtgcatcg gggtgaacag cacgccgtgg tcgcgggcgg cgagggcgag caggtcgtcg + 47281 tcgacggtga aggggacggt gacggtgacg aagaacccac cggtcggcgt gttccagcgg + 47341 accccggcgc gcccgccgag ccgtcgctcg agctcgccca gcacgagccg caggttgcgc + 47401 tggtagaccg cgatctcgcg cgcgttggcc ttggtcaggc tgaagtcgtt gaggagcagc + 47461 ttcccggcga tcaccgactg ggctatcggg gacgtgttca ccgtgagcat gcccttgagc + 47521 ttggagagct ggtcggcgag caggccgccc ccggccattc gctggtccgc cacggtgaag + 47581 ccgacccggg caccgggcat gccggtcttg gcgaaggagc cgatgtagac cacggtcccc + 47641 gaccggtcga gggctttcag ggtggggagg cgttcggcgc cgaagagccc gtacgcgttg + 47701 tcctccagga gcaggatgcc gttggcctcg gcgacctcga ggagccggtg ccgggcggcc + 47761 aggtccatgc tggtcccggt cgggttggcg aagttcggtg tcacgtaaca ggcccggacc + 47821 cgcttgccct gttcgtcggc ccgcttcagc tgcaggacca ggtcgtccgg gtcgatgccg + 47881 ttctcggtcg actgcaccgg ccagacgggc gtgtcggtga gcagcgccgc ccccgtcagg + 47941 ccgacgtagg tgggggcggg ggcgagcagc acgtcgtgtt cggtcgcccg cagcgtgcga + 48001 agcaccagga acatcgcctc ctgggcgccc acggtgacca ccacggattc cggggcggcg + 48061 tcgatgttct cgtcctcggc gaggttgcgg gcgatgaggt cggcgatgac gcctttcgtg + 48121 gtgccgtact ggaagagcgt gcgggtgacc cccgcttcgt cgagcttccg gtcgcggcgg + 48181 aggtggtcgc agtaggcgtc gatgtactcg tggatgaggc ggatgtcgaa gaattcttcg + 48241 tacgggcggc ctgccgccat ggaaatagcc accgggtatt cgtcgatcag ctcgttgagc + 48301 aagttcatcg acgagatggc cggatcggtg agcgatccgt gcagggtttc cacgctcaat + 48361 ggggtggaca gaccgaagga atccataaat actaggattt ccatacgccg ccgaggtgtc + 48421 aagcggcggc ggtggacgcg atcgcgtggc gattcccgac gatttcccgg ctcggtaccg + 48481 cgcgcggaac aaaagccgtc cgagactgtc gatgtccatt tctcgctttt ccggacactc + 48541 gatcttcgaa ggtacggtca cacgtgtcgc cgcgcgccgc ggatgggcgg cgggcagggg + 48601 aggaccttca tgctgatgac gactgagcac gggatccggc tgtcgtacca cgaccagggc + 48661 cgtggtgcgc cggttctgct gctgaccggc accggggcgc cgagctcggt gtgggacctg + 48721 caccaggtgc ccgcgctccg cgccgccggg ttccgggtga tcaccatgga caaccgcggg + 48781 atcccgccca gcgacgacgg cgcggacggg ttcaccgtcg acgacctcgt cgcggacgtg + 48841 gccgcgctgc tcgaccacct cgacgcgtcg ccgtgccgcg tcgtcggcac gtcgatgggc + 48901 tcgtacatcg cgcaggagct ggcgctggcc cgcccggaac tggtggacgc cgtcgtgctg + 48961 atggcggcct gcggccggag cagtctcgtc cagcgcgtgc tcgcggaggc cgaggcggac + 49021 ctgatcggac gggggaccga gctgccgccg gggtaccgcg ccgccgttcg cgcgatgcac + 49081 aacctggggc ccgcgacgct cgccgacgac gacctcgctg ccgactggct cgacctgttc + 49141 gcggcgtcgg agaactgggg gccgggcgtc cgggcgcagc tgctgctgag cgcgttgccc + 49201 gaccgtcgcg aggcctaccg ggcgatcaag gtgccctgcc acgtcgtttc gttcgagcac + 49261 gacctcgtgg cgccgccgtc cgccgggcag gagctggccg ccgtgatccc cggcgccacg + 49321 caccgcacga tcccggggtg cgggcacttc ggctacctgg agaagccgga agcggtgaac + 49381 cgcgagctgc tccggttcct ccgcacggaa tccggcgtgg ctgtgacatc cggggcttcg + 49441 ccccggaccc ccgaagaact gtgacagccg gggctcgccc cgggccgggg gctccgccac + 49501 ccggaccccc gaaacctgga ggagaccgca tgaccggcgc gatcgtgccc ccgtccacgg + 49561 cacccgccct gttcgaggcg gccgccgccg cggtgccgga ccggccggcc gtggcgatgg + 49621 ggaccaccac gctgacctac gccgagctga atacccaggc caaccggctc gcgcgccggc + 49681 tcgtggcgca cggcgtgggc ccggaacggc tggtcgcact ggcgatgccg cggtcgatcg + 49741 agttcgccgt cgcgatgctg gccgtgcaca aggccggcgg tgcgtacgtg ccgatcgacc + 49801 cggactatcc cgcggaacgc cggcagcaca tgctggccgg tgcggcggcg cagtgcctgc + 49861 tgtgcctgcc cgggcaggac gtcgccggcg ctccggtcgt gctgagcgtg gcgctggcgg + 49921 agccgggccg tcccgagccg gacctggacg actccgaccg gctcgccccg ctgctgccca + 49981 gccaccccgc gtacgtcatc ttcacctcgg gctcgaccgg gcagccgaag ggcgtcgtgg + 50041 tcacgcaccg gggaatcccg aacctggccg ccgactacgt gcaccgccag aacctgctgc + 50101 ccgacagccg gttgctggct ttcgcgtccc ccagcttcga cgccgccgtc gccgagttct + 50161 ggccgatctg gctggccggt gcctgcctgg tgctggcgcc cgcgccggac ctgatccccg + 50221 gggagccgct cgcccggctg gtccgggacc ggcacatcac ccacgtgacg ctgccgccgt + 50281 ccgccctggc cccgctggaa gaagccggcg gcctgccgcc ggggctgacc ctcctggtcg + 50341 ccggcgaggc gggcccggct ccggtcgcga agcgctgggc cgccggccgc gtcatgatca + 50401 acgcgtacgg ccccaccgaa gccacggtcg cggtgaccgc gagcgacccg ctgaccggcg + 50461 aagacacgcc gccgatcggc aggccgatca ccggtgtcca cacctacgtc ctggacgacc + 50521 ggctggtccc cgtcccggac gggaccgtgg gggagctgta catgaccggc ccgggcctcg + 50581 cccgcggtta cctgcaccgg ccggccgcga ccgcggaacg gttcctgccg gacccgttcg + 50641 gcggtccggg gcagcgcatg taccgcacgg gtgaccgggt gcgggcgcgc ccggacggtc + 50701 agctcgtctt cgtcggccgg gccgacgacc agctgaaggt gcgtggtcac cggatcgagc + 50761 cggcggaggt cgaatccgcg ctgctcgcgg tggacggggt ggcccaggcg gtggtgaccg + 50821 aacacgacaa ccggctcgtg gcgtacgtgg tcggcgccgg gggcgcgcgg gtgcccgccg + 50881 aagacctcct gccgccgctg aggaagcagc tgcccgccta cctggtcccc gacgtggtcg + 50941 tcggcctgcc gcacctgccg accaccccga acggcaaggt cgaccgggcc gcgctgcccg + 51001 cgcccgaggc ggaggacacc gggcgcgcga tctccgggcg ggcgccgagc acgcccacgg + 51061 aaatccacct ggccgccttg ttcgcggaag tgctcggtgt cagcagcgtc ggcgtggagg + 51121 acagcttctt cgaggtcggc ggccactcgc tgctcgccac ccggctggtt tcccgcatcc + 51181 gcgaaagcct gcgggtccgg ctgcgggtgc aggccttctt cgacgcgccg accgtggccg + 51241 aactcgccaa ggtgctcgac gccgccctga cgtgacctgg agaccctgat gcagacgacg + 51301 aacgccgtcg acctcggcaa ccccgacctg tacacgaccc tggaacggca cgcccgctgg + 51361 cgcgagctcg cggcggaaga cgcgatggtg tggagtgacc cgggcagttc cccctccggc + 51421 ttctggtcgg tgttctcgca ccgggcgtgc gccgcggtcc tcgcgccgtc ggcgccgctc + 51481 acctccgaat acgggatgat gatcgggttc gaccgcgacc acccggacaa ctccggcggc + 51541 cggatgatgg tggtctccga acacgagcag caccgcaagc tgcgcaagct cgtcgggccg + 51601 ctgctgtccc gggcggccgc gcgcaagctg gccgagcggg tgcggatcga ggtcggcgac + 51661 gtgctcggcc gggtcctcga cggcgaggtc tgcgacgcgg ccacggcgat cggcccccgc + 51721 atccccgccg cggtcgtgtg cgagatcctc ggcgtgcccg ccgaggacga agacatgctc + 51781 atcgacctga ccaaccacgc cttcggcggc gaggacgagc tgttcgacgg gatgaccccg + 51841 cggcaggcgc acaccgagat cctcgtctac ttcgacgaac tgatcaccgc gcgccgcaag + 51901 gaacccggcg acgacctcgt cagcacgctg gtgaccgacg acgacctcac gatcgacgac + 51961 gtgctgctca actgcgacaa cgtgctcatc ggcggcaacg agaccacgcg gcacgcgatc + 52021 accggcgcgg tgcacgcgct ggcgacggtg cccggcctgc tgacggcgct gcgggacggg + 52081 agcgcggacg tcgacaccgt cgtggaagag gtgctgcgct ggacctcgcc cgcgatgcac + 52141 gtgctccggg tgacgaccgc cgacgtcacg atcaacggcc gcgacctgcc gtccggcacc + 52201 ccggtggtgg cgtggctgcc cgcggcgaac cgggaccccg ccgagttcga cgacccggac + 52261 accttcctgc ccgggcggaa acccaaccgg cacatcacct tcggccacgg catgcaccac + 52321 tgcctcgggt ccgcgctcgc gcggatcgag ctgtcggtcg tgctgcgggt gctggccgag + 52381 cgggtgtccc gggtggacct ggaacgggag ccggcctggt tgcgggcgat cgtcgtgcag + 52441 gggtaccggg aactcccggt gcggttcacc gggcgctgac ccgcgcgcgg tgccccggtg + 52501 agggtgcggc tgccccgcgc ccattttgtc cactgtggac tccggcgccc gccgcggcgg + 52561 gtgtcaagct gacaccgttg atgcggaatt ggcttggagc catcctgggg aatgagcgtt + 52621 acacctattt gacggaggaa tgtcttgact tccgattcga ctgtccagaa tttcgagatc + 52681 gactacgtcg aaatgtatgt ggaaaacctc gaggcggcca cgttcacctg ggtcgacaag + 52741 tatgctttcg ccgtcgccgg taccgaccgg tcggcggacc accggagcgt cacgctgcgg + 52801 cagggcccga tcaagctggt cctcaccgaa ccgacgtcgg accggcaccc ggcggccgcc + 52861 tacctccagt cgcacggcga cggcgtggcc gacatcgcgc tgcgcacgcc ggacgtgacc + 52921 gccgctttcg aagccgcggt gcggggcggg gccgccgccg tgcgcgaacc ggtgcggctc + 52981 gccggcgggc cgatcgtcac ggccaccatc ggcgggttcg gcgacgtcgt gcacaccctg + 53041 atccagagcg gcgaagccac cgcggccgcg ccggagacca ccggccaggg cgggggagac + 53101 gtgaacctgc tcgggctcga ccacttcgcg gtctgcctga actcgggtga cctcggtccc + 53161 acggtggcgt tctacgagcg ggccttcggg ttccggcaga tcttcgagga gcacatcgtg + 53221 gtcggcaggc aggcgatgaa ctccaccgtg gtgcagagcg cgtcggggga ggtcaccctc + 53281 accctgatcg agcccgacag caacgccgac cccggccaga tcgacgagtt cctcaaggcc + 53341 caccagggag ccggcgtcca gcacatcgcc ttcaacgccg acgacgcggt ccgcgcggtc + 53401 cgggcgctgt ccggccgcgg ggtggagttc ctgaagactc cggggaccta ttacgacatg + 53461 ctcggcgagc ggatcacgct ggagacgcac acgctggacg acctgcggtc gacgaacgtg + 53521 ctcgccgacg aggaccacgg cggccagctg ttccagatct tcgccgcttc cacccacccg + 53581 cgtcacacca tcttcttcga gatcatcgag cggcagggcg cgggaacctt cggcagctcc + 53641 aacatcaagg ccctgtacga ggccgtggag ctggagcgga ccgggcagag cgagttcggc + 53701 gccgcccggc gatgacgtac gtttccctgg gcgacctcga acgtgccgct cgcgacgtcc + 53761 tccccggcga gatctgggac ttcctcgccg gggggagcgg cgccgaggca tcgctgacgg + 53821 ccaaccgcac cgcgctcgac cgggttttcg tggttccccg gatgctgtgc gacctgaccg + 53881 gcagcaccac cgaggccgag ctcctgggcc ggcgcgccgc gctcccgatg gcggtcgcgc + 53941 cggtcgcgta ccagcggttg ttccaccccg agggcgagct ggcggccgct cgcgcggctc + 54001 gcgacgccgg cgtgccgtac accatctgca ccttgagcag cgtcccgctc gaggaggtcg + 54061 cggccgtcgg cggccggccg tggttccagc tgtactggct gcgtgacgag aagcggtcgc + 54121 tggagctcgt gcgccgcgcg gaagacgccg ggtgcgaagc gatcgtgttc accgtggacg + 54181 tgccgtggat gggacggcgg tggcgggaca tgcgcaacgg cttcgcgttg ccggaatcgg + 54241 tgacagcggc caacttcgac gccggatcgg ccgcgcaccg ccgcacgcgc ggggcctcgg + 54301 ccgtggccga ccacaccgcg cgcgagttcg cccccgccac ctgggagtcg gtggcgacgg + 54361 tccgcgcgca cacggacctg ccggtggtgc tcaagggcat cctcgccgcc gaggacgccc + 54421 gccgtgccgt cgaggccggg gccgacggga tcgtggtgtc caaccacgga ggtcgtcagt + 54481 tggacggcgc ggtgcccggg atcgaggtgc tgggcgagat cgccgccgag gtctccggcc + 54541 gctgcgaagt gctgctggac ggcggaatcc ggaccggcgg ggacatcctc aaggcggccg + 54601 cgctgggcgc gtcgggcgtg ctggtcgggc ggcccgtgat gtgggggctg gccgcggcgg + 54661 gccaggaggg cgtccggcag gtgttcgaac tgctcgccgc cgaactccgg aacgcgctgg + 54721 gcctggcggg ctgtgactcg gtgagcgcgg ccggccggct gggcacgagg gtcccccgct + 54781 acggctgatt ccccgcccca cgcccgattt cgacgtgaac ccgatccgcc cgcgcgtgcc + 54841 gggctcgact ggagcggggc ctttcccgga ggagaaaaat gctgcacacc tttgccgcgg + 54901 cggtcgcgcc ggtcgcaccg atcgccgcgc acagtctcct ggtcttcctg ctgcagatcg + 54961 gcttgctgct cctgctcgcc gtcgtgctcg gccggctggc cggccggttc gggatgcccg + 55021 cggtcgtcgg tgagctgttc gtcggggtga tcctcggtcc gtcgctgctg ggctgggcgg + 55081 cgccgggcct gcacagctgg ctgttcccgg ccgtcgccga gcagtaccac ctgctcgacg + 55141 ccgtcggcca ggtcggcgtc ctgctgctgg tcggcctcac cggcgtgcag atggacatgg + 55201 ggctggcccg caagcgcggc ctcaccgcgg ccggggtcag catcggcggc ctggtccttc + 55261 cgctcggcct ggggatcggc gcgggttacc tgctgccgaa ggtgctcgtt ccggagggca + 55321 ccgacgtcac cgtcttcgcg atgttcctcg gcgtggccct gtgcgtcagc gccatcccgg + 55381 tcatcgccaa gaccctcatc gacatgaaac tgctgcaccg caacatcggg cagctcacgc + 55441 tcaccgccgg catggtcgac gacgtgttcg gctggttcat gctgtccgtc gtcagcgcga + 55501 tggcggtcaa cgcggtctcc gccggcaccg tgctcacttc gctggcctac ctggtcgcca + 55561 tcctcgcctt ctgtttcacc ctcggccgtc cgctggcccg gggtgtgctc cgcgtcgcgg + 55621 ccaagtccga cggtcccggg ctcaccgtcg ccaccgtcgt cgtcctgatc ttcctcgccg + 55681 cggccggtac gcaggcgctc ggcctggagg cggtcttcgg cgccttcctc tgcggcatcc + 55741 tgctcgggac ggcgggcaag gtggatccgg ccaagctcgc ccccctgcgc acggtcgtcc + 55801 tgtcgggact cgcccccctc ttcttcgcca cggccgggtt gcggatggac ctcaccgcgc + 55861 tgacccaccc ggtggtcctg ctcaccggtc tggtggtgct cgccctggcc atcgccggca + 55921 agttcgccgg cgcgttcgcc ggcgcgcggc tgagcgggtt gaacaagtgg gaagggctgg + 55981 cgctcggcgc cgggctgaac gcgcggggag tcatccaggt cgtggtggcc atggtcggcc + 56041 tgcggctggg tatcctcagc gtggaggtct acacgatcat catcctcgtc gcgatcgtca + 56101 cttccctgat ggcgtcgccg atcctgcggt tcgcgatgtc cagagtggag cagaccgccg + 56161 aagaacaggt tcgcgagaac gaacaccggg cgtggaacac gcacccggcg gcgaacccgc + 56221 aggagcaaag tctctaggcg caggccggta ctgctcgggg cgacgggacg aaccgcgggt + 56281 gtccaaccgc ggaattcgcc ggtcggacgg gaaatcgctt tctcgtgcca cggcggccgt + 56341 tgaccaatcc acggcgtgga acagtgcggt gcctgccgct atcttggcgg cacgaggaac + 56401 gaaaagactt cctcgacagc gtcttcggcc tgacccgacg ccggttccgg agcagcgatg + 56461 acgcagcctt cgcacgacgg tcatgacaag gagtcgtccg atgctgcctg acctcgttcc + 56521 cccggtcgtg gtgcgccccc gcgacggccg cgaccacgcg gaccgcatcg cgttgtcggc + 56581 ggcgaccacc gacggggtgc acatgcggac cgaggacgtc cgcgcctgga tcgccgaacg + 56641 ccgtgaggcc aacgacttcc acgtcgaacg cgtcccgttc cgggacctcg accagtggtc + 56701 gttcgaggag gtgaccggca acctcgtgca ccacagcgga cggttcttca ccatcgaggg + 56761 cctgcacgtg atcgagcacg acggcccgaa cggcgacggc ccctaccgcg agtggcagca + 56821 accggtcatc aagcagcccg aagtcggcat cctcggcatc ctgggcaagg agttcggcgg + 56881 cgtcctgcac ttcctgatgc aggccaagat ggagccgggg aaccccaatc tggtgcagct + 56941 ctcgccgacc gtgcaggcca cccgcagcaa ctacaccaag gcgcacggcg gcacgaacgt + 57001 caagctgatc gagtacttcg ccccgcccga ccccgagcac gtcatcgtcg acgttctcca + 57061 ggccgagcaa ggctcgtggt tcttccgcaa gtccaatcgc aacatgatcg tcgagaccgt + 57121 cgacgacgtg ccgctgtggg acgacttctg ctggctcacc ctcggccaga tcgcggagct + 57181 gatgcacgag gacgagacga tcaacatgaa cgccaggagc gtgttgtcgt gcctgcctta + 57241 ccacgacgcg gctcccggcg cgcggttctc cgacgtccag ctcctgtcgt ggttcacgaa + 57301 cgagcgttcg cggcacgacg tgcgtgcccg ccgcatcccg ctcgcggacg tgtgcggctg + 57361 gaagcagggc gacgaagcga tcgagcacga ggacggccgt tatttccggg tcctcgcggt + 57421 cgccgtgcgg gggagcaacc gcgagcggat cagctggacc cagccgctgc tcgaatccgt + 57481 cgacctgggt gtcgtcgcgt tcctcgtgcg cgagatcggc ggtgtgcccc acgttctggt + 57541 gcacgcccgc gccgacggtg gtttcctgga cacggtcgag ctggcaccga ccgtccagtg + 57601 cactccccaa aactacgcgc acctgcccgc ggagaaccgc ccgcccttcc tcgacgtcgt + 57661 cctcaacgct ccggagtcgc gcattcgtta cgaggcaata cattccgaag agggcgggcg + 57721 cttcctcaac gtccgggcgc gctacctcgc gatcgaagcg gacgacacgg tcgagccccc + 57781 tcccggctac acctgggtca cgccggccca gctcaccgcg ctcacccggc acgggcacta + 57841 cgtcaacgtc gaggcccgca cgctgctcgc ctgcctcaac gccgcgacgg cccagcctcg + 57901 aggcggtgcc tgacatgaag acggtcaccg tcctcggcgc ctcgggtttc gccggctcgg + 57961 ccgtccaccg gctgggcgaa gtcttccggc tcgtggcacg ggaggtcgcc gggcacaccg + 58021 gacgcggccc ggtggacgtg ccctgcgtgg cacccccgtc gcacgcgccc gagacggatt + 58081 tccggagcgt cacggtcggt tccacgccgt tccggtcgat caccggccgg cgcccggaga + 58141 tgtcgcggcc cgagggagtg cgccgcactg tcgccgcttt gccgtcatca gatcagggaa + 58201 aggttcgcac atgaccacgc gtgtatggga ctaccaggcc gaataccgga acgagcggct + 58261 cgacctgctg gacgcggtcg agacggtctt cgactcgggg cagctcgtgc tcggggcgag + 58321 cgtgcgtggc ttcgaggcgg aattcgccgc gtaccacggg gtcgggcact gcgttggcct + 58381 cgacaacggg acgaacgcga tcaagctcgg cctgcaggcg ctgggtgtcg ggccgggcga + 58441 cgaggtgatc acggtgtcca acaccgccgc cccgaccgtg gtcgccatcg acggcaccgg + 58501 cgccacgccg gtcttcgtcg acgtccgcga ggacgacttc ctgatggaca ccggccaggt + 58561 cgcggccgcg atcaccgagc gcaccaagtg cctgctgccc gtgcacctat acggacagtg + 58621 cgtggacatg gctccgctga aggacctcgc cgcgaagcat ggactgtcca ttttggagga + 58681 ctgtgcccag gcgcacgggg cccggcagaa cggaacggtc gcgggctcga ccggtgacgc + 58741 ggccgcgttc tccttctacc cgaccaaggt gctcggggcg tacggcgacg gcggcgcgac + 58801 catcacctcc gacgaatccg tggaccggcg gctgcggcgg ctgcgctact acggcatgga + 58861 caagcagtac tacacgctgg aaacgccggc ccacaacagc cggctggacg aggtccaggc + 58921 cgagatcctg cggcgcaagc tcaagcggct cgacacctac gtcgccgccc gccaggccat + 58981 cgcccagcgc tacgtcgacg gactgggcga cacggagctg aagctgccgc ggaccgtccc + 59041 cggcaacgag cacgtgtact acgtgtacgt cgtgcgccac ccgcgacgtg acgacatcat + 59101 cgagcgcctc aaggcgtacg acatccactt gaacatcagc tatccgtggc cggtgcacac + 59161 catgaccggt ttcgcccacc tcggctacgc gaccggcgcg ttcccggtca ccgaaaaact + 59221 ggccggcgag atcttctcgc tgccgatgta ccccgcgctt tccgccgacc tgcaggacaa + 59281 ggtcatccat gcggtgcgcg aggtggtgtc caccctctga ccactccacc aacaggagta + 59341 gccgtgcaag cacgcaaact cgccgtcgac ggcgcgatcg agttcacccc ccgggtcttc + 59401 cccgacgacc ggggcctgtt cgtctcgccg ttccaggaag aggccttcgc cgaggcccgc + 59461 ggcggcccgc tgttccgggt ggcgcagacg aaccacagca tgtccaagcg cggcgtggtg + 59521 cgtggcatcc actacacgat gacgccaccg ggcacggcca agtacgtcta ctgtgcccgc + 59581 ggcaaggcgt tggacatcgt ggtcgacatc cgggtcggct cgccgacgtt cggccggtgg + 59641 gacgcggtcc tgctggacca gcgggaccac cgggcgatgt acttcccggt gggggtcggc + 59701 cacgcgttcg tggccctcga ggacgacacc gccatgtggt acctgctctc cacggcctac + 59761 gtggcgcgga acgagctcgc cctctcggtc ctggatcccg cgctgggcct gcccatcgac + 59821 gccgacgtcg acccgatcct gtccgaacgg gaccaggtgg ccgtcacgct cgccgaggcg + 59881 ggacggcagg ggttgctgcc ggactacgcc acctgcctgg agctcgaccg gcagctgtcc + 59941 gaagtctccc tttccgcctg acctcacgac cgatcgggcc gaaggcgtcc ttcaccacgt + 60001 ccgaacgcgg tgaaggacgc cttcgacgga aaccaatcac gaactccgcg ccttggacga + 60061 cattgaccgc cgagttcggc cgagcctact ttcggaatgt ccggtccgct ctttcgcgaa + 60121 aggtgagatc catgcccgct gcgcaggtca agcagctgct tcgaagcaag ttgagaacgt + 60181 gggggtggat gtatcgatga cgaccagcat cgaacccgcc gaagaccttt cggtcctctc + 60241 cggcctgacc gagatcactc gattcgccgg cgtgggaaca gcggtttccg cgtcgtccta + 60301 ttcgcagtcc gaggtcctcg acatcctcga cgtcgaggac cccaaaatcc gctcggtctt + 60361 cctgaacagc gccatcgacc ggcgttttct caccctgccg ccggagagtc ccggtggggg + 60421 ccgcgtgtcc gaaccgcagg gcgacctcct ggacaagcac aaggagctcg cggtcgacat + 60481 ggggtgccgg gccctcgagg cctgcctgaa gtcggcggga gcgacgcttt cggacctgcg + 60541 tcacctgtgc tgcgtcacct cgaccgggtt cctgaccccc ggcctcagcg cactgatcat + 60601 ccgcgaactg gggatcgacc cgcactgcag ccgctcggac atcgtgggca tggggtgcaa + 60661 cgccggcctg aacgcgctca acgtcgtcgc cggctggtcc gcggcgcacc cgggtgaact + 60721 cggcgtcgtc ctgtgcagcg aggcgtgttc cgcggcctac gccctggacg gcaccatgcg + 60781 gaccgcggtg gtcaacagcc tcttcggcga cggatccgcc gcactcgccg tgatttccgg + 60841 tgacggccgc gtgcccggcc ctcgggtcct caagttcgcg agctacatca tcaccgacgc + 60901 gctggacgcc atgcgctacg actgggaccg tgaccaggac cggttcagct tcttcctcga + 60961 cccgcagatt ccgtacgtgg tcggggcgca cgcggagatc gtcgccgacc ggctgctgtc + 61021 cggcacgggc ctgcggcgca gcgacatcgg gcactggctg gtgcattccg gcggcaagaa + 61081 ggtgatcgac tccgtcgtcg tcaacctcgg cctgagccgc cacgacgtcc gccacaccac + 61141 cggagttctc cgtgactacg ggaacctttc cagcggctcc ttcctcttct cctacgagcg + 61201 gctcgccgaa gaaggcgtca cccggcccgg agactacggc gtactcatga ccatggggcc + 61261 tggctccaca atcgaaatgg cgctgatcca atggtgaacg gtgaactggt gctccggctc + 61321 gacggcaccc ggcccctgtc ggccgcgtcg gtcgaggaac tggacgccct ctgcgatcgc + 61381 gtggaagacc accgggaacc cggcccggtc accgtccacg tcacgggtgt cccggccgcc + 61441 ggctggacgg cggaggtgac ggtcggcctg gtctccaagt gggaacgggt ggtgcgccgg + 61501 ttcgagcggc tcggcaggct caccatcgcc gtggcggcgg gtgactgcgc cggaacggca + 61561 ctggacgtcc tcctcgcggc cgacgtccgg atcgccgcgc cgggcacccg gctgctgctc + 61621 gcccgggccg gcggcgcgcc gtggcccggg atgaccgtgc accggctcac ccggcaggcc + 61681 ggggcggccg gcatccggcg ggcggtgctg ctcggcgccc cgatcgaggc cggtcgcgcg + 61741 ctggccctga acctggtcga cgaggtctcg gaggacccgg cggccgcgct ggcggagctc + 61801 gccgggacgg ccggtgccgt ggacggcaag gagctggcga tccgccgtca gctggtcttc + 61861 gaagccggct cgaccgcctt cgaggacgca ctcggcgccc acctggccgc ggcggaccgg + 61921 gccctgcgca gggaaaccgc gtcgtgacgg ccgcaccccc gacgtctccg ccggggccgc + 61981 ggctcgaccg cccggccctg gcggaggcag ccggccgcgt cgacgacctg ctcgccgaac + 62041 tgccgccgcc gtccgcccgg acccccgggc aacgcgaggc cgcgtcttcg gcgctggacg + 62101 ggatccgggc gatgcgcgcg gactacgtcg gggcgcacgc cgaagcgatc tacgacgaac + 62161 tcaccgacgg ccggtcccgg tccctgcgca tcgacgagct cgtccgggcc gccgcccggg + 62221 cctttcccgg cctggtgccc acggacgagc agatggcggc cgagcgcgcg cggccgcagg + 62281 cggagaagga cgggcgggag atcgaccagg gcatcttcct gcgcgggatc ctgcgggcgg + 62341 agcgggccgg cccgcacctg ctcgacgcca tgctccagcc caccccgagg gcgctgaagc + 62401 tgctcccggg attcaccgag tccggtgtcg tgcagatgga ggcggtccgg ctggaacgcc + 62461 gggacggcgt cgcgtacctg accctgtgcc gggacgactg cctcaacgcc gaggacgccc + 62521 agcaggtcga cgacatggag accgcggtcg acctggcgct gctcgacccg gccgtccggg + 62581 tggggctgct gcgcggcggg gagatgagcc atccccgcta ccgggggcgc cgcgtgttct + 62641 gcgccggcat caacctcaag aagctgagct cgggcggcat cccgctggtc gatttcctgc + 62701 tgcggcggga gctggggtac atccacaaga tcgtgcgcgg cgtggtcacc gaaggttcgt + 62761 ggcattcgcg gctgaccgac aagccgtgga tcgcggccgt cgactccttc gccatcggcg + 62821 gcggggccca gctgctcctc gtcttcgacc acgtgctggc cgcgtccgac gcctacttca + 62881 gcctgcccgc ggcgaaggag gggatcatcc ccggcgcgtc gaacttccgg ctctcccggt + 62941 tcgccgggcc ccgcgtggcc cggcaggtga tcctcggcgg ccgccggatc cgggcggacg + 63001 agccggatgc ccgactgctc gtcgacgagg tcgtcccgcc ggcggagctg gacgcggcga + 63061 tcgacgccgc gctggcccgc ctggacgggg aggcggtgct ggccaaccgg cgcatgctga + 63121 acctggccga ggaaccgccg gacgaattcc gccggtacat ggccgagttc gccctgcagc + 63181 aggcgctgcg gatctacggc gaagacgtga tcggcaaggt cggccggttc gcggcgggct + 63241 cgtcgtgagc ggcgaccggg tgcggtacga gaagaaggac cacgtcgcct acgtgacgct + 63301 ggaccggccc ggcgtgctga acgccatgga ccggcggacg cacgaggagc tcgccggaat + 63361 ctgggacgac gccgaggccg acgacgaagt ccgggtggtg gtgctgaccg gcgccgggaa + 63421 ccgcgcgttc tccgtcggcc aggacctcaa ggaacgcgcc cggctgaacg aagcgggtgc + 63481 gcgggccacg acgttcggca gccggggcca gccggggcat ccccggctga ccgaccggtt + 63541 caccctgtcc aagccggtgg tcgcccgggt gcacggctac gcgctgggcg gtggcttcga + 63601 gctggtgctc gcctgcgaca tcgtcatcgc ctccgacgat tcggtgttcg ccctgccgga + 63661 ggtccgcctc ggcctgatcc ccggggcggg cggggtgttc cggctgccgc ggcagctgcc + 63721 gcagaaggtg gcgatgggct acctgctgac cggccgccgg atggacgcgg cgacggcgct + 63781 gcggtacgga ttggtcaacg aggtcgtgcc accggaggaa ctggaccggt gcgtcgccga + 63841 atggacggac agcctcgtgc gcgccgctcc gctttcggtt cgcgcgatca aggaggccgc + 63901 gctacggtcg ctcgacctcc ccctggagga ggcgttcacc gcttcctaca cctgggaaga + 63961 gcgccgtcgg cggagcgaag acgcgatcga gggtccccgg gccttcgccg cgaaacggga + 64021 tccggtctgg accggggaat accggccggg ttgaccaggc tgttcggtgg tttcgagtga + 64081 ggatggtgcg gagatgtcgg tgacggaatt cgctgtgacg gcgcgaaggg gaccggtcgc + 64141 ggccgggccg gggcaccggg tgtggccgcg atgacccaca ccgtcgccac gaccgacctc + 64201 gacaaccagc gcatcgagcg gatcgtcccc ctggtcaccc ccgccctgct gcatcacgaa + 64261 ctgccgctca gcgccaccgc ggccgagacg gtgcgaaagg gccgcgagag cgtcgtccgc + 64321 gtcctcgacg gcacggacga ccggctgctc gtgatcaccg ggccgtgctc catccacgac + 64381 cccgccgcgg cgctcgacta cgccggccac ctcgccgcca tcgccggcga ggtcgccggc + 64441 gacctgctcg tcgtcatgcg cgtgtacttc gagaaacccc ggacgatcgg cggctggaag + 64501 gggctcatca acgaccccca cctcgacggc accggcgacg tcaaccacgg gctgcgcacg + 64561 gcccggcacc tcctgctgga gctcgccgaa cgcggcctgc ccgccgcgtg cgaatggctg + 64621 gacaccacca ttcccgcgta cttcgcggac acggtctcgt ggggcgccat cggcgcccgc + 64681 accgtggaaa gccagaacca ccgcatgctc gccagcggcc tgtccatgcc cgtcggcttc + 64741 aagaaccgcc gcgacggcga catcaccgtc gccatcgacg cgatccgggc cgccgcggtc + 64801 cgccacgtgg tccccggcgt cgaccccggc gggttgcccg ccatcctgca cacggcgggc + 64861 aacccggact gccacgtcgt cctgcgcggt ggtgacggcg cgcccaacca cgactccgcg + 64921 tccgtccaca agacactgac cgcgctggag gccgcgggcc tgcccggccg ggtggtgatc + 64981 gacgccagcc acgacaacag cggcaaggac caccaccgcc agcccctcgt cgcggccgag + 65041 atcgcgggcc aggtcgagaa cgggcggaac ggcatcgtcg gggtgatgct cgagtccaac + 65101 ctccgcgccg gccgccagga cctccagccg ggccgtccgc cggcatacgg ccagtccatc + 65161 accgacgcct gcatcgacgt ccccaccacc cggacggtcc tccacggcct cgccgcggcg + 65221 gccgcggccc ggcgaaagct cggcaagcaa gcaagctgag caagtccgtg aatggcacat + 65281 tgccggacat agagtccctc gatgtgccat tcacggactt ggccctgacg gcggccggca + 65341 ccgacacgga tcccccggat ggggtatacc cgaaccggtg gaggaaatcc ggacagagtt + 65401 catccgaccg ctgctgacat cgttgtccgc gcacgccgcg gaccgccccg cctactccga + 65461 cgaccggcga acgctgacct acggcgggct ggcccacgcc gccgcggagc tcgccgccgg + 65521 gctcggggtg gcccggggcg accgcgtgct ggtgcacgtc ggcagccggg tcgagttcgc + 65581 cgtcgccctg ctggcggtgc tgcgggcggc ggccgtggga gtcccggtga gcgtgcgctc + 65641 gaccgacgcc gaactcgccc acctggcggc cgattcgggg gcgacgctcc tggtcacgga + 65701 ggcgcggcac gccgccgcgg ccgaacggct gcgccgcgac cggcccggtc tgcgggttct + 65761 cttcgtcgac gatccgccgc ccgcgcgggt gggcgagccg cgcgacgacc tcggactgga + 65821 cgagccggct tggctgctct acacctccgg caccaccggc cggcccaagg gcgtcctgct + 65881 ctcgcagcgc gcgatgctgt ggtcgacggc cgcgtactac gtccccatgc tcgggctcga + 65941 cgccgaagac accgtgctgt ggccgttgcc gacgcaccac gcgtacgccc tgtcgctggc + 66001 gttcgtcacc acgatcgcgc tgggggcgca cacccggctg gccgacgggt gcacgccgga + 66061 cctgctcgcc cggtaccccg gcagtgtgct cgccggtgtt cccgcgctct acctccggct + 66121 ccgccaggag tccggcggtc ccctcgccgc gccgcggctg tgcctgagcg gcggcgcgcc + 66181 gtgcacgccg gcgacccggg ccgcggtccg ggacctgttc gggctcccgg tggccgacgg + 66241 ctacgggagc accgagacgg gcgggaaggt cgccgccgag cttcccggtg aagcgggcct + 66301 ggtcccggtg cccggcttgg agatccggat cgacgcgggg gaggtgctcg tccgcggtcc + 66361 cgggctgatg ctgggctacc acgggcgaac cgaatcaccg ctgcgggacg gctggtaccg + 66421 cacgggcgac gccggccggt tcgagggcgg ccggctcgtg ctcgagggcc gcgtggacga + 66481 cgtgatcgtc tgcggtggcc agaacgtcca ccctgccgaa atcgaggcgg tgctcgaaga + 66541 gtcgccttcg gtgcgggacg tcctcgtgct cggccgtccc gacgacgtcc tgggcgaggt + 66601 gccggtggcg ttcgtggtcg ccgggcccgg cggcttcgac gccgaggagc tgcgtggccg + 66661 gtgtctaga +// diff --git a/tests/data/balh.embl b/tests/data/balh.embl new file mode 100644 index 0000000..6ebbd3d --- /dev/null +++ b/tests/data/balh.embl @@ -0,0 +1,2033 @@ +ID AME16952 standard; genomic DNA; PRO; 66669 BP. +XX +AC Y16952; +XX +SV Y16952.3 +XX +DT 01-OCT-1998 (Rel. 57, Created) +DT 18-APR-2005 (Rel. 83, Last updated, Version 7) +XX +DE Amycolatopsis balhimycina biosynthetic gene cluster for balhimycin, strain +DE DSM 5908 +XX +KW abc gene; bbr gene; bgtfA gene; bgtfB gene; bgtfC gene; bhaA gene; +KW bhp gene; bmt gene; bpsA gene; bpsB gene; bpsC gene; bpsD gene; +KW dihydroxyphenylacetic acid synthase; dpgA gene; dpgB gene; dpgC gene; +KW dpgD gene; enoyl-CoA hydratase; enoyl-CoA-isomerase; glycosyl transferase; +KW halogenase; hydrolase; hydroxyacyl-dehydrogenase; ORF1; ORF10; ORF11; ORF2; +KW ORF3; ORF5; ORF6; ORF7; ORF8; ORF9; orfX; oxyA gene; oxyB gene; oxyC gene; +KW oxyD gene; P450 monooxygenase; pdh gene; peptide synthetase; pgat gene; +KW phenylglycine amino transferase; +KW putative ABC transporter ATP-binding protein; +KW putative prephenate dehydrogenase; putative two-component system respons; +KW putative two-component system sensor kinase; +KW putative VanY-type carboxypeptidase; StrR family transcriptional regulator; +KW vanR gene; vanS gene; vanY gene. +XX +OS Amycolatopsis balhimycina +OC Bacteria; Actinobacteria; Actinobacteridae; Actinomycetales; +OC Pseudonocardineae; Pseudonocardiaceae; Amycolatopsis. +XX +RN [1] +RC revised by [6] +RA Pelzer S.; +RT ; +RL Submitted (24-MAR-1998) to the EMBL/GenBank/DDBJ databases. +RL S. Pelzer, Universitaet Tuebingen, Lehrstuhl Mikrobiologie-Biotechnologie, +RL Auf der Morgenstelle 28, D- 72076 Tuebingen, FRG +XX +RN [2] +RX PUBMED; 10390204. +RA Pelzer S., Suessmuth R., Heckmann D., Recktenwald J., Huber P., Jung G., +RA Wohlleben W.; +RT "Identification and analysis of the balhimycin biosynthetic gene cluster +RT and its use for manipulating glycopeptide biosynthesis in Amycolatopsis +RT mediterranei DSM5908"; +RL Antimicrob. Agents Chemother. 43(7):1565-1573(1999). +XX +RN [3] +RX PUBMED; 11932455. +RA Recktenwald J., Shawky R.M., Puk O., Pfennig F., Keller U., Wohlleben W., +RA Pelzer S.; +RT "The nonribosomal biosynthesis of vancomycin-type antibiotics: A +RT heptapeptide backbone and eight peptide synthetase modules"; +RL Microbiology (Reading, Engl.) 148(Pt 4):1105-1118(2002). +XX +RN [4] +RX DOI; 10.1074/jbc.M106580200. +RX PUBMED; 11495926. +RA Pfeifer V., Nicholson G.J., Ries J., Recktenwald J., Schefer A.B., +RA Shawky R.M., Schroeder J., Wohlleben W., Pelzer S.; +RT "A polyketide synthase in glycopeptide biosynthesis: the biosynthesis of +RT the non-proteinogenic amino acid (S)-3,5-dihydroxyphenylglycine."; +RL J. Biol. Chem. 276(42):38370-38377(2001). +XX +RN [6] +RC revised by [10] +RA Pelzer S.; +RT ; +RL Submitted (03-AUG-2001) to the EMBL/GenBank/DDBJ databases. +RL S. Pelzer, Universitaet Tuebingen, Lehrstuhl Mikrobiologie-Biotechnologie, +RL Auf der Morgenstelle 28, D- 72076 Tuebingen, FRG +XX +RN [7] +RX DOI; 10.1002/1521-3773(20011217)40:24<4688::AID-ANIE4688>3.0.CO;2-M. +RX PUBMED; 12404385. +RA Bischoff D., Pelzer S., Bister B., Nicholson G.J., Stockert S., Schirle M., +RA Wohlleben W., Jung G., Suessmuth R.D.; +RT "The biosynthesis of vancomycin-type glycopeptide antibiotics - the order +RT of cyclization steps"; +RL Angew. Chem. Int. Ed. Engl. 40(24):4688-4691(2001). +XX +RN [8] +RX DOI; 10.1016/S1074-5521(02)00101-1. +RX PUBMED; 11880037. +RA Puk O., Huber P., Bischoff D., Recktenwald J., Jung G., Suessmuth R.D., +RA van Pee K.H., Wohlleben W., Pelzer S.; +RT "Gylcopeptide biosynthesis in Amycolatopsis mediterranei: Function of a +RT halogenase and a haloperoxidase/perhydrolase"; +RL Chem. Biol. 9(2):225-235(2002). +XX +RN [9] +RA Stegmann E., Bischoff D., Kittel C., Pelzer S., Puk O., Recktenwald J., +RA Weist S., Suessmuth R.D., Wohlleben W.; +RT "Precursor-directed biosynthesis for the generation of novel +RT glycopeptides"; +RL (in) Unknown A. (eds.); +RL SCHERING-PROCEEDINGS BIOCOMBINATORIAL APPROACHES FOR DRUG FINDING:0-0; +RL Springer Verlag, Berlin, Heidelberg +XX +RN [10] +RP 1-66669 +RA Pelzer S.; +RT ; +RL Submitted (07-APR-2004) to the EMBL/GenBank/DDBJ databases. +RL S. Pelzer, Universitaet Tuebingen, Lehrstuhl Mikrobiologie-Biotechnologie, +RL Auf der Morgenstelle 28, D- 72076 Tuebingen, FRG +XX +XX +FH Key Location/Qualifiers +FH +FT source 1..66669 +FT /organism="Amycolatopsis balhimycina" +FT /strain="DSM 5908" +FT /mol_type="genomic DNA" +FT /db_xref="taxon:208443" +FT CDS complement(<1..759) +FT /transl_table=11 +FT /gene="vanS" +FT /product="putative two-component system sensor kinase" +FT /db_xref="GOA:Q799B6" +FT /db_xref="InterPro:IPR003660" +FT /db_xref="InterPro:IPR003661" +FT /db_xref="UniProtKB/TrEMBL:Q799B6" +FT /protein_id="CAG25751.1" +FT /translation="MDRAAGMSVRLKLTLSYACFLVLAGVLLLASVWLFLLRDVPDVLA +FT KPPPGGVLERSVLVRNFLPAAGSVLFFLLLFGLLGGWILAGRMLAPLTRITDAARMAAN +FT GSLSHRIRLEGTEDEFRELADAFDAMLARLEAHVAAQRRFAANASHELRTPLAITQALL +FT EVARNDPAKDPLLVFDRLHAVNARAIDLTEALLVLSRADQRAFTREPVDLSLLVEEAIE +FT TLLPIAEKRRVVIIASGHISRVVGSATLLLQ" +FT CDS complement(770..1447) +FT /transl_table=11 +FT /gene="vanR" +FT /product="putative two-component system response +FT regulator" +FT /db_xref="GOA:Q799B5" +FT /db_xref="HSSP:1KGS" +FT /db_xref="InterPro:IPR001789" +FT /db_xref="InterPro:IPR001867" +FT /db_xref="UniProtKB/TrEMBL:Q799B5" +FT /protein_id="CAG25752.1" +FT /translation="MRVLIVEDEPYLAEAIRDGLRLEAIAADTAGNGDTALELLSLNTY +FT DIAVLDRDIPGPSGDEIAKRIVASGSGLPILMLTAADRLDDKITGFELGADDYLTKPFE +FT LRELVLRLRALDRRRAHNRPPVLEIAGLRLNPFRREVYRDDRYIALTRKQFAVLEVLVS +FT ADGGVVSAEELLERAWDKNADPFTNAVRITVSALRKRLGEPWIITTVAGVGYRIGAAPG +FT AGR" +FT CDS 1537..2157 +FT /transl_table=11 +FT /gene="vanY" +FT /product="putative VanY-type carboxypeptidase" +FT /db_xref="GOA:Q799B4" +FT /db_xref="InterPro:IPR003709" +FT /db_xref="UniProtKB/TrEMBL:Q799B4" +FT /protein_id="CAG25753.1" +FT /translation="MTYRESARTTTRRIPGAVVPVARRIRGVLLAGLRAVGTRIARSPG +FT RPVRPQDRAGLGKTHGAVPAGVTVFDDDVPAVTRLDPALLSALRRAATAAADGGVELCV +FT NSGWRSPEYQSRLLREAVAKYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEH +FT GADYGLCRVYRNEPWHFELRPEAIEHGCPPLYADPSHDPRLRR" +FT CDS 3011..3976 +FT /transl_table=11 +FT /gene="bbr" +FT /product="StrR family transcriptional regulator" +FT /db_xref="GOA:Q799B3" +FT /db_xref="InterPro:IPR003115" +FT /db_xref="UniProtKB/TrEMBL:Q799B3" +FT /protein_id="CAG25754.1" +FT /translation="MDPTRVDIFALPAVEIELSRLSSASSPRTSGEDPEHVETLLSAEG +FT ELPPILVHRPTMQVLDGLHRLKVARVRGDTKILARLVDATESDAFVLAVEANIRHGLPL +FT SLADRKRAAVQIIGTHPQWSDRRVASATGISAGTVADLRRRAGEDGTEARIGRDGRVRP +FT SDGSERRRLAAELIRSDPGLSLRQVAKQVGISPETVRDVRGRLERGESPTPDGTRRLPA +FT KPHPLRLSEPDFGRAVDQDRLALLERLKSDPALRLNEVGRILLRMLTMHSMDGQEWERI +FT LQGVPPHLHGVIAGFARDHARVWAEFADHLESRATELAAG" +FT CDS 4106..4981 +FT /transl_table=11 +FT /gene="pdh" +FT /product="putative prephenate dehydrogenase" +FT /db_xref="GOA:Q799B2" +FT /db_xref="InterPro:IPR000205" +FT /db_xref="InterPro:IPR003042" +FT /db_xref="InterPro:IPR003099" +FT /db_xref="InterPro:IPR008235" +FT /db_xref="UniProtKB/TrEMBL:Q799B2" +FT /protein_id="CAG25755.1" +FT /translation="MTIEKALVVGTGLIGTSVALALREKGVAVFLSDVDTEAARLAQVL +FT GAGREWAGEGVDLAVIAVPPHLVGDRLADLQKQGAARVYTDVASVKADPIADAERLGCD +FT LASYVPGHPLAGRERSGPAAARAELFSGRPWALCPGPETDAEALRRVRELVSLCGATAV +FT VVGAGEHDSAVALVSHAPHVVASAVAASLASGDDVALGLAGQGLRDVTRIAAGDPLLWR +FT RILSGNTRPVAGVLERIAADLAAAASALRSGDLDEVTDLLRRGVDGHGRIPGQRGGSLP +FT GRNPAGSPGR" +FT CDS 5180..7135 +FT /transl_table=11 +FT /gene="abc" +FT /product="putative ABC transporter ATP-binding protein" +FT /db_xref="GOA:Q799B1" +FT /db_xref="InterPro:IPR001140" +FT /db_xref="InterPro:IPR003439" +FT /db_xref="InterPro:IPR003593" +FT /db_xref="InterPro:IPR010916" +FT /db_xref="InterPro:IPR011527" +FT /db_xref="UniProtKB/TrEMBL:Q799B1" +FT /protein_id="CAG25756.1" +FT /translation="MDMVLRFEGVDKSPDDPDPWVTKVRKGTLRRVLAYFRPHVGKVAL +FT FCLVAVLESLIVVATPLLLKELIDNGIVKNDLGVVILMAGLTAVLAVLGAGLTMVSGYI +FT SGRIGEGITYDLRVQALGHVRRLPIAFFTRTQTGVLVGRLHTELIMAQQHFTGLLMAAT +FT SVVMVVVVLAELIYLSWIVAIVSLVLIPIFLVPWIRVGRAIQRRSIRLMDANTGLGGLL +FT QERFNVQGAMLSKLFGRPAEEMAEYEERAGEIRKIGVSLSVWGRMAFVMMALMASLATA +FT LVYGIGGGLVLAGAFELGTLVAIATLLQRLFGPITQLSGMQELAQTVVVSFSRVFELLD +FT LKPLIQERPDAIALKKKVVPDVEFEHVSFRYPTADEVSLASLEHLRAERERSEVTPDVL +FT RDVSFHAQAGTLTALVGPSGAGKSTITHLVSRLYDPNGGTVRLGGHDLRDLTFESLREA +FT VGVVSQDAYLFHDTIRENLLYARPTATEDELMEACKGAQIRDLIDSLPLGLDTVTGDRG +FT YRMSGGEKQRLAIARLLLKEPSIVVLDEATAHLDSESEAAVQRALKTALHGRTSLVIAH +FT RLSTIREADQILVIDGGRVRERGTHDELLAQGGLYAELYHTQFANPAANDPKPEIEDEL +FT DDIEPEPVIQHMGYGG" +FT CDS 7138..16635 +FT /transl_table=11 +FT /gene="bpsA" +FT /product="peptide synthetase" +FT /function="involved in the biosynthesis of the balhimycin +FT heptapeptide backbone" +FT /db_xref="GOA:Q939Z1" +FT /db_xref="HSSP:1AMU" +FT /db_xref="InterPro:IPR000873" +FT /db_xref="InterPro:IPR001242" +FT /db_xref="InterPro:IPR006162" +FT /db_xref="InterPro:IPR006163" +FT /db_xref="InterPro:IPR009081" +FT /db_xref="InterPro:IPR010060" +FT /db_xref="InterPro:IPR010071" +FT /db_xref="UniProtKB/TrEMBL:Q939Z1" +FT /citation=[3] +FT /protein_id="CAC48360.1" +FT /translation="MNSAARTTPTMLDLFASHVDRTPDAVAVAGGDGVLTYRQLDERAG +FT RLAGRLASRGIRRGDRVAVVMDRSADLVVALLAVWKAGAAYVPVDAGYPAPRVAFMVAD +FT SAAKLVVCSAASRGAVPAGVESLEPAAAAEEGASDAPAATVRPGDPAYVMYTSGSTGTP +FT KGVTISQGCVAELTMDAGWAMEPGEAVLMHSPHAFDASLFELWMPLASGVRVVLAEPGS +FT VDARRLREAAAAGVTRVYLTAGSLRAVAEEAPESFAEFREVLTGGDVVPAHAVERVRTA +FT APRARFRNMYGPTEATMCATWHLLQPGDVVGPVVPIGRPLTGRRVQVLDASLRPVGPGV +FT VGDLYLSGALAEGYFNRAALTAERFVADPSAPGQRMYWTGDLAQWTADGELVFAGRADD +FT QVKIRGFRIEPGEIEAALIAQPDVHDAVVAAVDGRLIGYVVTEGDADPRVIRERLGAVL +FT PEHLVPAAVLALDALPLTGNGKVDRSALPAPEFAASAAGRAPSTDAERVLCGLFAEVLG +FT VARAGVDDGFFELGGDSIGAMRLAARAAKAGLLVTPAQIFEEPTPARLAAVARPVPAGG +FT PVDGPLLTLTAAEEAELALAAPGAEEIWPLAPLQEGLLFESILDDQGSDIYQVQVILEL +FT NGPVDAPRLRAAWDAVVRRHPELRLSFHRLASGKTVQAVHGDVTPPWRVVDLTGAGDVD +FT AAVAALVAEEQQQRFELATAPLVRLVLVRIAADRYRLLFVIHHILVDGWSVAVILNDVS +FT EAYEAGEPVPEQRGGATFRDYLAWLDRQDDDAARAAWRAELAGLDEPALIATSGVETEY +FT DYRATHLTPALHTRLLGFAREHGLTPSTVVHAAWAMVLARLTRRTDVVFGTMVATRPPE +FT LAGIESMPGLLMTAVPVRVPLDGGQSVLDMLTDLHSRQTALKRHQYLGLPEIQKAAGPG +FT ATFDTMLVVENYPREYARRYTHLRTIEGTHYPVTLGITPGDRFKIQLGYWPGQVPDTVA +FT ESLLEWFVGAIGALVADPAGLVGRIGMGAADVRRWDPPLQAGEPLPALVGRMAARPPDN +FT VAVVDGDGALSYADLWERSLKFAAVLRAHGVRSEDRVGLVVGRSAWWTVGMLGVLLAGG +FT TFVPVDPAYPAERKEWIFRSANPMLVVCAGATRGAVPAEFADRLVVIDEVDPAAGSAGD +FT LPRVDPRSAAYVIYTSGSTGTPKGVVVTHAGLGNLALAHIDRFGVSPSSRVLQFAALGF +FT DTIVSEVMMALLSGATLVVPPERDLPPRASFTDALERWDITHVKAPPSVLGTADVLPST +FT VETVVAAGELCPPGLVDRLSADRRMINAYGPTETTICATMSMPLSPGQHPIPFGKPVPG +FT VRGYLLDSFLRPLPPGVTGELYLAGIGVARGYLGRSALTAERFVADPFVPGERMYRTGD +FT LAYWTEQGELVSAGRADDQVKIRGFRVEPREIEFALSGYPRVTQAAVAVRDDRLVAYVT +FT PGDIDTQAVRAHLASRMPQYMVPAAVVALDALPLTAHGKIDRRALPDPDFTAGKQAREP +FT ATETERVLCELFAGVLGLARVGVDDSFFELGGDSILSMQLAARARRSGLTFTAADVFDG +FT KTPERIAQLAAESSVPEPGRSPKPDGVGDVAWTPVMWMLGDGVAGPAFAQWMVVGTPSD +FT LTEKALAAGFAAVVDTHDMLRARVVADEGGRRLVVGERGSVDVAGAVTRIRADGRSLDE +FT AVADAARAAVTRLDPSAGVMAQAVWVDAGPDQVGRLVVVAHHLSVDGVSWRILLSDLQA +FT ACEAAVAGREPVLEPVGASFKRWAGLLAEWAVSAERAGELAAWKAILGPGDRPAGAQAT +FT SRAAEGAVRSRSWVVPKVETAALAGRAPVAFHCGVNEVLLAGLAGAVARWRGGDAVLVD +FT VESHGRHPVDGTDLSRTVGWFTSAHPVRLDVAGTDLADVLAGGPAAGRLLKAVKEQSRA +FT VPGDGLGYGLLRYLNGTTGPVLADLPSPQIGFNYMGRFAAGEKSGVRAWQPVGDIGSSL +FT EPGMGLPHALEVNAIVQDLPDGPELTLMLEWQDGLLGEDEIDRLGRAWLDMLSGVARQA +FT ADPAAGGHTASDFDLVTLDQAEIEALEAEFAAAGGLAEVLPLSPLQHGLAFHAGYAGDG +FT VDVYTAQAVLELAGPLDVPLLRKSVRALLDRHANLRAGFRHGADGTAYQVVPGAVAVPV +FT TLVDVTESADPAAEAAAVAAAERARPFELARPPLLRVMVVVLGPDRHRLVLTNHHILLD +FT GWSTPLLLDELLTLYRNGAAPAALAPVTPYRDYLAWVRETDREAATEAWRDALAGLPEP +FT TLVAADRPVPVEVPEQIWTTLDETFAQALGARARECGVTVSTVLQAVWGMVLAALTGRD +FT DVVFGSVVSGRPAELPGIETMVGLFINTVPVRVRMRPQDTFAELVRGLQNEQVALLAHH +FT HVGLTDIQQAAGLGRLFDTIIVYENYPRPAEIGDESADADRVRVQGLTAADATHYPLAL +FT AVVPGTDLRLRLEHQPALFTAEQAGAVLERFTLVLEAVVADPRLPLAVVPILSDAERRQ +FT LQAGNDTALPVPDRTLPELFAAQAAATPEATAVVFEDRSLTYAELDARANQLARWLIDQ +FT GAGPEGLVAVLLPRSLELVVALLAVTKTGGAWLPIDPGYPADRIAFMLDDAGPALVITT +FT AVLSASPIGDVLAARSRTVVLDEPAAAGQLAGRDRAPVTDTDRARALDPRHPAYLIYTS +FT GSTGRPKAVVVTHRNLTNYLLHCGRMYPGLRGRSVLHSSIAFDLTVTATFTPLIVGGEI +FT HVGALEDLIGVVEAAPSIFLKATPSHLLTLDTASRGSAGSGDLLLGGEQLPADTVVQWR +FT RKYPNIVVVNEYGPTEATVGCVEYRLEPGQECPPGGVVPIGTPLANMRAFVLDSWLRLV +FT PPGAVGELYVAGAGLARGYLGRAGLTATRFVADPFGSGERMYRTGDLVQWNPDGQLVFA +FT GRVDDQVKVRGFRIEPGEIEAALVAQESVGQAVVVARDSEIGTRLIGYVTAAGESGVDE +FT AAVREGVAARLPQYMVPAALVVLGALPLTANGKVDRAALPDPDFGARAGGREPVTEAER +FT LLCALFAEVLGLERAGADDSFFELGGDSILSMRLAARAHREGMSFGAREVFEQRTPAGI +FT AAIVERVAGDRPVAAVHAVSDVALLDLDQGELDEFKAEFDDDSQPFADPGRY" +FT CDS 16635..28868 +FT /transl_table=11 +FT /gene="bpsB" +FT /product="peptide synthetase" +FT /function="involved in the biosynthesis of the balhimycin +FT heptapeptide backbone" +FT /db_xref="GOA:Q939Z0" +FT /db_xref="HSSP:1AMU" +FT /db_xref="InterPro:IPR000873" +FT /db_xref="InterPro:IPR001242" +FT /db_xref="InterPro:IPR006162" +FT /db_xref="InterPro:IPR006163" +FT /db_xref="InterPro:IPR009081" +FT /db_xref="InterPro:IPR010060" +FT /db_xref="InterPro:IPR010071" +FT /db_xref="UniProtKB/TrEMBL:Q939Z0" +FT /citation=[3] +FT /protein_id="CAC48361.1" +FT /translation="MSQSRIEEIWPLSPLQAGLLFHAVYDGEGPDVYIGHWILDLAGPV +FT DAAGLRAAWETLLARHAPLRACFRQRKSGETVQIIARQVELPWREVDLSHLDDPEEAVR +FT ELAEQDRTTRFDLAQAPLLRLTLIRLGADAHRLVVTCHHTIMDGWSLPIVIDELSVLYP +FT AGGDASALPDVPSYREYLAWLSRQDKERALSAWTAELSGAEEPTLVVPADPGRAPAEPE +FT SVEAHLPEHLTRSLAELARRHGLTLNTVVQGAWALVLAQLAGRPDVVFGAAVSARPPDL +FT PGVEGMVGLFLNTVPVRVRLRGSTPVVELLAELQKRQSALIPDQFVGLADIQQAAGPAA +FT VFDTLLVFEKFHHGPAGSDSAGTFRIHVNQGRVAAHYPLTLVAVPGESMYLKLDYLTEL +FT FDRETAFAILERFTGVLRQLTGAGELTVAGVEVTTAAERALVAGEWGASTSAPPSLPAL +FT DLFGHQVAHRRDEPAVVDGDRTVSYGELAERAERLAGYLNGRGVRRGDRVAVVLDRSPD +FT LIATLLAVWKAGAAYVPVDPAYPVERRKFMLADSGPAAVVCAEAYRAAVPDTCPEPIVL +FT DDPRTRQAVAESPRLSAGTSADDLAYVMYTSGSTGTPKGVAVSHGNVAALAGEPGWRVG +FT PGDAVLLHASHAFDISLFEMWVPLLSGARVVLAGPGAVDGAALAAYVAGGVTAAHLTAG +FT AFRVLADESPEAVAGLREVLTGGDAVPLAAVERVRGRVRNVRVRHLYGPTEATLCATWW +FT LLEPGDETGSVLPIGRPLAGRRVHVLDAFLRPVPPGVAGELYVAGAGVAQGYSSRPALT +FT AERFVADPSGSGARMYRTGDLAYWTEQGALAFAGRADDQVKIRGYRVEPGEIEVVLAGL +FT PGVGQAVVTPRGEHLIGYVVAEAGHDADPVRLREQLAGTLPEFMVPAAVLVLDELPLTV +FT NGKVDRRALPEPDFAAKSAGREPVTEAERVLCGVFADVLGLDHVGVDDSFFELGGDSIS +FT SMQVAARARREGISLTPRLVFEHRTPERLAALAQEAGATPRAEVVTGVGEIPWTPVMRA +FT LGDDAMRPGFAQVRVVVTPAGVNPDALVSALQAVLDAHDLLRARVEPDGRLIVPERGAV +FT AAAGLLTRVAAGTGGLDEIAEREVRTATGTLDPSAGIMARVVWIDAGDAEPGRLAFVAH +FT HLSVDAVSWGILLPDLRAAYDEVISGGTPALEPPVTSYRQWARRLTARALSESTVAELE +FT KWAAVVEGAEPALPQDTGQHTGQSHSWSTSLSGTEVRDLVTVLPGAFHCGIQDVLLAGL +FT AGAVARVRGSGAALLVDVEGHGREAADGEDLLRTVGWFTSVHPVRLELSDVDLAGAADG +FT ERPAGQLLKAVKEQIRAVPGDGSGYGLLRHLNPGTGARLAELPSAQIGFNYLGRTVLAP +FT EDTAWQPNGGGPLGGGPDMVLAHAVEVSAELQDTPAGPRLGLAIDTRDFDLATVERLGE +FT AWLEMLTGLAAVARGSGAGGHTPADFALVDLTQRDVAELEAAAPGLTDIWPLSPLQEGM +FT LFERAFDEDGVDVYQTQRILDLDGPLDEPRLRAAWNQVLARHASLRTGFHQLGSGATVQ +FT VVVREADIPWRVADLSHLDAAEAAAEVERLLAEDQGRRFDVTRPPLLRLLLIRLGADEH +FT RLVVTSHHVLLDGWSTPLVVGEMSDGYAGGRSSSKPPSYQDYLAWLSRQDAEATRSAWR +FT AELAGADEPTLVDADAGKTLVMPDEHAEWLPEPATRALAGFARGHGLTVSTIVLGAWAL +FT VLARLAGRTDVVFGSVVSGRPADVPDVERMVGMFINTVPARVRLDGRRPLLEMLEDLQA +FT RQAALTEHQYLGLPEIQKVAGTGAIFDTIVMVENYPHDAAGLGGDGGVAISSVVTRTGT +FT SYPLTMNVSLGDRLRITVSYRPDRIDDATAAEVARQVVRVLERVVAEPSLPVGRLGVTS +FT EPTRAAVVERWNSTGEAAAETSVLELFRRQAGASPDAVAVVAGERTLSYADLDRESDRL +FT AGHLAGIGVGRGDRVGVVMTRGADLFVALLGVWKAGAAQVPVNVDYPAERIERMLADVG +FT ASVAVCVEATRKAVPDGVEPVVVDLPVIGGVRPEAPPVTVGAHDVAYVMYTSGSTGVPK +FT AVAVPHGSVAALASDPGWSQGPGDCVLLHASHAFDASLVEIWVPLVSGARVLVAEPGTV +FT DAERLREAVSRGVTTVHLTAGAFRAVAEESPDSFIGLREILTGGDAVPLASVVRMRQAC +FT PDVRVRQLYGPTEITLCATWLVLEPGAATGDVLPIGRPLAGRQAYVLDAFLQPVAPNVT +FT GELYLAGAGLAHGYLGNTAATSERFVANPFSGGGRMYRTGDLARWTDQGELVFAGRADS +FT QVKIRGYRVEPGEVEVALTEVPHVAQAVVVAREGQPGEKRLIAYVTAEAGSALESAAVR +FT AHLATRLPEFMVPSVVVVLESFPLTLNGKIDRAALPAPEFAGKAAGREPRTEAERVLCG +FT LFAEILGLERVGADDGFFELGGDSILSMRLAARARRENFVFGAKQVFEQKTPAGIAAVA +FT ERGGQSRPAGVADGVGEVPWTPVVRALLERDPAGLTRGAMAQWVSVAAPRDLSVTALVA +FT GLGAVIDTHDMLRSRIVESEGVEPRLVVAGRGTVDAAALVERVEAGDGDLAEIADRCAH +FT DTAARLDPVAGVLVRAVWVDAGPGRAGRLVVAAHHLVVDVVSWRTLLPDLQAACEAVVA +FT GGQPALDPPDVSFRRWSRTLDGEAAIRTGELAVWTEILDGAQSRLGELDPRRDTVSTAG +FT RRSWTVPREHAGVLVEQVTSAFHCGVHEVLLATLAGAVAGWRGGTAVVVDVEGHGRQPL +FT GELDLSRTLGWFTDVHPLRLDVTGVDPAEAVAGGDAAGRLLKQVKENVRAVPDGGLGYG +FT MLRYLNAETGPVLAALPKAEIGFNYLGRFSAGSGGEAQPWQITGIVGGAAEQDTPLRHV +FT VEIDAVVVDGPDGPEFTLTVTWAGRMLGDAEAESLAQAWLDMLAGLAAHVAAGGPGGHT +FT PSDFPLTALTQREVAEFEAAVPGLLDIWPLSPLQEGLLFHAADDRGPDVYASMRTLAID +FT GPLDVARFRASWTVLLDRHPALRASFHQLESGEAVQVIARDVPPDWRETDLSGLPESEA +FT LAEFDRLAARMHAERFDLTKAPQLRLHLVRLGDRRYRLIFTSHHIVADGWSLPLILVDV +FT LTAYEAGGDGRTLPAATSYRDFLAWVDRQDKGAAGQAWRTELAGLDEATHVVPPGSIIT +FT PLEPERVAFELDDETSKRLVEFTRRHGVTANTLFQGVWALHLARLAGRNDVVFGAAVAG +FT RPPEIPGVESAVGLFMNMLPVRARLTGAEPVVDMLKDLQERQVAMMAHQHIGLPEIKQL +FT TGPGAAFDTIVVFENYPPAPPRSDDPDALVIRPVGIPNDTGHYPLSMRASVAAGPVRGE +FT FIYRPDVVDRTEAGEMVAAILRALEQVVAEPWTPVGQVGLIGPEQRRLVVDEWNRTDVP +FT LAAETLPVLFRRQAERSPDAVAVEDGARSLTFGGLLGEVEALARLLVGAGVRREHRVGV +FT LVERSAELAVTMMAVSFAGGVFVPVDPDYPRERVEFMLANSAPGVMVCTKTTRAAVPAE +FT FAGTVLVLDELPAADPDVELPPVAPEDAAYVIYTSGSTGVPKGVLVTHSGLANLGYAHI +FT ERMAVTSSSRVLQLSATGFDAIVSELYMALLAGATLVLPDAASMPPRVTLGEAIRRAGI +FT THLTVSPSVLASEDDLPDTLRTVLTGGEALPPALVDRWSPGRRVIQAYGPTETTICSTM +FT SAPLSPGHDQVPLGGPIHNVRHYVLDAFLQPVPPGVVGELYITGVGLARGYLGRPGLTA +FT ERFVASPFAPGERMYRSGDLFRWTREGQLLFAGRVDAQVKVRGYRVEPAEIEAVLAEHP +FT WVGQVAVSVRRDGPGDKQLVAYVVPSADAAAENGTLASALRELAAERLPEYMMPAAFVS +FT LEQMPLTPNGKLDHRALQAPDFAGMSSKRAPRTPMEARLCALFADVLGLDQVGPDDSFF +FT ELGGDSITSMQLSARARPTGLELTPWQVFDEKTPERLAVIVQELAAEGGTTPAPEPGEG +FT TLVALSPDQMDLLEAGLAGE" +FT CDS 28888..34464 +FT /transl_table=11 +FT /gene="bpsC" +FT /product="peptide synthetase" +FT /function="involved in the biosynthesis of the balhimycin +FT heptapeptide backbone" +FT /db_xref="GOA:Q939Y9" +FT /db_xref="HSSP:1AMU" +FT /db_xref="InterPro:IPR000379" +FT /db_xref="InterPro:IPR000873" +FT /db_xref="InterPro:IPR001031" +FT /db_xref="InterPro:IPR001242" +FT /db_xref="InterPro:IPR006163" +FT /db_xref="InterPro:IPR009081" +FT /db_xref="InterPro:IPR010071" +FT /db_xref="UniProtKB/TrEMBL:Q939Y9" +FT /protein_id="CAC48362.1" +FT /translation="MTVDDTRAKRRSSVEDVWPLSPLQEGMLYHTALDDDGPDTYTVQT +FT VYGIDGPLDPGLLRASWQALVDRHAALRACFRYVSGAQMVQVIAREAEVPWRETDLSGL +FT PDDIAEGEVDRLAADEVAERLRIEAAPLMKLHLIRLGPDRHRLVHTLHHVLVDGWSMPI +FT LHRELAAIYAAGGDASGLPPTVSYRDYLAWLGRQDKEVARAAWRAELAGLDTPTTVAAP +FT DPARVPDIHTAVVELPAELTDGLAQFARGHDLTLNTVVQGAWAVVLAQLAGRDDVVFGA +FT TASGRPADLPGVEAMVGQLLNTLPVRVRLDGGRRAAELFARLQRDQSALMAHQHLGLQD +FT VQAVVGPGAVFDTLVIYENFPRKGLGRAPGGGLSLVPVKRGRNSSHYPFTLITGPGERM +FT PLILDYDRGLFDPAAAESVVGALARVLERLVAEPDVLVGRLTLASEAERALVVEGFNAT +FT AGPVPGESVLELFARRVAAAPDAVAITGAAGANLTYAEVDQASNRLAGYLAVRGVGRGD +FT RVGVAMERSPDLLIAFLAIWKAGAAYVPVDVEYPAERISFVFDDSGVSTVLCTLATSAV +FT APGNAIVLDAPETRVAVRDCAAPEIRPHADDLAYVMYTSGSTGLPKGVAIPHGAVAGLA +FT GDAGWQIGPGDGVLMHATHVFDPSLYAMWVPLVSGARVLLTEPGVLDAAGVRQAVHRGA +FT TFVHLTAGTFRALAETAPECFEGLVEIGTGGDVVPLQSVENLRRAQPGLRVRNTYGPTE +FT TTLCATWLPIEPGEVLGRELPIGHPMTNRRIYLLDAFLRPVPPGVAGELYIAGTGLAHG +FT YLKSPGLTAGRFVACPFAAGERMYRTGDRARWTRDGEVVFLGRADDQVKIRGYRVELGE +FT VEAALAAQPGVVEAVVTAREDQPGEKRLVGYFVSDGGDAGPVEIRRQLALVLPDYLVPI +FT AVVALPGLPVTPNGKVDRRALPAPDLAGHSPEKAPENETEKVLCALFAEILSIDQVGVD +FT DTFHDLGGSSALAMRLVARIREELGADLPIRQLFSSPTPAGLARALAAKSRPALEAAQR +FT PDRVPVTARQLRAWLLADPGGETAGLHTSVALRLHGRVDVPALAAALGDVAARHEILRT +FT TFPGDAQSVHQHVHDALAVELTPVGVTEEDLPGLLAERRDLLFDLTRDVPWRCDLFALS +FT DNEHVLHLQVHRILADDDSLDVFFRDLAAAYGARREGRVPERAPLALQFADYALWEQRL +FT LTDENEPGSLINEQVAFWRDNLAGLDGETVLPFDRPRPAVPSRRAGTVALRLEAGPHAR +FT LTEAAEPPGADTLEMVHAALAMLLAKLGAGHDVVIGTALPRDEELFDLEPMIGPFTRAL +FT ALRTDVSGDPTFLEVVARVQEAGQATGEHLDLPFERIVELLDLPASLARHPVFQVGLQV +FT DEEDIDGWAAAELPALRTAVEPGGTAAMELDLAVKLTERFDDDDNAGGLEGALHYATDL +FT FDEATAESVARRLVRVLEQVAEDPGRRISDLDVFLDDFERGRPPIAPARWAGAVPPVVA +FT ELAGDGPLGALLLDEQLRPVAPGAVGDLYVTGPAVDAGTATLATVPCPFGDEGHRMLHT +FT GLLARKTPAKTLVVVGERRRSSASVKTGDFEILLPLRAGGDRPPLFCVHASGGLSWNYE +FT PLLRYLPPNQPVYGVQARGLARTEPLPGSVEEMAADYLEQIRAVQPAGPYHLLGWSLGG +FT RIAQAMARLLEADGERLGLLALLDAYPVYMGRKTTGAASEEAALEQRNQQDLDLAGQLV +FT KGVAARSRLEAVMRNLWKVGPRHTRSPFAGDVLLFVATVDRPAHLPVPVAKASWKEFTS +FT GAVEAHEIPSNHYDMVQSAALGQIGAIVAEKLRSRPEGERTQR" +FT CDS 34461..34670 +FT /transl_table=11 +FT /product="hypothetical protein" +FT /note="orf1" +FT /db_xref="InterPro:IPR005153" +FT /db_xref="UniProtKB/TrEMBL:Q939Y8" +FT /protein_id="CAC48363.1" +FT /translation="MSNPFDNEDGSFFVLVNDEGQHSLWPTFAEVPAGWTRVHGEAGRQ +FT ECLAYVEENWTDLRPKSLIREASA" +FT CDS 34782..35957 +FT /transl_table=11 +FT /gene="oxyA" +FT /product="P450 monooxygenase" +FT /function="involved in the coupling of the aromatic side +FT chains of the heptapeptide" +FT /note="already deposited under Y16952" +FT /db_xref="GOA:O87673" +FT /db_xref="HSSP:1LFK" +FT /db_xref="InterPro:IPR001128" +FT /db_xref="InterPro:IPR002397" +FT /db_xref="UniProtKB/TrEMBL:O87673" +FT /protein_id="CAA76547.1" +FT /translation="MFEESNALRGTEIHRRDRFAPGPELRSLMGEGTMSILQPPDSPGG +FT RTGWLATGHDEVRQVLGSDKFSAKLLYGGTVAGRIWPGFLNQYDPPEHTRLRRMVTSAF +FT TVRRMQDFRPRIEQIVQASLDAIEAAGGPVDFVPRFAWSVATTVTCDFLGIPRDDQADL +FT SRALHASRSERSGKRRVAAGNKYWTYMTEIAARARRDPGDDMFGAVVRDHGDAITDAEL +FT LGVAAFVMGAGGDQVARFLAAGAWLMVEHPDQFALLREKPDTVPDWLNEVERYLTSDEK +FT TTPRIAQEDVRIGDQLVKAGDAVTCSLLAANRRKFPAPEDEFDITRERPVHVTFGHGIH +FT HCLGRPLAEMVFRAAIPALAQRFPKLRLAEPDREIKLGPPPFDVEALLLEW" +FT CDS 36007..37203 +FT /transl_table=11 +FT /gene="oxyB" +FT /product="P450 monooxygenase" +FT /function="involved in the coupling of the aromatic side +FT chains of the heptapeptide" +FT /db_xref="GOA:O87674" +FT /db_xref="HSSP:1LFK" +FT /db_xref="InterPro:IPR001128" +FT /db_xref="InterPro:IPR002397" +FT /db_xref="UniProtKB/TrEMBL:O87674" +FT /protein_id="CAA76548.1" +FT /translation="MNDDDPRPLHIRRQGLDPADELLAAGSLTRVTIGSGADAETHWMA +FT TAHALVRQVMGDHQRFSTRRRWDPRDEIGGTGTFRPRELVGNLMDYDPPEHTRLRQKLT +FT PGFTLRKMQRLQPYIEQIVNERLDEMARAGSPADLVAFVADKVPGAVLCELIGVPRDDR +FT ATFMQLCHAHLDASRSQKRRAAAGEAFSRYLLAMIARERKDPGEGLIGAVVAEYGDEAT +FT DEELRGFCVQVMLAGDDNISGMIGLGVLALLRHPEQIDALRGGEQPAQRAVDELIRYLT +FT VPYGPTPRIAKQDVTVGDQVIKAGESVICSLPAANRDPALVPDADRLDVTRDPVPHVAF +FT GHGIHHCLGAALARLELRTVFTALWRRFPDLRLADPAQETKFRLTTPAYGLTELMVAW" +FT CDS 37353..38573 +FT /transl_table=11 +FT /gene="oxyC" +FT /product="P450 monooxygenase" +FT /function="involved in the coupling of the aromatic side +FT chains of the heptapeptide" +FT /db_xref="GOA:O87675" +FT /db_xref="HSSP:1UED" +FT /db_xref="InterPro:IPR001128" +FT /db_xref="InterPro:IPR002397" +FT /db_xref="UniProtKB/TrEMBL:O87675" +FT /protein_id="CAA76549.1" +FT /translation="MGHDIGQLAPLLPEPANFQLRTNCDPHADNFDLRAHGPLVRIAGD +FT SSAQLGREYVWQAHGYDVVRRILGDHENFTTRPQFTQAKSGAHVEAQFVGQISTYDPPE +FT HTRLRKMLTPEFTVRRIRRMEPAIQALVDDRLDRVAAEGPPADLQALFADPVGALALCE +FT LLGIPRDDQREFVRRIRRNTDLSRGLKARAADSAAFNRYLDNLIARQRRDADDGFLGMI +FT VREHGDTVTDEELKGLCTALILGGVETVAGMIGFGVLALLENPGQVPLLFAGPEQADRV +FT VNELLRYLSPVQAPNPSLAVKDVIIDGQLIKAGDYVLCSVLMANRDEALTPNPNVFDAN +FT RAAVSDVGFGHGIHYCVGAALARSMLRMAYQALWQRFPGLRLAVPIAEVKYRSAFVDCP +FT DRVPVTW" +FT CDS 38634..40109 +FT /transl_table=11 +FT /gene="bhaA" +FT /product="halogenase" +FT /function="involved in the halogenation of balhimycin" +FT /db_xref="GOA:O87676" +FT /db_xref="InterPro:IPR001100" +FT /db_xref="InterPro:IPR003042" +FT /db_xref="InterPro:IPR006905" +FT /db_xref="InterPro:IPR013027" +FT /db_xref="UniProtKB/TrEMBL:O87676" +FT /protein_id="CAA76550.1" +FT /translation="MSVEDFDVVVAGGGPGGSTVATLVAMQGHRVLLLEKEVFPRYQIG +FT ESLLPATVHGVCRMLGISDELANAGFPIKRGGTFRWGARPEPWTFHFGISAKMAGSTSH +FT AYQVERARFDEMLLNNAKRKGVVVREGCAVTDVVEDGERVTGARYTDPDGTEREVSARF +FT VIDASGNKSRLYTKVGGSRNYSEFFRSLALFGYFEGGKRLPEPVSGNILSVAFDSGWFW +FT YIPLSDTLTSVGAVVRREDAEKIQGDREKALNTLIAECPLISEYLADATRVTTGRYGEL +FT RVRKDYSYQQETYWRPGMILVGDAACFVDPVFSSGVHLATYSALLAARSINSVLAGDLD +FT EKTALNEFELRYRREYGVFYEFLVSFYQMNVNEESYFWQAKKVTQNQSTDVESFVELIG +FT GVSSGETALTAADRIAARSAEFAAAVDEMAGGDGDNMVPMFKSTVVQQAMQEAGQVQMK +FT ALLGEDAEPELPLFPGGLVTSPERMKWLPHHPA" +FT CDS 40192..41382 +FT /transl_table=11 +FT /gene="bgtfA" +FT /product="glycosyltransferase" +FT /function="involved in glycosylation of balhimycin" +FT /db_xref="GOA:O87677" +FT /db_xref="HSSP:1PN3" +FT /db_xref="InterPro:IPR004276" +FT /db_xref="UniProtKB/TrEMBL:O87677" +FT /protein_id="CAA76551.1" +FT /translation="MRVLISGCGSRGDTEPLIALAVRLRELGVDVRMCLPPDYVERCAE +FT VGVSMVAVGPAMRAGARGPGEPPPGAPEIVSEVVADWFDKVPAAAEGCDVVVATGLLPA +FT AVVVRSVAEKLGIPYLYTVLSPDHLPSVLSQAERDEYDQGADRLFGAVVTSGRAAIGLP +FT PVANLFTYGYTEQPWLGADQILAPPPPGDLDTVQTGAWILPDERPLPAELETFLAAGSP +FT PVYVGFGSSSGPRTAGAAKAAIEAIRARGHRVVLSRGWADLAAPDDSADCFTVGEVNLQ +FT VLFRRVAAAVHHDSAGTTLLAIRAGTPQIVVRRVIDNVVEQAYHADRVAELGVGVALEG +FT PIPASEAMSDALETALAPETRARAAEVAGTVRTDGTTVAAELLFAAVSREKPAVPA" +FT CDS 41447..42676 +FT /transl_table=11 +FT /gene="bgtfB" +FT /product="glycosyltransferase" +FT /function="involved in glycosylation of balhimycin" +FT /db_xref="GOA:O87678" +FT /db_xref="HSSP:1IIR" +FT /db_xref="InterPro:IPR002213" +FT /db_xref="InterPro:IPR004276" +FT /db_xref="UniProtKB/TrEMBL:O87678" +FT /protein_id="CAA76552.1" +FT /translation="MKRVLLSTLGSRGDVEPLVALAVRLRDLGAEPLMCAPPDCADRLE +FT EVGVPHVPVGPSARAPIHREKPLTPEDMRRLMAEAIAMPFDRIPAAAEGCAAVVTTGLL +FT AAAIGVRSVAEKLGIPYFYAFHCPSYVPSPYYPPPPPLGEPPAEDVTDIRALWERNNRS +FT AYQRYGGPLNSHRAAIGLPPVEDIFTFGYTDHPWVAADSVLAPMQPTDLGAVQTGAWIL +FT PDERPLSPELEAFLDTGTPPVYLGFGSLRAPADAVRVSIDAIRAQGRRVILSRGWADLV +FT LPDDREDCFATGEVNQQVLFGRVAAVIHHGGAGTTHVAMQAGAPQVLVPQMADQPYYAG +FT RVAELGIGVAHDGPVPTFDSLSAALVTALAPETRARAEAVARTAGADGAAVAAKLLLDA +FT VSREKPAVPA" +FT CDS 42824..44053 +FT /transl_table=11 +FT /gene="bgtfC" +FT /product="glycosyltransferase" +FT /function="involved in the glycosylation of balhimycin" +FT /db_xref="GOA:O87679" +FT /db_xref="HSSP:1IIR" +FT /db_xref="InterPro:IPR004276" +FT /db_xref="UniProtKB/TrEMBL:O87679" +FT /protein_id="CAA76553.1" +FT /translation="MRVLLSTAGSRGDVEPLLALAVRLQGLGAEVLMCASPASAERLAE +FT VGVPHVPVGLQLDGMLLQEGMPPPSAEDERRLAAMAIDMQFDAVPAAAEGCAAVVATGE +FT LAAAAAVRSVAEKLGIPYFYGAYSPNYLASPHYPPPDDERTTPGVTDNGVLWAERAERF +FT AKRYGETLNSRRAAIGLPPVADVFGYGYTEQPWLAADPVLAPLDPDLDAVQTGAWILRD +FT DRPLSPELAAFLAAGSPPVYVGFGSASGPGIEDAAKVAIEAIRALGRRAILSRGWADLV +FT LPDDREDCFAVDEANLQVLFEQSAAVVHHGSAGTEHLATRAGVPQIAIPRHTDQAYYAG +FT RVAELGVGVALEGPVPSFAAMSAELATALAPETRARAAEVAGTVRTDGTTMAAELLFQA +FT AEQGKLTVPA" +FT CDS 44087..45313 +FT /transl_table=11 +FT /gene="dvaC" +FT /product="putative C-3 methyl transferase" +FT /function="probably involved in the C-3 methylation of +FT dehydrovancosamine" +FT /db_xref="GOA:Q939Y7" +FT /db_xref="InterPro:IPR000051" +FT /db_xref="InterPro:IPR001601" +FT /db_xref="InterPro:IPR013217" +FT /db_xref="UniProtKB/TrEMBL:Q939Y7" +FT /protein_id="CAC48364.1" +FT /translation="MSTTSQCRICDGTVHEFIDFGRQPLSDAFVAPGAEKGEFFFRLAT +FT GICDSCTMVQLMEEVPRDLMFHEAYPYLSSGSAVMRTHFHELAKHLLATELTGEDPFIV +FT ELGCNDGIMLKAVADAGVRQLGVEPSGSVADLAAAKGIRVRKDFFEEATAADIRENDGP +FT ADVIYAANTLCHIPYMDSILKGVTKLLGPNGVFVFEDPYLGDIVERTSFDQIYDEHFFL +FT FTARSVQEMARRNGLELVDVERIPVHGGEVRYTLALAGARKPSEAVAELLAWEAERKLA +FT EYATLERFATDVKKIKEDLIALLTKLRAEGKRVVGYGATAKSATVTNFCGITPDLVEFI +FT SDTTPAKQGKLSPGQHIPVREYGEFAGNHPDYALLFAWNHADEIMNVEQAFRDAGGQWI +FT LYVPNVHVS" +FT CDS 45409..46233 +FT /transl_table=11 +FT /product="hypothetical protein" +FT /note="orf2" +FT /db_xref="InterPro:IPR003737" +FT /db_xref="UniProtKB/TrEMBL:Q939Y6" +FT /protein_id="CAC48365.1" +FT /translation="MPQDLDADRILAISPHLDDAVLSFGAGLARAAQAGAKVTVHTVFA +FT GTAAPPYSPAAERLHAIWELSPDQDASLRRRDEDIAALDHLGVDYRHGRFLDAIYRKLP +FT DGRWLADNVPGRQKLAIGRQSPQGDPELFSAVRADIESIVEEYAPALILTCAAGNGHVD +FT NEIARDAALFVAYEKGIRVRLWEDLPHAMFAEGAAELPDGFRLGPPDFGSVEPEARARK +FT FEALRLYSSQMLMLHGPEKDFFAQLDGHARKSAPGGGYGETTWPVVSREDNG" +FT CDS 46265..47119 +FT /transl_table=11 +FT /gene="bmt" +FT /product="putative N-methyl transferase" +FT /function="probably involved in the methylation of the +FT D-Leu residue of the heptapeptide" +FT /db_xref="GOA:Q939Y5" +FT /db_xref="InterPro:IPR000051" +FT /db_xref="InterPro:IPR001737" +FT /db_xref="InterPro:IPR013217" +FT /db_xref="UniProtKB/TrEMBL:Q939Y5" +FT /protein_id="CAC48366.1" +FT /translation="MSGQLERGPVRTTHADVLLASVGERGVLCDFYDEEGSNTYRDLIQ +FT DADGTPEAREFATRVGPVPGPVLELAAGTGRLTFPFLELGWEVTALELSAPVVDGFRMR +FT LAEAPADLRDRCTVVQADMSAFSVDRRFGAAVISSGSVNELDEAGRQGLYASVREHLEP +FT GGKFLLSLALSEVAESQPPERRQELPGQSGRLYVLHVSVQPAEETQDITIYPADETADP +FT FVVCTHRRRLVPADRIVRELLRAGFDVIARTPFASGASGRAGHEDMLLVEAVKQEGAIP +FT AAR" +FT CDS complement(47100..48404) +FT /transl_table=11 +FT /gene="pgat" +FT /product="phenylglycine amino transferase" +FT /function="transamination of 4-hydroxy- and +FT 3,5-dihydroxyphenylglycine" +FT /db_xref="GOA:Q939Y4" +FT /db_xref="InterPro:IPR004839" +FT /db_xref="UniProtKB/TrEMBL:Q939Y4" +FT /protein_id="CAC48367.1" +FT /translation="MEILVFMDSFGLSTPLSVETLHGSLTDPAISSMNLLNELIDEYPV +FT AISMAAGRPYEEFFDIRLIHEYIDAYCDHLRRDRKLDEAGVTRTLFQYGTTKGVIADLI +FT ARNLAEDENIDAAPESVVVTVGAQEAMFLVLRTLRATEHDVLLAPAPTYVGLTGAALLT +FT DTPVWPVQSTENGIDPDDLVLQLKRADEQGKRVRACYVTPNFANPTGTSMDLAARHRLL +FT EVAEANGILLLEDNAYGLFGAERLPTLKALDRSGTVVYIGSFAKTGMPGARVGFTVADQ +FT RMAGGGLLADQLSKLKGMLTVNTSPIAQSVIAGKLLLNDFSLTKANAREIAVYQRNLRL +FT VLGELERRLGGRAGVRWNTPTGGFFVTVTVPFTVDDDLLALAARDHGVLFTPMHHFYGG +FT KGGFNQLRLSISLLTPELIEEGVARLAALITARLG" +FT CDS 48610..49464 +FT /transl_table=11 +FT /gene="bhp" +FT /product="putative hydrolase" +FT /function="involved in the beta-hydroxytyrosine +FT biosynthesis" +FT /db_xref="GOA:Q939Y3" +FT /db_xref="HSSP:1A88" +FT /db_xref="InterPro:IPR000073" +FT /db_xref="InterPro:IPR000379" +FT /db_xref="InterPro:IPR000639" +FT /db_xref="InterPro:IPR003089" +FT /db_xref="UniProtKB/TrEMBL:Q939Y3" +FT /protein_id="CAC48368.1" +FT /translation="MLMTTEHGIRLSYHDQGRGAPVLLLTGTGAPSSVWDLHQVPALRA +FT AGFRVITMDNRGIPPSDDGADGFTVDDLVADVAALLDHLDASPCRVVGTSMGSYIAQEL +FT ALARPELVDAVVLMAACGRSSLVQRVLAEAEADLIGRGTELPPGYRAAVRAMHNLGPAT +FT LADDDLAADWLDLFAASENWGPGVRAQLLLSALPDRREAYRAIKVPCHVVSFEHDLVAP +FT PSAGQELAAVIPGATHRTIPGCGHFGYLEKPEAVNRELLRFLRTESGVAVTSGASPRTP +FT EEL" +FT CDS 49530..51275 +FT /transl_table=11 +FT /gene="bpsD" +FT /product="peptide synthetase" +FT /function="involved in the beta-hydroxytyrosine +FT biosynthesis" +FT /db_xref="GOA:Q939Y2" +FT /db_xref="HSSP:1AMU" +FT /db_xref="InterPro:IPR000873" +FT /db_xref="InterPro:IPR006162" +FT /db_xref="InterPro:IPR006163" +FT /db_xref="InterPro:IPR009081" +FT /db_xref="InterPro:IPR010071" +FT /db_xref="UniProtKB/TrEMBL:Q939Y2" +FT /protein_id="CAC48369.1" +FT /translation="MTGAIVPPSTAPALFEAAAAAVPDRPAVAMGTTTLTYAELNTQAN +FT RLARRLVAHGVGPERLVALAMPRSIEFAVAMLAVHKAGGAYVPIDPDYPAERRQHMLAG +FT AAAQCLLCLPGQDVAGAPVVLSVALAEPGRPEPDLDDSDRLAPLLPSHPAYVIFTSGST +FT GQPKGVVVTHRGIPNLAADYVHRQNLLPDSRLLAFASPSFDAAVAEFWPIWLAGACLVL +FT APAPDLIPGEPLARLVRDRHITHVTLPPSALAPLEEAGGLPPGLTLLVAGEAGPAPVAK +FT RWAAGRVMINAYGPTEATVAVTASDPLTGEDTPPIGRPITGVHTYVLDDRLVPVPDGTV +FT GELYMTGPGLARGYLHRPAATAERFLPDPFGGPGQRMYRTGDRVRARPDGQLVFVGRAD +FT DQLKVRGHRIEPAEVESALLAVDGVAQAVVTEHDNRLVAYVVGAGGARVPAEDLLPPLR +FT KQLPAYLVPDVVVGLPHLPTTPNGKVDRAALPAPEAEDTGRAISGRAPSTPTEIHLAAL +FT FAEVLGVSSVGVEDSFFEVGGHSLLATRLVSRIRESLRVRLRVQAFFDAPTVAELAKVL +FT DAALT" +FT CDS 51289..52479 +FT /transl_table=11 +FT /gene="oxyD" +FT /product="putative P450 monooxygenase" +FT /function="probably involved in the biosynthesis of +FT beta-hydroxytyrosine" +FT /db_xref="GOA:Q939Y1" +FT /db_xref="HSSP:1CPT" +FT /db_xref="InterPro:IPR001128" +FT /db_xref="InterPro:IPR002397" +FT /db_xref="UniProtKB/TrEMBL:Q939Y1" +FT /protein_id="CAC48370.1" +FT /translation="MQTTNAVDLGNPDLYTTLERHARWRELAAEDAMVWSDPGSSPSGF +FT WSVFSHRACAAVLAPSAPLTSEYGMMIGFDRDHPDNSGGRMMVVSEHEQHRKLRKLVGP +FT LLSRAAARKLAERVRIEVGDVLGRVLDGEVCDAATAIGPRIPAAVVCEILGVPAEDEDM +FT LIDLTNHAFGGEDELFDGMTPRQAHTEILVYFDELITARRKEPGDDLVSTLVTDDDLTI +FT DDVLLNCDNVLIGGNETTRHAITGAVHALATVPGLLTALRDGSADVDTVVEEVLRWTSP +FT AMHVLRVTTADVTINGRDLPSGTPVVAWLPAANRDPAEFDDPDTFLPGRKPNRHITFGH +FT GMHHCLGSALARIELSVVLRVLAERVSRVDLEREPAWLRAIVVQGYRELPVRFTGR" +FT CDS 52645..53715 +FT /transl_table=11 +FT /gene="hmaS" +FT /product="putative hydroxyphenyl pyruvate dioxygenase" +FT /function="probably involved in the 4-hydroxyphenylglycine +FT biosynthesis" +FT /db_xref="GOA:Q939Y0" +FT /db_xref="HSSP:1CJX" +FT /db_xref="InterPro:IPR004360" +FT /db_xref="InterPro:IPR005956" +FT /db_xref="UniProtKB/TrEMBL:Q939Y0" +FT /protein_id="CAC48371.1" +FT /translation="MTSDSTVQNFEIDYVEMYVENLEAATFTWVDKYAFAVAGTDRSAD +FT HRSVTLRQGPIKLVLTEPTSDRHPAAAYLQSHGDGVADIALRTPDVTAAFEAAVRGGAA +FT AVREPVRLAGGPIVTATIGGFGDVVHTLIQSGEATAAAPETTGQGGGDVNLLGLDHFAV +FT CLNSGDLGPTVAFYERAFGFRQIFEEHIVVGRQAMNSTVVQSASGEVTLTLIEPDSNAD +FT PGQIDEFLKAHQGAGVQHIAFNADDAVRAVRALSGRGVEFLKTPGTYYDMLGERITLET +FT HTLDDLRSTNVLADEDHGGQLFQIFAASTHPRHTIFFEIIERQGAGTFGSSNIKALYEA +FT VELERTGQSEFGAARR" +FT CDS 53712..54788 +FT /transl_table=11 +FT /gene="hmo" +FT /product="putative phenylglycolate oxidase" +FT /function="probably involved in the 4-hydroxyphenylglycine +FT biosynthesis" +FT /db_xref="GOA:Q939X9" +FT /db_xref="HSSP:1GOX" +FT /db_xref="InterPro:IPR000262" +FT /db_xref="InterPro:IPR003009" +FT /db_xref="InterPro:IPR008259" +FT /db_xref="InterPro:IPR012133" +FT /db_xref="UniProtKB/TrEMBL:Q939X9" +FT /protein_id="CAC48372.1" +FT /translation="MTYVSLGDLERAARDVLPGEIWDFLAGGSGAEASLTANRTALDRV +FT FVVPRMLCDLTGSTTEAELLGRRAALPMAVAPVAYQRLFHPEGELAAARAARDAGVPYT +FT ICTLSSVPLEEVAAVGGRPWFQLYWLRDEKRSLELVRRAEDAGCEAIVFTVDVPWMGRR +FT WRDMRNGFALPESVTAANFDAGSAAHRRTRGASAVADHTAREFAPATWESVATVRAHTD +FT LPVVLKGILAAEDARRAVEAGADGIVVSNHGGRQLDGAVPGIEVLGEIAAEVSGRCEVL +FT LDGGIRTGGDILKAAALGASGVLVGRPVMWGLAAAGQEGVRQVFELLAAELRNALGLAG +FT CDSVSAAGRLGTRVPRYG" +FT CDS 54879..56237 +FT /transl_table=11 +FT /product="putative antiporter" +FT /note="orf7" +FT /db_xref="GOA:Q939X8" +FT /db_xref="InterPro:IPR006153" +FT /db_xref="UniProtKB/TrEMBL:Q939X8" +FT /protein_id="CAC48373.1" +FT /translation="MLHTFAAAVAPVAPIAAHSLLVFLLQIGLLLLLAVVLGRLAGRFG +FT MPAVVGELFVGVILGPSLLGWAAPGLHSWLFPAVAEQYHLLDAVGQVGVLLLVGLTGVQ +FT MDMGLARKRGLTAAGVSIGGLVLPLGLGIGAGYLLPKVLVPEGTDVTVFAMFLGVALCV +FT SAIPVIAKTLIDMKLLHRNIGQLTLTAGMVDDVFGWFMLSVVSAMAVNAVSAGTVLTSL +FT AYLVAILAFCFTLGRPLARGVLRVAAKSDGPGLTVATVVVLIFLAAAGTQALGLEAVFG +FT AFLCGILLGTAGKVDPAKLAPLRTVVLSGLAPLFFATAGLRMDLTALTHPVVLLTGLVV +FT LALAIAGKFAGAFAGARLSGLNKWEGLALGAGLNARGVIQVVVAMVGLRLGILSVEVYT +FT IIILVAIVTSLMASPILRFAMSRVEQTAEEQVRENEHRAWNTHPAANPQEQSL" +FT CDS 56501..57913 +FT /transl_table=11 +FT /gene="dvaA" +FT /product="putative NDP-hexose 2,3-dehydratase" +FT /function="probably involved in the biosynthesis of +FT dehydrovancosamine" +FT /db_xref="InterPro:IPR005212" +FT /db_xref="UniProtKB/TrEMBL:Q939X7" +FT /protein_id="CAC48374.1" +FT /translation="MLPDLVPPVVVRPRDGRDHADRIALSAATTDGVHMRTEDVRAWIA +FT ERREANDFHVERVPFRDLDQWSFEEVTGNLVHHSGRFFTIEGLHVIEHDGPNGDGPYRE +FT WQQPVIKQPEVGILGILGKEFGGVLHFLMQAKMEPGNPNLVQLSPTVQATRSNYTKAHG +FT GTNVKLIEYFAPPDPEHVIVDVLQAEQGSWFFRKSNRNMIVETVDDVPLWDDFCWLTLG +FT QIAELMHEDETINMNARSVLSCLPYHDAAPGARFSDVQLLSWFTNERSRHDVRARRIPL +FT ADVCGWKQGDEAIEHEDGRYFRVLAVAVRGSNRERISWTQPLLESVDLGVVAFLVREIG +FT GVPHVLVHARADGGFLDTVELAPTVQCTPQNYAHLPAENRPPFLDVVLNAPESRIRYEA +FT IHSEEGGRFLNVRARYLAIEADDTVEPPPGYTWVTPAQLTALTRHGHYVNVEARTLLAC +FT LNAATAQPRGGA" +FT CDS 57915..58214 +FT /transl_table=11 +FT /gene="dvaE" +FT /product="putative 4-ketoreductase" +FT /note="probably not active because of an in frame deletion +FT of 226 aa" +FT /db_xref="UniProtKB/TrEMBL:Q939X6" +FT /protein_id="CAC48375.1" +FT /translation="MKTVTVLGASGFAGSAVHRLGEVFRLVAREVAGHTGRGPVDVPCV +FT APPSHAPETDFRSVTVGSTPFRSITGRRPEMSRPEGVRRTVAALPSSDQGKVRT" +FT CDS 58211..59320 +FT /transl_table=11 +FT /gene="dvaB" +FT /product="putative C-3 amino transferase" +FT /function="probably involved in the biosynthesis of +FT dehydrovancosamine" +FT /db_xref="GOA:Q939X5" +FT /db_xref="HSSP:1MDO" +FT /db_xref="InterPro:IPR000653" +FT /db_xref="UniProtKB/TrEMBL:Q939X5" +FT /protein_id="CAC48376.1" +FT /translation="MTTRVWDYQAEYRNERLDLLDAVETVFDSGQLVLGASVRGFEAEF +FT AAYHGVGHCVGLDNGTNAIKLGLQALGVGPGDEVITVSNTAAPTVVAIDGTGATPVFVD +FT VREDDFLMDTGQVAAAITERTKCLLPVHLYGQCVDMAPLKDLAAKHGLSILEDCAQAHG +FT ARQNGTVAGSTGDAAAFSFYPTKVLGAYGDGGATITSDESVDRRLRRLRYYGMDKQYYT +FT LETPAHNSRLDEVQAEILRRKLKRLDTYVAARQAIAQRYVDGLGDTELKLPRTVPGNEH +FT VYYVYVVRHPRRDDIIERLKAYDIHLNISYPWPVHTMTGFAHLGYATGAFPVTEKLAGE +FT IFSLPMYPALSADLQDKVIHAVREVVSTL" +FT CDS 59344..59961 +FT /transl_table=11 +FT /gene="dvaD" +FT /product="putative 3,5 epimerase" +FT /function="probably involved in the biosynthesis of +FT dehydrovancosamine" +FT /db_xref="GOA:Q939X4" +FT /db_xref="HSSP:1EP0" +FT /db_xref="InterPro:IPR000888" +FT /db_xref="UniProtKB/TrEMBL:Q939X4" +FT /protein_id="CAC48377.1" +FT /translation="MQARKLAVDGAIEFTPRVFPDDRGLFVSPFQEEAFAEARGGPLFR +FT VAQTNHSMSKRGVVRGIHYTMTPPGTAKYVYCARGKALDIVVDIRVGSPTFGRWDAVLL +FT DQRDHRAMYFPVGVGHAFVALEDDTAMWYLLSTAYVARNELALSVLDPALGLPIDADVD +FT PILSERDQVAVTLAEAGRQGLLPDYATCLELDRQLSEVSLSA" +FT CDS 60179..61297 +FT /transl_table=11 +FT /gene="dpgA" +FT /product="dihydroxyphenylacetic acid synthase" +FT /function="involved in the biosynthesis of +FT 3,5-dihydroxyphenylglycine" +FT /db_xref="GOA:Q939X3" +FT /db_xref="InterPro:IPR001099" +FT /db_xref="InterPro:IPR011141" +FT /db_xref="InterPro:IPR012328" +FT /db_xref="UniProtKB/TrEMBL:Q939X3" +FT /citation=[4] +FT /protein_id="CAC48378.1" +FT /translation="MGVDVSMTTSIEPAEDLSVLSGLTEITRFAGVGTAVSASSYSQSE +FT VLDILDVEDPKIRSVFLNSAIDRRFLTLPPESPGGGRVSEPQGDLLDKHKELAVDMGCR +FT ALEACLKSAGATLSDLRHLCCVTSTGFLTPGLSALIIRELGIDPHCSRSDIVGMGCNAG +FT LNALNVVAGWSAAHPGELGVVLCSEACSAAYALDGTMRTAVVNSLFGDGSAALAVISGD +FT GRVPGPRVLKFASYIITDALDAMRYDWDRDQDRFSFFLDPQIPYVVGAHAEIVADRLLS +FT GTGLRRSDIGHWLVHSGGKKVIDSVVVNLGLSRHDVRHTTGVLRDYGNLSSGSFLFSYE +FT RLAEEGVTRPGDYGVLMTMGPGSTIEMALIQW" +FT CDS 61294..61947 +FT /transl_table=11 +FT /gene="dpgB" +FT /product="putative enoyl-CoA hydratase" +FT /function="involved in the biosynthesis of +FT 3,5-dihydroxyphenylglycine" +FT /db_xref="GOA:Q939X2" +FT /db_xref="InterPro:IPR001753" +FT /db_xref="UniProtKB/TrEMBL:Q939X2" +FT /citation=[4] +FT /protein_id="CAC48379.1" +FT /translation="MNGELVLRLDGTRPLSAASVEELDALCDRVEDHREPGPVTVHVTG +FT VPAAGWTAEVTVGLVSKWERVVRRFERLGRLTIAVAAGDCAGTALDVLLAADVRIAAPG +FT TRLLLARAGGAPWPGMTVHRLTRQAGAAGIRRAVLLGAPIEAGRALALNLVDEVSEDPA +FT AALAELAGTAGAVDGKELAIRRQLVFEAGSTAFEDALGAHLAAADRALRRETAS" +FT CDS 61944..63248 +FT /transl_table=11 +FT /gene="dpgC" +FT /product="hydroxyacyl-dehydrogenase" +FT /function="involved in the biosynthesis of +FT 3,5-dihydroxyphenylglycine" +FT /db_xref="GOA:Q939X1" +FT /db_xref="HSSP:1MJ3" +FT /db_xref="InterPro:IPR001753" +FT /db_xref="UniProtKB/TrEMBL:Q939X1" +FT /protein_id="CAC48380.1" +FT /translation="MTAAPPTSPPGPRLDRPALAEAAGRVDDLLAELPPPSARTPGQRE +FT AASSALDGIRAMRADYVGAHAEAIYDELTDGRSRSLRIDELVRAAARAFPGLVPTDEQM +FT AAERARPQAEKDGREIDQGIFLRGILRAERAGPHLLDAMLQPTPRALKLLPGFTESGVV +FT QMEAVRLERRDGVAYLTLCRDDCLNAEDAQQVDDMETAVDLALLDPAVRVGLLRGGEMS +FT HPRYRGRRVFCAGINLKKLSSGGIPLVDFLLRRELGYIHKIVRGVVTEGSWHSRLTDKP +FT WIAAVDSFAIGGGAQLLLVFDHVLAASDAYFSLPAAKEGIIPGASNFRLSRFAGPRVAR +FT QVILGGRRIRADEPDARLLVDEVVPPAELDAAIDAALARLDGEAVLANRRMLNLAEEPP +FT DEFRRYMAEFALQQALRIYGEDVIGKVGRFAAGSS" +FT CDS 63245..64054 +FT /transl_table=11 +FT /gene="dpgD" +FT /product="putative enoyl-CoA-isomerase" +FT /function="involved in the biosynthesis of +FT 3,5-dihydroxyphenylglycine" +FT /db_xref="GOA:Q939X0" +FT /db_xref="HSSP:1MJ3" +FT /db_xref="InterPro:IPR001753" +FT /db_xref="UniProtKB/TrEMBL:Q939X0" +FT /protein_id="CAC48381.1" +FT /translation="MSGDRVRYEKKDHVAYVTLDRPGVLNAMDRRTHEELAGIWDDAEA +FT DDEVRVVVLTGAGNRAFSVGQDLKERARLNEAGARATTFGSRGQPGHPRLTDRFTLSKP +FT VVARVHGYALGGGFELVLACDIVIASDDSVFALPEVRLGLIPGAGGVFRLPRQLPQKVA +FT MGYLLTGRRMDAATALRYGLVNEVVPPEELDRCVAEWTDSLVRAAPLSVRAIKEAALRS +FT LDLPLEEAFTASYTWEERRRRSEDAIEGPRAFAAKRDPVWTGEYRPG" +FT CDS 64162..65259 +FT /transl_table=11 +FT /gene="ald" +FT /product="putative alodlase" +FT /db_xref="GOA:Q799B0" +FT /db_xref="HSSP:1KFL" +FT /db_xref="InterPro:IPR006218" +FT /db_xref="InterPro:IPR006219" +FT /db_xref="UniProtKB/TrEMBL:Q799B0" +FT /protein_id="CAG25757.1" +FT /translation="MAAMTHTVATTDLDNQRIERIVPLVTPALLHHELPLSATAAETVR +FT KGRESVVRVLDGTDDRLLVITGPCSIHDPAAALDYAGHLAAIAGEVAGDLLVVMRVYFE +FT KPRTIGGWKGLINDPHLDGTGDVNHGLRTARHLLLELAERGLPAACEWLDTTIPAYFAD +FT TVSWGAIGARTVESQNHRMLASGLSMPVGFKNRRDGDITVAIDAIRAAAVRHVVPGVDP +FT GGLPAILHTAGNPDCHVVLRGGDGAPNHDSASVHKTLTALEAAGLPGRVVIDASHDNSG +FT KDHHRQPLVAAEIAGQVENGRNGIVGVMLESNLRAGRQDLQPGRPPAYGQSITDACIDV +FT PTTRTVLHGLAAAAAARRKLGKQAS" +FT CDS 65378..>66669 +FT /transl_table=11 +FT /gene="pks" +FT /product="putative type I polyketide synthase" +FT /db_xref="GOA:Q799A9" +FT /db_xref="HSSP:1BA3" +FT /db_xref="InterPro:IPR000873" +FT /db_xref="InterPro:IPR001969" +FT /db_xref="UniProtKB/TrEMBL:Q799A9" +FT /protein_id="CAG25758.1" +FT /translation="MEEIRTEFIRPLLTSLSAHAADRPAYSDDRRTLTYGGLAHAAAEL +FT AAGLGVARGDRVLVHVGSRVEFAVALLAVLRAAAVGVPVSVRSTDAELAHLAADSGATL +FT LVTEARHAAAAERLRRDRPGLRVLFVDDPPPARVGEPRDDLGLDEPAWLLYTSGTTGRP +FT KGVLLSQRAMLWSTAAYYVPMLGLDAEDTVLWPLPTHHAYALSLAFVTTIALGAHTRLA +FT DGCTPDLLARYPGSVLAGVPALYLRLRQESGGPLAAPRLCLSGGAPCTPATRAAVRDLF +FT GLPVADGYGSTETGGKVAAELPGEAGLVPVPGLEIRIDAGEVLVRGPGLMLGYHGRTES +FT PLRDGWYRTGDAGRFEGGRLVLEGRVDDVIVCGGQNVHPAEIEAVLEESPSVRDVLVLG +FT RPDDVLGEVPVAFVVAGPGGFDAEELRGRCL" +SQ Sequence 66669 BP; 9264 A; 22986 C; 24686 G; 9733 T; 0 other; + ctgcagcagc agcgtcgccg accccaccac ccggctgatg tgaccggagg cgatgatgac 60 + gacacggcgt ttctcggcga tcgggagcag cgtttcgatg gcctcttcca caagcaggga 120 + caggtcgacc ggttcccggg tgaacgcgcg ctgatcggcg cggctgagca ccagcagcgc 180 + ctcggtgagg tcgatcgccc gggcgttcac cgcgtggagg cggtcgaaga ccagcagcgg 240 + gtctttcgcc ggatcgttgc gggccacttc gagaagcgcc tgcgtgatcg ccagcggggt 300 + gcgcagctcg tgcgaggcgt tcgcggcgaa cctccgctgc gcggcgacgt gggcttcgag 360 + ccgggcgagc atggcgtcga aggcatcggc gagttcgcgg aactcgtctt cggtgccttc 420 + cagccggatc cggtgggaga gcgacccgtt cgcggccatg cgcgcggcgt cggtgatccg 480 + ggtcagcggg gcgagcatgc ggccggcgag gatccagccg cccagcaggc cgaacagcag 540 + caggaagaac agcactgagc ccgcggccgg caggaagttg cgcaccagga cggaccgctc 600 + cagcaccccg ccggggggcg gtttggccag cacgtcgggg acatcacgca gcaggaacag 660 + ccacacggaa gcgagcagca gtacaccggc gagcacgagg aagcaggcgt agctgagggt 720 + gagtttgagg cggacgctca tcccggccgc tctgtccacc gcctgctccc tagcgcccgg 780 + cgccgggcgc cgcgccgatg cggtacccga cgccggccac cgtggtgatg atccagggct 840 + cgccgagccg cttgcgcaga gccgaaaccg tgatgcgcac agcgttggtg aacgggtcgg 900 + cgttcttgtc ccaggcccgt tccagcagct cttcggcgct gacgacaccg ccgtcggcgg 960 + aaacgaggac ttcgagcacg gcgaactgct tcctggtcag cgcgatgtag cggtcgtcgc 1020 + ggtagacctc gcggcggaac gggttcagcc gcaggccggc gatttccagt accggcggcc 1080 + ggttgtgggc acgccggcga tcgagcgccc gcagcctgag cacgagctcc cgcagttcga 1140 + acggcttcgt gaggtagtcg tcggcaccga gctcgaaccc ggtgatcttg tcgtcgagcc 1200 + ggtcggcggc ggtgagcatg aggatcggca ggccgctgcc ggaagcgacg atccgtttgg 1260 + cgatctcgtc cccgctgggt ccgggaatat cccggtcgag gacggcgatg tcgtaggtgt 1320 + tgaggctcag cagttcgaga gcggtgtcgc cgttgcccgc ggtgtccgcc gcgatcgctt 1380 + ccaaacgcag gccgtcgcgg atggcttcgg ccagataggg ctcgtcctcg acgatcagca 1440 + cgcgcatgcc ccgatggtac gagaggccac tacatatcgt cggcatatgg aaaatcgcat 1500 + acgtgccggc aacacatcgc cgacttgaat ggacacatga cctaccgcga gtcggcccgg 1560 + acgacgaccc gccggattcc cggcgccgtc gtgccggtgg cccgccggat tcgcggggtc 1620 + cttctcgccg gcctgcgcgc cgtcggcacg aggattgccc ggtcgcccgg tcgcccggtc 1680 + cgcccccagg accgtgccgg cctcggcaag acccacggtg ccgtccccgc cggggtgacg 1740 + gtcttcgacg acgacgtccc ggctgtgact cgcctcgacc cggcgcttct gagtgcactg 1800 + cgccgggccg cgaccgcggc cgccgacggc ggggtcgaac tgtgcgtgaa cagcggctgg 1860 + cggtctccgg aataccagag ccggcttctt cgcgaggcgg tggcgaaata cgggtcggcg 1920 + gcggcggccg cccggtgggt ggccaccccg gagacgtcga tccacgtggc ggggaaggcg 1980 + gtcgacatcg ggccacccgc gtccgcgtcg tggttgtccg agcacggcgc cgattacggg 2040 + ttgtgccgcg tctaccgcaa cgaaccctgg cacttcgaat tgcgtcccga agcgatcgag 2100 + cacggctgcc cgcccctgta tgccgacccg agtcacgacc cgcggctgcg ccggtgacca 2160 + gggtcgcccg gcgtgctccg cagacggccc ggaaatttgc ccctaccatc catggatatg 2220 + gcggtcaagg ggcagtcggg tgagcccggc tcgcggacgg aatgcgagcg gtggacacta 2280 + taatcatttc tcgcggacaa tcaaggctcg caatcggccc aatgtgatcc actgaggagt 2340 + ccagccggtg acattccggc tcaatgtgtc cggcgcggcg catcatacta atggttgacg 2400 + atcgcgtcaa tacaacatta gtgaaagagt cgctcctcga tcccggccga tggcctgcgc 2460 + cttggacatc acgattgttc gtcaattgcc gacaacgatc ggtcatgcca tgctgacgtc 2520 + gtgtgcactc actggggatc aagggagggc gcggcggtga ccggggacca cgaggcaatc 2580 + ggggttccat cgggtcgaac cgctgagccg cgcccggcat gtactgggaa ttcgttgtcg 2640 + aacatgccct gtggcctcgc gttacccgac cgccggcctt ggtcgggctg acggtcgtcg 2700 + gtctcggccg ggtacaagcc accgaccggt agttccgcgg cgatcatcgc ccggggggat 2760 + tgcaagctgc atcgagttcc ggccgatggc cggaacgcgg gctgctgcct gccgtccggg 2820 + gaatcgcgga tcgttcgcgc acaggcctgt ccgcggcgtt catggggatc catcagcgtc 2880 + gaggacatcc gccgcggccg aggccggccc ggacaccggt cgtgtgtcca accaccgagg 2940 + ctgagcagtt ggacggtgcg gttctctgcc gtgccgcaaa gttgacgaag tagatccaaa 3000 + tgggggctag gtggatccga cgagagttga catattcgct ctccctgccg tcgaaatcga 3060 + gctgtcccgg ctgtcttccg cgagctcacc gcgaacatcg ggtgaggatc cggagcacgt 3120 + cgagacgctg ttgtcggcag agggagaact tccgcccatt ctcgttcacc gtccgacgat 3180 + gcaggtgctc gacggcctgc accggttgaa ggtggcgcga gtccggggtg acacgaaaat 3240 + cctggccaga ctggtcgacg ccaccgaatc ggatgcgttc gtcctggcgg tcgaggcgaa 3300 + catccggcac ggtctgccgc tgtccctcgc cgatcgcaag cgtgcggccg tccagatcat 3360 + cgggacgcat ccgcagtggt ccgatcggcg ggtggcctcg gcgaccggga tctccgcggg 3420 + cacggtggcc gacctgcgca ggcgcgcggg agaggacggg accgaggccc ggatcgggcg 3480 + ggacgggcgt gtccgcccgt ccgacggttc ggagcggaga agactcgccg ccgagctcat 3540 + ccgcagcgat ccgggtctgt ctctgcggca ggtcgccaag caggtcggca tctccccgga 3600 + gacggtgcgt gacgtgcggg gccggctgga gcgcggggag agcccgactc cggacgggac 3660 + aaggagattg ccggccaagc cgcacccgct gcggttgtcg gagcccgact tcggccgtgc 3720 + cgtggaccag gatcggctcg cgctgctgga aaggctcaag agcgacccgg cactgcggct 3780 + gaacgaggtc ggccggatcc tgctgcgcat gctcaccatg cactccatgg acgggcagga 3840 + gtgggaacgg atcctgcagg gtgttccacc acacctgcac ggcgtgatcg ccgggttcgc 3900 + ccgggaccac gcccgggtct gggcggagtt cgccgaccac ctggagagcc gggcgaccga 3960 + gctggccgcg ggatgatcgc gtgaccacgg cggagcccag tggcgtcgcc ggttccggtg 4020 + cgggccgccg caccggaacc ggcgtcgccg tccgcccaag tcccgcggct ggacttcgtc 4080 + cggtgagccg ttttaagaag gaacggtgac catcgagaaa gcgcttgtcg tcggtaccgg 4140 + gctgatcggc acctcggtgg cgctggccct ccgggagaag ggcgtcgcgg tcttcctctc 4200 + cgacgtcgac accgaggccg cccggctggc gcaggtactc ggcgccgggc gggagtgggc 4260 + gggagaaggt gtggatctgg cggtgatcgc cgtgccgccg cacctggtgg gggaccggct 4320 + ggccgacctg cagaagcaag gtgcggcccg ggtgtacacc gacgtggcca gtgtgaaggc 4380 + cgatccgatc gccgacgcgg agcggctcgg gtgtgacctg gcctcctatg tgccgggcca 4440 + cccgcttgcc ggccgggaac gctcgggccc ggccgccgcc cgcgccgagc tgttctcggg 4500 + ccggccgtgg gcactgtgcc ccggccccga gacggacgcg gaagccctgc gacgggtgcg 4560 + ggagctggtg tccctgtgcg gggcgacggc cgtcgtcgtg ggtgcgggcg agcacgactc 4620 + ggccgtggcg ctggtgtcgc acgccccgca cgtggtggcg tcggcggtgg cggccagcct 4680 + ggcgagcggc gacgacgtcg cgctgggcct ggcggggcag ggactccgtg atgtgacgcg 4740 + catcgcagcc ggggatccct tgctgtggcg gaggattctc tccgggaaca cccggccggt 4800 + ggccggggtg ctcgaacgga tcgcggccga cctcgccgcg gcggcctcgg cgttgcggtc 4860 + cggcgacctg gacgaggtga cggatctgct gcggcgcggc gtggacggtc acggccggat 4920 + ccctggtcag cgcggcggat cccttcccgg ccgcaacccg gcgggttccc cggggcgtta 4980 + ggccggccgc aaaaacgatt gccgaaggtg gccggaacgt ccgtcgtgat tgtacggtta 5040 + tccgtgcggc gcggcacggg ggcgtcggca aaaaaatgcg tccaagtgcc gaaagcgctt 5100 + gcttggaccc actcgtggac atcgactcga ttcagcacga ttgagatcgc cgactttggc 5160 + gtgtgagaga ggtgaccgga tggacatggt gttgcgtttc gagggggtgg acaagagccc 5220 + tgacgacccc gacccctggg tgaccaaggt ccgcaagggg acgctgcgcc gcgtgctcgc 5280 + ctacttccgc ccgcacgtcg ggaaggtggc gctcttctgt ctcgtcgccg tgctggagtc 5340 + gctcatcgtc gtggcaactc cgttgctgtt gaaggaactc atcgacaacg gcatcgtcaa 5400 + gaacgatctc ggggtcgtga tcctgatggc cggcctcacc gcggtgctcg ccgtgctggg 5460 + cgccgggctg acgatggtgt ccggctacat ctccgggcgg atcggggagg ggatcaccta 5520 + cgatctccgg gtccaggcgc tcggccacgt ccggcggctg ccgatcgcgt tcttcacccg 5580 + tacccagacg ggggtgctgg tcggcaggct gcacacggaa ctgatcatgg cgcagcagca 5640 + tttcaccggc ttgctcatgg cggccaccag cgtggtcatg gtcgtggtgg tgctggccga 5700 + gctgatctac ctttcgtgga tcgtcgccat cgtctcgctg gtgctgattc cgatattcct 5760 + cgtgccctgg attcgcgtgg ggcgggcgat ccagcggcgc agtatccggc tcatggacgc 5820 + gaataccggc ctcggcgggc ttctccagga gcggttcaac gtccaggggg ccatgctctc 5880 + caagctcttc ggccgtcccg ccgaggaaat ggccgagtac gaggagcgtg ccggggagat 5940 + ccgcaagatc ggcgtgagcc tttccgtgtg gggccggatg gccttcgtca tgatggcgct 6000 + gatggcctcg ctcgccacgg ccctcgtcta cgggatcggg ggcgggctcg tgctcgccgg 6060 + tgcgttcgag ctcggcacgc tggtcgccat cgccaccctg ctccagcggc tgttcgggcc 6120 + gatcacccag ctgtccggga tgcaggagct cgcgcagacg gtcgtggtga gcttttcccg 6180 + ggtcttcgag ctgctcgacc tcaagccact gatccaggaa cgccccgacg cgatcgcgct 6240 + gaagaagaag gtggtgccgg acgtcgagtt cgagcacgtg tcgttccgct accccaccgc 6300 + ggacgaggtc tcgctggcgt cgctggagca cctgcgggcc gagcgggagc gcagcgaagt 6360 + gacgccggat gtcctgcgcg acgtgagctt ccacgcgcag gccggaaccc tcaccgcgct 6420 + cgtcggcccg tccggcgcgg ggaagagcac catcacccac ctggtctccc ggctgtacga 6480 + cccgaacggc gggaccgtcc gcctcggcgg ccacgatctg cgcgatctca ccttcgaatc 6540 + gctccgcgaa gcggtcgggg tggtcagcca ggacgcctac ctcttccacg acacgatccg 6600 + ggagaacctc ctctacgccc gcccgaccgc caccgaggac gagctgatgg aggcgtgcaa 6660 + gggggcccag atccgggacc tgatcgactc cctcccgctc gggctggaca ccgtcacggg 6720 + cgatcgcggc taccgcatgt cgggcgggga gaagcaacga ctggccatcg cccggctgct 6780 + gctgaaggag ccgtcgatcg tcgtcctcga cgaagccacc gcccacctgg actccgagtc 6840 + ggaggccgcc gtccagcggg cgctcaagac ggccctgcac ggccggacct cgctggtgat 6900 + cgcccaccgg ttgtccacga tccgcgaggc cgaccagatc ctcgtgatcg acggcggcag 6960 + ggtgcgggag cgcgggacac acgacgagct gctggcccag ggcggcctgt acgcggagct 7020 + ctaccacacg cagttcgcca acccggccgc caacgacccc aagccggaga tcgaggacga 7080 + gctcgacgac atcgagcccg agccggtgat ccaacacatg ggctacggag gatgacgatg 7140 + aattccgcag cgcggaccac gccgacgatg ctggatctgt tcgcttcgca cgtggaccgg 7200 + acacctgacg cggtggccgt ggccggcggt gacggggttc tgacgtaccg gcagctcgac 7260 + gagcgcgcgg gccggttggc ggggcggctg gcgagtcgcg gcattcgccg tggcgaccgc 7320 + gtcgcggtgg tgatggaccg ttcggcggac ctggtggtgg cgctgctcgc cgtgtggaag 7380 + gcgggggcgg cgtacgtgcc ggtggacgcc ggctaccccg cgccgcgagt ggccttcatg 7440 + gtggcggact cggcggccaa gctcgtggtg tgctcggccg cgtcgcgcgg cgccgtaccg 7500 + gccggggtcg agtcgctcga gccggccgcc gccgccgagg agggcgcgtc cgacgcgccg 7560 + gcggccacgg tgcgaccggg ggatccggcg tacgtgatgt acacgtccgg ctcgacgggc 7620 + acaccgaagg gcgtgaccat ttcgcagggc tgcgtcgcgg agctgacgat ggacgccggg 7680 + tgggcgatgg agcccggcga ggcggtgctc atgcattcgc cgcacgcctt cgacgcgtca 7740 + ctgttcgaac tctggatgcc gctggcgtcg ggggtccggg tggtgctcgc cgaaccgggt 7800 + tcggtggacg cccggcggct gcgggaagcg gccgcggccg gggtgacgag ggtgtacctc 7860 + accgcgggga gcctgcgcgc ggtggcggag gaggcgccgg aatcgttcgc ggagttccgt 7920 + gaggtgctga ccggcggtga cgtggtaccc gcgcacgcgg tggagcgggt gcggacggcc 7980 + gcaccccggg cgcggttccg gaacatgtac ggcccgacgg aagcgacgat gtgcgcgacg 8040 + tggcacctgc tgcagccggg tgacgtggtg ggcccggtcg tgccgatcgg ccgtccgctg 8100 + accggccgcc gggtgcaggt gctcgacgcg tcgctgcggc ccgtggggcc gggtgtggtc 8160 + ggcgacctgt acctctccgg ggcgctggcg gagggctact tcaaccgggc ggcgctgacg 8220 + gcggagcggt tcgtggcgga tccgtccgca ccggggcagc ggatgtactg gaccggggac 8280 + ctcgcccagt ggaccgcgga cggtgagctg gtgttcgcgg gccgggccga cgaccaggtg 8340 + aagatccgcg ggttccggat cgagcccggc gagatcgagg ccgcgctgat cgctcagccg 8400 + gacgtgcacg acgccgtcgt ggcggcggtc gacggacggc tgatcgggta tgtggtgacc 8460 + gagggggacg ccgatccccg ggtcatccgc gaacgcctcg gtgcggtgct gccggagcac 8520 + ctggtcccgg ccgccgtgct cgcactggac gcactgccgc tgaccggcaa cggcaaggtg 8580 + gaccggtccg cgctgccggc gcccgagttc gcggcgagtg ccgccgggcg ggcaccgagc 8640 + accgatgcgg aacgtgtcct ctgtggactc ttcgccgagg tgctcggcgt ggcacgagcc 8700 + ggcgtcgacg acggtttctt cgagctgggc ggggattcga tcggcgcgat gcggctggcg 8760 + gcccgggccg ccaaggcggg cctgctggtg acgcccgccc agatcttcga ggagccgacc 8820 + cccgcccggc tggccgccgt ggcgcggccg gtcccggccg gcgggcccgt cgacggcccc 8880 + ctgctcaccc tgaccgcggc cgaggaggcg gagctggcgc tcgccgctcc gggcgccgag 8940 + gagatctggc cgctggcccc gttgcaggag gggctgctct tcgaatcgat cctcgacgac 9000 + cagggctccg acatctacca ggtgcaggtg atcctggagc tgaacgggcc ggtggacgcg 9060 + ccccggctgc gggccgcgtg ggacgcggtc gtccggcggc accccgagct ccggctgagc 9120 + ttccaccgcc tcgcctcggg caagacggtg caggccgtcc acggggacgt caccccgccg 9180 + tggcgggtgg tggacctgac gggtgccggc gacgtcgacg cggccgtcgc ggccctcgtc 9240 + gccgaggaac agcagcagcg gttcgaactc gccacggcgc cgctggtccg gctggtgctg 9300 + gtccggatcg cggcggaccg gtaccgcctg ctgttcgtca tccaccacat cctcgtcgac 9360 + ggctggtcgg tggcggtcat cctcaacgac gtctccgagg cgtacgaagc cggcgagccg 9420 + gtgccggaac agcggggcgg cgccaccttc cgggactacc tggcctggct ggaccggcag 9480 + gacgacgacg cggcccgggc ggcctggcgg gcggagctgg ccggtctcga cgagcccgcg 9540 + ctgatcgcga cttcgggcgt cgagacggag tacgactacc gcgccacgca cctgacgccg 9600 + gccctgcaca ccaggctgct ggggttcgcc cgcgagcacg ggctgacgcc gagcacggtg 9660 + gtgcacgccg cctgggcgat ggtgctggcg cggctcacgc ggcggaccga cgtcgtgttc 9720 + ggcaccatgg tcgcgacccg tcccccggaa ctggcgggga tcgagtcgat gccgggcctg 9780 + ctgatgaccg cggtgccggt ccgggtgccg ctggacggcg ggcaatcggt cctggacatg 9840 + ctcaccgacc tgcacagcag gcagacggcc ctcaaacgac accagtacct ggggctgccg 9900 + gagatccaga aggcggcggg accgggcgcg acgttcgaca cgatgctggt ggtcgagaac 9960 + tacccgcggg agtacgcccg ccggtacacg catctgcgca cgatcgaggg gacccactac 10020 + ccggtgaccc tgggcatcac cccgggcgac cggttcaaga tccagctcgg ctactggccg 10080 + ggccaggtcc cggacaccgt cgccgagtcg ctgctggagt ggttcgtcgg cgccatcggc 10140 + gcgctggtcg ccgatcccgc cggcctggtg gggcggatcg ggatgggcgc ggccgacgtg 10200 + cgccgctggg acccgccgct gcaggcgggg gagccgctgc cggccctggt ggggcggatg 10260 + gcggcgcggc cgccggacaa cgtggcggtc gtggacggcg acggtgcgct gtcctatgcg 10320 + gacttgtggg agcggtcgct gaagttcgcg gccgtcctgc gggcccacgg agtccggtcc 10380 + gaggaccggg tcggcctggt ggtggggcgc tcggcctggt ggacggtcgg catgctgggc 10440 + gtcctgctgg cgggcggcac gttcgtgccg gtggacccgg cctatccggc cgagcgcaag 10500 + gaatggatct tccggagcgc gaacccgatg ctggtggtgt gcgcgggcgc gacacggggg 10560 + gcggtgcccg cggagttcgc ggaccggctg gtggtgatcg acgaggtcga tccggccgcg 10620 + ggctcggcgg gggacctgcc gcgggtggat ccgcgcagtg ccgcgtacgt gatctacacg 10680 + tcggggtcga cgggaacccc gaagggggtc gtcgtcaccc atgccgggct gggaaacctg 10740 + gcgctggcgc acatcgaccg gttcggggtg tccccgtcgt cacgggtgct gcagttcgcg 10800 + gcgctcgggt tcgacaccat cgtctccgag gtgatgatgg cgttgctctc gggagcgacg 10860 + ctggtggtgc cgccggagcg ggacctgccg ccgcgggcgt cgttcaccga cgccctggaa 10920 + cggtgggaca tcacgcacgt gaaggcgccg ccgtcggtgc tgggcacggc cgacgtgttg 10980 + ccgtcgacgg tggagacggt ggtggcggcg ggcgagctct gcccgccggg cctggtggac 11040 + cggctgtccg cggaccggcg gatgatcaac gcctacgggc cgaccgaaac cacgatctgc 11100 + gcgacgatga gcatgccgtt gtcgcccggc cagcacccga tcccgttcgg caagccggtg 11160 + ccgggggtgc gcggatatct gctggactcg ttcctgcgcc cgttgccgcc cggggtcacc 11220 + ggtgagctct acctggccgg gatcggcgtg gcccgcggct acctcggccg ttcggcgctg 11280 + acggccgagc ggttcgtcgc cgatccgttc gtgcccggtg agcggatgta ccggaccggg 11340 + gacctggcgt actggaccga acagggcgag ctggtgtccg ccgggcgcgc cgacgaccag 11400 + gtcaagatcc gcggcttccg tgtcgaaccc cgcgagatcg agttcgcctt gtccggctac 11460 + ccccgggtca cccaggccgc ggtcgccgtc cgcgacgacc gcctggtcgc ctacgtgaca 11520 + ccaggcgaca tcgacacgca ggcggtgcgg gcgcacctcg cgtcccggat gccccagtac 11580 + atggtccccg cggcggtggt ggcgctggac gccctgccgc tgacggcgca cgggaagatc 11640 + gatcggcgcg cactgcccga ccccgacttc accgccggga agcaggccag ggagccggcc 11700 + accgagaccg agcgggtgct gtgcgagttg ttcgccggcg tgctcggcct ggcgcgggtc 11760 + ggggtggacg acagcttctt cgagctcggc ggggactcca tcctctcgat gcagctggcg 11820 + gcgcgggcgc ggcggtcggg gctgacgttc accgcggcgg acgtcttcga cgggaagacg 11880 + cccgagcgga tcgcgcagct ggcggcggag tcgtcggtgc cggagcccgg tcgttccccg 11940 + aaacccgatg gcgtcggtga cgtcgcgtgg acgccggtga tgtggatgct gggagacggc 12000 + gtcgcgggac cggcgttcgc gcagtggatg gtggtcggga cgccttcgga cctgacggag 12060 + aaggcgctgg cggcgggctt tgcggccgtg gtggatacgc acgacatgct gcgggcgcgg 12120 + gtcgtcgccg acgagggcgg ccggcgcctg gtggtgggtg agcgtgggtc ggtggatgtc 12180 + gccggggcgg tcacccgcat ccgcgccgat ggccgctcgc tggacgaagc cgtggcggac 12240 + gcggcgcgcg cggccgtgac ccggttggac ccgtcggcgg gcgtgatggc ccaggcggtg 12300 + tgggtcgacg ccggaccgga ccaggtgggg cggctggtgg tggtggcgca ccacctgtcg 12360 + gtcgatggcg tgtcgtggcg gattctgctg tcggatctgc aggcggcctg cgaagccgcg 12420 + gtcgcggggc gggagccggt gctggagccg gtcggtgcgt cgttcaagcg gtgggcgggc 12480 + ttgctggccg agtgggcggt ttccgcggag cgggccggtg agctggccgc gtggaaggcg 12540 + attctcggac cgggggaccg gccggccggt gcgcaggcca cgagccgggc cgcggaaggt 12600 + gccgtgcgct cgcggtcgtg ggtcgtgccg aaggtggaga cggcggcgtt ggcaggccgg 12660 + gctccggtgg cgttccactg cggggtgaac gaggtcctgc tcgccgggct ggcgggcgcg 12720 + gtcgcgcggt ggcgcggcgg ggacgccgtg ctggtggacg tggaaagcca tggccgccac 12780 + ccggtggacg ggacggacct gtcccggacg gtgggctggt tcaccagcgc acatccggta 12840 + cggctggacg tggccggcac cgatctggcg gacgtgctcg ccggcggtcc ggcggccggg 12900 + cgtttgctga aggccgtcaa ggagcagtca cgggccgtgc ccggcgacgg gctcggatac 12960 + ggcttgctgc ggtacctcaa cggcacgacg gggccggtgc tggcggacct gccgtcgccg 13020 + cagatcgggt tcaactacat gggccggttc gccgccggcg agaagagcgg ggtgcgggcg 13080 + tggcagccgg tcggtgacat cggcagttcg ctggaacccg gtatgggcct gccgcacgcg 13140 + ctcgaggtca acgcgatcgt ccaggacctg ccggacggtc ccgagctgac gctcatgctg 13200 + gaatggcagg acggcctgct cggcgaggac gagatcgacc ggctgggccg ggcctggctg 13260 + gacatgctgt ccggggtggc ccgccaggcg gctgatcccg ccgcgggcgg gcacaccgcg 13320 + tccgacttcg acctcgtcac cctggaccag gcggagatcg aggccctcga ggccgaattc 13380 + gcggccgccg gcggactggc cgaggtgctg ccgctgtcgc cgctgcagca cgggctggcc 13440 + ttccacgccg gttacgccgg cgacggcgtc gacgtctaca ccgcgcaggc ggtgctggag 13500 + ctggccggcc cgctggacgt gccgctgctg cggaagtcgg tgcgcgcgct gctggacagg 13560 + cacgcgaatc tgcgtgccgg cttccggcac ggcgccgacg ggaccgccta ccaggtggtc 13620 + cccggcgccg tggcggtgcc ggtgaccctg gtggacgtga cggaatcggc ggatccggcg 13680 + gccgaggcgg cggcggtggc cgcggccgaa cgggcgcggc cgttcgagct ggcccggccc 13740 + ccgctgctgc gggtcatggt ggtggtgctc ggcccggacc ggcaccggct ggtgctgacc 13800 + aaccaccaca tcctgctcga cggctggtcg acgccgctgc tgctggacga actgctcacg 13860 + ctttaccgca acggagccgc tccggccgcg ctggcgccgg tcaccccgta tcgggactac 13920 + ctggcctggg tgcgcgaaac cgaccgggag gcggctaccg aagcctggcg cgacgccctg 13980 + gccggcttgc ccgagccgac cctggtggcg gcggaccggc cggtcccggt cgaggtgccc 14040 + gagcagatct ggaccaccct ggacgagacg ttcgcccagg cgctgggggc gcgggcacgc 14100 + gagtgcggtg tcacggtcag caccgtgctg caggcggtgt ggggcatggt gctggcggcg 14160 + ctcaccggac gcgacgacgt ggtgttcggg tcggtggtgt ccgggcgccc ggccgagctg 14220 + ccggggatcg agaccatggt cgggttgttc atcaacaccg tcccggtccg ggtccggatg 14280 + cggccgcagg acaccttcgc cgaactggtg cggggactgc agaacgagca ggtggcgctg 14340 + ctggcccacc accacgtggg tctcaccgac atccagcagg ccgcggggct ggggcggctg 14400 + ttcgacacca tcatcgtcta cgagaactac ccgagaccgg ccgagatcgg cgacgaatcc 14460 + gccgatgccg atcgggtccg ggtgcaggga ctgaccgccg ccgatgccac ccactacccg 14520 + ctggcgctgg cggtcgtgcc gggcaccgac ctgcggctgc ggctggagca ccagcccgcg 14580 + ctgttcaccg ccgagcaggc cggcgccgtg ctcgagcggt tcacgctggt gctcgaagcc 14640 + gtcgtcgccg atccgcggct gccgctcgcg gtggtgccga tcctgtccga tgccgaacgg 14700 + cgacagctgc aggcgggcaa cgacaccgcg ctgccggtgc cggaccggac gttgccggag 14760 + ctgttcgccg cgcaggccgc cgccaccccg gaggcgaccg cggtggtctt cgaggaccgg 14820 + tcgctgacct acgccgagct cgacgcgcgc gccaaccagc tggcgcgctg gctcatcgac 14880 + cagggtgccg ggccggaagg cctggtcgcg gtgctgctgc cccggtcgct ggaactggtc 14940 + gtcgcgttgc tggcggtcac caagaccggc ggcgcgtggc tgccgatcga tccgggctat 15000 + ccggccgacc gcatcgcctt catgctcgac gacgccggac cggcgctggt gatcaccacc 15060 + gcggtgctgt cggcatcgcc gatcggtgac gtgctggccg cccgctcgag gacggtggtg 15120 + ctcgacgagc ccgcggccgc gggccagctg gcggggcggg accgcgcgcc ggtcaccgac 15180 + accgaccgcg ctcgagcgct ggatccgcgc cacccggcgt acctcatcta cacctcgggc 15240 + tccaccggtc gccccaaggc cgtggtcgtc acccatcgga acctgacgaa ctacctgctc 15300 + cactgtggac ggatgtaccc gggtctgcgg gggcggtcgg tgctgcattc gtcgatcgcc 15360 + ttcgatctga cggtcaccgc gacgttcacc ccgctcatcg tggggggaga gattcacgtc 15420 + ggtgccctgg aagacctgat cggggtggtg gaggccgcac cgtcgatctt cctcaaggcc 15480 + acgccgagcc atctgctgac cttggacacc gcttcccggg gcagtgccgg ttcgggtgac 15540 + ctcctgctcg gcggcgaaca attgccggcc gacacggtcg tccaatggcg ccggaagtat 15600 + ccgaacatcg tggtggtcaa tgaatacggg ccgaccgagg cgaccgtcgg gtgcgtcgaa 15660 + taccggctcg aaccggggca ggaatgcccg ccgggcggtg tggtgccgat cggcaccccg 15720 + ctggcgaaca tgcgggcgtt cgtgctggat tcgtggctgc ggctggtgcc gccgggtgcg 15780 + gtgggcgagt tgtacgtggc cggtgcgggc ctggcgcggg gatacctggg ccgggcaggg 15840 + ctgacggcga cgcggttcgt ggccgatccg ttcggctccg gcgagcggat gtaccggacc 15900 + ggggacctgg tgcagtggaa cccggacgga cagctggtgt tcgccggccg ggtcgacgac 15960 + caggtgaagg tgcggggctt ccggatcgag cccggtgaga tcgaggccgc cctggtggcg 16020 + caggagtcag tgggccaggc ggtggtggtg gcccgtgaca gcgagatcgg cacccggctg 16080 + atcgggtacg tgaccgccgc gggggagtcc ggtgtggacg aagccgcggt gcgcgaggga 16140 + gtggcggccc ggttgccgca gtacatggtg ccggcggcgc tggtggtact cggcgcgctg 16200 + ccgttgacgg cgaacggaaa ggtggaccgg gcggcgctgc cggatcccga cttcggcgcc 16260 + cgtgccgggg gccgggagcc ggtcacggag gccgagcggc tgctgtgtgc gctcttcgcc 16320 + gaggtgctcg gcctggagcg cgccggtgcg gacgacagtt tcttcgagct gggtggggat 16380 + tccatccttt cgatgcggct ggcggcccgg gcccaccgcg agggaatgtc cttcggtgcg 16440 + cgcgaggtgt tcgagcagcg cacgcccgcg gggatcgcgg cgatcgtgga acgggttgcg 16500 + ggcgatcgtc ctgtcgcggc ggtacacgcc gtgtccgatg tcgcccttct cgacctggac 16560 + caaggcgagc tcgacgaatt caaggctgag ttcgacgacg attcccagcc ctttgctgat 16620 + ccagggagat attgatgagc cagtcgcgga tcgaggaaat ctggccgctg tcgccactgc 16680 + aggccggttt gctcttccac gcggtttacg acggcgaagg gcccgacgtc tacatcggtc 16740 + actggattct cgacctggcc ggaccggtgg acgcggccgg gctgcgtgcg gcgtgggaga 16800 + cgctgctggc ccggcacgcc ccgctccggg cgtgtttccg gcagcgcaag tcgggcgaga 16860 + cggtgcagat catcgccagg caggtggaac tgccgtggcg ggaggtcgac ctttcccacc 16920 + tcgacgaccc cgaggaggcc gttcgcgagc tggccgagca ggaccggacg acgaggttcg 16980 + acctcgcgca ggcgccgttg ctgcggctga ccctgatccg gctcggcgcc gacgcgcacc 17040 + gcctggtggt gacctgccac cacacgatca tggacggctg gtcgctgccc atcgtgatcg 17100 + acgagctgtc ggtgctgtac ccggcgggcg gtgacgcgtc ggcgctgccg gacgtgccgt 17160 + cctaccggga atacctcgcg tggctgagcc ggcaggacaa ggaacgcgcg ctgtcggcgt 17220 + ggaccgcgga gctcagcggc gccgaggaac cgacgctggt ggtgcccgcc gatccggggc 17280 + gggcacccgc cgagccggag agcgtcgagg cccacctgcc ggagcacctc acgcgctcgc 17340 + tggccgagct ggcccgtcgc cacgggttga cgttgaacac cgtggtgcag ggcgcctggg 17400 + cgctggtgct ggcgcagctg gccggccggc cggacgtggt gttcggggcg gcggtgtcgg 17460 + cgcgcccgcc ggacctgccc ggtgtggagg ggatggtggg gctgttcctc aacaccgttc 17520 + ccgtgcgcgt gcggttgcgc ggctcgacgc cggtcgtcga gctgctggcg gagttgcaga 17580 + aacggcagtc ggcgctcatt cccgaccagt tcgtcgggct ggcggacatc cagcaggcgg 17640 + cgggtcccgc cgcggttttc gacacgctgc tcgtcttcga gaagttccac cacgggcccg 17700 + ccggatcgga ctccgcggga accttccgca ttcacgtgaa ccagggccgg gtggcggccc 17760 + actacccgct gacgctggtc gccgtccccg gcgagtcgat gtacctcaag ctcgactacc 17820 + tgacggagct cttcgaccgg gaaaccgcgt tcgccatcct cgagcggttc accggggtgc 17880 + tgcggcagct gaccggcgcg ggcgagctca cggtggccgg cgtcgaggtg acgaccgcgg 17940 + ccgagcgggc cctggtggcc ggggaatggg gtgcctcgac ctcggcgccg ccgagcctgc 18000 + cggcgctgga tctgttcggg caccaggtgg cgcaccgccg cgacgagccg gcggtcgtcg 18060 + acggcgatcg gacggtgtcg tacggagaac tcgccgagcg cgctgagcgg ctcgccggct 18120 + acctgaacgg ccggggagtc cggcgcggag accgggtggc cgtggtgctc gaccggtcac 18180 + ccgacctgat cgcgacgctg ctcgcggtgt ggaaggcggg cgcggcgtac gttccggtgg 18240 + accccgccta cccggtggaa cgcaggaagt tcatgctggc ggactccggg cccgcggcgg 18300 + tggtgtgcgc ggaagcgtac cgggccgccg tgccggacac ctgccccgag ccgatcgtcc 18360 + tggacgatcc ccggacgcgg caggcggtgg cggagagccc tcgcctgtcg gcaggcacga 18420 + gcgccgacga cctcgcctac gtgatgtaca cgtccggatc gaccgggacg ccgaagggcg 18480 + tcgcggtgtc gcacggcaac gtcgcggcgc tggccgggga gccgggctgg cgggtgggcc 18540 + ccggtgacgc cgtgctgctg cacgcctcgc acgccttcga catctcgttg ttcgaaatgt 18600 + gggtgccgct gctgtcgggt gcccgggtgg tgctggccgg accgggcgcg gtggacggcg 18660 + cggcgctggc ggcctacgtg gccggtggcg tcacggccgc ccacctgacc gcgggggcct 18720 + tccgggtgct ggccgacgag tcgccggagg cggtcgccgg gctgcgcgag gtgctgaccg 18780 + gtggggacgc ggtgccgctg gcggcggtcg agcgggtgcg cggacgtgtc cggaacgtgc 18840 + gggtgcggca cctctacggc ccgaccgagg ccacgctgtg cgcgacgtgg tggctgctcg 18900 + aacccggcga cgagacggga tcggtgctgc cgatcggacg tccgctcgcc gggcggcgcg 18960 + tccacgtcct cgacgcgttc ctgcggcccg tgccgccggg cgtggcgggc gagctgtatg 19020 + tcgccggagc cggtgtggcg cagggctatt cgagccgccc ggcgctgacg gccgagcggt 19080 + tcgtcgccga tccctccggt tccggtgcgc ggatgtaccg caccggggac ctggcgtact 19140 + ggacggagca gggtgcgctg gcgttcgccg ggcgggccga cgaccaggtg aagatccgcg 19200 + ggtaccgcgt ggagcccggc gagatcgagg tggtcctcgc cggcctgccc ggcgtcggcc 19260 + aggccgtggt gaccccgcgg ggtgagcacc tgatcggcta tgtggtcgcc gaagcgggcc 19320 + acgacgccga cccggtgcgg ctgcgcgagc agctcgccgg gacgctgccc gagttcatgg 19380 + tgccggccgc ggtgctggtg ctggacgagt tgccgttgac ggtcaacggg aaggtggacc 19440 + ggcgggcact gcccgaaccg gacttcgcgg cgaagtcggc gggccgggag ccggtcaccg 19500 + aggccgaacg agtcctttgt ggagtgtttg ccgacgtcct cggcctcgac cacgtcggcg 19560 + tcgacgacag cttcttcgag ctgggcggcg actcgatctc gtcgatgcag gtcgccgcgc 19620 + gtgcgcgtcg cgaagggatc tcgctgaccc cgcggctggt gttcgagcac cggacgccgg 19680 + aacgcctcgc ggcactggcg caggaggcag gcgcgacgcc acgcgccgag gtcgtcacgg 19740 + gcgtgggcga gatcccgtgg acgccggtga tgcgtgccct cggggacgac gcgatgcgcc 19800 + ccggcttcgc gcaggtgaga gtcgtcgtca ccccggcggg ggtgaacccg gacgcgctcg 19860 + tgagcgccct gcaggcggtg ctggacgcgc acgacctgct gcgggcccgg gtggagccgg 19920 + acggacggct gatcgtgccc gagcgcggcg cggtggccgc ggccggcctg ctcacgcggg 19980 + tggccgccgg gaccggcggc ctcgacgaga tcgccgagcg cgaggtcagg acggcgacgg 20040 + gcacgctgga cccgtcggcg ggaatcatgg cgcgggtcgt gtggatcgac gccggggacg 20100 + ccgagccggg ccggttggcc ttcgtggcgc accacctctc ggtcgacgcg gtctcctggg 20160 + ggatcctgct gccggatctg cgagcggcct acgacgaggt gatctccggc gggaccccgg 20220 + ccctcgaacc cccggtgacg tcgtatcggc agtgggcgcg ccggctgacc gcgcgggcgc 20280 + tcagcgaaag caccgtggcc gaactcgaaa aatgggctgc cgtcgtggaa ggcgcggaac 20340 + cggcactgcc ccaggacacc gggcagcaca ccgggcagtc gcactcgtgg tccacgagcc 20400 + tgtccggcac cgaggtgcga gacctggtca ctgtcttgcc gggcgcgttc cactgcggga 20460 + tccaggacgt tctgctggcg gggctcgcgg gtgcggtggc gcgtgtgcgc ggttccggcg 20520 + ccgcgctgct ggtcgacgtg gaagggcacg gtcgcgaagc cgccgacggc gaggacctgt 20580 + tgcgcaccgt cggctggttc accagcgttc acccggtccg tctcgaactg tccgatgtgg 20640 + acctcgcggg cgcggcggac ggcgagcggc ctgccgggca gttgctgaag gccgtgaagg 20700 + agcagatccg ggccgtgccc ggcgacggat ccggctacgg gctgctgcgc cacctcaacc 20760 + cgggcaccgg ggcgaggctg gccgagttgc cgtccgcgca gatcggcttc aactacctcg 20820 + gccggactgt cctcgctccc gaggacaccg cgtggcagcc caacggcgga gggccgctcg 20880 + gcggcggtcc ggacatggtc ctcgcgcacg ccgtggaggt cagcgcggaa ctccaggaca 20940 + cgccggccgg cccccggctc gggctggcca tcgacacgcg ggatttcgac ctcgccacgg 21000 + tggagcggct cggcgaggcc tggctggaga tgctgaccgg tctcgcggcg gtggcccgcg 21060 + gatccggcgc gggcgggcac acgcccgccg acttcgctct ggtcgacctg acgcagcggg 21120 + acgtggcgga gctggaggcc gcggcgcccg ggctgacgga catctggccg ttgtcgccgc 21180 + tgcaggaagg catgctcttc gaacgggcct tcgacgagga cggcgtcgac gtctaccaga 21240 + cgcagcggat cctggacctc gacgggccgc tcgacgaacc ccggctgcgc gcggcctgga 21300 + accaggtcct cgcccggcac gcctcgctgc ggaccggctt ccaccagctg gggtccggcg 21360 + ccacggtgca ggtcgtcgtg cgcgaggccg acatcccgtg gcgggtggcg gatctgtcgc 21420 + acctcgatgc ggcggaggcg gccgcggagg tcgagcggct gctcgccgag gaccagggcc 21480 + ggcggttcga cgtgacccgg ccgccgctgc tgcggctgct gctgatccgg ctcggtgcgg 21540 + acgagcaccg actcgtcgtg acctcgcacc acgtactcct cgacggctgg tcgaccccgc 21600 + tcgtcgtggg ggagatgtcg gacggctacg cgggcggccg cagctcctcg aagccgccgt 21660 + cctaccagga ctacctggcg tggctgagcc gtcaggacgc ggaggcgacc cgatcggcgt 21720 + ggcgggccga gctcgccggc gcggacgaac cgaccttggt cgacgccgac gcgggcaaga 21780 + cgctcgtgat gccggacgag cacgccgaat ggctgcccga gccggcgacg cgggcactcg 21840 + ccggcttcgc ccgtggccac gggctgacgg tgagcacgat cgtgctgggc gcgtgggcgc 21900 + tggtgctggc gcggctggcc ggccggaccg acgtggtgtt cggctcggtg gtgtcggggc 21960 + gtccggcgga cgtgccggat gtcgagcgca tggtgggcat gttcatcaac accgtcccgg 22020 + cccgggtgcg gctcgacggc cgccggccgt tgctggagat gctcgaagac ctgcaggcgc 22080 + gccaggcggc gttgaccgag caccagtacc tggggctgcc ggagatccag aaggtggcgg 22140 + ggaccggcgc gatcttcgac acgatcgtga tggtcgagaa ctacccgcac gacgccgccg 22200 + gtctcggcgg cgacggcggg gtggcgatca gctcggtcgt cacccggacc ggcaccagct 22260 + atccgctgac catgaacgtc agcctcgggg accgcctgcg catcaccgtg tcctaccggc 22320 + ccgaccggat cgacgacgcg acggccgccg aggtcgccag gcaggtcgtg cgggtcctgg 22380 + aacgggtggt ggccgagcct tcgctgccgg tgggccgcct cggcgtgacg agcgaaccga 22440 + cgcgcgcggc ggtggtggaa cgctggaact cgacgggcga agcggccgcc gagacgtccg 22500 + tgctggagct gttccggcgg caggcaggtg cctcgccgga cgcggtggcg gtcgtggcgg 22560 + gggaacgcac cctgtcctac gccgacctcg accgcgagtc cgaccggctg gccgggcacc 22620 + tggccgggat cggcgtgggg cgtggtgacc gcgtcggcgt ggtgatgaca cgcggcgcgg 22680 + acctgttcgt cgccctgctc ggggtctgga aggcaggcgc cgcgcaggta ccggtgaacg 22740 + tggactaccc cgcggaacgg atcgagcgga tgctggccga cgtcggcgcg tcggtcgcgg 22800 + tctgcgtgga agcgacccgc aaggcggtgc cggacggggt cgagccggtg gtcgtggacc 22860 + tgccggtgat cggcggagta cggcccgagg cgccaccggt cacggtcgga gcgcacgacg 22920 + tggcctacgt gatgtacacg tccggctcga ccggcgtgcc gaaggccgtc gcggtgccgc 22980 + acgggagcgt ggcggcgctg gcgagcgacc cgggctggtc gcagggcccc ggcgattgcg 23040 + tgctgctgca cgcgtcgcac gcgttcgacg cgtcgctggt cgagatctgg gtgccgctgg 23100 + tcagcggagc ccgcgtgctg gttgcggaac cgggcacggt cgacgcggaa cggctgcgcg 23160 + aagcggtctc ccgcggcgtg accaccgtcc acctgacggc cggtgccttc cgtgcggtgg 23220 + ccgaggaatc gccggactcc ttcatcgggc tgcgcgagat cctgaccggt ggggacgcgg 23280 + tgccgctcgc gtccgtcgtg cggatgcgcc aggcctgccc ggacgtccgg gtccggcagc 23340 + tgtacggccc caccgagatc accctctgcg ccacctggct cgtcctcgag ccgggggccg 23400 + cgacgggcga cgtcctgccg atcggcaggc cgctggccgg ccggcaggcc tacgtgctcg 23460 + acgcgttcct gcagcccgtg gcgccgaacg tgaccggcga gctctacctc gccggcgctg 23520 + gcctggcgca cggttacctg ggcaacaccg cggcgacctc ggagcggttc gtcgccaacc 23580 + cgttctccgg cggcggccgg atgtaccgca ccggcgacct ggctcgctgg accgaccagg 23640 + gcgagctggt gttcgccggc cgcgccgact cccaggtgaa gatccgcggc taccgcgtcg 23700 + agccgggtga ggtcgaggtg gcgctgaccg aggtgcccca cgtcgcgcag gcggtcgtgg 23760 + tggcgcggga aggccagccc ggcgagaagc gcctgatcgc gtacgtgacc gcggaagcgg 23820 + gatcggcact ggaatccgcc gcggtccgcg cgcacctcgc gacgcggctg ccggagttca 23880 + tggtgccgtc ggtggtggtg gtgctggaga gcttcccgtt gacgctcaac gggaagatcg 23940 + accgcgcggc cctgcccgcc cccgagttcg ccgggaaggc ggccgggcgc gaaccgcgca 24000 + cggaggccga gcgggtgctg tgcggcctgt tcgccgagat cctcgggctg gagcgggtcg 24060 + gcgccgacga cggcttcttc gagctgggcg gcgactcgat cctctcgatg cggctggccg 24120 + cccgcgcgcg tcgcgagaac ttcgtcttcg gcgcgaagca ggtcttcgag cagaagacgc 24180 + ccgcggggat cgcggcggtc gccgagcgtg gcgggcagag ccgcccggcc ggcgtcgccg 24240 + acggcgtcgg cgaggttccg tggacgccgg tggtgcgggc actgctcgaa cgcgatcccg 24300 + ccgggctgac ccgcggtgcc atggcgcagt gggtcagcgt ggcagcgccc cgcgaccttt 24360 + cggtgaccgc gctggtcgcc gggctgggcg cggtgatcga cacgcacgac atgctgcgga 24420 + gccggatcgt cgagagcgag ggcgtggaac cccggctggt cgtggccggg cggggcacgg 24480 + tggacgcggc ggcgctggtc gaacgggtcg aggccggcga cggtgatctc gccgagatcg 24540 + cggaccggtg cgcccacgac acggccgcac gcctggatcc cgtggccggc gtgctggtcc 24600 + gggccgtctg ggtggacgcc ggaccgggcc gcgccggacg gctcgtggtg gccgcgcacc 24660 + acctcgtggt cgacgtcgtg tcgtggcgaa ccctcctgcc ggacctgcag gcggcctgtg 24720 + aagccgtggt cgcgggcggg cagccggcgc tcgatccgcc ggacgtctcg ttccggcgct 24780 + ggtcgcggac gctggacggc gaggcggcga tccggaccgg cgaactggcg gtgtggacgg 24840 + agatcctcga cggggcgcag tcccggctgg gcgagctcga tccgcggcgc gacaccgtgt 24900 + ccaccgcggg acgccggtcc tggaccgtgc cccgggaaca cgcgggcgtg ctcgtggaac 24960 + aggtcacctc ggccttccac tgtggtgtcc acgaggtgct gctggccacc ctggcgggcg 25020 + ccgtggcggg ctggcgcggc ggcacggccg tcgtggtgga cgtcgaaggc cacggccgtc 25080 + agcccctcgg ggaactggac ctgtcgcgga cactcggctg gttcaccgac gtccacccgc 25140 + tccggctgga cgtcaccggg gtcgacccgg ccgaggcggt cgccggcggc gacgcggcgg 25200 + gccggttgct gaagcaggtc aaggagaacg tgcgagccgt gcccgacggc gggctcggct 25260 + acgggatgct gcggtacctc aacgccgaga cggggccggt cctcgccgcg ctgccgaagg 25320 + cggagatcgg gttcaactac ctcggccgct tctcggcggg gtccggcggc gaggcacaac 25380 + cctggcagat cacgggaatc gtcggcggtg cggcggagca ggacacgccc ttgcggcacg 25440 + tcgtggagat cgacgccgtc gtggtggacg gcccggacgg acccgaattc accctgaccg 25500 + tgacctgggc cgggcggatg ctcggcgacg ccgaggcgga gtcgctcgcg caggcgtggc 25560 + tggacatgct ggccggcctg gccgcccacg tggccgccgg tggccccggg gggcacacgc 25620 + cgtccgactt cccgctcacc gcgctgacgc agcgggaggt ggcggagttc gaggccgccg 25680 + tgccgggcct gctcgacatc tggccgcttt ccccgctgca ggaaggcctg ttgttccacg 25740 + ccgccgacga ccgcggcccg gacgtctacg cgagcatgcg caccctcgcc atcgacggcc 25800 + cgctggacgt cgcccggttc cgggcgtcct ggacggtcct gctcgaccgg catcccgccc 25860 + tgcgggcgag tttccaccag ctggaatccg gcgaggccgt gcaggtgatc gcccgggacg 25920 + tgccgccgga ctggcgggag accgacctgt ccgggctgcc cgagagcgaa gcgctcgcgg 25980 + agttcgaccg cctcgcggcg cggatgcacg ccgagcggtt cgacctgacc aaggctccgc 26040 + agctgcgcct gcacctggtg cgcctcggtg accgcaggta ccggctgatc ttcacgtcgc 26100 + accacatcgt ggccgacggc tggtccctgc cgctcatcct ggtcgacgtg ctgacggcgt 26160 + acgaggcagg cggtgacggc cggacgctgc cggccgcgac gtcgtaccgt gacttcctcg 26220 + cctgggtcga ccgccaggac aagggggcgg ccgggcaggc gtggcggacc gagctcgcgg 26280 + ggctcgacga ggcgacccac gtcgtgccgc cgggctcgat catcacgccc ctggagcccg 26340 + aacgcgtcgc gttcgaactc gacgacgaga cgagcaagcg gctggtcgag ttcacccggc 26400 + ggcacggcgt cacggcgaac acgctcttcc agggggtctg ggcactgcac ctggcccggc 26460 + tggccgggcg gaacgacgtg gtcttcggtg ccgcggtcgc ggggcgcccg ccggagatcc 26520 + ccggcgtcga gtccgcggtc ggcctgttca tgaacatgct gccggtccgg gcgcgcctca 26580 + ccggtgccga gccggtcgtc gacatgctga aggacctgca ggagcggcag gtcgcgatga 26640 + tggcgcacca gcacatcggg ctgcccgaga tcaagcagct caccgggccg ggggcggcgt 26700 + tcgacacgat cgtggtgttc gagaactacc cgcccgcgcc gccgaggtcc gacgaccccg 26760 + acgcgctcgt catccgcccg gtggggatcc cgaacgacac cgggcactac ccgctgtcca 26820 + tgcgcgcgtc cgtggcggcg ggccccgtcc gcggtgagtt catctaccgg ccggacgtgg 26880 + tcgaccggac cgaggccggg gagatggtcg cggcgatcct ccgcgcgctc gagcaggtgg 26940 + tggccgagcc gtggacgccg gtgggccagg tcggcctgat cggcccggag cagcgccgtc 27000 + tggtcgtgga cgagtggaac cggaccgacg tgccgctggc ggcggagacg ctgccggtgt 27060 + tgttccgcag gcaggcggag cggtcaccgg atgcggtggc cgtcgaggac ggggcgcgga 27120 + gcctgacgtt cggtgggctg ctcggcgagg tggaagcgct ggcccggctg ctcgtggggg 27180 + cgggcgtgcg gcgcgagcac cgggtgggcg tcctggtcga gcgctcggcc gagctggcgg 27240 + tgaccatgat ggccgtgtcg ttcgccggcg gggtgttcgt gccggtcgac cccgactatc 27300 + cccgtgagcg cgtcgagttc atgctggcga actcggcacc cggggtcatg gtgtgcacga 27360 + agacgacccg ggcggccgtg cccgcggagt tcgcgggcac cgtgctggtg ctggacgagc 27420 + tgcccgccgc ggacccggac gtcgagctgc cgccggtggc accggaagac gcggcgtacg 27480 + tgatctacac gtccgggtcg acgggggtgc ccaagggcgt cctggtgacc cactccgggc 27540 + tcgccaatct ggggtacgcg cacatcgagc ggatggcggt gacgtcgtcc tcgcgggtcc 27600 + tgcagttgtc cgcgaccggc ttcgacgcca tcgtgtccga gctctacatg gccttgctgg 27660 + ccggcgcgac cctggtgctg ccggacgcgg cgagcatgcc gccccgggtg acgctgggcg 27720 + aggcgatccg gcgggcgggc atcacgcacc tgaccgtgtc gccgagtgtg ctggcgagcg 27780 + aggacgacct gccggacacg ctgcggaccg tgctgacggg cggcgaggca ctgccgcccg 27840 + cgctggtgga ccgctggtca ccgggccgcc gggtgatcca ggcctacggg ccgaccgaga 27900 + cgaccatctg ctcgacgatg agtgccccgc tgtccccggg gcacgaccag gtcccgctcg 27960 + gcggcccgat ccacaacgtg cggcactacg tgctcgacgc gttcctgcag ccggtgccgc 28020 + ccggcgtggt cggcgagctc tacatcacgg gtgtcgggct ggcgcgcggc tacctcgggc 28080 + gtcccggcct gaccgcggaa cggttcgtgg ccagcccgtt cgcccccggc gagcggatgt 28140 + accgctcggg cgacctgttc cgctggaccc gggaaggcca gctgctcttc gcgggccgtg 28200 + tcgacgcgca ggtcaaggtg cgggggtacc gggtcgagcc cgccgagatc gaggccgtgc 28260 + tcgcggagca cccgtgggtc ggccaggtgg cggtgtccgt ccgccgggac ggcccgggcg 28320 + acaagcagct ggtggcttac gtcgtgccgt cggccgacgc ggccgccgag aacggcacgc 28380 + tggcctcggc actgcgcgag ctggcggccg aacgcctgcc ggagtacatg atgcccgcgg 28440 + cgttcgtgtc gctggagcag atgccgctca ccccgaacgg caagctcgac caccgggcgc 28500 + tgcaagcccc cgacttcgcc gggatgtcct cgaagcgggc cccccgcacg cccatggagg 28560 + cgaggctgtg cgcgctcttc gcggacgtgc tcggccttga ccaggtgggg cccgacgaca 28620 + gcttcttcga actcggcggc gactcgatca cctcgatgca gctgtcggcc cgggctcggc 28680 + cgacggggct ggaactgacc ccgtggcagg tgttcgacga gaagacgccg gaacggctgg 28740 + cggtgatcgt ccaggaactc gcggccgagg gcgggaccac cccggcgccg gagcccggcg 28800 + agggcacgct cgtcgctctc tcacctgacc agatggacct actcgaggcc gggctggccg 28860 + gcgaatgacc gccataagga gcagattgtg accgttgacg acactcgcgc gaagcgccgc 28920 + tccagcgtcg aggacgtctg gcctctttcg ccgctgcagg agggaatgct ctatcacacc 28980 + gccctcgacg acgacgggcc ggacacctac acggtgcaga ccgtctacgg catcgacggc 29040 + ccgctggacc cggggctctt gcgggcgtcg tggcaggcgc tcgtggaccg gcacgccgcg 29100 + ctgcgggcct gtttccggta cgtctccggg gcgcagatgg tgcaggtcat cgcgcgggag 29160 + gccgaggttc cctggcgcga gacggacctt tccgggctgc cggacgacat cgccgagggc 29220 + gaggttgacc ggctggcggc ggacgaggtg gccgagcggc tgcgcatcga ggccgcgccg 29280 + ctgatgaagc tgcacctgat ccggctcggc ccggaccgcc accggctcgt gcacacgctg 29340 + caccacgtgc tggtggacgg ctggtcgatg ccgatcctgc accgggagct cgccgcgatc 29400 + tacgcggcgg gcggggacgc gtccggcctc ccgcccaccg tctcctaccg ggactacctc 29460 + gcctggctgg gccggcagga caaggaggtg gcgcgggcgg cctggcgggc cgagctcgcc 29520 + gggctggaca cgccgaccac ggtcgccgcg cccgatccgg cccgcgtccc ggacatccac 29580 + acggcggtgg tcgagctgcc ggcggagctg acggacggct tggcgcagtt cgcgcgtggc 29640 + cacgacctca cgctgaacac cgtcgtgcag ggcgcgtggg ccgtcgtgct ggcccagctc 29700 + gcgggccgcg acgacgtcgt gttcggcgcg accgcctccg ggcggcccgc ggacctgccc 29760 + ggggtggagg cgatggtcgg ccagctgctc aacaccctgc cggtgcgggt ccggctcgac 29820 + ggcgggcgcc gcgcggccga gctgttcgcc cggctgcagc gcgaccagtc ggcactcatg 29880 + gcccaccagc acctcggcct gcaggacgtg caggccgtcg tcggacccgg agcggtcttc 29940 + gacacgctcg tcatctacga gaacttcccc cgcaagggac tcggccgggc accgggcggt 30000 + ggcctgagcc tggtcccggt gaagcgcggg cggaactcct cgcactaccc gttcacgctg 30060 + atcaccggac ccggcgagcg gatgccgctg atcctcgact acgaccgggg cctgttcgac 30120 + cccgcggccg ccgaatcggt cgtcggcgcg ctggccaggg tgctggagcg gctggtcgcc 30180 + gagcccgacg tcctcgtcgg caggctgacg ctcgcgagcg aggccgaacg cgcgctggtg 30240 + gtggagggct tcaacgccac cgcgggcccg gtgccggggg agtccgtcct cgagctgttc 30300 + gcccggcggg tggccgccgc gccggacgcg gtggcgatca ccggcgccgc cggcgcgaac 30360 + ctgacctacg ccgaggtcga ccaggcgtcg aaccggctgg cgggctacct cgccgtccgg 30420 + ggcgtgggcc gtggcgaccg cgtcggggtg gccatggaac ggtcgccgga tctgctgatc 30480 + gcgttcctgg cgatctggaa ggcgggtgcc gcctacgttc cggtggacgt cgagtacccg 30540 + gccgagcgga tctcgttcgt cttcgacgac tccggcgtct cgaccgtcct gtgcaccctg 30600 + gccaccagcg cggtcgcgcc gggcaacgcg atcgtgctcg acgcgcccga aacacgcgtg 30660 + gccgtgcggg actgcgccgc gccggaaatc cggccgcacg cggacgacct ggcgtacgtc 30720 + atgtacacct ccggctccac cggcctgccg aagggcgtgg ccatcccgca cggggccgtg 30780 + gccggcctgg cgggcgacgc gggctggcag atcggtcccg gcgacggcgt gctgatgcac 30840 + gcgacgcacg tcttcgaccc ttcgctctac gcgatgtggg tgccgctcgt ctcgggcgcc 30900 + cgggtcctgc tcaccgagcc gggggtgctg gacgcggccg gggtacggca ggccgtgcac 30960 + cggggcgcga ccttcgtcca cctcaccgcc ggcaccttcc gcgcgctggc ggagacggca 31020 + ccggagtgct tcgaaggcct ggtcgagatc gggaccggcg gcgacgtggt tccgctgcag 31080 + tcggtggaga acctgcggcg ggcccagccc ggcctgcggg tgcgcaacac ctacgggccg 31140 + accgagacca ccctgtgcgc gacgtggctg ccgatcgagc ccggtgaggt gctcggccgg 31200 + gagctgccga tcggccatcc gatgaccaac cgccggatct acctcctcga cgccttcctg 31260 + cgcccggttc cgccgggcgt ggccggcgag ctgtacatcg cgggcacggg cctggcccac 31320 + gggtacctga agagccccgg cctgacggcc ggccggttcg tggcctgccc gttcgccgcc 31380 + ggtgaacgca tgtaccgcac cggcgaccgg gcgcgctgga cccgcgacgg cgaggtggtg 31440 + ttcctcggcc gcgccgacga ccaggtgaag atccgcggct accgggtcga gctcggcgaa 31500 + gtggaggctg cgctggcggc ccagccgggc gtggtcgagg ccgtcgtcac ggcgcgggag 31560 + gaccagcccg gcgagaagcg cctggtcggc tacttcgtct ccgacggcgg cgacgcgggg 31620 + ccggtggaga tccggcggca gctggccctg gtgctgcccg actacctggt ccccatcgcc 31680 + gtggtcgccc tgcccggcct gcccgtcacc cccaacggca aggtcgatcg ccgggccctg 31740 + cccgccccgg atctcgcggg acactcgccg gagaaggcac ccgagaacga gaccgagaag 31800 + gtgctgtgcg cgctgttcgc cgagatcctc agcatcgacc aggtgggggt cgacgacacc 31860 + ttccacgacc tcggcggcag ttcggcgctg gccatgcggc tcgtcgcgcg gatccgtgag 31920 + gagctcggcg cggacctgcc catccggcag ctgttctcct cgccgacccc cgcgggcctg 31980 + gccagggcgc tggccgcgaa gtcacgcccc gcgctggaag ccgcccagcg gccggaccgg 32040 + gtgcccgtca ccgcccggca gctgcgtgcc tggctgctgg ccgatcccgg cggggagacg 32100 + gccggcctgc acacctccgt cgccctgcgc ctgcacggcc gggtggacgt gcccgcgctg 32160 + gcggcggcgc tcggcgacgt cgcggcccgg cacgagatcc tccgcacgac cttcccgggt 32220 + gacgcgcaga gcgttcacca gcacgtccac gacgccttgg cggtcgagct gactccggtc 32280 + ggagtcaccg aggaagacct cccggggctg ctcgccgagc ggcgtgacct gctcttcgac 32340 + ctcaccaggg acgtgccgtg gcggtgtgac ctcttcgcgc tctcggacaa cgagcacgtg 32400 + ctgcacctgc aggtccaccg gatcctcgcc gacgacgact cgctcgacgt gttcttccgc 32460 + gacctggcgg ccgcctatgg tgcgcgccgc gaaggccggg tcccggagcg cgcgcccctg 32520 + gcgttgcagt tcgccgacta cgcgctctgg gagcagcgcc tgctcacgga cgagaacgag 32580 + ccgggcagcc tgatcaacga gcaggtggcc ttctggcggg acaacctggc cggcctcgac 32640 + ggggagacgg tgctgccgtt cgaccgcccg cgcccggccg tcccgtcgcg gcgcgccgga 32700 + acggtcgcgc tgcggctgga ggccggcccg cacgcccggt tgacggaggc ggcggagccg 32760 + ccgggcgcgg acacgctcga gatggtgcac gccgcgctcg cgatgctgct ggccaagctc 32820 + ggagcgggcc acgacgtggt gatcggcacg gcgctgccgc gggacgagga gctcttcgac 32880 + ctcgagccga tgatcgggcc gttcacccgg gcgctcgccc tgcgcaccga cgtctcgggc 32940 + gatccgacct tcctcgaggt cgtcgccagg gtgcaggagg cgggccaagc cacgggcgag 33000 + cacctggacc tgcccttcga acggatcgtc gagctgctcg atctgccggc ctcgctcgcc 33060 + cgccaccccg tgttccaggt gggacttcag gtggacgagg aggacatcga cggatgggcc 33120 + gcggcggaac tgcccgccct gcgcaccgcc gtcgaacccg gcgggaccgc ggccatggag 33180 + ctggacctcg cggtcaagct caccgagcgc ttcgacgacg acgacaacgc cggcggcctc 33240 + gagggcgcgc tgcactacgc caccgacctg ttcgacgagg ccacggcgga gtcggtggcc 33300 + cggcggctgg tccgcgtcct cgagcaggtg gcggaggatc ccgggcggcg gatcagcgac 33360 + ctggatgtct tcctggacga cttcgaacgc ggccgtccgc ccatcgctcc ggcgcggtgg 33420 + gccggggccg tgcccccggt ggtcgccgaa ctggccgggg acggcccgct cggcgcgctc 33480 + ctgctcgacg agcagctgcg cccggtcgct cccggagccg tcggcgatct gtacgtcacc 33540 + ggcccggccg tggacgcggg aacggccacc ctggcgaccg tgccctgccc gttcggggac 33600 + gaggggcacc ggatgctgca cacgggcctg ctcgcccgca aaacgcccgc caagaccctg 33660 + gtcgtcgtgg gcgagcggag gcggtcgagc gcttcggtga agacgggtga cttcgagatc 33720 + ctgctgccgc tgcgcgccgg cggtgaccgc ccgcccctgt tctgcgtcca cgcgagcggt 33780 + ggcctgagct ggaactacga gccgttgctg cggtacctcc cgccgaacca gccggtctac 33840 + ggcgtgcagg ctcgcggcct ggcccggacc gaaccgctgc cgggcagcgt cgaggagatg 33900 + gcggccgact acctcgagca gatccgtgcc gtgcagccgg ccgggccgta ccacctcctc 33960 + ggctggtccc tcggcggccg gatcgcgcag gcgatggcca ggttgctcga ggcggacggg 34020 + gagcggctcg gcctgctcgc cctgctcgac gcctatcccg tctacatggg acgcaagacg 34080 + accggcgccg cgagcgaaga agcggctctc gaacagcgga accagcagga tctggacctc 34140 + gcggggcaac tggtcaaggg tgtggccgcc cggtcgcgcc tcgaggcggt catgcgcaac 34200 + ctctggaagg tcgggccacg gcacacacgt tcgcccttcg ccggcgacgt cctgcttttc 34260 + gtggccactg tggaccgtcc cgcgcatttg cccgtcccag tggcgaaggc cagctggaag 34320 + gaattcacca gtggggcggt agaggcccac gaaatcccgt ccaaccacta cgacatggtg 34380 + caatccgcgg cgctgggcca gattggtgcc atcgtcgccg agaaactccg gtcccggccg 34440 + gagggtgaaa ggacacaacg atgagcaatc cgttcgacaa cgaagacggc tcctttttcg 34500 + tgctggtcaa cgacgagggc cagcactccc tctggccgac cttcgcggag gtgcccgccg 34560 + gctggacccg cgtgcacggc gaagcgggcc gtcaggagtg cctcgcctac gtcgaggaga 34620 + actggacgga cctccggccg aagagcctca tccgggaagc gagcgcctga gtgtccagcc 34680 + gctcaacgcg gacggttgga cgagcccggc gctcgtccga ccggctccgg gcggcttccg 34740 + cttgagtcca ccggctggat gaggcgaaag gaccttgtgc agtgttcgag gagagcaacg 34800 + ccctccgggg cacggaaata caccggagag accggttcgc tccggggccg gaactgcgct 34860 + ccctgatggg cgagggcacc atgtccatcc tgcagccccc ggattccccc ggcgggcgga 34920 + ccgggtggct ggccaccggg cacgacgagg tccggcaggt cctcggctcg gacaagttca 34980 + gcgccaagct gctctacggc gggaccgtgg ccggccgcat ctggccgggc ttcctcaacc 35040 + agtacgaccc cccggagcac acgcgcctgc gccggatggt gacgtcggcg ttcaccgtcc 35100 + ggcggatgca ggacttccgg ccgcggatcg agcagatcgt ccaggcgagc ctggacgcca 35160 + tcgaggccgc cggtggcccg gtggacttcg tcccccggtt cgcctggtcc gtggcgacga 35220 + cggtgacgtg cgacttcctc ggcatcccgc gtgacgatca ggcggacttg tcgcgcgccc 35280 + tgcacgccag ccggtccgaa cggtcgggca agcggcgggt ggcggcgggg aacaagtact 35340 + ggacgtacat gaccgagatc gcggcccgcg cgcgccgcga tcccggtgac gacatgttcg 35400 + gcgcggtggt gcgcgaccac ggcgacgcga tcaccgacgc ggaactgctg ggcgtggccg 35460 + cgttcgtcat gggcgcgggc ggggaccagg tggcccggtt tctcgcggcg ggcgcgtggc 35520 + tgatggtcga gcaccccgat cagttcgcgc tgctgcggga aaagccggac accgtcccgg 35580 + actggctgaa cgaggtggag cggtacctca ccagcgacga gaagaccact ccgcgcatcg 35640 + cgcaggagga cgtgcgcatc ggtgatcagc tcgtcaaggc cggcgatgcc gtcacctgct 35700 + cgctgctggc ggcgaaccgc aggaagttcc ccgccccgga ggacgagttc gacatcaccc 35760 + gggaacggcc ggtgcacgtc acgttcggcc acggcatcca ccactgcctc ggcaggccac 35820 + tggccgagat ggtgttccgg gcggcgattc cggcgctggc acaacgcttt cccaagctga 35880 + ggctggccga gccggaccgc gagatcaagc tggggccgcc gccgttcgac gtggaagccc 35940 + tgctgctgga gtggtgacgc cgggccggac acgaaatcgt cgggagcaaa agagggggtt 36000 + ttcccgttga atgatgacga cccgcggccg ctgcacattc gccggcaggg cctggacccg 36060 + gcggacgagc tgctcgccgc cggatcgctg acgagggtca ccatcggatc cggagcggat 36120 + gccgagaccc attggatggc caccgcgcac gccctcgtcc ggcaggtgat gggcgaccac 36180 + cagcggttca gcacccggcg ccgctgggac ccgcgggacg agatcggcgg gacgggcacc 36240 + ttccggccgc gtgaactggt cggcaacctg atggactacg acccgcccga gcacacgcgg 36300 + ctgcgccaga agctgacccc cgggttcacg ctgcgcaaga tgcagcggct gcagccgtac 36360 + atcgaacaga tcgtcaacga gcgactcgac gagatggcgc gggcgggatc gcccgcggat 36420 + ctggtcgcgt tcgtcgccga caaggtgccc ggcgccgtgc tgtgcgagct gatcggcgtg 36480 + ccgagggacg accgggccac gttcatgcag ctgtgccacg cgcatctcga cgcctcgcga 36540 + agccagaaac ggcgggcggc ggcgggagag gcgttctccc gctacctgct ggcgatgatc 36600 + gccagggaac gcaaggaccc gggcgagggg ctcatcggag cggtcgtcgc cgaatacggc 36660 + gacgaagcca cggacgagga gctgcgcggc ttctgcgtgc aggtgatgct ggctggcgac 36720 + gacaacatct ccggcatgat cgggctcggc gtgctggcgc tgctgcggca ccccgagcag 36780 + atcgacgcgt tgcgcggcgg cgaacagccg gcgcaacgag ccgtcgacga gctgatccgg 36840 + tacctgaccg tgccctacgg cccgacaccc cgcatcgcga agcaggacgt caccgtcggg 36900 + gaccaggtga tcaaggcggg cgagagcgtc atctgctcgc tcccggcggc caaccgcgac 36960 + cccgccctcg tgccggacgc ggaccggctc gatgtcacgc gcgaccccgt cccgcacgtc 37020 + gcgttcgggc acgggatcca ccactgcctg ggagccgcac tggcccgcct cgaactgcgc 37080 + acggtcttca ccgcgctgtg gcggcggttt cccgacctgc ggctcgcgga tcccgcccag 37140 + gagaccaagt tccgcctcac cacccccgct tacgggctga ccgagctgat ggtcgcctgg 37200 + tgaccggggg cccgcgacgt ccacgcgagg ccgctggacg tcctgcctga ttccgggtgg 37260 + aattgggacc gtcggcgggt tcgggccgaa aaaaccgaac gaccaagaca gagggacatt 37320 + tcttcccggt cgaccaagga gtttccacgc ggatggggca cgatatcggt cagctcgcgc 37380 + cgctcttgcc ggagccggcg aacttccagc tgaggacgaa ctgcgatccg catgcggaca 37440 + acttcgacct gagggcgcac ggcccgctgg tccggatagc cggggactcc tccgctcagc 37500 + tgggcaggga atatgtctgg caggcccacg gctacgacgt cgtgcgccgg atattgggcg 37560 + accacgagaa tttcacgacg cggccgcaat tcacccaagc gaaatccggg gcgcacgtcg 37620 + aggcccagtt cgtcgggcag atatcgacct acgacccacc cgagcacacc cggctgcgga 37680 + agatgctcac gccggagttc acggtccggc ggatccgccg gatggagccc gcgatccaag 37740 + ccctcgtcga cgatcggctc gaccgggtgg cggccgaggg accgcccgcc gacctccagg 37800 + cgctgttcgc cgacccggtc ggcgcgctcg ctctgtgcga actgctcggc atcccccgag 37860 + acgaccagcg cgagttcgtc cggcggatca ggcggaacac cgatctgagc cgcgggctca 37920 + aggcgcgggc ggcggacagc gcggcgttca accggtacct ggacaacctc atcgcccggc 37980 + agcgccggga cgccgacgac gggttcctcg gcatgatcgt gcgagagcac ggggacaccg 38040 + tcacggacga ggagctgaag ggcctgtgca cggcgctgat cctcggcggc gtcgagaccg 38100 + tcgccgggat gatcggcttc ggggtgctcg ccctgctcga gaaccccggc caggtgccgt 38160 + tgctgttcgc gggccccgag caggccgacc gcgtggtcaa cgagctgctg cgttacctgt 38220 + ctccggtgca ggcgccgaat cccagcctcg ccgtcaagga tgtgatcatc gacggacagc 38280 + tgatcaaagc gggagattat gtcctgtgct cggtcctcat ggccaaccgg gacgaagcgc 38340 + tgacgccgaa ccccaacgtc ttcgacgcga atcgcgccgc ggtatcggac gtcggtttcg 38400 + ggcacggcat ccactactgc gtgggcgcgg cgctggccag gtcgatgctg cggatggcgt 38460 + accaggccct gtggcagcga ttccccgggc tccggctggc cgtgcccatc gcggaagtga 38520 + agtaccgaag cgcgttcgtc gactgccctg atcgggttcc ggtcacctgg tagcgcaatc 38580 + cgggttgaaa accagcctcg gcaatttgac actcgacaga ggaatggtgg gagatgtcgg 38640 + tcgaagactt cgacgtggtg gtggcgggcg gcgggccggg tggttcgacg gtggccacgc 38700 + tggtggccat gcagggacac cgggtgctgc tgctggagaa agaggttttc ccgcggtatc 38760 + agatcggtga gtcgctgctg cccgccacgg tgcacggcgt gtgccggatg ctcggcatct 38820 + ccgacgagct ggccaatgcc gggttcccga tcaagcgcgg cggcacgttc cgctggggcg 38880 + cccggccgga gccgtggacg ttccacttcg gcatctcggc caagatggcc ggctcgacgt 38940 + cgcacgccta ccaggtcgag cgggcgcggt tcgacgagat gctgctgaac aacgccaagc 39000 + gcaagggcgt ggtcgtgcgg gaggggtgcg cggtcaccga tgtggtggaa gacggcgagc 39060 + gggtcaccgg tgcgcggtac accgatcccg acggcaccga gcgggaagtg tcggcgcggt 39120 + tcgtgatcga cgcgtcgggc aacaagagcc ggctctacac caaggtcggc ggttcgcgga 39180 + actattcgga gttcttccgc agcctcgcgc tgttcggtta cttcgagggt ggcaagcggc 39240 + tgcccgagcc ggtctccggg aacatcctga gtgtggcctt cgacagcggc tggttctggt 39300 + acatcccgct gagcgacacg ctgaccagcg tcggcgcggt ggtgcgccgg gaggacgccg 39360 + agaagatcca gggtgaccgg gagaaggccc tcaacacgct gatcgccgag tgcccgctga 39420 + tctcggaata cctcgcggac gcgacccggg tgacgaccgg ccggtacggg gaactgcgcg 39480 + tccgcaagga ctactcctac cagcaggaga cctactggcg gccgggcatg atcctggtcg 39540 + gcgacgccgc gtgtttcgtg gacccggtgt tctcctccgg tgtgcacctg gcgacctaca 39600 + gcgcgctgct cgcggcccgg tcgatcaaca gcgtcctcgc cggcgacctg gacgagaaga 39660 + ccgcgctgaa cgagttcgag ctgcggtatc gccgtgagta cggcgtgttc tacgagttcc 39720 + tcgtgtcctt ctaccagatg aacgtgaacg aggagtcgta cttctggcag gccaagaagg 39780 + tcacgcagaa ccagagcacc gacgtcgagt cgttcgtcga gctgatcggc ggagtgtcgt 39840 + ccggggagac cgcgctgacg gccgccgacc gcatcgccgc gcgcagtgcc gagttcgccg 39900 + cggcggtgga cgagatggcg ggcggggacg gcgacaacat ggtgccgatg ttcaagtcga 39960 + cggtggtcca gcaggcgatg caggaagcgg gccaggtgca gatgaaggcg ctgctcggcg 40020 + aggacgccga acccgagctg cccctgttcc ccggtggcct ggtgacctcg cccgaacgga 40080 + tgaagtggct gcctcaccac cctgcgtgaa gcctgtgcgc gccggccgtt cgcgggtggc 40140 + cgggacctgc ggaacaacct atggaaaaac ctacggaaca gagggtgcga aatgcgcgtg 40200 + ttgatctcgg ggtgcggatc gcgcggggac accgaaccgc tgatcgcctt ggcggtccgg 40260 + ttgcgggaac tcggtgtaga cgtccggatg tgcctgccgc cggactacgt ggagcggtgc 40320 + gccgaggtcg gggtgtcgat ggtggcggtc ggcccggcga tgcgcgccgg ggcacgcggg 40380 + ccgggagaac cgccgccggg agcacccgaa atcgtgtccg aggtggtcgc ggactggttc 40440 + gacaaggtgc cggcggccgc cgaagggtgt gacgtggtgg tggcgaccgg cttgctgccc 40500 + gccgcggtcg tcgtgcggtc ggtcgccgag aagctgggca tcccttacct ctacaccgtg 40560 + ctgtcgccgg accacctgcc gtcggtgctc agccaggcgg agcgcgacga atacgaccag 40620 + ggcgccgacc ggctgttcgg tgcggtggtc accagcgggc gggccgcgat cggcctgccg 40680 + ccggtggcga acctcttcac ctacggctac accgaacagc cctggctggg ggcggaccag 40740 + atcctcgccc cgccgccacc gggagacttg gacaccgtgc agaccggtgc gtggatcctg 40800 + cccgacgaac ggccccttcc cgcggagctg gagacgttcc tcgcggccgg gtcgccgccc 40860 + gtgtacgtgg gtttcggcag ctcgtccgga ccccggaccg ccggcgccgc caaggcggcc 40920 + atcgaggcga tccgcgcccg gggccaccgg gtcgtcctct cccgcggctg ggccgacctg 40980 + gccgcgcccg acgactcggc cgactgcttc accgtcggcg aagtgaacct ccaggtgctg 41040 + ttccgccggg tggccgccgc cgtccaccac gacagcgcgg gcacgacact cctggccatc 41100 + cgggcaggca ccccccagat cgtcgtccgc cgcgtgatag acaacgtggt ggagcaggcg 41160 + taccacgccg accgggtggc cgaactgggg gtcggtgtgg cactcgaagg tccgatcccg 41220 + gcctccgagg ccatgtcgga cgcgctcgag acggcgctgg caccggaaac ccgcgcgcga 41280 + gcggcggagg tggcgggcac ggtccgcacc gacgggacga cggtggccgc ggaactgctg 41340 + ttcgccgcgg tcagccggga aaagcccgcc gttcccgcat gacccgcacc gagccggcgt 41400 + gccgggaacc ggccggcggg ccagaaccca ggaaacgggg aaatacgtga agcgtgtgct 41460 + gttgtcgacg ctcggaagcc gcggagacgt cgaaccactg gtggccttgg cggtccggct 41520 + gcgcgacctc ggcgcggagc cgctgatgtg cgcaccgccg gactgcgcgg accggctgga 41580 + agaggtcggc gtgccgcacg tgcccgtcgg cccgtcggcg cgcgcgccga tccatcggga 41640 + gaagccgttg acgcccgagg acatgcgccg gctcatggcc gaagcgatcg ccatgccgtt 41700 + cgaccggata ccggcggccg ccgaggggtg tgccgcagtg gtgacgaccg ggctgctggc 41760 + cgccgcgatc ggcgtgcggt cggtggccga gaagctgggc atcccctact tctacgcctt 41820 + ccactgcccg agctacgtgc cgtcaccgta ctatccgccg ccaccgcccc tcggcgagcc 41880 + gcccgccgag gacgtgaccg acatccgggc gctgtgggag cggaacaacc ggagcgctta 41940 + ccagcggtac gggggtccgc tcaacagcca ccgggccgcg atcggcctgc ctccggtgga 42000 + ggacatcttc accttcggct acaccgatca cccgtgggtg gcggcggatt cggtcctggc 42060 + cccgatgcag ccgaccgacc tcggtgccgt gcagaccggc gcgtggatcc tgcccgacga 42120 + acggccgctt tccccggagc tggaagcttt cctggacacc ggcaccccgc cggtgtacct 42180 + cgggttcggc agcctgcgcg ccccggccga cgccgtccgg gtgtccatcg acgcgatccg 42240 + ggcccaaggc cgccgggtaa tcctttcccg gggctgggcc gacctggtcc tgcccgacga 42300 + ccgggaagac tgcttcgcca ccggcgaggt gaaccagcag gtgctgttcg gccgggtggc 42360 + cgccgtcatc caccacggcg gcgcgggcac gacgcacgtg gccatgcagg ccggggcacc 42420 + ccaggtcctg gtgccccaga tggcggacca gccgtactac gccggccggg tggccgagct 42480 + ggggatcggg gtggcccacg acggtccggt cccgaccttc gactcgctgt cggccgcgct 42540 + cgtcacggcg ctggccccgg aaacccgcgc acgagcggag gccgtggcgc gcacggccgg 42600 + tgccgacggg gcggcggtgg ccgcgaaact gctgctcgac gcggtcagcc gggaaaagcc 42660 + ggctgttccc gcgtaaacca caccgggtcg gcgccgccgg aaagtggcgc atgcggtgac 42720 + ccgggtcctg tccattcttt gaccgttccg gacataaatg cgctcggata gcattccgcc 42780 + tcattatcgc agggggacag aaccgatcaa attggggtgc gggatgcgtg tgttgctgtc 42840 + gacggcggga agccgtggag acgtcgaacc gttgctggcc ttggcggtcc ggttgcaggg 42900 + actcggcgcg gaggtgctga tgtgcgcgtc gcctgcttcc gcggagcggc tggccgaggt 42960 + cggggtgccg cacgtgccgg tcggcctgca gctggacggc atgttgttgc aggaaggaat 43020 + gccgccgccg tcggccgagg acgagcgcag actcgcggcc atggcgatcg acatgcagtt 43080 + cgacgcggtc cccgcggccg ccgaagggtg tgccgcggtc gtggcgaccg gagagctggc 43140 + cgccgcggcc gccgtgcggt cggtggccga gaagctgggc atcccgtact tctacggcgc 43200 + atacagcccg aactacctgg cgtcgccgca ctatccgccg cccgacgacg agcggaccac 43260 + cccgggcgtg accgacaacg gggtgctgtg ggccgagcgt gccgagcgtt tcgccaagcg 43320 + gtacggggaa acgctcaaca gcagacgggc ggcgatcggc ctgcccccgg tggcggacgt 43380 + cttcggctac ggctacaccg agcagccctg gctggcggcg gacccggtcc tggccccgct 43440 + ggatccggat ctcgacgcgg tgcagaccgg cgcgtggatc ctgcgtgacg atcggccgct 43500 + ttcccctgag ctggcggcgt ttctcgctgc cgggtcaccg ccggtgtacg tgggtttcgg 43560 + cagcgcgtcc gggccgggaa tcgaggacgc cgcgaaggtg gccatcgagg cgatccgggc 43620 + cctcggccgc cgggcgatcc tttcccgcgg ctgggccgat ctggtcctgc ccgacgaccg 43680 + ggaggactgc ttcgccgtcg acgaggcgaa tctccaggtg ctgttcgagc agtcggccgc 43740 + cgtcgtccac cacggcagcg cgggcaccga gcacctggcc acgcgggccg gcgtccccca 43800 + gatcgcgata ccccggcaca cggatcaggc gtactacgcc ggccgggtgg ccgagctggg 43860 + ggtcggtgtg gcactcgaag gtccggtccc gtccttcgcg gcgatgtcgg cggagctcgc 43920 + gacggccctg gccccggaaa cccgtgcgcg agcggcggag gtggcgggca cggtccgcac 43980 + cgacgggacg acgatggccg cggagctgct cttccaggcg gccgaacagg gcaaactgac 44040 + cgttcccgcg tgaatttctt cgaagacaaa gcaaagagga gactgcatgt cgaccacgtc 44100 + ccagtgccgt atctgtgacg gcactgtcca cgagttcatc gacttcggac gccagccgct 44160 + ctcggacgcg ttcgtggctc ccggcgcgga aaagggtgag ttcttcttcc gccttgccac 44220 + cggcatctgc gattcctgca cgatggtgca gctgatggag gaagtcccgc gggacctgat 44280 + gttccacgag gcctacccct acctgtcgtc gggttcggcc gtcatgcgca cgcacttcca 44340 + cgagctggcc aagcacctgc tggccacgga gctgaccggc gaggacccgt tcatcgtcga 44400 + gctcggctgc aacgacggca tcatgctcaa ggccgtggcc gacgcggggg tgcgccagct 44460 + cggcgtcgaa ccctccggca gtgtcgcgga tctggcggca gccaagggga tccgcgtccg 44520 + caaggacttc ttcgaagagg cgacggccgc cgacatccgc gagaacgacg gccccgcgga 44580 + cgtgatctac gcggccaaca cactgtgcca catcccttac atggactcga tcctgaaggg 44640 + cgtcaccaag ctgctcggcc cgaacggcgt gttcgtcttc gaggacccgt acctcggcga 44700 + catcgtggag cgcacgtcgt tcgaccagat ctacgacgag catttcttcc tcttcacggc 44760 + gcgctcggtc caggagatgg cccggcgcaa cggcctcgag ctcgtggacg tcgagcgcat 44820 + tccggtgcac ggcggcgagg tccgctacac cctggccctg gccggcgctc gcaagccgtc 44880 + cgaggccgtg gcggagctcc tggcctggga ggcggagcgc aagctggcgg agtacgccac 44940 + gctggaacgt ttcgccaccg acgtgaagaa gatcaaggaa gacctgatcg cgctgctgac 45000 + caagctccgt gccgaaggca agcgcgtcgt cggctacggc gcgacggcca agagtgccac 45060 + ggtgaccaac ttctgcggca tcaccccgga cctggtcgag ttcatctcgg acacgacacc 45120 + ggccaagcag ggcaagctca gcccgggaca gcacatcccg gtccgcgagt acggggaatt 45180 + cgccggcaac cacccggact acgccctgct gttcgcctgg aaccacgccg acgagatcat 45240 + gaacgtggaa caggcctttc gtgacgccgg cggtcagtgg atcctttacg tgccgaacgt 45300 + gcacgtgagc tgaccggcca tgcgaatcct cgcgtcgagc cctttcggcg ggcactgtcc 45360 + agggttcacc agttgtcttg tggcagcgct gtcggcaccg tgattgccat gcctcaagac 45420 + ctcgacgcgg accggattct ggcgatatcc ccgcatttgg acgacgcggt tctttccttc 45480 + ggtgccggcc tcgcccgtgc ggcgcaggcc ggcgcgaagg tgaccgttca cacggtgttc 45540 + gccgggaccg cggcgccccc ttattcgccg gcggcggagc ggctgcacgc gatctgggag 45600 + ctctcaccgg atcaagacgc gtcgctccgc cgccgggacg aagacatcgc cgcgctcgac 45660 + cacctgggcg tcgactaccg gcacggccgg ttcctcgacg ccatctaccg caagctgccg 45720 + gacggccgat ggctggccga caacgtgccg ggccgccaga agctggccat cggacggcaa 45780 + tcgccgcagg gcgatccgga gctgttctcc gcggtccggg cggacatcga gtcgatcgtc 45840 + gaagagtacg ccccggcgct gatcctgacc tgcgcggcag gcaacggtca tgtcgacaac 45900 + gagatcgcgc gggatgccgc gctgttcgtc gcgtacgaga agggcatccg ggttcggctg 45960 + tgggaagacc ttccgcacgc gatgttcgcg gagggcgccg ccgaactgcc ggacggattc 46020 + cggctggggc cgcccgattt cggttccgtc gaaccggagg cacgggcgcg gaaattcgaa 46080 + gcgctgcggc tctactcgtc gcagatgctg atgctgcacg ggccggaaaa ggatttcttc 46140 + gctcagctgg acgggcatgc ccggaagagt gcaccgggtg gtggatacgg cgaaacgacc 46200 + tggccggttg tctctcgcga agacaacggc tgaatccagg gctgaaccca gggaggttgt 46260 + cattgtgagc ggtcaactcg agcgtggtcc ggtgcggacc acgcacgccg acgtcctgct 46320 + ggcctcggtg ggtgagcgag gcgttctgtg cgacttctac gacgaggagg gctcgaacac 46380 + ctatcgggac ctgatccagg acgcggacgg taccccggaa gcgcgggagt tcgccactcg 46440 + cgtcggcccg gtgcccggac ccgtgctgga gctcgcggcc ggcacgggcc ggctgacctt 46500 + cccgttcctg gagctcggct gggaggtgac cgccctggaa ctgtcggccc cggtggtcga 46560 + cggcttccgg atgcggctgg cggaagcacc ggcggacctg cgggaccgct gcacagtggt 46620 + tcaggcggac atgagcgctt tctcggtgga ccggcgcttc ggggcagcgg tcatcagctc 46680 + gggttcggtc aacgaactgg acgaagccgg ccggcagggc ctgtacgcgt cggttcgcga 46740 + gcacctcgag cccggcggga agttcctgct cagcctggcc ttgtcggagg tcgccgagtc 46800 + acagccgccg gagcgccggc aagagttgcc aggccagagc ggccggctgt acgtgttgca 46860 + cgtgagtgtg cagccggcgg aggagaccca ggacatcacg atctaccccg ccgacgaaac 46920 + agcggatccc ttcgtcgtct gcacgcatcg ccgccggctc gtcccggcgg accggatagt 46980 + gcgggaactt cttcgggccg gcttcgacgt gatcgcgcgg acgccgttcg cgtccggtgc 47040 + gtccggccgg gcgggccatg aagacatgtt gctggtggaa gcggtgaagc aggagggcgc 47100 + tatcccagcc gcgcggtgat gagcgcggcg agccgggcga cgccctcttc gatcagttcc 47160 + ggggtgagca ggctgatcga cagccgcagc tggttgaacc cgcccttgcc gccgtagaag 47220 + tggtgcatcg gggtgaacag cacgccgtgg tcgcgggcgg cgagggcgag caggtcgtcg 47280 + tcgacggtga aggggacggt gacggtgacg aagaacccac cggtcggcgt gttccagcgg 47340 + accccggcgc gcccgccgag ccgtcgctcg agctcgccca gcacgagccg caggttgcgc 47400 + tggtagaccg cgatctcgcg cgcgttggcc ttggtcaggc tgaagtcgtt gaggagcagc 47460 + ttcccggcga tcaccgactg ggctatcggg gacgtgttca ccgtgagcat gcccttgagc 47520 + ttggagagct ggtcggcgag caggccgccc ccggccattc gctggtccgc cacggtgaag 47580 + ccgacccggg caccgggcat gccggtcttg gcgaaggagc cgatgtagac cacggtcccc 47640 + gaccggtcga gggctttcag ggtggggagg cgttcggcgc cgaagagccc gtacgcgttg 47700 + tcctccagga gcaggatgcc gttggcctcg gcgacctcga ggagccggtg ccgggcggcc 47760 + aggtccatgc tggtcccggt cgggttggcg aagttcggtg tcacgtaaca ggcccggacc 47820 + cgcttgccct gttcgtcggc ccgcttcagc tgcaggacca ggtcgtccgg gtcgatgccg 47880 + ttctcggtcg actgcaccgg ccagacgggc gtgtcggtga gcagcgccgc ccccgtcagg 47940 + ccgacgtagg tgggggcggg ggcgagcagc acgtcgtgtt cggtcgcccg cagcgtgcga 48000 + agcaccagga acatcgcctc ctgggcgccc acggtgacca ccacggattc cggggcggcg 48060 + tcgatgttct cgtcctcggc gaggttgcgg gcgatgaggt cggcgatgac gcctttcgtg 48120 + gtgccgtact ggaagagcgt gcgggtgacc cccgcttcgt cgagcttccg gtcgcggcgg 48180 + aggtggtcgc agtaggcgtc gatgtactcg tggatgaggc ggatgtcgaa gaattcttcg 48240 + tacgggcggc ctgccgccat ggaaatagcc accgggtatt cgtcgatcag ctcgttgagc 48300 + aagttcatcg acgagatggc cggatcggtg agcgatccgt gcagggtttc cacgctcaat 48360 + ggggtggaca gaccgaagga atccataaat actaggattt ccatacgccg ccgaggtgtc 48420 + aagcggcggc ggtggacgcg atcgcgtggc gattcccgac gatttcccgg ctcggtaccg 48480 + cgcgcggaac aaaagccgtc cgagactgtc gatgtccatt tctcgctttt ccggacactc 48540 + gatcttcgaa ggtacggtca cacgtgtcgc cgcgcgccgc ggatgggcgg cgggcagggg 48600 + aggaccttca tgctgatgac gactgagcac gggatccggc tgtcgtacca cgaccagggc 48660 + cgtggtgcgc cggttctgct gctgaccggc accggggcgc cgagctcggt gtgggacctg 48720 + caccaggtgc ccgcgctccg cgccgccggg ttccgggtga tcaccatgga caaccgcggg 48780 + atcccgccca gcgacgacgg cgcggacggg ttcaccgtcg acgacctcgt cgcggacgtg 48840 + gccgcgctgc tcgaccacct cgacgcgtcg ccgtgccgcg tcgtcggcac gtcgatgggc 48900 + tcgtacatcg cgcaggagct ggcgctggcc cgcccggaac tggtggacgc cgtcgtgctg 48960 + atggcggcct gcggccggag cagtctcgtc cagcgcgtgc tcgcggaggc cgaggcggac 49020 + ctgatcggac gggggaccga gctgccgccg gggtaccgcg ccgccgttcg cgcgatgcac 49080 + aacctggggc ccgcgacgct cgccgacgac gacctcgctg ccgactggct cgacctgttc 49140 + gcggcgtcgg agaactgggg gccgggcgtc cgggcgcagc tgctgctgag cgcgttgccc 49200 + gaccgtcgcg aggcctaccg ggcgatcaag gtgccctgcc acgtcgtttc gttcgagcac 49260 + gacctcgtgg cgccgccgtc cgccgggcag gagctggccg ccgtgatccc cggcgccacg 49320 + caccgcacga tcccggggtg cgggcacttc ggctacctgg agaagccgga agcggtgaac 49380 + cgcgagctgc tccggttcct ccgcacggaa tccggcgtgg ctgtgacatc cggggcttcg 49440 + ccccggaccc ccgaagaact gtgacagccg gggctcgccc cgggccgggg gctccgccac 49500 + ccggaccccc gaaacctgga ggagaccgca tgaccggcgc gatcgtgccc ccgtccacgg 49560 + cacccgccct gttcgaggcg gccgccgccg cggtgccgga ccggccggcc gtggcgatgg 49620 + ggaccaccac gctgacctac gccgagctga atacccaggc caaccggctc gcgcgccggc 49680 + tcgtggcgca cggcgtgggc ccggaacggc tggtcgcact ggcgatgccg cggtcgatcg 49740 + agttcgccgt cgcgatgctg gccgtgcaca aggccggcgg tgcgtacgtg ccgatcgacc 49800 + cggactatcc cgcggaacgc cggcagcaca tgctggccgg tgcggcggcg cagtgcctgc 49860 + tgtgcctgcc cgggcaggac gtcgccggcg ctccggtcgt gctgagcgtg gcgctggcgg 49920 + agccgggccg tcccgagccg gacctggacg actccgaccg gctcgccccg ctgctgccca 49980 + gccaccccgc gtacgtcatc ttcacctcgg gctcgaccgg gcagccgaag ggcgtcgtgg 50040 + tcacgcaccg gggaatcccg aacctggccg ccgactacgt gcaccgccag aacctgctgc 50100 + ccgacagccg gttgctggct ttcgcgtccc ccagcttcga cgccgccgtc gccgagttct 50160 + ggccgatctg gctggccggt gcctgcctgg tgctggcgcc cgcgccggac ctgatccccg 50220 + gggagccgct cgcccggctg gtccgggacc ggcacatcac ccacgtgacg ctgccgccgt 50280 + ccgccctggc cccgctggaa gaagccggcg gcctgccgcc ggggctgacc ctcctggtcg 50340 + ccggcgaggc gggcccggct ccggtcgcga agcgctgggc cgccggccgc gtcatgatca 50400 + acgcgtacgg ccccaccgaa gccacggtcg cggtgaccgc gagcgacccg ctgaccggcg 50460 + aagacacgcc gccgatcggc aggccgatca ccggtgtcca cacctacgtc ctggacgacc 50520 + ggctggtccc cgtcccggac gggaccgtgg gggagctgta catgaccggc ccgggcctcg 50580 + cccgcggtta cctgcaccgg ccggccgcga ccgcggaacg gttcctgccg gacccgttcg 50640 + gcggtccggg gcagcgcatg taccgcacgg gtgaccgggt gcgggcgcgc ccggacggtc 50700 + agctcgtctt cgtcggccgg gccgacgacc agctgaaggt gcgtggtcac cggatcgagc 50760 + cggcggaggt cgaatccgcg ctgctcgcgg tggacggggt ggcccaggcg gtggtgaccg 50820 + aacacgacaa ccggctcgtg gcgtacgtgg tcggcgccgg gggcgcgcgg gtgcccgccg 50880 + aagacctcct gccgccgctg aggaagcagc tgcccgccta cctggtcccc gacgtggtcg 50940 + tcggcctgcc gcacctgccg accaccccga acggcaaggt cgaccgggcc gcgctgcccg 51000 + cgcccgaggc ggaggacacc gggcgcgcga tctccgggcg ggcgccgagc acgcccacgg 51060 + aaatccacct ggccgccttg ttcgcggaag tgctcggtgt cagcagcgtc ggcgtggagg 51120 + acagcttctt cgaggtcggc ggccactcgc tgctcgccac ccggctggtt tcccgcatcc 51180 + gcgaaagcct gcgggtccgg ctgcgggtgc aggccttctt cgacgcgccg accgtggccg 51240 + aactcgccaa ggtgctcgac gccgccctga cgtgacctgg agaccctgat gcagacgacg 51300 + aacgccgtcg acctcggcaa ccccgacctg tacacgaccc tggaacggca cgcccgctgg 51360 + cgcgagctcg cggcggaaga cgcgatggtg tggagtgacc cgggcagttc cccctccggc 51420 + ttctggtcgg tgttctcgca ccgggcgtgc gccgcggtcc tcgcgccgtc ggcgccgctc 51480 + acctccgaat acgggatgat gatcgggttc gaccgcgacc acccggacaa ctccggcggc 51540 + cggatgatgg tggtctccga acacgagcag caccgcaagc tgcgcaagct cgtcgggccg 51600 + ctgctgtccc gggcggccgc gcgcaagctg gccgagcggg tgcggatcga ggtcggcgac 51660 + gtgctcggcc gggtcctcga cggcgaggtc tgcgacgcgg ccacggcgat cggcccccgc 51720 + atccccgccg cggtcgtgtg cgagatcctc ggcgtgcccg ccgaggacga agacatgctc 51780 + atcgacctga ccaaccacgc cttcggcggc gaggacgagc tgttcgacgg gatgaccccg 51840 + cggcaggcgc acaccgagat cctcgtctac ttcgacgaac tgatcaccgc gcgccgcaag 51900 + gaacccggcg acgacctcgt cagcacgctg gtgaccgacg acgacctcac gatcgacgac 51960 + gtgctgctca actgcgacaa cgtgctcatc ggcggcaacg agaccacgcg gcacgcgatc 52020 + accggcgcgg tgcacgcgct ggcgacggtg cccggcctgc tgacggcgct gcgggacggg 52080 + agcgcggacg tcgacaccgt cgtggaagag gtgctgcgct ggacctcgcc cgcgatgcac 52140 + gtgctccggg tgacgaccgc cgacgtcacg atcaacggcc gcgacctgcc gtccggcacc 52200 + ccggtggtgg cgtggctgcc cgcggcgaac cgggaccccg ccgagttcga cgacccggac 52260 + accttcctgc ccgggcggaa acccaaccgg cacatcacct tcggccacgg catgcaccac 52320 + tgcctcgggt ccgcgctcgc gcggatcgag ctgtcggtcg tgctgcgggt gctggccgag 52380 + cgggtgtccc gggtggacct ggaacgggag ccggcctggt tgcgggcgat cgtcgtgcag 52440 + gggtaccggg aactcccggt gcggttcacc gggcgctgac ccgcgcgcgg tgccccggtg 52500 + agggtgcggc tgccccgcgc ccattttgtc cactgtggac tccggcgccc gccgcggcgg 52560 + gtgtcaagct gacaccgttg atgcggaatt ggcttggagc catcctgggg aatgagcgtt 52620 + acacctattt gacggaggaa tgtcttgact tccgattcga ctgtccagaa tttcgagatc 52680 + gactacgtcg aaatgtatgt ggaaaacctc gaggcggcca cgttcacctg ggtcgacaag 52740 + tatgctttcg ccgtcgccgg taccgaccgg tcggcggacc accggagcgt cacgctgcgg 52800 + cagggcccga tcaagctggt cctcaccgaa ccgacgtcgg accggcaccc ggcggccgcc 52860 + tacctccagt cgcacggcga cggcgtggcc gacatcgcgc tgcgcacgcc ggacgtgacc 52920 + gccgctttcg aagccgcggt gcggggcggg gccgccgccg tgcgcgaacc ggtgcggctc 52980 + gccggcgggc cgatcgtcac ggccaccatc ggcgggttcg gcgacgtcgt gcacaccctg 53040 + atccagagcg gcgaagccac cgcggccgcg ccggagacca ccggccaggg cgggggagac 53100 + gtgaacctgc tcgggctcga ccacttcgcg gtctgcctga actcgggtga cctcggtccc 53160 + acggtggcgt tctacgagcg ggccttcggg ttccggcaga tcttcgagga gcacatcgtg 53220 + gtcggcaggc aggcgatgaa ctccaccgtg gtgcagagcg cgtcggggga ggtcaccctc 53280 + accctgatcg agcccgacag caacgccgac cccggccaga tcgacgagtt cctcaaggcc 53340 + caccagggag ccggcgtcca gcacatcgcc ttcaacgccg acgacgcggt ccgcgcggtc 53400 + cgggcgctgt ccggccgcgg ggtggagttc ctgaagactc cggggaccta ttacgacatg 53460 + ctcggcgagc ggatcacgct ggagacgcac acgctggacg acctgcggtc gacgaacgtg 53520 + ctcgccgacg aggaccacgg cggccagctg ttccagatct tcgccgcttc cacccacccg 53580 + cgtcacacca tcttcttcga gatcatcgag cggcagggcg cgggaacctt cggcagctcc 53640 + aacatcaagg ccctgtacga ggccgtggag ctggagcgga ccgggcagag cgagttcggc 53700 + gccgcccggc gatgacgtac gtttccctgg gcgacctcga acgtgccgct cgcgacgtcc 53760 + tccccggcga gatctgggac ttcctcgccg gggggagcgg cgccgaggca tcgctgacgg 53820 + ccaaccgcac cgcgctcgac cgggttttcg tggttccccg gatgctgtgc gacctgaccg 53880 + gcagcaccac cgaggccgag ctcctgggcc ggcgcgccgc gctcccgatg gcggtcgcgc 53940 + cggtcgcgta ccagcggttg ttccaccccg agggcgagct ggcggccgct cgcgcggctc 54000 + gcgacgccgg cgtgccgtac accatctgca ccttgagcag cgtcccgctc gaggaggtcg 54060 + cggccgtcgg cggccggccg tggttccagc tgtactggct gcgtgacgag aagcggtcgc 54120 + tggagctcgt gcgccgcgcg gaagacgccg ggtgcgaagc gatcgtgttc accgtggacg 54180 + tgccgtggat gggacggcgg tggcgggaca tgcgcaacgg cttcgcgttg ccggaatcgg 54240 + tgacagcggc caacttcgac gccggatcgg ccgcgcaccg ccgcacgcgc ggggcctcgg 54300 + ccgtggccga ccacaccgcg cgcgagttcg cccccgccac ctgggagtcg gtggcgacgg 54360 + tccgcgcgca cacggacctg ccggtggtgc tcaagggcat cctcgccgcc gaggacgccc 54420 + gccgtgccgt cgaggccggg gccgacggga tcgtggtgtc caaccacgga ggtcgtcagt 54480 + tggacggcgc ggtgcccggg atcgaggtgc tgggcgagat cgccgccgag gtctccggcc 54540 + gctgcgaagt gctgctggac ggcggaatcc ggaccggcgg ggacatcctc aaggcggccg 54600 + cgctgggcgc gtcgggcgtg ctggtcgggc ggcccgtgat gtgggggctg gccgcggcgg 54660 + gccaggaggg cgtccggcag gtgttcgaac tgctcgccgc cgaactccgg aacgcgctgg 54720 + gcctggcggg ctgtgactcg gtgagcgcgg ccggccggct gggcacgagg gtcccccgct 54780 + acggctgatt ccccgcccca cgcccgattt cgacgtgaac ccgatccgcc cgcgcgtgcc 54840 + gggctcgact ggagcggggc ctttcccgga ggagaaaaat gctgcacacc tttgccgcgg 54900 + cggtcgcgcc ggtcgcaccg atcgccgcgc acagtctcct ggtcttcctg ctgcagatcg 54960 + gcttgctgct cctgctcgcc gtcgtgctcg gccggctggc cggccggttc gggatgcccg 55020 + cggtcgtcgg tgagctgttc gtcggggtga tcctcggtcc gtcgctgctg ggctgggcgg 55080 + cgccgggcct gcacagctgg ctgttcccgg ccgtcgccga gcagtaccac ctgctcgacg 55140 + ccgtcggcca ggtcggcgtc ctgctgctgg tcggcctcac cggcgtgcag atggacatgg 55200 + ggctggcccg caagcgcggc ctcaccgcgg ccggggtcag catcggcggc ctggtccttc 55260 + cgctcggcct ggggatcggc gcgggttacc tgctgccgaa ggtgctcgtt ccggagggca 55320 + ccgacgtcac cgtcttcgcg atgttcctcg gcgtggccct gtgcgtcagc gccatcccgg 55380 + tcatcgccaa gaccctcatc gacatgaaac tgctgcaccg caacatcggg cagctcacgc 55440 + tcaccgccgg catggtcgac gacgtgttcg gctggttcat gctgtccgtc gtcagcgcga 55500 + tggcggtcaa cgcggtctcc gccggcaccg tgctcacttc gctggcctac ctggtcgcca 55560 + tcctcgcctt ctgtttcacc ctcggccgtc cgctggcccg gggtgtgctc cgcgtcgcgg 55620 + ccaagtccga cggtcccggg ctcaccgtcg ccaccgtcgt cgtcctgatc ttcctcgccg 55680 + cggccggtac gcaggcgctc ggcctggagg cggtcttcgg cgccttcctc tgcggcatcc 55740 + tgctcgggac ggcgggcaag gtggatccgg ccaagctcgc ccccctgcgc acggtcgtcc 55800 + tgtcgggact cgcccccctc ttcttcgcca cggccgggtt gcggatggac ctcaccgcgc 55860 + tgacccaccc ggtggtcctg ctcaccggtc tggtggtgct cgccctggcc atcgccggca 55920 + agttcgccgg cgcgttcgcc ggcgcgcggc tgagcgggtt gaacaagtgg gaagggctgg 55980 + cgctcggcgc cgggctgaac gcgcggggag tcatccaggt cgtggtggcc atggtcggcc 56040 + tgcggctggg tatcctcagc gtggaggtct acacgatcat catcctcgtc gcgatcgtca 56100 + cttccctgat ggcgtcgccg atcctgcggt tcgcgatgtc cagagtggag cagaccgccg 56160 + aagaacaggt tcgcgagaac gaacaccggg cgtggaacac gcacccggcg gcgaacccgc 56220 + aggagcaaag tctctaggcg caggccggta ctgctcgggg cgacgggacg aaccgcgggt 56280 + gtccaaccgc ggaattcgcc ggtcggacgg gaaatcgctt tctcgtgcca cggcggccgt 56340 + tgaccaatcc acggcgtgga acagtgcggt gcctgccgct atcttggcgg cacgaggaac 56400 + gaaaagactt cctcgacagc gtcttcggcc tgacccgacg ccggttccgg agcagcgatg 56460 + acgcagcctt cgcacgacgg tcatgacaag gagtcgtccg atgctgcctg acctcgttcc 56520 + cccggtcgtg gtgcgccccc gcgacggccg cgaccacgcg gaccgcatcg cgttgtcggc 56580 + ggcgaccacc gacggggtgc acatgcggac cgaggacgtc cgcgcctgga tcgccgaacg 56640 + ccgtgaggcc aacgacttcc acgtcgaacg cgtcccgttc cgggacctcg accagtggtc 56700 + gttcgaggag gtgaccggca acctcgtgca ccacagcgga cggttcttca ccatcgaggg 56760 + cctgcacgtg atcgagcacg acggcccgaa cggcgacggc ccctaccgcg agtggcagca 56820 + accggtcatc aagcagcccg aagtcggcat cctcggcatc ctgggcaagg agttcggcgg 56880 + cgtcctgcac ttcctgatgc aggccaagat ggagccgggg aaccccaatc tggtgcagct 56940 + ctcgccgacc gtgcaggcca cccgcagcaa ctacaccaag gcgcacggcg gcacgaacgt 57000 + caagctgatc gagtacttcg ccccgcccga ccccgagcac gtcatcgtcg acgttctcca 57060 + ggccgagcaa ggctcgtggt tcttccgcaa gtccaatcgc aacatgatcg tcgagaccgt 57120 + cgacgacgtg ccgctgtggg acgacttctg ctggctcacc ctcggccaga tcgcggagct 57180 + gatgcacgag gacgagacga tcaacatgaa cgccaggagc gtgttgtcgt gcctgcctta 57240 + ccacgacgcg gctcccggcg cgcggttctc cgacgtccag ctcctgtcgt ggttcacgaa 57300 + cgagcgttcg cggcacgacg tgcgtgcccg ccgcatcccg ctcgcggacg tgtgcggctg 57360 + gaagcagggc gacgaagcga tcgagcacga ggacggccgt tatttccggg tcctcgcggt 57420 + cgccgtgcgg gggagcaacc gcgagcggat cagctggacc cagccgctgc tcgaatccgt 57480 + cgacctgggt gtcgtcgcgt tcctcgtgcg cgagatcggc ggtgtgcccc acgttctggt 57540 + gcacgcccgc gccgacggtg gtttcctgga cacggtcgag ctggcaccga ccgtccagtg 57600 + cactccccaa aactacgcgc acctgcccgc ggagaaccgc ccgcccttcc tcgacgtcgt 57660 + cctcaacgct ccggagtcgc gcattcgtta cgaggcaata cattccgaag agggcgggcg 57720 + cttcctcaac gtccgggcgc gctacctcgc gatcgaagcg gacgacacgg tcgagccccc 57780 + tcccggctac acctgggtca cgccggccca gctcaccgcg ctcacccggc acgggcacta 57840 + cgtcaacgtc gaggcccgca cgctgctcgc ctgcctcaac gccgcgacgg cccagcctcg 57900 + aggcggtgcc tgacatgaag acggtcaccg tcctcggcgc ctcgggtttc gccggctcgg 57960 + ccgtccaccg gctgggcgaa gtcttccggc tcgtggcacg ggaggtcgcc gggcacaccg 58020 + gacgcggccc ggtggacgtg ccctgcgtgg cacccccgtc gcacgcgccc gagacggatt 58080 + tccggagcgt cacggtcggt tccacgccgt tccggtcgat caccggccgg cgcccggaga 58140 + tgtcgcggcc cgagggagtg cgccgcactg tcgccgcttt gccgtcatca gatcagggaa 58200 + aggttcgcac atgaccacgc gtgtatggga ctaccaggcc gaataccgga acgagcggct 58260 + cgacctgctg gacgcggtcg agacggtctt cgactcgggg cagctcgtgc tcggggcgag 58320 + cgtgcgtggc ttcgaggcgg aattcgccgc gtaccacggg gtcgggcact gcgttggcct 58380 + cgacaacggg acgaacgcga tcaagctcgg cctgcaggcg ctgggtgtcg ggccgggcga 58440 + cgaggtgatc acggtgtcca acaccgccgc cccgaccgtg gtcgccatcg acggcaccgg 58500 + cgccacgccg gtcttcgtcg acgtccgcga ggacgacttc ctgatggaca ccggccaggt 58560 + cgcggccgcg atcaccgagc gcaccaagtg cctgctgccc gtgcacctat acggacagtg 58620 + cgtggacatg gctccgctga aggacctcgc cgcgaagcat ggactgtcca ttttggagga 58680 + ctgtgcccag gcgcacgggg cccggcagaa cggaacggtc gcgggctcga ccggtgacgc 58740 + ggccgcgttc tccttctacc cgaccaaggt gctcggggcg tacggcgacg gcggcgcgac 58800 + catcacctcc gacgaatccg tggaccggcg gctgcggcgg ctgcgctact acggcatgga 58860 + caagcagtac tacacgctgg aaacgccggc ccacaacagc cggctggacg aggtccaggc 58920 + cgagatcctg cggcgcaagc tcaagcggct cgacacctac gtcgccgccc gccaggccat 58980 + cgcccagcgc tacgtcgacg gactgggcga cacggagctg aagctgccgc ggaccgtccc 59040 + cggcaacgag cacgtgtact acgtgtacgt cgtgcgccac ccgcgacgtg acgacatcat 59100 + cgagcgcctc aaggcgtacg acatccactt gaacatcagc tatccgtggc cggtgcacac 59160 + catgaccggt ttcgcccacc tcggctacgc gaccggcgcg ttcccggtca ccgaaaaact 59220 + ggccggcgag atcttctcgc tgccgatgta ccccgcgctt tccgccgacc tgcaggacaa 59280 + ggtcatccat gcggtgcgcg aggtggtgtc caccctctga ccactccacc aacaggagta 59340 + gccgtgcaag cacgcaaact cgccgtcgac ggcgcgatcg agttcacccc ccgggtcttc 59400 + cccgacgacc ggggcctgtt cgtctcgccg ttccaggaag aggccttcgc cgaggcccgc 59460 + ggcggcccgc tgttccgggt ggcgcagacg aaccacagca tgtccaagcg cggcgtggtg 59520 + cgtggcatcc actacacgat gacgccaccg ggcacggcca agtacgtcta ctgtgcccgc 59580 + ggcaaggcgt tggacatcgt ggtcgacatc cgggtcggct cgccgacgtt cggccggtgg 59640 + gacgcggtcc tgctggacca gcgggaccac cgggcgatgt acttcccggt gggggtcggc 59700 + cacgcgttcg tggccctcga ggacgacacc gccatgtggt acctgctctc cacggcctac 59760 + gtggcgcgga acgagctcgc cctctcggtc ctggatcccg cgctgggcct gcccatcgac 59820 + gccgacgtcg acccgatcct gtccgaacgg gaccaggtgg ccgtcacgct cgccgaggcg 59880 + ggacggcagg ggttgctgcc ggactacgcc acctgcctgg agctcgaccg gcagctgtcc 59940 + gaagtctccc tttccgcctg acctcacgac cgatcgggcc gaaggcgtcc ttcaccacgt 60000 + ccgaacgcgg tgaaggacgc cttcgacgga aaccaatcac gaactccgcg ccttggacga 60060 + cattgaccgc cgagttcggc cgagcctact ttcggaatgt ccggtccgct ctttcgcgaa 60120 + aggtgagatc catgcccgct gcgcaggtca agcagctgct tcgaagcaag ttgagaacgt 60180 + gggggtggat gtatcgatga cgaccagcat cgaacccgcc gaagaccttt cggtcctctc 60240 + cggcctgacc gagatcactc gattcgccgg cgtgggaaca gcggtttccg cgtcgtccta 60300 + ttcgcagtcc gaggtcctcg acatcctcga cgtcgaggac cccaaaatcc gctcggtctt 60360 + cctgaacagc gccatcgacc ggcgttttct caccctgccg ccggagagtc ccggtggggg 60420 + ccgcgtgtcc gaaccgcagg gcgacctcct ggacaagcac aaggagctcg cggtcgacat 60480 + ggggtgccgg gccctcgagg cctgcctgaa gtcggcggga gcgacgcttt cggacctgcg 60540 + tcacctgtgc tgcgtcacct cgaccgggtt cctgaccccc ggcctcagcg cactgatcat 60600 + ccgcgaactg gggatcgacc cgcactgcag ccgctcggac atcgtgggca tggggtgcaa 60660 + cgccggcctg aacgcgctca acgtcgtcgc cggctggtcc gcggcgcacc cgggtgaact 60720 + cggcgtcgtc ctgtgcagcg aggcgtgttc cgcggcctac gccctggacg gcaccatgcg 60780 + gaccgcggtg gtcaacagcc tcttcggcga cggatccgcc gcactcgccg tgatttccgg 60840 + tgacggccgc gtgcccggcc ctcgggtcct caagttcgcg agctacatca tcaccgacgc 60900 + gctggacgcc atgcgctacg actgggaccg tgaccaggac cggttcagct tcttcctcga 60960 + cccgcagatt ccgtacgtgg tcggggcgca cgcggagatc gtcgccgacc ggctgctgtc 61020 + cggcacgggc ctgcggcgca gcgacatcgg gcactggctg gtgcattccg gcggcaagaa 61080 + ggtgatcgac tccgtcgtcg tcaacctcgg cctgagccgc cacgacgtcc gccacaccac 61140 + cggagttctc cgtgactacg ggaacctttc cagcggctcc ttcctcttct cctacgagcg 61200 + gctcgccgaa gaaggcgtca cccggcccgg agactacggc gtactcatga ccatggggcc 61260 + tggctccaca atcgaaatgg cgctgatcca atggtgaacg gtgaactggt gctccggctc 61320 + gacggcaccc ggcccctgtc ggccgcgtcg gtcgaggaac tggacgccct ctgcgatcgc 61380 + gtggaagacc accgggaacc cggcccggtc accgtccacg tcacgggtgt cccggccgcc 61440 + ggctggacgg cggaggtgac ggtcggcctg gtctccaagt gggaacgggt ggtgcgccgg 61500 + ttcgagcggc tcggcaggct caccatcgcc gtggcggcgg gtgactgcgc cggaacggca 61560 + ctggacgtcc tcctcgcggc cgacgtccgg atcgccgcgc cgggcacccg gctgctgctc 61620 + gcccgggccg gcggcgcgcc gtggcccggg atgaccgtgc accggctcac ccggcaggcc 61680 + ggggcggccg gcatccggcg ggcggtgctg ctcggcgccc cgatcgaggc cggtcgcgcg 61740 + ctggccctga acctggtcga cgaggtctcg gaggacccgg cggccgcgct ggcggagctc 61800 + gccgggacgg ccggtgccgt ggacggcaag gagctggcga tccgccgtca gctggtcttc 61860 + gaagccggct cgaccgcctt cgaggacgca ctcggcgccc acctggccgc ggcggaccgg 61920 + gccctgcgca gggaaaccgc gtcgtgacgg ccgcaccccc gacgtctccg ccggggccgc 61980 + ggctcgaccg cccggccctg gcggaggcag ccggccgcgt cgacgacctg ctcgccgaac 62040 + tgccgccgcc gtccgcccgg acccccgggc aacgcgaggc cgcgtcttcg gcgctggacg 62100 + ggatccgggc gatgcgcgcg gactacgtcg gggcgcacgc cgaagcgatc tacgacgaac 62160 + tcaccgacgg ccggtcccgg tccctgcgca tcgacgagct cgtccgggcc gccgcccggg 62220 + cctttcccgg cctggtgccc acggacgagc agatggcggc cgagcgcgcg cggccgcagg 62280 + cggagaagga cgggcgggag atcgaccagg gcatcttcct gcgcgggatc ctgcgggcgg 62340 + agcgggccgg cccgcacctg ctcgacgcca tgctccagcc caccccgagg gcgctgaagc 62400 + tgctcccggg attcaccgag tccggtgtcg tgcagatgga ggcggtccgg ctggaacgcc 62460 + gggacggcgt cgcgtacctg accctgtgcc gggacgactg cctcaacgcc gaggacgccc 62520 + agcaggtcga cgacatggag accgcggtcg acctggcgct gctcgacccg gccgtccggg 62580 + tggggctgct gcgcggcggg gagatgagcc atccccgcta ccgggggcgc cgcgtgttct 62640 + gcgccggcat caacctcaag aagctgagct cgggcggcat cccgctggtc gatttcctgc 62700 + tgcggcggga gctggggtac atccacaaga tcgtgcgcgg cgtggtcacc gaaggttcgt 62760 + ggcattcgcg gctgaccgac aagccgtgga tcgcggccgt cgactccttc gccatcggcg 62820 + gcggggccca gctgctcctc gtcttcgacc acgtgctggc cgcgtccgac gcctacttca 62880 + gcctgcccgc ggcgaaggag gggatcatcc ccggcgcgtc gaacttccgg ctctcccggt 62940 + tcgccgggcc ccgcgtggcc cggcaggtga tcctcggcgg ccgccggatc cgggcggacg 63000 + agccggatgc ccgactgctc gtcgacgagg tcgtcccgcc ggcggagctg gacgcggcga 63060 + tcgacgccgc gctggcccgc ctggacgggg aggcggtgct ggccaaccgg cgcatgctga 63120 + acctggccga ggaaccgccg gacgaattcc gccggtacat ggccgagttc gccctgcagc 63180 + aggcgctgcg gatctacggc gaagacgtga tcggcaaggt cggccggttc gcggcgggct 63240 + cgtcgtgagc ggcgaccggg tgcggtacga gaagaaggac cacgtcgcct acgtgacgct 63300 + ggaccggccc ggcgtgctga acgccatgga ccggcggacg cacgaggagc tcgccggaat 63360 + ctgggacgac gccgaggccg acgacgaagt ccgggtggtg gtgctgaccg gcgccgggaa 63420 + ccgcgcgttc tccgtcggcc aggacctcaa ggaacgcgcc cggctgaacg aagcgggtgc 63480 + gcgggccacg acgttcggca gccggggcca gccggggcat ccccggctga ccgaccggtt 63540 + caccctgtcc aagccggtgg tcgcccgggt gcacggctac gcgctgggcg gtggcttcga 63600 + gctggtgctc gcctgcgaca tcgtcatcgc ctccgacgat tcggtgttcg ccctgccgga 63660 + ggtccgcctc ggcctgatcc ccggggcggg cggggtgttc cggctgccgc ggcagctgcc 63720 + gcagaaggtg gcgatgggct acctgctgac cggccgccgg atggacgcgg cgacggcgct 63780 + gcggtacgga ttggtcaacg aggtcgtgcc accggaggaa ctggaccggt gcgtcgccga 63840 + atggacggac agcctcgtgc gcgccgctcc gctttcggtt cgcgcgatca aggaggccgc 63900 + gctacggtcg ctcgacctcc ccctggagga ggcgttcacc gcttcctaca cctgggaaga 63960 + gcgccgtcgg cggagcgaag acgcgatcga gggtccccgg gccttcgccg cgaaacggga 64020 + tccggtctgg accggggaat accggccggg ttgaccaggc tgttcggtgg tttcgagtga 64080 + ggatggtgcg gagatgtcgg tgacggaatt cgctgtgacg gcgcgaaggg gaccggtcgc 64140 + ggccgggccg gggcaccggg tgtggccgcg atgacccaca ccgtcgccac gaccgacctc 64200 + gacaaccagc gcatcgagcg gatcgtcccc ctggtcaccc ccgccctgct gcatcacgaa 64260 + ctgccgctca gcgccaccgc ggccgagacg gtgcgaaagg gccgcgagag cgtcgtccgc 64320 + gtcctcgacg gcacggacga ccggctgctc gtgatcaccg ggccgtgctc catccacgac 64380 + cccgccgcgg cgctcgacta cgccggccac ctcgccgcca tcgccggcga ggtcgccggc 64440 + gacctgctcg tcgtcatgcg cgtgtacttc gagaaacccc ggacgatcgg cggctggaag 64500 + gggctcatca acgaccccca cctcgacggc accggcgacg tcaaccacgg gctgcgcacg 64560 + gcccggcacc tcctgctgga gctcgccgaa cgcggcctgc ccgccgcgtg cgaatggctg 64620 + gacaccacca ttcccgcgta cttcgcggac acggtctcgt ggggcgccat cggcgcccgc 64680 + accgtggaaa gccagaacca ccgcatgctc gccagcggcc tgtccatgcc cgtcggcttc 64740 + aagaaccgcc gcgacggcga catcaccgtc gccatcgacg cgatccgggc cgccgcggtc 64800 + cgccacgtgg tccccggcgt cgaccccggc gggttgcccg ccatcctgca cacggcgggc 64860 + aacccggact gccacgtcgt cctgcgcggt ggtgacggcg cgcccaacca cgactccgcg 64920 + tccgtccaca agacactgac cgcgctggag gccgcgggcc tgcccggccg ggtggtgatc 64980 + gacgccagcc acgacaacag cggcaaggac caccaccgcc agcccctcgt cgcggccgag 65040 + atcgcgggcc aggtcgagaa cgggcggaac ggcatcgtcg gggtgatgct cgagtccaac 65100 + ctccgcgccg gccgccagga cctccagccg ggccgtccgc cggcatacgg ccagtccatc 65160 + accgacgcct gcatcgacgt ccccaccacc cggacggtcc tccacggcct cgccgcggcg 65220 + gccgcggccc ggcgaaagct cggcaagcaa gcaagctgag caagtccgtg aatggcacat 65280 + tgccggacat agagtccctc gatgtgccat tcacggactt ggccctgacg gcggccggca 65340 + ccgacacgga tcccccggat ggggtatacc cgaaccggtg gaggaaatcc ggacagagtt 65400 + catccgaccg ctgctgacat cgttgtccgc gcacgccgcg gaccgccccg cctactccga 65460 + cgaccggcga acgctgacct acggcgggct ggcccacgcc gccgcggagc tcgccgccgg 65520 + gctcggggtg gcccggggcg accgcgtgct ggtgcacgtc ggcagccggg tcgagttcgc 65580 + cgtcgccctg ctggcggtgc tgcgggcggc ggccgtggga gtcccggtga gcgtgcgctc 65640 + gaccgacgcc gaactcgccc acctggcggc cgattcgggg gcgacgctcc tggtcacgga 65700 + ggcgcggcac gccgccgcgg ccgaacggct gcgccgcgac cggcccggtc tgcgggttct 65760 + cttcgtcgac gatccgccgc ccgcgcgggt gggcgagccg cgcgacgacc tcggactgga 65820 + cgagccggct tggctgctct acacctccgg caccaccggc cggcccaagg gcgtcctgct 65880 + ctcgcagcgc gcgatgctgt ggtcgacggc cgcgtactac gtccccatgc tcgggctcga 65940 + cgccgaagac accgtgctgt ggccgttgcc gacgcaccac gcgtacgccc tgtcgctggc 66000 + gttcgtcacc acgatcgcgc tgggggcgca cacccggctg gccgacgggt gcacgccgga 66060 + cctgctcgcc cggtaccccg gcagtgtgct cgccggtgtt cccgcgctct acctccggct 66120 + ccgccaggag tccggcggtc ccctcgccgc gccgcggctg tgcctgagcg gcggcgcgcc 66180 + gtgcacgccg gcgacccggg ccgcggtccg ggacctgttc gggctcccgg tggccgacgg 66240 + ctacgggagc accgagacgg gcgggaaggt cgccgccgag cttcccggtg aagcgggcct 66300 + ggtcccggtg cccggcttgg agatccggat cgacgcgggg gaggtgctcg tccgcggtcc 66360 + cgggctgatg ctgggctacc acgggcgaac cgaatcaccg ctgcgggacg gctggtaccg 66420 + cacgggcgac gccggccggt tcgagggcgg ccggctcgtg ctcgagggcc gcgtggacga 66480 + cgtgatcgtc tgcggtggcc agaacgtcca ccctgccgaa atcgaggcgg tgctcgaaga 66540 + gtcgccttcg gtgcgggacg tcctcgtgct cggccgtccc gacgacgtcc tgggcgaggt 66600 + gccggtggcg ttcgtggtcg ccgggcccgg cggcttcgac gccgaggagc tgcgtggccg 66660 + gtgtctaga 66669 +// diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 7f28449..9cc82c2 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -2,9 +2,10 @@ from Bio import SeqIO from Bio.SeqFeature import FeatureLocation from secmet.record import Record -from secmet.record import ClusterFeature, CDSFeature +from secmet.record import ClusterFeature filename = 'nisin.gbk' +filetype = 'genbank' def get_testfile(): """File path for testing""" @@ -13,60 +14,62 @@ def get_testfile(): def test_add_new_cluster(): """Test for adding a new cluster to record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') + rec = Record.from_file(testfile) new_cluster = ClusterFeature() - new_cluster.location = FeatureLocation(15100, 15500) + new_cluster.location = FeatureLocation(100, 500) try: - new_cluster.cutoff = 15300 + new_cluster.cutoff = 300 except: raise ValueError('Error assigning cutoff value') try: - new_cluster.extension = 15300 + new_cluster.extension = 300 except: raise ValueError('Error assiging extension value') new_cluster.contig_edge = True new_cluster.detection = 'Detection rules...' new_cluster.add_product('product_info') - assert len(rec.get_clusters()) == 1 + no_clusters_initial = len(rec.get_clusters()) rec.add_feature(new_cluster) - assert len(rec.get_clusters()) == 2 + no_clusters_final = len(rec.get_clusters()) + assert no_clusters_initial+1 == no_clusters_final return new_cluster def test_add_existing_cluster(): """Test for accessing the existing cluster from record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') - assert len(rec.get_clusters()) == 1 - new_cluster = rec.get_clusters()[0] - assert isinstance(new_cluster, ClusterFeature) - new_cluster.location = FeatureLocation(100, 15106) - try: - new_cluster.cutoff = 5000 - except: - raise ValueError('Error assigning cutoff value') - try: - new_cluster.extension = 5000 - except: - raise ValueError('Error assiging extension value') - rec.add_feature(new_cluster) - return new_cluster + rec = Record.from_file(testfile) + if len(rec.get_clusters()) >= 1: + new_cluster = rec.get_clusters()[0] + assert isinstance(new_cluster, ClusterFeature) + new_cluster.location = FeatureLocation(100, 500) + try: + new_cluster.cutoff = 300 + except: + raise ValueError('Error assigning cutoff value') + try: + new_cluster.extension = 300 + except: + raise ValueError('Error assiging extension value') + rec.add_feature(new_cluster) + return new_cluster def write_to_genbank_file(): """Write data from test_add_new_cluster()""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') + rec = Record.from_file(testfile) new_cluster_feature = test_add_new_cluster() rec.add_feature(new_cluster_feature) record_1 = rec.to_biopython() + with open('test_'+filename, 'w') as handle: + SeqIO.write([record_1], handle, filetype) - with open('test_new_cluster.gbk', 'w') as handle: - SeqIO.write([record_1], handle, "genbank") - - #Write data from test_add_existing_cluster( - rec = Record.from_file(testfile, 'genbank') - new_cluster_feature = test_add_existing_cluster() - rec.add_feature(new_cluster_feature) + #Write data from test_add_existing_cluster() + rec = Record.from_file(testfile) + try: + new_cluster_feature = test_add_existing_cluster() + rec.add_feature(new_cluster_feature) + except TypeError: #To return if no clusters are already present in the file + return record_2 = rec.to_biopython() - - with open('test_existing_cluster.gbk', 'w') as handle: - SeqIO.write([record_2], handle, "genbank") + with open('test_'+filename, 'w') as handle: + SeqIO.write([record_2], handle, filetype) diff --git a/tests/test_record.py b/tests/test_record.py index 0ca576b..e020f02 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -5,8 +5,9 @@ from secmet.record import Record from secmet.record import GenericFeature, ClusterFeature, CDSFeature - -filename = 'nisin.gbk' +#Global variables for test file name and its type +filename = 'balh.embl' +filetype = 'embl' def get_testfile(): """File path for testing""" @@ -15,8 +16,8 @@ def get_testfile(): def test_from_file(): """Test file operations in Record""" testfile = get_testfile() - bp_rec = SeqIO.read(testfile, 'genbank') - rec = Record.from_file(testfile, 'genbank') + bp_rec = SeqIO.read(testfile, filetype) + rec = Record.from_file(testfile) assert isinstance(rec, Record) assert rec.id == bp_rec.id assert rec.seq == bp_rec.seq @@ -30,20 +31,20 @@ def test_from_file(): def test_from_biopython(): """Test from_biopython() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') + rec = Record.from_file(testfile) assert isinstance(rec.from_biopython(rec._record), Record) def test_to_biopython(): """Test to_biopython() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') + rec = Record.from_file(testfile) assert isinstance(rec.to_biopython(), Bio.SeqRecord.SeqRecord) def test_get_clusters(): """Test get_clusters() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') - bp_rec = SeqIO.read(testfile, 'genbank') + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] mod_clusters = rec.get_clusters() assert len(mod_clusters) == len(bp_clusters) @@ -53,8 +54,8 @@ def test_get_clusters(): def test_get_CDSs(): """Test get_CDSs() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') - bp_rec = SeqIO.read(testfile, 'genbank') + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) bp_CDSs = [i for i in bp_rec.features if i.type == 'CDS'] mod_CDSs = rec.get_CDSs() assert len(mod_CDSs) == len(bp_CDSs) @@ -64,14 +65,15 @@ def test_get_CDSs(): def test_get_cluster_number(): """Test get_cluster_number() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') - cluster = rec.get_clusters()[0] - assert rec.get_cluster_number(cluster) == 1 + rec = Record.from_file(testfile) + clusters = rec.get_clusters() + if len(clusters) is not 0: + assert rec.get_cluster_number(clusters[0]) == 1 def test_add_feature(): """Test add_feature() in Record""" testfile = get_testfile() - rec = Record.from_file(testfile, 'genbank') + rec = Record.from_file(testfile) no_of_clusters = len(rec.get_clusters()) no_of_cdss = len(rec.get_CDSs()) no_of_generics = len(rec._modified_generic) @@ -84,8 +86,9 @@ def test_add_feature(): rec.add_feature(new_cluster) rec.add_feature(new_cds) rec.add_feature(new_generic) - assert no_of_clusters+1 == len(rec.get_clusters()) + clusters = rec.get_clusters() + assert no_of_clusters+1 == len(clusters) assert no_of_cdss+1 == len(rec.get_CDSs()) assert no_of_generics+1 == len(rec._modified_generic) - assert rec.get_clusters()[0].get_cluster_number() == 1 - assert new_cluster.get_cluster_number() == 2 + for index, cluster in enumerate(clusters): + assert cluster.get_cluster_number() == index+1 From 2c549b38d830b49c397a0f108ef8bb947fb31ef2 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 19 Jun 2017 20:44:10 +0530 Subject: [PATCH 11/71] Updated testfiles path tests/data/ -> data/ --- tests/test_cluster.py | 2 +- tests/test_record.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 9cc82c2..b0ff877 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -9,7 +9,7 @@ def get_testfile(): """File path for testing""" - return path.join(path.dirname(__file__), 'tests/data', filename) + return path.join(path.dirname(__file__), 'data', filename) def test_add_new_cluster(): """Test for adding a new cluster to record""" diff --git a/tests/test_record.py b/tests/test_record.py index e020f02..c708a07 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -11,7 +11,7 @@ def get_testfile(): """File path for testing""" - return path.join(path.dirname(__file__), 'tests/data', filename) + return path.join(path.dirname(__file__), 'data', filename) def test_from_file(): """Test file operations in Record""" From 5bd44869746b48523f40595210bdbd59f3995697 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 20 Jun 2017 22:34:03 +0530 Subject: [PATCH 12/71] SOLVED: complement(location); Add sort_feature() and cmp_feature_location() in record.py, Update test files --- secmet/record.py | 52 ++++++++++++++++++++++++++++++++----------- tests/test_cluster.py | 6 ++--- tests/test_record.py | 43 ++++++++++++++++++++++++++--------- 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index ef69957..f693a43 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -4,6 +4,30 @@ from Bio import SeqIO, SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation +import sys + +logging = [] +def cmp_feature_location(a, b): + "Compare two features by their start/end locations" + ret = cmp(a.location.start, b.location.start) + if ret != 0: + return ret + return cmp(a.location.end, b.location.end) + +def sort_features(seq_record): + "Sort features in a seq_record by their position" + #Check if all features have a proper location assigned + for feature in seq_record.features: + if feature.location is None: + if feature.id != "": + logging.append("Feature '%s' has no proper location assigned", feature.id) + elif "locus_tag" in feature.qualifiers: + logging.append("Feature '%s' has no proper location assigned", feature.qualifiers["locus_tag"][0]) + else: + logging.append("File contains feature without proper location assignment") + sys.exit(0) #FIXME: is sys.exit(0) really what we want to do here? + #Sort features by location + seq_record.features.sort(cmp=cmp_feature_location) class Feature(object): """A Feature super class that expands to different subclasses""" @@ -48,6 +72,7 @@ def to_biopython(self): if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Generic.location = self.location new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -107,6 +132,7 @@ def to_biopython(self): self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_CDS.location = self.location new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -124,6 +150,7 @@ def __init__(self, feature=None): self._qualifiers = {} self.parent_record = None self.type = 'cluster' + self.note = [] if feature is not None: self._qualifiers = feature.qualifiers @@ -137,8 +164,12 @@ def __init__(self, feature=None): self.contig_edge = self._qualifiers['contig_edge'][0] if 'note' in self._qualifiers: - self.detection = self._qualifiers['note'][1] - self.clusternumber = int(self._qualifiers['note'][0].split(':')[1]) + note_list = self._qualifiers['note'] + self.detection = note_list[1] + self.clusternumber = int(note_list[0].split(':')[1]) + if len(note_list) > 2: + for i in range(2, len(note_list)): + self.note.append(note_list[i]) if 'product' in self._qualifiers: self.products = self._qualifiers['product'] @@ -194,11 +225,13 @@ def to_biopython(self): raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) + self._qualifiers['note'].extend(self.note) self._qualifiers['cutoff'] = [str(self.cutoff)] self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) + new_Cluster.location = self.location new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -213,7 +246,6 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - self._features = [] #A list containing all Feature instances self._modified_cds = [] #A list containing instances of CDSFeature self._modified_cluster = [] #A list containing instances of ClusterFeature self._modified_generic = [] #A list containing instances of GenericFeature @@ -287,18 +319,14 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record - features = self._features - for index, feature in enumerate(features): - if feature.type == 'cluster': - features.pop(index) - if feature.type == 'source': - source_index = index - for cluster in self.get_clusters(): - features.insert(source_index+1, cluster) + features = self._modified_generic + features.extend(self.get_clusters()) + features.extend(self.get_CDSs()) record_features = [] for feature in features: record_features.append(feature.to_biopython()[0]) new_record.features = record_features #A new_record with all the modified features + sort_features(new_record) return new_record def get_cluster_number(self, clusterfeature): @@ -336,7 +364,6 @@ def add_feature(self, feature): self._modified_cds.append(feature) else: self._modified_generic.append(feature) - self._features.append(feature) def from_biopython(self, record): """Modifies _modified_features list with new Feature instances""" @@ -353,5 +380,4 @@ def from_biopython(self, record): else: feature = GenericFeature(feature) self._modified_generic.append(feature) - self._features.append(feature) return self diff --git a/tests/test_cluster.py b/tests/test_cluster.py index b0ff877..d4077ba 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -53,14 +53,14 @@ def test_add_existing_cluster(): rec.add_feature(new_cluster) return new_cluster -def write_to_genbank_file(): +def write_to_file(): """Write data from test_add_new_cluster()""" testfile = get_testfile() rec = Record.from_file(testfile) new_cluster_feature = test_add_new_cluster() rec.add_feature(new_cluster_feature) record_1 = rec.to_biopython() - with open('test_'+filename, 'w') as handle: + with open('test_new_'+filename, 'w') as handle: SeqIO.write([record_1], handle, filetype) #Write data from test_add_existing_cluster() @@ -71,5 +71,5 @@ def write_to_genbank_file(): except TypeError: #To return if no clusters are already present in the file return record_2 = rec.to_biopython() - with open('test_'+filename, 'w') as handle: + with open('test_existing_'+filename, 'w') as handle: SeqIO.write([record_2], handle, filetype) diff --git a/tests/test_record.py b/tests/test_record.py index c708a07..0044f56 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -6,8 +6,8 @@ from secmet.record import GenericFeature, ClusterFeature, CDSFeature #Global variables for test file name and its type -filename = 'balh.embl' -filetype = 'embl' +filename = 'nisin.gbk' +filetype = 'genbank' def get_testfile(): """File path for testing""" @@ -46,10 +46,14 @@ def test_get_clusters(): rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] - mod_clusters = rec.get_clusters() + mod_clusters = [i.to_biopython()[0] for i in rec.get_clusters()] assert len(mod_clusters) == len(bp_clusters) - for cluster in mod_clusters: - assert isinstance(cluster, ClusterFeature) + for bcluster, mcluster in zip(bp_clusters, mod_clusters): + assert isinstance(mcluster, Bio.SeqFeature.SeqFeature) + assert bcluster.type == mcluster.type + assert bcluster.location.__str__() == mcluster.location.__str__() + for key, value in bcluster.qualifiers.items(): + assert value == mcluster.qualifiers[key] def test_get_CDSs(): """Test get_CDSs() in Record""" @@ -57,18 +61,37 @@ def test_get_CDSs(): rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) bp_CDSs = [i for i in bp_rec.features if i.type == 'CDS'] - mod_CDSs = rec.get_CDSs() + mod_CDSs = [i.to_biopython()[0] for i in rec.get_CDSs()] assert len(mod_CDSs) == len(bp_CDSs) - for cds in mod_CDSs: - assert isinstance(cds, CDSFeature) + for bcds, mcds in zip(bp_CDSs, mod_CDSs): + assert isinstance(mcds, Bio.SeqFeature.SeqFeature) + assert bcds.type == mcds.type + assert bcds.location.__str__() == mcds.location.__str__() + for key, value in bcds.qualifiers.items(): + assert value == mcds.qualifiers[key] + +def test_modified_generic(): + """Test _modified_generic list in Record""" + testfile = get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_gens = [i for i in bp_rec.features if i.type != 'CDS' and i.type != 'cluster'] + mod_gens = [i.to_biopython()[0] for i in rec._modified_generic] + assert len(mod_gens) == len(bp_gens) + for bgen, mgen in zip(bp_gens, mod_gens): + assert isinstance(mgen, Bio.SeqFeature.SeqFeature) + assert bgen.type == mgen.type + assert bgen.location.__str__() == mgen.location.__str__() + for key, value in bgen.qualifiers.items(): + assert value == mgen.qualifiers[key] def test_get_cluster_number(): """Test get_cluster_number() in Record""" testfile = get_testfile() rec = Record.from_file(testfile) clusters = rec.get_clusters() - if len(clusters) is not 0: - assert rec.get_cluster_number(clusters[0]) == 1 + for index, cluster in enumerate(clusters): + assert rec.get_cluster_number(cluster) == index+1 def test_add_feature(): """Test add_feature() in Record""" From 976d300e19e5c657217cddb7bd528ab81bd4dc86 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Wed, 21 Jun 2017 08:07:04 +0530 Subject: [PATCH 13/71] Update new SeqFeature location --- secmet/record.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f693a43..8c50471 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -71,8 +71,7 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") - new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_Generic.location = self.location + new_Generic = SeqFeature(self.location, type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -131,8 +130,7 @@ def to_biopython(self): self._qualifiers['product'] = [str(self.product)] self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] - new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_CDS.location = self.location + new_CDS = SeqFeature(self.location, type=self.type) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -230,8 +228,7 @@ def to_biopython(self): self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] - new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end), type=self.type) - new_Cluster.location = self.location + new_Cluster = SeqFeature(self.location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] From 0db0c676c66ef916f198760826886d5cd29365d9 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Wed, 21 Jun 2017 23:24:43 +0530 Subject: [PATCH 14/71] Update FeatureLocation(), Delete logging(list), Update sort_features() --- secmet/record.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 8c50471..c35fbe3 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -6,7 +6,6 @@ from Bio.SeqFeature import SeqFeature, FeatureLocation import sys -logging = [] def cmp_feature_location(a, b): "Compare two features by their start/end locations" ret = cmp(a.location.start, b.location.start) @@ -18,14 +17,7 @@ def sort_features(seq_record): "Sort features in a seq_record by their position" #Check if all features have a proper location assigned for feature in seq_record.features: - if feature.location is None: - if feature.id != "": - logging.append("Feature '%s' has no proper location assigned", feature.id) - elif "locus_tag" in feature.qualifiers: - logging.append("Feature '%s' has no proper location assigned", feature.qualifiers["locus_tag"][0]) - else: - logging.append("File contains feature without proper location assignment") - sys.exit(0) #FIXME: is sys.exit(0) really what we want to do here? + assert feature.location is not None #Sort features by location seq_record.features.sort(cmp=cmp_feature_location) @@ -71,7 +63,7 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" if not isinstance(self.location, FeatureLocation): raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") - new_Generic = SeqFeature(self.location, type=self.type) + new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -130,7 +122,7 @@ def to_biopython(self): self._qualifiers['product'] = [str(self.product)] self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] - new_CDS = SeqFeature(self.location, type=self.type) + new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -228,7 +220,7 @@ def to_biopython(self): self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] - new_Cluster = SeqFeature(self.location, type=self.type) + new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -378,3 +370,7 @@ def from_biopython(self, record): feature = GenericFeature(feature) self._modified_generic.append(feature) return self +""" +rec = Record.from_file("../tests/data/nisin.gbk") +print rec.get_clusters()[0].to_biopython()[0].location.parts +""" From d21aebe7833d931cda752967896635fc7b6fcc52 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Wed, 21 Jun 2017 23:49:51 +0530 Subject: [PATCH 15/71] Remove comments --- secmet/record.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index c35fbe3..0d8dd80 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -370,7 +370,3 @@ def from_biopython(self, record): feature = GenericFeature(feature) self._modified_generic.append(feature) return self -""" -rec = Record.from_file("../tests/data/nisin.gbk") -print rec.get_clusters()[0].to_biopython()[0].location.parts -""" From 2b2e57de8951f4bc08f195e74ae98d8ec66cecc7 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 25 Jun 2017 23:57:30 +0530 Subject: [PATCH 16/71] Add name and record property in Record --- secmet/record.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index 0d8dd80..4cdf097 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -240,6 +240,7 @@ def __init__(self, seq_record=None): self._modified_generic = [] #A list containing instances of GenericFeature self._cluster_number_dict = {} #A dictionary to map clusters and their numbers + if not isinstance(self._record, SeqRecord.SeqRecord): raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") self.from_biopython(self._record) @@ -297,6 +298,19 @@ def description(self): else: return "" + @property + def name(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.name + else: + return "NO_NAME_ASSIGNED" + + @property + def record(self): + """Return the seq_record object""" + return self._record + def get_clusters(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cluster From f3577bc83098d55871913b0930620085ec73086a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 27 Jun 2017 12:32:10 +0530 Subject: [PATCH 17/71] Remove record property, add translation in CDSFeature, add setter for seq in Record --- secmet/record.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 4cdf097..92cd21b 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -2,7 +2,7 @@ # Licensed under the APL2, see LICENSE for details """Secondary Metabolite Record Objects""" -from Bio import SeqIO, SeqRecord +from Bio import SeqIO, SeqRecord, Seq from Bio.SeqFeature import SeqFeature, FeatureLocation import sys @@ -81,6 +81,7 @@ def __init__(self, feature=None): self.product = None self.protein_id = None self.gene = None + self.translation = None self.cluster = None #At present we are manually assigning it for checking self._qualifiers = {} self.type = 'CDS' @@ -103,6 +104,9 @@ def __init__(self, feature=None): if 'gene' in self._qualifiers: self.gene = self._qualifiers['gene'][0] + + if 'translation' in self._qualifiers: + self.translation = self._qualifiers['translation'][0] self.location = feature.location def get_id(self): @@ -122,6 +126,7 @@ def to_biopython(self): self._qualifiers['product'] = [str(self.product)] self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] + self._qualifiers['translation'] = [str(self.translation)] new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -281,6 +286,12 @@ def seq(self): return self._record.seq else: return None + @seq.setter + def seq(self, value): + """Setter for seq in Record""" + if not isinstance(value, Seq.Seq): + raise ValueError('Sequence should of type "Bio.Seq.Seq"') + self._record.seq = value @property def annotations(self): @@ -306,11 +317,6 @@ def name(self): else: return "NO_NAME_ASSIGNED" - @property - def record(self): - """Return the seq_record object""" - return self._record - def get_clusters(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cluster @@ -384,3 +390,4 @@ def from_biopython(self, record): feature = GenericFeature(feature) self._modified_generic.append(feature) return self + From 510d1380088fb36ae56f9a755069d61a82376378 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 27 Jun 2017 18:11:29 +0530 Subject: [PATCH 18/71] Add setter for id --- secmet/record.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/secmet/record.py b/secmet/record.py index 92cd21b..19a53f2 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -278,6 +278,12 @@ def id(self): return self._record.id else: return "NO_ID_ASSIGNED" + @id.setter + def id(self, value): + """Setter for id in Record""" + if not isinstance(value, str): + raise ValueError('ID should be of type "str"') + self._record.id = value @property def seq(self): @@ -290,7 +296,7 @@ def seq(self): def seq(self, value): """Setter for seq in Record""" if not isinstance(value, Seq.Seq): - raise ValueError('Sequence should of type "Bio.Seq.Seq"') + raise ValueError('Sequence should be of type "Bio.Seq.Seq"') self._record.seq = value @property From 7748c6f558038451a4f5655a87f67a375e2e791c Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 29 Jun 2017 02:36:14 +0530 Subject: [PATCH 19/71] Add setter's and getter's for generics and clusters lists --- secmet/record.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 19a53f2..f2fabdd 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -160,8 +160,9 @@ def __init__(self, feature=None): if 'note' in self._qualifiers: note_list = self._qualifiers['note'] - self.detection = note_list[1] self.clusternumber = int(note_list[0].split(':')[1]) + if len(note_list) > 1: + self.detection = note_list[1] if len(note_list) > 2: for i in range(2, len(note_list)): self.note.append(note_list[i]) @@ -326,15 +327,25 @@ def name(self): def get_clusters(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cluster + def set_clusters(self, clusters_list): + """To set the clusters of the seq_record""" + self._modified_cluster = clusters_list def get_CDSs(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cds + def get_generics(self): + """A list of secondary metabolite generics present in the record""" + return self._modified_generic + def set_generics(self, generics_list): + """To set the generic features of the seq_record""" + self._modified_generic = generics_list + def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record - features = self._modified_generic + features = self.get_generics()[:] #Clone the private list features.extend(self.get_clusters()) features.extend(self.get_CDSs()) record_features = [] @@ -396,4 +407,3 @@ def from_biopython(self, record): feature = GenericFeature(feature) self._modified_generic.append(feature) return self - From 3c179658d82f54bca91bdc7e12accd437f742754 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 1 Jul 2017 15:25:14 +0530 Subject: [PATCH 20/71] Add setters for things in Record --- secmet/record.py | 53 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f2fabdd..0e12fd4 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -2,9 +2,10 @@ # Licensed under the APL2, see LICENSE for details """Secondary Metabolite Record Objects""" -from Bio import SeqIO, SeqRecord, Seq -from Bio.SeqFeature import SeqFeature, FeatureLocation -import sys +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation def cmp_feature_location(a, b): "Compare two features by their start/end locations" @@ -246,10 +247,12 @@ def __init__(self, seq_record=None): self._modified_generic = [] #A list containing instances of GenericFeature self._cluster_number_dict = {} #A dictionary to map clusters and their numbers - - if not isinstance(self._record, SeqRecord.SeqRecord): - raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") - self.from_biopython(self._record) + if self._record is not None: + if not isinstance(self._record, SeqRecord): + raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") + self.from_biopython(self._record) + else: + self._record = SeqRecord(Seq("")) @classmethod def from_file(cls, filename): @@ -296,18 +299,10 @@ def seq(self): @seq.setter def seq(self, value): """Setter for seq in Record""" - if not isinstance(value, Seq.Seq): + if not isinstance(value, Seq): raise ValueError('Sequence should be of type "Bio.Seq.Seq"') self._record.seq = value - @property - def annotations(self): - """Pass through to seq_record object if available""" - if self._record is not None: - return self._record.annotations - else: - return {} - @property def description(self): """Pass through to seq_record object if available""" @@ -315,6 +310,12 @@ def description(self): return self._record.description else: return "" + @description.setter + def description(self, value): + """Setter for description in Record""" + if not isinstance(value, str): + raise ValueError('Description should be of type "string"') + self._record.description = value @property def name(self): @@ -323,6 +324,26 @@ def name(self): return self._record.name else: return "NO_NAME_ASSIGNED" + @name.setter + def name(self, value): + """Setter for name in Record""" + if not isinstance(value, str): + raise ValueError('Name should be of type "string"') + self._record.name = value + + @property + def annotations(self): + """Pass through to seq_record object if available""" + if self._record is not None: + return self._record.annotations + else: + return {} + def add_annotation(self, key, value): + """Adding annotations in Record""" + if not (isinstance(key, str) and (isinstance(value, str) or isinstance(value, list))): + raise ValueError('Key and Value are not in right format') + self._record.annotations[key] = value + def get_clusters(self): """A list of secondary metabolite clusters present in the record""" From 0e9b1b7bb501b9dcd4a6dbfb8d190118958110f4 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 1 Jul 2017 17:38:52 +0530 Subject: [PATCH 21/71] Add setter and getter for location in Feature class. --- secmet/record.py | 51 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 0e12fd4..d6355a7 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -29,6 +29,24 @@ def __init__(self): self.location = None self.type = None + def set_location(self, start, end, strand=None): + """Set feature's location""" + if not isinstance(start, (int, FeatureLocation)) and isinstance(end, (int, FeatureLocation)): + raise ValueError("Start and End location should be either 'int' or 'FeatureLocation'") + if isinstance(start, int) and isinstance(end, int): + self.location = FeatureLocation(start, end) + if strand is not None: + if not isinstance(strand, int): + raise ValueError('Strand should be of type "int"') + self.location.strand = strand + elif isinstance(start, FeatureLocation) and isinstance(end, FeatureLocation): + self.location = CompoundLocation([start, end]) + else: + raise ValueError('Start and End should of same type') + def get_location(self): + """Return feature's location""" + return self.location + class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature @@ -42,8 +60,9 @@ def __init__(self, feature=None): self._qualifiers = {} if feature is not None: self._qualifiers = feature.qualifiers - self.location = feature.location self.type = feature.type + self.set_location(feature.location.start, feature.location.end, feature.location.strand) + def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" @@ -62,9 +81,10 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" - if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") - new_Generic = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) + location = self.get_location() + if not isinstance(location, (FeatureLocation, CompoundLocation)): + raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") + new_Generic = SeqFeature(location, type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -108,7 +128,7 @@ def __init__(self, feature=None): if 'translation' in self._qualifiers: self.translation = self._qualifiers['translation'][0] - self.location = feature.location + self.set_location(feature.location.start, feature.location.end, feature.location.strand) def get_id(self): """Returns the id of the CDSFeature""" @@ -120,15 +140,16 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") + location = self.get_location() + if not isinstance(location, (FeatureLocation, CompoundLocation)): + raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") self._qualifiers['sec_met'] = self.sec_met self._qualifiers['locus_tag'] = [str(self.locus_tag)] self._qualifiers['product'] = [str(self.product)] self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] self._qualifiers['translation'] = [str(self.translation)] - new_CDS = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) + new_CDS = SeqFeature(location, type=self.type) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -170,7 +191,7 @@ def __init__(self, feature=None): if 'product' in self._qualifiers: self.products = self._qualifiers['product'] - self.location = feature.location + self.set_location(feature.location.start, feature.location.end, feature.location.strand) self.cdss = [] #At present they are manually assigned for checking @@ -218,8 +239,10 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if not isinstance(self.location, FeatureLocation): - raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") + location = self.get_location() + if not isinstance(location, (FeatureLocation, CompoundLocation)): + raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") + location = self.get_location() self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) self._qualifiers['note'].extend(self.note) @@ -227,7 +250,7 @@ def to_biopython(self): self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] - new_Cluster = SeqFeature(FeatureLocation(self.location.start, self.location.end, self.location.strand), type=self.type) + new_Cluster = SeqFeature(location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -387,8 +410,8 @@ def add_feature(self, feature): if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") if feature.type == 'cluster': - if not isinstance(feature.location, FeatureLocation): - raise ValueError("location should be an instance of Bio.SeqFeature.FeatureLocation") + if not isinstance(feature.location, (FeatureLocation, CompoundLocation)): + raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") clusters = self.get_clusters() clusters.append(None) for index, cluster in enumerate(clusters): From 301f2e4494f19c07761d09080cc6fb7768a54f94 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 2 Jul 2017 15:02:11 +0530 Subject: [PATCH 22/71] Modify location setter --- secmet/record.py | 47 ++++++++++++++++++++++++++------------------ tests/test_record.py | 3 +-- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index d6355a7..2bb6b3a 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -29,20 +29,28 @@ def __init__(self): self.location = None self.type = None - def set_location(self, start, end, strand=None): + def set_location(self, locations): """Set feature's location""" - if not isinstance(start, (int, FeatureLocation)) and isinstance(end, (int, FeatureLocation)): - raise ValueError("Start and End location should be either 'int' or 'FeatureLocation'") - if isinstance(start, int) and isinstance(end, int): - self.location = FeatureLocation(start, end) - if strand is not None: - if not isinstance(strand, int): - raise ValueError('Strand should be of type "int"') - self.location.strand = strand - elif isinstance(start, FeatureLocation) and isinstance(end, FeatureLocation): - self.location = CompoundLocation([start, end]) - else: - raise ValueError('Start and End should of same type') + if not isinstance(locations, list): + raise ValueError('locations should be a in list format') + if isinstance(locations[0], int): + if len(locations) < 2: + raise ValueError('Location should have atleast start and end positions') + if len(locations) == 2: + locations.append(None) + start, end, strand = locations + self.location = FeatureLocation(start, end, strand) + elif isinstance(locations[0], list): + compound_list = [] + for location in locations: + if len(location) < 2: + raise ValueError('Location should have atleast start and end positions') + if len(location) == 2: + location.append(None) + start, end, strand = location + compound_list.append(FeatureLocation(start, end, strand)) + self.location = CompoundLocation(compound_list) + def get_location(self): """Return feature's location""" return self.location @@ -61,7 +69,7 @@ def __init__(self, feature=None): if feature is not None: self._qualifiers = feature.qualifiers self.type = feature.type - self.set_location(feature.location.start, feature.location.end, feature.location.strand) + self.set_location([feature.location.start, feature.location.end, feature.location.strand]) def add_qualifier(self, category, info): @@ -128,7 +136,7 @@ def __init__(self, feature=None): if 'translation' in self._qualifiers: self.translation = self._qualifiers['translation'][0] - self.set_location(feature.location.start, feature.location.end, feature.location.strand) + self.set_location([feature.location.start, feature.location.end, feature.location.strand]) def get_id(self): """Returns the id of the CDSFeature""" @@ -191,7 +199,7 @@ def __init__(self, feature=None): if 'product' in self._qualifiers: self.products = self._qualifiers['product'] - self.set_location(feature.location.start, feature.location.end, feature.location.strand) + self.set_location([feature.location.start, feature.location.end, feature.location.strand]) self.cdss = [] #At present they are manually assigned for checking @@ -242,7 +250,6 @@ def to_biopython(self): location = self.get_location() if not isinstance(location, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") - location = self.get_location() self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) self._qualifiers['note'].extend(self.note) @@ -410,13 +417,15 @@ def add_feature(self, feature): if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") if feature.type == 'cluster': - if not isinstance(feature.location, (FeatureLocation, CompoundLocation)): + flocation = feature.get_location() + if not isinstance(flocation, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") clusters = self.get_clusters() clusters.append(None) for index, cluster in enumerate(clusters): if cluster is not None: - if feature.location.start < cluster.location.start: + clocation = cluster.get_location() + if flocation.start < clocation.start: break else: clusters[index] = feature diff --git a/tests/test_record.py b/tests/test_record.py index 0044f56..363009b 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -2,8 +2,7 @@ import Bio from Bio import SeqIO from Bio.SeqFeature import FeatureLocation -from secmet.record import Record -from secmet.record import GenericFeature, ClusterFeature, CDSFeature +from secmet.record import Record, GenericFeature, ClusterFeature, CDSFeature #Global variables for test file name and its type filename = 'nisin.gbk' From c20f78afae1b38cc07f61d3c899fa02da60f377d Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 2 Jul 2017 16:14:26 +0530 Subject: [PATCH 23/71] Modify parsing 'note' in ClusterFeature() --- secmet/record.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 2bb6b3a..ae72be8 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -190,12 +190,15 @@ def __init__(self, feature=None): if 'note' in self._qualifiers: note_list = self._qualifiers['note'] - self.clusternumber = int(note_list[0].split(':')[1]) - if len(note_list) > 1: - self.detection = note_list[1] - if len(note_list) > 2: - for i in range(2, len(note_list)): - self.note.append(note_list[i]) + note_list_copy = note_list[:] + for value in note_list: + if value.startswith('Cluster number'): + self.clusternumber = int(value.split(':')[1]) + note_list_copy.remove(value) + if value.startswith('Detection rule(s)'): + self.detection = value + note_list_copy.remove(value) + self.note.extend(note_list_copy) if 'product' in self._qualifiers: self.products = self._qualifiers['product'] From 0dd107823f9c6838d689ffe9ef88d09bc4d75584 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 2 Jul 2017 23:21:09 +0530 Subject: [PATCH 24/71] Update 'note' qualifier in ClusterFeature() --- secmet/record.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/secmet/record.py b/secmet/record.py index ae72be8..66bc994 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -105,6 +105,7 @@ def __init__(self, feature=None): param feature: class 'Bio.SeqFeature.SeqFeature' """ super(CDSFeature, self).__init__() + self.id = '' self.sec_met = [] self.locus_tag = None self.product = None @@ -112,6 +113,7 @@ def __init__(self, feature=None): self.gene = None self.translation = None self.cluster = None #At present we are manually assigning it for checking + self.note = [] self._qualifiers = {} self.type = 'CDS' @@ -136,8 +138,13 @@ def __init__(self, feature=None): if 'translation' in self._qualifiers: self.translation = self._qualifiers['translation'][0] + + if 'note' in self._qualifiers: + self.note = self._qualifiers['note'] + self.set_location([feature.location.start, feature.location.end, feature.location.strand]) + def get_id(self): """Returns the id of the CDSFeature""" return self.gene @@ -157,7 +164,8 @@ def to_biopython(self): self._qualifiers['protein_id'] = [str(self.protein_id)] self._qualifiers['gene'] = [str(self.gene)] self._qualifiers['translation'] = [str(self.translation)] - new_CDS = SeqFeature(location, type=self.type) + self._qualifiers['note'] = self.note + new_CDS = SeqFeature(location, type=self.type, id=self.id) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] From 762b6c218beae2773e065bfe4ceae702c0fba047 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 3 Jul 2017 17:28:05 +0530 Subject: [PATCH 25/71] Modify set_location() to take location values instead of lists --- secmet/record.py | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 66bc994..90348c7 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -29,33 +29,27 @@ def __init__(self): self.location = None self.type = None - def set_location(self, locations): + def set_location(self, start=None, end=None, strand=None, compound=None): """Set feature's location""" - if not isinstance(locations, list): - raise ValueError('locations should be a in list format') - if isinstance(locations[0], int): - if len(locations) < 2: - raise ValueError('Location should have atleast start and end positions') - if len(locations) == 2: - locations.append(None) - start, end, strand = locations - self.location = FeatureLocation(start, end, strand) - elif isinstance(locations[0], list): - compound_list = [] - for location in locations: - if len(location) < 2: - raise ValueError('Location should have atleast start and end positions') - if len(location) == 2: - location.append(None) - start, end, strand = location - compound_list.append(FeatureLocation(start, end, strand)) - self.location = CompoundLocation(compound_list) + if compound is not None: + if not isinstance(compound, CompoundLocation): + raise ValueError('Expected an instance of "Bio.SeqFeature.CompoundLocation"') + self.location = compound + else: + if start is not None and end is not None: + if not (isinstance(start, int) and isinstance(end, int)): + raise ValueError('Start and End should be of type "int"') + loc = FeatureLocation(start, end) + if strand is not None: + loc.strand = strand + self.location = loc + else: + raise ValueError('Start and End cannot be None') def get_location(self): """Return feature's location""" return self.location - class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature (Features other than CDSFeature and ClusterFeature) @@ -69,7 +63,7 @@ def __init__(self, feature=None): if feature is not None: self._qualifiers = feature.qualifiers self.type = feature.type - self.set_location([feature.location.start, feature.location.end, feature.location.strand]) + self.set_location(feature.location.start, feature.location.end, feature.location.strand) def add_qualifier(self, category, info): @@ -142,7 +136,7 @@ def __init__(self, feature=None): if 'note' in self._qualifiers: self.note = self._qualifiers['note'] - self.set_location([feature.location.start, feature.location.end, feature.location.strand]) + self.set_location(feature.location.start, feature.location.end, feature.location.strand) def get_id(self): @@ -210,7 +204,7 @@ def __init__(self, feature=None): if 'product' in self._qualifiers: self.products = self._qualifiers['product'] - self.set_location([feature.location.start, feature.location.end, feature.location.strand]) + self.set_location(feature.location.start, feature.location.end, feature.location.strand) self.cdss = [] #At present they are manually assigned for checking @@ -385,6 +379,9 @@ def add_annotation(self, key, value): raise ValueError('Key and Value are not in right format') self._record.annotations[key] = value + def __len__(self): + """Return the length of the Biorecord""" + return len(self._record) def get_clusters(self): """A list of secondary metabolite clusters present in the record""" From 95eb608eabe0a0c72c8b1392107561a14ce06d1a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 6 Jul 2017 00:32:42 +0530 Subject: [PATCH 26/71] Update set_location() to accept lists as CompoundLoaction() --- secmet/record.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 90348c7..a218e46 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -32,9 +32,22 @@ def __init__(self): def set_location(self, start=None, end=None, strand=None, compound=None): """Set feature's location""" if compound is not None: - if not isinstance(compound, CompoundLocation): - raise ValueError('Expected an instance of "Bio.SeqFeature.CompoundLocation"') - self.location = compound + if isinstance(compound, CompoundLocation): + self.location = compound + elif isinstance(compound, list): + if not isinstance(compound[0], list): + raise ValueError('Expected a 2D list') + compound_list = [] + for location in compound: + if len(location) < 2: + raise ValueError('Location should have atleast Start and End positions') + if len(location) == 2: + location.append(None) + start, end, strand = location + compound_list.append(FeatureLocation(start, end, strand)) + self.location = CompoundLocation(compound_list) + else: + raise ValueError('Expected an instance of "CompoundLocation" or a list') else: if start is not None and end is not None: if not (isinstance(start, int) and isinstance(end, int)): From 8d3e80e4d418327b5f69289b9535639110ad234d Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 6 Jul 2017 18:34:55 +0530 Subject: [PATCH 27/71] Add structure, probability in ClusterFeature and EC_number in CDSFeature --- secmet/record.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index a218e46..f7641ed 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -93,6 +93,8 @@ def get_qualifier(self, category): """Returns a qualifier of given category""" if category in self._qualifiers: return self._qualifiers[category] + else: + return [] def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" @@ -121,6 +123,7 @@ def __init__(self, feature=None): self.translation = None self.cluster = None #At present we are manually assigning it for checking self.note = [] + self.EC_number = None self._qualifiers = {} self.type = 'CDS' @@ -149,6 +152,9 @@ def __init__(self, feature=None): if 'note' in self._qualifiers: self.note = self._qualifiers['note'] + if 'EC_number' in self._qualifiers: + self.EC_number = self._qualifiers['EC_number'][0] + self.set_location(feature.location.start, feature.location.end, feature.location.strand) @@ -172,6 +178,7 @@ def to_biopython(self): self._qualifiers['gene'] = [str(self.gene)] self._qualifiers['translation'] = [str(self.translation)] self._qualifiers['note'] = self.note + self._qualifiers['EC_number'] = [str(self.EC_number)] new_CDS = SeqFeature(location, type=self.type, id=self.id) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -191,6 +198,8 @@ def __init__(self, feature=None): self.parent_record = None self.type = 'cluster' self.note = [] + self.structure = None + self.probability = None if feature is not None: self._qualifiers = feature.qualifiers @@ -217,6 +226,12 @@ def __init__(self, feature=None): if 'product' in self._qualifiers: self.products = self._qualifiers['product'] + + if 'structure' in self._qualifiers: + self.structure = self._qualifiers['structure'][0] + + if 'probability' in self._qualifiers: + self.probability = self._qualifiers['probability'][0] self.set_location(feature.location.start, feature.location.end, feature.location.strand) self.cdss = [] #At present they are manually assigned for checking @@ -275,6 +290,8 @@ def to_biopython(self): self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] + self._qualifiers['structure'] = [str(self.structure)] + self._qualifiers['probability'] = [str(self.probability)] new_Cluster = SeqFeature(location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] From d79bf233e80459e92f3d60d24b3323381f4b37a3 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Fri, 7 Jul 2017 02:27:22 +0530 Subject: [PATCH 28/71] Avoid None data in SeqRecord --- secmet/record.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f7641ed..4111187 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -178,7 +178,8 @@ def to_biopython(self): self._qualifiers['gene'] = [str(self.gene)] self._qualifiers['translation'] = [str(self.translation)] self._qualifiers['note'] = self.note - self._qualifiers['EC_number'] = [str(self.EC_number)] + if self.EC_number is not None: + self._qualifiers['EC_number'] = [str(self.EC_number)] new_CDS = SeqFeature(location, type=self.type, id=self.id) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -290,8 +291,10 @@ def to_biopython(self): self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products self._qualifiers['contig_edge'] = [str(self.contig_edge)] - self._qualifiers['structure'] = [str(self.structure)] - self._qualifiers['probability'] = [str(self.probability)] + if self.structure is not None: + self._qualifiers['structure'] = [str(self.structure)] + if self.probability is not None: + self._qualifiers['probability'] = [str(self.probability)] new_Cluster = SeqFeature(location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] From 3344e1bc2e26546006f06665b1107e6241dac9a1 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 8 Jul 2017 00:24:46 +0530 Subject: [PATCH 29/71] Add group_cluster_cds() to Record --- secmet/record.py | 81 ++++++++++++++++++------------------------------ 1 file changed, 30 insertions(+), 51 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 4111187..1b5662e 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -29,39 +29,11 @@ def __init__(self): self.location = None self.type = None - def set_location(self, start=None, end=None, strand=None, compound=None): + def set_location(self, location): """Set feature's location""" - if compound is not None: - if isinstance(compound, CompoundLocation): - self.location = compound - elif isinstance(compound, list): - if not isinstance(compound[0], list): - raise ValueError('Expected a 2D list') - compound_list = [] - for location in compound: - if len(location) < 2: - raise ValueError('Location should have atleast Start and End positions') - if len(location) == 2: - location.append(None) - start, end, strand = location - compound_list.append(FeatureLocation(start, end, strand)) - self.location = CompoundLocation(compound_list) - else: - raise ValueError('Expected an instance of "CompoundLocation" or a list') - else: - if start is not None and end is not None: - if not (isinstance(start, int) and isinstance(end, int)): - raise ValueError('Start and End should be of type "int"') - loc = FeatureLocation(start, end) - if strand is not None: - loc.strand = strand - self.location = loc - else: - raise ValueError('Start and End cannot be None') - - def get_location(self): - """Return feature's location""" - return self.location + if not isinstance(location, (FeatureLocation, CompoundLocation)): + raise ValueError('Location should be an instance of FeatureLocation or CompoundLocation ') + self.location = location class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature @@ -76,7 +48,7 @@ def __init__(self, feature=None): if feature is not None: self._qualifiers = feature.qualifiers self.type = feature.type - self.set_location(feature.location.start, feature.location.end, feature.location.strand) + self.set_location(feature.location) def add_qualifier(self, category, info): @@ -98,10 +70,9 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" - location = self.get_location() - if not isinstance(location, (FeatureLocation, CompoundLocation)): + if not isinstance(self.location, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") - new_Generic = SeqFeature(location, type=self.type) + new_Generic = SeqFeature(self.location, type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -121,7 +92,7 @@ def __init__(self, feature=None): self.protein_id = None self.gene = None self.translation = None - self.cluster = None #At present we are manually assigning it for checking + self.cluster = None self.note = [] self.EC_number = None self._qualifiers = {} @@ -155,7 +126,7 @@ def __init__(self, feature=None): if 'EC_number' in self._qualifiers: self.EC_number = self._qualifiers['EC_number'][0] - self.set_location(feature.location.start, feature.location.end, feature.location.strand) + self.set_location(feature.location) def get_id(self): @@ -168,8 +139,7 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - location = self.get_location() - if not isinstance(location, (FeatureLocation, CompoundLocation)): + if not isinstance(self.location, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") self._qualifiers['sec_met'] = self.sec_met self._qualifiers['locus_tag'] = [str(self.locus_tag)] @@ -180,7 +150,7 @@ def to_biopython(self): self._qualifiers['note'] = self.note if self.EC_number is not None: self._qualifiers['EC_number'] = [str(self.EC_number)] - new_CDS = SeqFeature(location, type=self.type, id=self.id) + new_CDS = SeqFeature(self.location, type=self.type, id=self.id) new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -201,6 +171,7 @@ def __init__(self, feature=None): self.note = [] self.structure = None self.probability = None + self.cdss = [] if feature is not None: self._qualifiers = feature.qualifiers @@ -233,9 +204,7 @@ def __init__(self, feature=None): if 'probability' in self._qualifiers: self.probability = self._qualifiers['probability'][0] - self.set_location(feature.location.start, feature.location.end, feature.location.strand) - - self.cdss = [] #At present they are manually assigned for checking + self.set_location(feature.location) def _get_cutoff(self): try: @@ -281,8 +250,7 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - location = self.get_location() - if not isinstance(location, (FeatureLocation, CompoundLocation)): + if not isinstance(self.location, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) @@ -295,7 +263,7 @@ def to_biopython(self): self._qualifiers['structure'] = [str(self.structure)] if self.probability is not None: self._qualifiers['probability'] = [str(self.probability)] - new_Cluster = SeqFeature(location, type=self.type) + new_Cluster = SeqFeature(self.location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -458,24 +426,24 @@ def add_feature(self, feature): if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") if feature.type == 'cluster': - flocation = feature.get_location() - if not isinstance(flocation, (FeatureLocation, CompoundLocation)): + if not isinstance(feature.location, (FeatureLocation, CompoundLocation)): raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") clusters = self.get_clusters() clusters.append(None) for index, cluster in enumerate(clusters): if cluster is not None: - clocation = cluster.get_location() - if flocation.start < clocation.start: + if feature.location.start < cluster.location.start: break else: clusters[index] = feature feature.parent_record = self + self.group_cluster_cds(feature) for index, cluster in enumerate(clusters): self._cluster_number_dict[cluster] = index+1 return clusters.insert(index, feature) feature.parent_record = self + self.group_cluster_cds(feature) for index, cluster in enumerate(clusters): self._cluster_number_dict[cluster] = index+1 return @@ -501,3 +469,14 @@ def from_biopython(self, record): feature = GenericFeature(feature) self._modified_generic.append(feature) return self + + def group_cluster_cds(self, cluster): + """Link cluster and their CDS features""" + clustercdsfeatures = [] + cdss = self.get_CDSs() + for cds in cdss: + if cluster.location.start <= cds.location.start <= cluster.location.end or \ + cluster.location.start <= cds.location.end <= cluster.location.end: + clustercdsfeatures.append(cds) + cds.cluster = cluster + cluster.cdss = clustercdsfeatures From 616e8e251d5f0739aede590a9fc9c3f8f59052ce Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 11 Jul 2017 23:31:31 +0530 Subject: [PATCH 30/71] Replace set_location() with location property. Add find_cluster_pos() and find_cluster() functions --- secmet/record.py | 128 +++++++++++++++++++++++++++---------------- tests/test_record.py | 3 +- 2 files changed, 83 insertions(+), 48 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 1b5662e..301e7dd 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -22,18 +22,63 @@ def sort_features(seq_record): #Sort features by location seq_record.features.sort(cmp=cmp_feature_location) + +def find_cluster_pos(array, start, end, target): + """Search for appropriate position in array to add cluster""" + if not array: + return 0 + #stopping condition + try: + if array[start].location.start <= target.location.start and array[start+1].location.start >= target.location.start: + return start+1 + except IndexError: + return start+1 + + mid = start+(end-start)/2 + if start == end or mid == start: + if target.location.start < array[0].location.start: + return 0 + else: + return len(array) + if array[mid].location.start > target.location.start: + return find_cluster_pos(array, start, mid, target) + elif array[mid].location.start <= target.location.start: + return find_cluster_pos(array, mid, end, target) + +def find_cluster(array, start, end, target): + if not array: + return + #Stopping condition + if array[start].location.start <= target.location.start <= array[start].location.end or \ + array[start].location.start <= target.location.end <= array[start].location.end: + array[start].cdss.append(target) + target.cluster = array[start] + return + + mid = start+(end-start)/2 + if start == end or mid == start: + return + if array[mid].location.start > target.location.start: + return find_cluster(array, start, mid, target) + elif array[mid].location.start <= target.location.start: + return find_cluster(array, mid, end, target) + class Feature(object): """A Feature super class that expands to different subclasses""" def __init__(self): """ Initialise a feature object""" - self.location = None self.type = None - def set_location(self, location): - """Set feature's location""" - if not isinstance(location, (FeatureLocation, CompoundLocation)): - raise ValueError('Location should be an instance of FeatureLocation or CompoundLocation ') - self.location = location + def _get_location(self): + try: + return self.__location + except: + raise ValueError('Unassigned location') + def _set_location(self, value): + if not isinstance(value, (FeatureLocation, CompoundLocation)): + raise TypeError("Location must be of type FeatureLocation or CompoundLocation") + self.__location = value + location = property(_get_location, _set_location) class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature @@ -48,7 +93,7 @@ def __init__(self, feature=None): if feature is not None: self._qualifiers = feature.qualifiers self.type = feature.type - self.set_location(feature.location) + self.location = feature.location def add_qualifier(self, category, info): @@ -70,8 +115,6 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" - if not isinstance(self.location, (FeatureLocation, CompoundLocation)): - raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") new_Generic = SeqFeature(self.location, type=self.type) new_Generic.qualifiers = self._qualifiers.copy() return [new_Generic] @@ -95,6 +138,7 @@ def __init__(self, feature=None): self.cluster = None self.note = [] self.EC_number = None + self.transl_table = None self._qualifiers = {} self.type = 'CDS' @@ -126,7 +170,10 @@ def __init__(self, feature=None): if 'EC_number' in self._qualifiers: self.EC_number = self._qualifiers['EC_number'][0] - self.set_location(feature.location) + if 'transl_table' in self._qualifiers: + self.transl_table = self._qualifiers['transl_table'][0] + + self.location = feature.location def get_id(self): @@ -139,8 +186,7 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if not isinstance(self.location, (FeatureLocation, CompoundLocation)): - raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") + new_CDS = SeqFeature(self.location, type=self.type, id=self.id) self._qualifiers['sec_met'] = self.sec_met self._qualifiers['locus_tag'] = [str(self.locus_tag)] self._qualifiers['product'] = [str(self.product)] @@ -150,7 +196,8 @@ def to_biopython(self): self._qualifiers['note'] = self.note if self.EC_number is not None: self._qualifiers['EC_number'] = [str(self.EC_number)] - new_CDS = SeqFeature(self.location, type=self.type, id=self.id) + if self.transl_table is not None: + self._qualifiers['transl_table'] = [str(self.transl_table)] new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] @@ -204,7 +251,7 @@ def __init__(self, feature=None): if 'probability' in self._qualifiers: self.probability = self._qualifiers['probability'][0] - self.set_location(feature.location) + self.location = feature.location def _get_cutoff(self): try: @@ -250,8 +297,7 @@ def get_CDSs(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - if not isinstance(self.location, (FeatureLocation, CompoundLocation)): - raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") + new_Cluster = SeqFeature(self.location, type=self.type) self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) self._qualifiers['note'].extend(self.note) @@ -263,7 +309,6 @@ def to_biopython(self): self._qualifiers['structure'] = [str(self.structure)] if self.probability is not None: self._qualifiers['probability'] = [str(self.probability)] - new_Cluster = SeqFeature(self.location, type=self.type) new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -425,31 +470,18 @@ def add_feature(self, feature): """Adds features to appropriate lists""" if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") - if feature.type == 'cluster': - if not isinstance(feature.location, (FeatureLocation, CompoundLocation)): - raise ValueError("location should be an instance of FeatureLocation or CompoundLocation") + if isinstance(feature, ClusterFeature): clusters = self.get_clusters() - clusters.append(None) - for index, cluster in enumerate(clusters): - if cluster is not None: - if feature.location.start < cluster.location.start: - break - else: - clusters[index] = feature - feature.parent_record = self - self.group_cluster_cds(feature) - for index, cluster in enumerate(clusters): - self._cluster_number_dict[cluster] = index+1 - return + index = find_cluster_pos(clusters, 0, len(clusters)-1, feature) clusters.insert(index, feature) feature.parent_record = self - self.group_cluster_cds(feature) - for index, cluster in enumerate(clusters): - self._cluster_number_dict[cluster] = index+1 - return + self._update_cluster_cds_links(feature) + for i, cluster in enumerate(clusters): + self._cluster_number_dict[cluster] = i+1 - elif feature.type == 'CDS': + elif isinstance(feature, CDSFeature): self._modified_cds.append(feature) + self._update_cluster_cds_links(feature) else: self._modified_generic.append(feature) @@ -470,13 +502,17 @@ def from_biopython(self, record): self._modified_generic.append(feature) return self - def group_cluster_cds(self, cluster): + def _update_cluster_cds_links(self, feature): """Link cluster and their CDS features""" - clustercdsfeatures = [] - cdss = self.get_CDSs() - for cds in cdss: - if cluster.location.start <= cds.location.start <= cluster.location.end or \ - cluster.location.start <= cds.location.end <= cluster.location.end: - clustercdsfeatures.append(cds) - cds.cluster = cluster - cluster.cdss = clustercdsfeatures + if isinstance(feature, ClusterFeature): + clustercdsfeatures = [] + cdss = self.get_CDSs() + for cds in cdss: + if feature.location.start <= cds.location.start <= feature.location.end or \ + feature.location.start <= cds.location.end <= feature.location.end: + clustercdsfeatures.append(cds) + cds.cluster = feature + feature.cdss = clustercdsfeatures + else: + clusters = self.get_clusters() + find_cluster(clusters, 0, len(clusters)-1, feature) diff --git a/tests/test_record.py b/tests/test_record.py index 363009b..a1f2a6e 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -100,10 +100,9 @@ def test_add_feature(): no_of_cdss = len(rec.get_CDSs()) no_of_generics = len(rec._modified_generic) new_cluster = ClusterFeature() - - #ClusterFeature should have valid location for adding new_cluster.location = FeatureLocation(15100, 15200) new_cds = CDSFeature() + new_cds.location = FeatureLocation(200, 300) new_generic = GenericFeature() rec.add_feature(new_cluster) rec.add_feature(new_cds) From 7b55371adda61615c83b142d9fe9931a70752d61 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 04:43:37 +0530 Subject: [PATCH 31/71] Update record to integrate with antiSMASH -Introduced members to GenericFeature() -Add CDS_motifFeature() -Add aSDomain() -Add PFAM_domain() -Update Record() -Replace search functions from recursion to loop -Add members to CDSFeature() and ClusterFeature() -Add SecMetQualifier() --- secmet/record.py | 758 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 686 insertions(+), 72 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 301e7dd..88278f5 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -23,52 +23,58 @@ def sort_features(seq_record): seq_record.features.sort(cmp=cmp_feature_location) -def find_cluster_pos(array, start, end, target): +def find_new_cluster_pos(clusters, target_cluster): """Search for appropriate position in array to add cluster""" - if not array: + if not clusters: return 0 - #stopping condition - try: - if array[start].location.start <= target.location.start and array[start+1].location.start >= target.location.start: + start = 0 + end = len(clusters)-1 + while True: + try: + #Stopping condition + if clusters[start].location.start <= target_cluster.location.start and \ + clusters[start+1].location.start >= target_cluster.location.start: + return start+1 + except IndexError: return start+1 - except IndexError: - return start+1 - - mid = start+(end-start)/2 - if start == end or mid == start: - if target.location.start < array[0].location.start: - return 0 + mid = start+(end-start)/2 + if start == end or mid == start: + if target_cluster.location.start < clusters[0].location.start: + return 0 + else: + return len(clusters) + if clusters[mid].location.start > target_cluster.location.start: + end = mid else: - return len(array) - if array[mid].location.start > target.location.start: - return find_cluster_pos(array, start, mid, target) - elif array[mid].location.start <= target.location.start: - return find_cluster_pos(array, mid, end, target) - -def find_cluster(array, start, end, target): - if not array: - return - #Stopping condition - if array[start].location.start <= target.location.start <= array[start].location.end or \ - array[start].location.start <= target.location.end <= array[start].location.end: - array[start].cdss.append(target) - target.cluster = array[start] - return + start = mid - mid = start+(end-start)/2 - if start == end or mid == start: +def find_cluster_of_new_cds(clusters, new_cds): + if not clusters: return - if array[mid].location.start > target.location.start: - return find_cluster(array, start, mid, target) - elif array[mid].location.start <= target.location.start: - return find_cluster(array, mid, end, target) + start = 0 + end = len(clusters)-1 + while True: + #Stopping condition + if clusters[start].location.start <= new_cds.location.start <= clusters[start].location.end or \ + clusters[start].location.start <= new_cds.location.end <= clusters[start].location.end: + clusters[start].cdss.append(new_cds) + new_cds.cluster = clusters[start] + return + mid = start+(end-start)/2 + if start == end or mid == start: + return + if clusters[mid].location.start > new_cds.location.start: + end = mid + elif clusters[mid].location.start <= new_cds.location.start: + start = mid + class Feature(object): """A Feature super class that expands to different subclasses""" def __init__(self): """ Initialise a feature object""" self.type = None - + self.notes = [] def _get_location(self): try: return self.__location @@ -76,79 +82,163 @@ def _get_location(self): raise ValueError('Unassigned location') def _set_location(self, value): if not isinstance(value, (FeatureLocation, CompoundLocation)): - raise TypeError("Location must be of type FeatureLocation or CompoundLocation") + raise TypeError("Location must be an instance of 'FeatureLocation' or 'CompoundLocation'") self.__location = value location = property(_get_location, _set_location) + def extract(self, parent_seq): + """Return Feature's seq from its parent's seq""" + if self.location is None: + raise ValueError("Location is None. Extracting Failed") + return self.location.extract(parent_seq) + + class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature (Features other than CDSFeature and ClusterFeature) """ - def __init__(self, feature=None): + def __init__(self, f_location=None, f_type=None, feature=None): """Initialise a GenericFeature param feature: class 'Bio.SeqFeature.SeqFeature' """ super(GenericFeature, self).__init__() self._qualifiers = {} + self.locus_tag = None + self.translation = None + self.gene = None + self.name = None + self.seq = None + self.description = None + self.sec_met = [] + if feature is not None: self._qualifiers = feature.qualifiers self.type = feature.type self.location = feature.location - + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'][0] + del self._qualifiers['locus_tag'] + if 'gene' in self._qualifiers: + self.gene = self._qualifiers['gene'][0] + del self._qualifiers['gene'] + if 'translation' in self._qualifiers: + self.translation = self._qualifiers['translation'][0] + del self._qualifiers['translation'] + if 'name' in self._qualifiers: + self.translation = self._qualifiers['name'][0] + del self._qualifiers['name'] + if 'seq' in self._qualifiers: + self.seq = self._qualifiers['seq'][0] + del self._qualifiers['seq'] + if 'description' in self._qualifiers: + self.description = self._qualifiers['description'][0] + del self._qualifiers['description'] + if 'sec_met' in self._qualifiers: + self.sec_met.extend(self._qualifiers['sec_met']) + del self._qualifiers['sec_met'] + else: + self.location = f_location + if not isinstance(f_type, str): + raise ValueError('Type of the feature should be a string') + self.type = f_type def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" - if not isinstance(category, str) and isinstance(info, str): + if not isinstance(category, str) and isinstance(info, (str, list)): raise TypeError("Type of qualifiers should be 'str'") - if category not in self._qualifiers: - self._qualifiers[category] = [info] + if category in ['evalue', 'score', 'probability']: + if not (info.replace('.', '')).replace('E-', '').isdigit(): + raise ValueError('%s should be a number'% category) + if hasattr(self, category): + if isinstance(getattr(self, category), list): + if isinstance(info, list): + getattr(self, category).extend(info) + else: + getattr(self, category).append(info) + else: + setattr(self, category, info) else: - self._qualifiers[category].append(info) + if category not in self._qualifiers: + if isinstance(info, list): + self._qualifiers[category] = info + else: + self._qualifiers[category] = [info] + else: + self._qualifiers[category].append(info) return None def get_qualifier(self, category): """Returns a qualifier of given category""" if category in self._qualifiers: return self._qualifiers[category] + elif category.lower() in self._qualifiers: + return self._qualifiers[category.lower()] + elif category.upper() in self._qualifiers: + return self._qualifiers[category.upper()] else: - return [] + if hasattr(self, category): + if getattr(self, category): + return getattr(self, category) + return [] def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" new_Generic = SeqFeature(self.location, type=self.type) - new_Generic.qualifiers = self._qualifiers.copy() + if self.locus_tag is not None: + new_Generic.qualifiers['locus_tag'] = [str(self.locus_tag)] + if self.translation is not None: + new_Generic.qualifiers['translation'] = [str(self.translation)] + if self.gene is not None: + new_Generic.qualifiers['gene'] = [str(self.gene)] + if self.name is not None: + new_Generic.qualifiers['name'] = [str(self.name)] + if self.seq is not None: + new_Generic.qualifiers['seq'] = [str(self.seq)] + if self.description is not None: + new_Generic.qualifiers['description'] = [str(self.description)] + if self.sec_met: + new_Generic.qualifiers['sec_met'] = self.sec_met + for key, value in self._qualifiers.items(): + new_Generic.qualifiers[key] = value return [new_Generic] + def __repr__(self): + return repr(self.to_biopython()[0]) + class CDSFeature(Feature): """A CDSFeature subclasses Feature""" - def __init__(self, feature=None): + def __init__(self, f_location=None, feature=None): """Initialise a CDSFeature param feature: class 'Bio.SeqFeature.SeqFeature' """ super(CDSFeature, self).__init__() self.id = '' - self.sec_met = [] + self.sec_met = SecMetQualifier() self.locus_tag = None self.product = None self.protein_id = None self.gene = None self.translation = None self.cluster = None - self.note = [] self.EC_number = None self.transl_table = None + self.source = None + self.aSProdPred = [] + self.aSASF_choice = [] + self.aSASF_note = [] + self.aSASF_prediction = [] + self.aSASF_scaffold = [] + self._qualifiers = {} + self.sec_met_predictions = [] + self.other_qualifiers = {} self.type = 'CDS' if feature is not None: - self._qualifiers = feature.qualifiers - if 'sec_met' in self._qualifiers: - self.sec_met = self._qualifiers['sec_met'] - if 'locus_tag' in self._qualifiers: self.locus_tag = self._qualifiers['locus_tag'][0] @@ -165,7 +255,7 @@ def __init__(self, feature=None): self.translation = self._qualifiers['translation'][0] if 'note' in self._qualifiers: - self.note = self._qualifiers['note'] + self.notes = self._qualifiers['note'] if 'EC_number' in self._qualifiers: self.EC_number = self._qualifiers['EC_number'][0] @@ -173,7 +263,29 @@ def __init__(self, feature=None): if 'transl_table' in self._qualifiers: self.transl_table = self._qualifiers['transl_table'][0] + if 'source' in self._qualifiers: + self.source = self._qualifiers['source'][0] + + if 'aSASF_choice' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSASF_choiceS'] + + if 'aSASF_note' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSASF_note'] + + if 'aSASF_prediction' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSASF_prediction'] + + if 'aSASF_scaffold' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSASF_scaffold'] + + if 'aSProdPred' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSProdPred'] + + if 'sec_met_predictions' in self._qualifiers: + self.sec_met_predictions = self._qualifiers['sec_met_predictions'] self.location = feature.location + else: + self.location = f_location def get_id(self): @@ -187,25 +299,402 @@ def get_cluster(self): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_CDS = SeqFeature(self.location, type=self.type, id=self.id) - self._qualifiers['sec_met'] = self.sec_met - self._qualifiers['locus_tag'] = [str(self.locus_tag)] - self._qualifiers['product'] = [str(self.product)] - self._qualifiers['protein_id'] = [str(self.protein_id)] - self._qualifiers['gene'] = [str(self.gene)] - self._qualifiers['translation'] = [str(self.translation)] - self._qualifiers['note'] = self.note + if not isinstance(self.sec_met, SecMetQualifier): + raise ValueError('Invalid sec_met type') + self._qualifiers['sec_met'] = self.sec_met.as_list() + if self.locus_tag is not None: + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + if self.product is not None: + self._qualifiers['product'] = [str(self.product)] + if self.protein_id is not None: + self._qualifiers['protein_id'] = [str(self.protein_id)] + if self.gene is not None: + self._qualifiers['gene'] = [str(self.gene)] + if self.translation is not None: + self._qualifiers['translation'] = [str(self.translation)] + if self.notes: + self._qualifiers['note'] = self.notes if self.EC_number is not None: self._qualifiers['EC_number'] = [str(self.EC_number)] if self.transl_table is not None: self._qualifiers['transl_table'] = [str(self.transl_table)] + if self.source is not None: + self._qualifiers['source'] = [str(self.source)] + if self.aSASF_choice: + self._qualifiers['aSASF_choice'] = self.aSASF_choice + if self.aSASF_note: + self._qualifiers['aSASF_note'] = self.aSASF_note + if self.aSASF_prediction: + self._qualifiers['aSASF_prediction'] = self.aSASF_prediction + if self.aSASF_scaffold: + self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold + if self.aSProdPred: + self._qualifiers['aSProdPred'] = self.aSProdPred + if self.sec_met_predictions: + self._qualifiers['sec_met_predictions'] = self.sec_met_predictions new_CDS.qualifiers = self._qualifiers.copy() return [new_CDS] + def __repr__(self): + return repr(self.to_biopython()[0]) + + +class CDS_motifFeature(Feature): + """A CDS_motifFeature which subclasses CDSFeature""" + def __init__(self, f_location=None, feature=None): + """Initialise a CDS_motifFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(CDS_motifFeature, self).__init__() + self.label = None + self.motif = None + self.aSDomain_id = None + self.aSTool = None + self.detection = None + self.database = None + self.translation = None + self.locus_tag = None + self.type = 'CDS_motif' + self._qualifiers = {} + + if feature is not None: + self._qualifiers = feature.qualifiers + + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'][0] + + if 'translation' in self._qualifiers: + self.translation = self._qualifiers['translation'][0] + + if 'label' in self._qualifiers: + self.label = self._qualifiers['label'][0] + + if 'motif' in self._qualifiers: + self.motif = self._qualifiers['motif'][0] + + if 'aSDomain_id' in self._qualifiers: + self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + + if 'evalue' in self._qualifiers: + self.evalue = self._qualifiers['evalue'][0] + + if 'score' in self._qualifiers: + self.score = self._qualifiers['score'][0] + + if 'aSTool' in self._qualifiers: + self.aSTool = self._qualifiers['aSTool'][0] + + if 'detection' in self._qualifiers: + self.detection = self._qualifiers['detection'][0] + + if 'database' in self._qualifiers: + self.database = self._qualifiers['database'][0] + + if 'note' in self._qualifiers: + self.notes = self._qualifiers['note'] + self.location = feature.location + else: + self.location = f_location + def _get_score(self): + try: + return self.__score + except: + return None + def _set_score(self, value): + if not ((value.replace('.', '')).replace('-', '')).isdigit(): + raise TypeError("score must be a number") + self.__score = value + score = property(_get_score, _set_score) + + def _get_evalue(self): + try: + return self.__evalue + except: + return None + def _set_evalue(self, value): + if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): + raise TypeError("evalue must be an number") + self.__evalue = value + evalue = property(_get_evalue, _set_evalue) + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + new_CDS_motif = SeqFeature(self.location, type=self.type) + if self.locus_tag is not None: + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + if self.translation is not None: + self._qualifiers['translation'] = [str(self.translation)] + if self.label is not None: + self._qualifiers['label'] = [str(self.label)] + if self.motif is not None: + self._qualifiers['motif'] = [str(self.motif)] + if self.database is not None: + self._qualifiers['database'] = [str(self.database)] + if self.evalue is not None: + self._qualifiers['evalue'] = [str(self.evalue)] + if self.aSDomain_id is not None: + self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.detection is not None: + self._qualifiers['detection'] = [str(self.detection)] + if self.score is not None: + self._qualifiers['score'] = [str(self.score)] + if self.aSTool is not None: + self._qualifiers['aSTool'] = [str(self.aSTool)] + if self.notes: + self._qualifiers['note'] = self.notes + new_CDS_motif.qualifiers = self._qualifiers.copy() + return [new_CDS_motif] + + def __repr__(self): + return repr(self.to_biopython()[0]) + + +class PFAM_domain(Feature): + """A PHAM_domain feature which subclasses Feature""" + def __init__(self, f_location=None, feature=None): + """Initialise a ClusterFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(PFAM_domain, self).__init__() + self.domain = None + self.aSDomain_id = None + self.locus_tag = None + self.aSTool = None + self.detection = None + self.database = None + self.translation = None + self.description = None + self.db_xref = [] + self.label = [] + self.type = 'PFAM_domain' + self._qualifiers = {} + + if feature is not None: + self._qualifiers = feature.qualifiers + + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'][0] + + if 'domain' in self._qualifiers: + self.domain = self._qualifiers['domain'][0] + + if 'translation' in self._qualifiers: + self.translation = self._qualifiers['translation'][0] + + if 'label' in self._qualifiers: + self.label = self._qualifiers['label'] + + if 'aSDomain_id' in self._qualifiers: + self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + + if 'evalue' in self._qualifiers: + self.evalue = self._qualifiers['evalue'][0] + + if 'score' in self._qualifiers: + self.score = self._qualifiers['score'][0] + + if 'aSTool' in self._qualifiers: + self.aSTool = self._qualifiers['aSTool'][0] + + if 'detection' in self._qualifiers: + self.detection = self._qualifiers['detection'][0] + + if 'database' in self._qualifiers: + self.database = self._qualifiers['database'][0] + + if 'db_xref' in self._qualifiers: + self.db_xref = self._qualifiers['db_xref'] + + if 'description' in self._qualifiers: + self.description = self._qualifiers['description'][0] + + if 'note' in self._qualifiers: + self.notes = self._qualifiers['note'] + self.location = feature.location + else: + self.location = f_location + def _get_score(self): + try: + return self.__score + except: + return None + def _set_score(self, value): + if not ((value.replace('.', '')).replace('-', '')).isdigit(): + raise TypeError("score must be a number") + self.__score = value + score = property(_get_score, _set_score) + + def _get_evalue(self): + try: + return self.__evalue + except: + return None + def _set_evalue(self, value): + if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): + raise TypeError("evalue must be an number") + self.__evalue = value + evalue = property(_get_evalue, _set_evalue) + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + new_PFAM_domain = SeqFeature(self.location, type=self.type) + if self.locus_tag is not None: + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + if self.translation is not None: + self._qualifiers['translation'] = [str(self.translation)] + if self.label is not None: + self._qualifiers['label'] = self.label + if self.database is not None: + self._qualifiers['database'] = [str(self.database)] + if self.evalue is not None: + self._qualifiers['evalue'] = [str(self.evalue)] + if self.aSDomain_id is not None: + self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.detection is not None: + self._qualifiers['detection'] = [str(self.detection)] + if self.score is not None: + self._qualifiers['score'] = [str(self.score)] + if self.aSTool is not None: + self._qualifiers['aSTool'] = [str(self.aSTool)] + if self.domain is not None: + self._qualifiers['domain'] = [str(self.domain)] + if self.description is not None: + self._qualifiers['description'] = [str(self.description)] + if self.db_xref is not None: + self._qualifiers['db_xref'] = self.db_xref + if self.notes: + self._qualifiers['note'] = self.notes + new_PFAM_domain.qualifiers = self._qualifiers.copy() + return [new_PFAM_domain] + + def __repr__(self): + return repr(self.to_biopython()[0]) + + +class aSDomain(Feature): + """A aSDomain feature which subclasses Feature""" + def __init__(self, f_location=None, feature=None): + """Initialise a ClusterFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(aSDomain, self).__init__() + self.domain = None + self.domain_subtype = None + self.aSDomain_id = None + self.locus_tag = None + self.detection = None + self.database = None + self.translation = None + self.label = [] + self.specificity = [] + self.type = 'aSDomain' + self._qualifiers = {} + + if feature is not None: + self._qualifiers = feature.qualifiers + + if 'locus_tag' in self._qualifiers: + self.locus_tag = self._qualifiers['locus_tag'][0] + + if 'domain' in self._qualifiers: + self.domain = self._qualifiers['domain'][0] + + if 'domain_subtype' in self._qualifiers: + self.domain_subtype = self._qualifiers['domain_subtype'][0] + + if 'translation' in self._qualifiers: + self.translation = self._qualifiers['translation'][0] + + if 'label' in self._qualifiers: + self.label = self._qualifiers['label'] + + if 'aSDomain_id' in self._qualifiers: + self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + + if 'evalue' in self._qualifiers: + self.evalue = self._qualifiers['evalue'][0] + + if 'score' in self._qualifiers: + self.score = self._qualifiers['score'][0] + + if 'detection' in self._qualifiers: + self.detection = self._qualifiers['detection'][0] + + if 'database' in self._qualifiers: + self.database = self._qualifiers['database'][0] + + if 'note' in self._qualifiers: + self.notes = self._qualifiers['note'] + + if 'specificity' in self._qualifiers: + self.notes = self._qualifiers['specificity'] + self.location = feature.location + else: + self.location = f_location + def _get_score(self): + try: + return self.__score + except: + return None + def _set_score(self, value): + if not ((value.replace('.', '')).replace('-', '')).isdigit(): + raise TypeError("score must be a number") + self.__score = value + score = property(_get_score, _set_score) + + def _get_evalue(self): + try: + return self.__evalue + except: + return None + def _set_evalue(self, value): + if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): + raise TypeError("evalue must be an number") + self.__evalue = value + evalue = property(_get_evalue, _set_evalue) + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + new_aSDomain = SeqFeature(self.location, type=self.type) + if self.locus_tag is not None: + self._qualifiers['locus_tag'] = [str(self.locus_tag)] + if self.translation is not None: + self._qualifiers['translation'] = [str(self.translation)] + if self.label is not None: + self._qualifiers['label'] = self.label + if self.database is not None: + self._qualifiers['database'] = [str(self.database)] + if self.evalue is not None: + self._qualifiers['evalue'] = [str(self.evalue)] + if self.aSDomain_id is not None: + self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.detection is not None: + self._qualifiers['detection'] = [str(self.detection)] + if self.score is not None: + self._qualifiers['score'] = [str(self.score)] + if self.domain_subtype is not None: + self._qualifiers['domain_subtype'] = [str(self.domain_subtype)] + if self.domain is not None: + self._qualifiers['domain'] = [str(self.domain)] + if self.notes: + self._qualifiers['note'] = self.notes + if self.specificity: + self._qualifiers['specificity'] = self.specificity + new_aSDomain.qualifiers = self._qualifiers.copy() + return [new_aSDomain] + + def __repr__(self): + return repr(self.to_biopython()[0]) + class ClusterFeature(Feature): """A ClusterFeature which subclasses Feature""" - def __init__(self, feature=None): + def __init__(self, f_location=None, feature=None): """Initialise a ClusterFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' param feature: class 'Bio.SeqFeature.SeqFeature' """ super(ClusterFeature, self).__init__() @@ -215,13 +704,16 @@ def __init__(self, feature=None): self._qualifiers = {} self.parent_record = None self.type = 'cluster' - self.note = [] self.structure = None self.probability = None + self.subclusterblast = None + self.knownclusterblast = None + self.clusterblast = None self.cdss = [] if feature is not None: self._qualifiers = feature.qualifiers + if 'cutoff' in self._qualifiers: self.cutoff = int(self._qualifiers['cutoff'][0]) @@ -241,7 +733,7 @@ def __init__(self, feature=None): if value.startswith('Detection rule(s)'): self.detection = value note_list_copy.remove(value) - self.note.extend(note_list_copy) + self.notes.extend(note_list_copy) if 'product' in self._qualifiers: self.products = self._qualifiers['product'] @@ -252,6 +744,8 @@ def __init__(self, feature=None): if 'probability' in self._qualifiers: self.probability = self._qualifiers['probability'][0] self.location = feature.location + else: + self.location = f_location def _get_cutoff(self): try: @@ -300,7 +794,7 @@ def to_biopython(self): new_Cluster = SeqFeature(self.location, type=self.type) self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] self._qualifiers['note'].append(self.detection) - self._qualifiers['note'].extend(self.note) + self._qualifiers['note'].extend(self.notes) self._qualifiers['cutoff'] = [str(self.cutoff)] self._qualifiers['extension'] = [str(self.extension)] self._qualifiers['product'] = self.products @@ -309,9 +803,17 @@ def to_biopython(self): self._qualifiers['structure'] = [str(self.structure)] if self.probability is not None: self._qualifiers['probability'] = [str(self.probability)] + if self.subclusterblast is not None: + self._qualifiers['subclusterblast'] = self.subclusterblast + if self.knownclusterblast is not None: + self._qualifiers['knownclusterblast'] = self.knownclusterblast + if self.clusterblast is not None: + self._qualifiers['clusterblast'] = self.clusterblast new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] + def __repr__(self): + return repr(self.to_biopython()[0]) class Record(object): """A record containing secondary metabolite clusters""" @@ -323,10 +825,13 @@ def __init__(self, seq_record=None): :type seq_record: :class:`Bio.SeqRecord.SeqRecord` """ self._record = seq_record - self._modified_cds = [] #A list containing instances of CDSFeature - self._modified_cluster = [] #A list containing instances of ClusterFeature - self._modified_generic = [] #A list containing instances of GenericFeature - self._cluster_number_dict = {} #A dictionary to map clusters and their numbers + self._modified_cds = [] #A list containing instances of CDSFeature + self._modified_cluster = [] #A list containing instances of ClusterFeature + self._modified_generic = [] #A list containing instances of GenericFeature + self._modified_cds_motif = [] #A list containing instances of CDS_motifFeature + self._modified_pfam_domain = [] #A list containing instances of PFAM_domain + self._modified_asdomain = [] #A list containing instances of aSDomain + self._cluster_number_dict = {} #A dictionary to map clusters and their numbers if self._record is not None: if not isinstance(self._record, SeqRecord): @@ -439,6 +944,27 @@ def set_clusters(self, clusters_list): def get_CDSs(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cds + def set_CDSs(self, cds_list): + """To set the CDS features of the seq_record""" + self._modified_cds = cds_list + + def get_CDS_motifs(self): + """A list of secondary metabolite CDS_motifs present in the record""" + return self._modified_cds_motif + def set_CDS_motifs(self, cds_motif_list): + """To set the cds_motifs features of the seq_record""" + self._modified_cds_motif = cds_motif_list + + def get_PFAM_domains(self): + """A list of secondary metabolite PFAM_domains present in the record""" + return self._modified_pfam_domain + + def get_aSDomains(self): + """A list of secondary metabolite aSDomains present in the record""" + return self._modified_asdomain + def set_aSDomains(self, asdomains_list): + """To set the asdomains features of the seq_record""" + self._modified_asdomain = asdomains_list def get_generics(self): """A list of secondary metabolite generics present in the record""" @@ -447,12 +973,23 @@ def set_generics(self, generics_list): """To set the generic features of the seq_record""" self._modified_generic = generics_list + def get_secmet_features(self): + """Return all features with sec_met qualifier""" + secmet_features = self.get_CDSs() + for generic in self.get_generics(): + if generic.sec_met: + secmet_features.append(generic) + return secmet_features + def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record features = self.get_generics()[:] #Clone the private list features.extend(self.get_clusters()) features.extend(self.get_CDSs()) + features.extend(self.get_CDS_motifs()) + features.extend(self.get_aSDomains()) + features.extend(self.get_PFAM_domains()) record_features = [] for feature in features: record_features.append(feature.to_biopython()[0]) @@ -472,7 +1009,7 @@ def add_feature(self, feature): raise TypeError("The argument is not an instance of 'Feature'") if isinstance(feature, ClusterFeature): clusters = self.get_clusters() - index = find_cluster_pos(clusters, 0, len(clusters)-1, feature) + index = find_new_cluster_pos(clusters, feature) clusters.insert(index, feature) feature.parent_record = self self._update_cluster_cds_links(feature) @@ -482,6 +1019,12 @@ def add_feature(self, feature): elif isinstance(feature, CDSFeature): self._modified_cds.append(feature) self._update_cluster_cds_links(feature) + elif isinstance(feature, CDS_motifFeature): + self._modified_cds_motif.append(feature) + elif isinstance(feature, PFAM_domain): + self._modified_pfam_domain.append(feature) + elif isinstance(feature, aSDomain): + self._modified_asdomain.append(feature) else: self._modified_generic.append(feature) @@ -490,20 +1033,32 @@ def from_biopython(self, record): features = record.features for feature in features: if feature.type == 'CDS': - feature = CDSFeature(feature) + feature = CDSFeature(feature=feature) self._modified_cds.append(feature) elif feature.type == 'cluster': - feature = ClusterFeature(feature) + feature = ClusterFeature(feature=feature) feature.parent_record = self self._modified_cluster.append(feature) self._cluster_number_dict[feature] = self._modified_cluster.index(feature)+1 + elif feature.type == 'CDS_motif': + feature = CDS_motifFeature(feature=feature) + self._modified_cds_motif.append(feature) + elif feature.type == 'PFAM_domain': + feature = PFAM_domain(feature=feature) + self._modified_pfam_domain.append(feature) + elif feature.type == 'aSDomain': + feature = aSDomain(feature=feature) + self._modified_asdomain.append(feature) else: - feature = GenericFeature(feature) + feature = GenericFeature(feature=feature) self._modified_generic.append(feature) + cluster_cds_features = self.get_CDSs() + self.get_clusters() + for feature in cluster_cds_features: + self._update_cluster_cds_links(feature) return self def _update_cluster_cds_links(self, feature): - """Link cluster and their CDS features""" + """Link cluster and their corresponding CDS features""" if isinstance(feature, ClusterFeature): clustercdsfeatures = [] cdss = self.get_CDSs() @@ -515,4 +1070,63 @@ def _update_cluster_cds_links(self, feature): feature.cdss = clustercdsfeatures else: clusters = self.get_clusters() - find_cluster(clusters, 0, len(clusters)-1, feature) + find_cluster_of_new_cds(clusters, feature) + + +class SecMetQualifier(list): + """A Secmet class to store sec_met qualifiers""" + def __init__(self, clustertype=None, domains=None, kind=None): + self.clustertype = clustertype + self.domains = domains + self.kind = kind + self.nrpspks = [] + self.asf_predictions = [] + super(SecMetQualifier, self).__init__() + + def __len__(self): + """Return length of the sec_met qualifier""" + count = 0 + if self.clustertype is not None: + count += 1 + if self.domains is not None: + count += 1 + if self.kind is not None: + count += 1 + if self.nrpspks: + count += len(self.nrpspks) + if self.asf_predictions: + count += len(self.asf_predictions) + return count + + def __repr__(self): + """A string representation of the sec_met qualifier""" + return str(self.as_list()) + + def __nonzero__(self): + """Returns False if sec_met doesn't contain any qualifier""" + if self.clustertype is not None or self.kind is not None or (self.domains is not None and self.domains): + return True + if self.nrpspks or self.asf_predictions: + return True + return False + + def __iter__(self): + if self.clustertype is not None: + yield "Type: %s" % self.clustertype + if self.domains is not None: + yield "Domains detected: " + "; ".join(map(str, self.domains)) + if self.kind is not None: + yield "Kind: %s" % self.kind + if self.nrpspks: + for nrps in self.nrpspks: + yield nrps + if self.asf_predictions: + for asf in self.asf_predictions: + yield asf + + def as_list(self): + """Returns a list of all sec_met qualifiers""" + self._sec_met = [] + for qual in self: + self._sec_met.append(qual) + return self._sec_met From 25e6292ca224566d71ae552649a17dd05dd81cb6 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 04:45:29 +0530 Subject: [PATCH 32/71] Modify tests to use unittest --- tests/test_record.py | 197 ++++++++++++++++++++++--------------------- 1 file changed, 100 insertions(+), 97 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index a1f2a6e..e68517e 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -1,4 +1,5 @@ from os import path +import unittest import Bio from Bio import SeqIO from Bio.SeqFeature import FeatureLocation @@ -8,108 +9,110 @@ filename = 'nisin.gbk' filetype = 'genbank' -def get_testfile(): - """File path for testing""" - return path.join(path.dirname(__file__), 'data', filename) +class TestRecordMethods(unittest.TestCase): -def test_from_file(): - """Test file operations in Record""" - testfile = get_testfile() - bp_rec = SeqIO.read(testfile, filetype) - rec = Record.from_file(testfile) - assert isinstance(rec, Record) - assert rec.id == bp_rec.id - assert rec.seq == bp_rec.seq - # SNAG: Can't compare Reference objects in Biopython :( - # So delete them to make the test work. - del rec.annotations['references'] - del bp_rec.annotations['references'] - assert rec.annotations == bp_rec.annotations - assert rec.description == bp_rec.description + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) -def test_from_biopython(): - """Test from_biopython() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - assert isinstance(rec.from_biopython(rec._record), Record) + def test_from_file(self): + """Test file operations in Record""" + testfile = self.get_testfile() + bp_rec = SeqIO.read(testfile, filetype) + rec = Record.from_file(testfile) + assert isinstance(rec, Record) + self.assertEqual(rec.id, bp_rec.id) + self.assertEqual(rec.seq, bp_rec.seq) + # SNAG: Can't compare Reference objects in Biopython :( + # So delete them to make the test work. + del rec.annotations['references'] + del bp_rec.annotations['references'] + self.assertEqual(rec.annotations, bp_rec.annotations) + self.assertEqual(rec.description, bp_rec.description) -def test_to_biopython(): - """Test to_biopython() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - assert isinstance(rec.to_biopython(), Bio.SeqRecord.SeqRecord) + def test_from_biopython(self): + """Test from_biopython() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + self.assertIsInstance(rec.from_biopython(rec._record), Record) -def test_get_clusters(): - """Test get_clusters() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - bp_rec = SeqIO.read(testfile, filetype) - bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] - mod_clusters = [i.to_biopython()[0] for i in rec.get_clusters()] - assert len(mod_clusters) == len(bp_clusters) - for bcluster, mcluster in zip(bp_clusters, mod_clusters): - assert isinstance(mcluster, Bio.SeqFeature.SeqFeature) - assert bcluster.type == mcluster.type - assert bcluster.location.__str__() == mcluster.location.__str__() - for key, value in bcluster.qualifiers.items(): - assert value == mcluster.qualifiers[key] + def test_to_biopython(self): + """Test to_biopython() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + self.assertIsInstance(rec.to_biopython(), Bio.SeqRecord.SeqRecord) -def test_get_CDSs(): - """Test get_CDSs() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - bp_rec = SeqIO.read(testfile, filetype) - bp_CDSs = [i for i in bp_rec.features if i.type == 'CDS'] - mod_CDSs = [i.to_biopython()[0] for i in rec.get_CDSs()] - assert len(mod_CDSs) == len(bp_CDSs) - for bcds, mcds in zip(bp_CDSs, mod_CDSs): - assert isinstance(mcds, Bio.SeqFeature.SeqFeature) - assert bcds.type == mcds.type - assert bcds.location.__str__() == mcds.location.__str__() - for key, value in bcds.qualifiers.items(): - assert value == mcds.qualifiers[key] + def test_get_clusters(self): + """Test get_clusters() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] + mod_clusters = [i.to_biopython()[0] for i in rec.get_clusters()] + self.assertEqual(len(mod_clusters), len(bp_clusters)) + for bcluster, mcluster in zip(bp_clusters, mod_clusters): + self.assertIsInstance(mcluster, Bio.SeqFeature.SeqFeature) + self.assertEqual(bcluster.type, mcluster.type) + self.assertEqual(bcluster.location.__str__(), mcluster.location.__str__()) + for key, value in bcluster.qualifiers.items(): + self.assertEqual(value, mcluster.qualifiers[key]) -def test_modified_generic(): - """Test _modified_generic list in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - bp_rec = SeqIO.read(testfile, filetype) - bp_gens = [i for i in bp_rec.features if i.type != 'CDS' and i.type != 'cluster'] - mod_gens = [i.to_biopython()[0] for i in rec._modified_generic] - assert len(mod_gens) == len(bp_gens) - for bgen, mgen in zip(bp_gens, mod_gens): - assert isinstance(mgen, Bio.SeqFeature.SeqFeature) - assert bgen.type == mgen.type - assert bgen.location.__str__() == mgen.location.__str__() - for key, value in bgen.qualifiers.items(): - assert value == mgen.qualifiers[key] + def test_get_CDSs(self): + """Test get_CDSs() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_CDSs = [i for i in bp_rec.features if i.type == 'CDS'] + mod_CDSs = [i.to_biopython()[0] for i in rec.get_CDSs()] + self.assertEqual(len(mod_CDSs), len(bp_CDSs)) + for bcds, mcds in zip(bp_CDSs, mod_CDSs): + self.assertIsInstance(mcds, Bio.SeqFeature.SeqFeature) + self.assertEqual(bcds.type, mcds.type) + self.assertEqual(bcds.location.__str__(), mcds.location.__str__()) + for key, value in bcds.qualifiers.items(): + if key != 'sec_met': + self.assertEqual(value, mcds.qualifiers[key]) -def test_get_cluster_number(): - """Test get_cluster_number() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - clusters = rec.get_clusters() - for index, cluster in enumerate(clusters): - assert rec.get_cluster_number(cluster) == index+1 + def test_get_generics(self): + """Test get_generics() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + non_generic_features = ['CDS', 'cluster', 'CDS_motif', 'aSDomain', 'PFAM_domain'] + bp_gens = [i for i in bp_rec.features if i.type not in non_generic_features] + mod_gens = [i.to_biopython()[0] for i in rec.get_generics()] + self.assertEqual(len(mod_gens), len(bp_gens)) + for bgen, mgen in zip(bp_gens, mod_gens): + self.assertIsInstance(mgen, Bio.SeqFeature.SeqFeature) + self.assertEqual(bgen.type, mgen.type) + self.assertEqual(bgen.location.__str__(), mgen.location.__str__()) + for key, value in bgen.qualifiers.items(): + self.assertEqual(value, mgen.qualifiers[key]) -def test_add_feature(): - """Test add_feature() in Record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - no_of_clusters = len(rec.get_clusters()) - no_of_cdss = len(rec.get_CDSs()) - no_of_generics = len(rec._modified_generic) - new_cluster = ClusterFeature() - new_cluster.location = FeatureLocation(15100, 15200) - new_cds = CDSFeature() - new_cds.location = FeatureLocation(200, 300) - new_generic = GenericFeature() - rec.add_feature(new_cluster) - rec.add_feature(new_cds) - rec.add_feature(new_generic) - clusters = rec.get_clusters() - assert no_of_clusters+1 == len(clusters) - assert no_of_cdss+1 == len(rec.get_CDSs()) - assert no_of_generics+1 == len(rec._modified_generic) - for index, cluster in enumerate(clusters): - assert cluster.get_cluster_number() == index+1 + def test_get_cluster_number(self): + """Test get_cluster_number() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + clusters = rec.get_clusters() + for index, cluster in enumerate(clusters): + self.assertEqual(rec.get_cluster_number(cluster), index+1) + + def test_add_feature(self): + """Test add_feature() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + no_of_clusters = len(rec.get_clusters()) + no_of_cdss = len(rec.get_CDSs()) + no_of_generics = len(rec.get_generics()) + new_cluster = ClusterFeature(FeatureLocation(15100, 15200)) + new_cds = CDSFeature(FeatureLocation(200, 300)) + new_generic = GenericFeature(FeatureLocation(350, 450), 'FAKE') + rec.add_feature(new_cluster) + rec.add_feature(new_cds) + rec.add_feature(new_generic) + clusters = rec.get_clusters() + self.assertEqual(no_of_clusters+1, len(clusters)) + self.assertEqual(no_of_cdss+1, len(rec.get_CDSs())) + self.assertEqual(no_of_generics+1, len(rec._modified_generic)) + for index, cluster in enumerate(clusters): + self.assertEqual(cluster.get_cluster_number(), index+1) From 10063aa87414e4fbeb73b0268b6a431af332474e Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 05:09:54 +0530 Subject: [PATCH 33/71] Add tests for CDS_motif, aSDomain, PFAM_domain --- tests/test_record.py | 47 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/test_record.py b/tests/test_record.py index e68517e..84a1864 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -89,6 +89,53 @@ def test_get_generics(self): for key, value in bgen.qualifiers.items(): self.assertEqual(value, mgen.qualifiers[key]) + def test_get_CDS_motifs(self): + """Test get_CDSs() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_CDS_motifs = [i for i in bp_rec.features if i.type == 'CDS_motif'] + mod_CDS_motifs = [i.to_biopython()[0] for i in rec.get_CDS_motifs()] + self.assertEqual(len(mod_CDS_motifs), len(bp_CDS_motifs)) + for b_motif, m_motif in zip(mod_CDS_motifs, bp_CDS_motifs): + self.assertIsInstance(m_motif, Bio.SeqFeature.SeqFeature) + self.assertEqual(b_motif.type, m_motif.type) + self.assertEqual(b_motif.location.__str__(), m_motif.location.__str__()) + for key, value in b_motif.qualifiers.items(): + self.assertEqual(value, m_motif.qualifiers[key]) + + def test_get_PFAM_domains(self): + """Test get_CDSs() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_PFAM_domains = [i for i in bp_rec.features if i.type == 'PFAM_domain'] + mod_PFAM_domains = [i.to_biopython()[0] for i in rec.get_PFAM_domains()] + self.assertEqual(len(mod_PFAM_domains), len(bp_PFAM_domains)) + for b_fam, m_fam in zip(mod_PFAM_domains, bp_PFAM_domains): + self.assertIsInstance(m_fam, Bio.SeqFeature.SeqFeature) + self.assertEqual(b_fam.type, m_fam.type) + self.assertEqual(b_fam.location.__str__(), m_fam.location.__str__()) + for key, value in b_fam.qualifiers.items(): + if value is not None and value: + self.assertEqual(value, m_fam.qualifiers[key]) + + def test_get_aSDomains(self): + """Test get_CDSs() in Record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_aSDomains = [i for i in bp_rec.features if i.type == 'aSDomain'] + mod_aSDomains = [i.to_biopython()[0] for i in rec.get_aSDomains()] + self.assertEqual(len(mod_aSDomains), len(bp_aSDomains)) + for b_asdomain, m_asdomain in zip(mod_aSDomains, bp_aSDomains): + self.assertIsInstance(m_asdomain, Bio.SeqFeature.SeqFeature) + self.assertEqual(b_asdomain.type, m_asdomain.type) + self.assertEqual(b_asdomain.location.__str__(), m_asdomain.location.__str__()) + for key, value in b_asdomain.qualifiers.items(): + if value is not None and value: + self.assertEqual(value, m_asdomain.qualifiers[key]) + def test_get_cluster_number(self): """Test get_cluster_number() in Record""" testfile = self.get_testfile() From c7b4b802dcc55aa532e3918ac15c990018e2e563 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 05:34:28 +0530 Subject: [PATCH 34/71] Modify test_add_feature() to test CDS_motif(), aSDomain() and PFAM_domain() Update docstrings --- tests/test_record.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index 84a1864..dcca15f 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -3,7 +3,8 @@ import Bio from Bio import SeqIO from Bio.SeqFeature import FeatureLocation -from secmet.record import Record, GenericFeature, ClusterFeature, CDSFeature +from secmet.record import Record, GenericFeature, ClusterFeature, CDSFeature \ + , CDS_motifFeature, aSDomain, PFAM_domain #Global variables for test file name and its type filename = 'nisin.gbk' @@ -90,7 +91,7 @@ def test_get_generics(self): self.assertEqual(value, mgen.qualifiers[key]) def test_get_CDS_motifs(self): - """Test get_CDSs() in Record""" + """Test get_CDS_motifs() in Record""" testfile = self.get_testfile() rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) @@ -105,7 +106,7 @@ def test_get_CDS_motifs(self): self.assertEqual(value, m_motif.qualifiers[key]) def test_get_PFAM_domains(self): - """Test get_CDSs() in Record""" + """Test get_PFAM_domains() in Record""" testfile = self.get_testfile() rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) @@ -121,7 +122,7 @@ def test_get_PFAM_domains(self): self.assertEqual(value, m_fam.qualifiers[key]) def test_get_aSDomains(self): - """Test get_CDSs() in Record""" + """Test get_aSDomains() in Record""" testfile = self.get_testfile() rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) @@ -151,15 +152,28 @@ def test_add_feature(self): no_of_clusters = len(rec.get_clusters()) no_of_cdss = len(rec.get_CDSs()) no_of_generics = len(rec.get_generics()) + no_of_cds_motifs = len(rec.get_CDS_motifs()) + no_of_pfam_domains = len(rec.get_PFAM_domains()) + no_of_asdomains = len(rec.get_aSDomains()) + #Create new Feature's with fake identity and fake location new_cluster = ClusterFeature(FeatureLocation(15100, 15200)) new_cds = CDSFeature(FeatureLocation(200, 300)) new_generic = GenericFeature(FeatureLocation(350, 450), 'FAKE') + new_cds_motif = CDS_motifFeature(FeatureLocation(150, 200)) + new_pfam_domain = PFAM_domain(FeatureLocation(500, 600)) + new_asdomain = aSDomain(FeatureLocation(600, 700)) rec.add_feature(new_cluster) rec.add_feature(new_cds) rec.add_feature(new_generic) + rec.add_feature(new_cds_motif) + rec.add_feature(new_pfam_domain) + rec.add_feature(new_asdomain) clusters = rec.get_clusters() self.assertEqual(no_of_clusters+1, len(clusters)) self.assertEqual(no_of_cdss+1, len(rec.get_CDSs())) - self.assertEqual(no_of_generics+1, len(rec._modified_generic)) + self.assertEqual(no_of_generics+1, len(rec.get_generics())) + self.assertEqual(no_of_cds_motifs+1, len(rec.get_CDS_motifs())) + self.assertEqual(no_of_pfam_domains+1, len(rec.get_PFAM_domains())) + self.assertEqual(no_of_asdomains+1, len(rec.get_aSDomains())) for index, cluster in enumerate(clusters): self.assertEqual(cluster.get_cluster_number(), index+1) From 6f0d3dd37880cbb40c7bc4e7e1eb0ccfc77c4c91 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 06:23:16 +0530 Subject: [PATCH 35/71] Add test_cluster_cds_links() --- tests/test_record.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_record.py b/tests/test_record.py index dcca15f..c4c6d88 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -145,6 +145,27 @@ def test_get_cluster_number(self): for index, cluster in enumerate(clusters): self.assertEqual(rec.get_cluster_number(cluster), index+1) + def test_cluster_cds_links(self): + """Test whether cluster(s) and CDS(s) are properly linked""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] + bp_cdss = [i for i in bp_rec.features if i.type == 'CDS'] + mod_clusters = rec.get_clusters() + self.assertEqual(len(bp_clusters), len(mod_clusters)) + for bp_cluster, mod_cluster in zip(bp_clusters, mod_clusters): + bp_clustercdsfeatures = [] + for cds in bp_cdss: + if bp_cluster.location.start <= cds.location.start <= bp_cluster.location.end or \ + bp_cluster.location.start <= cds.location.end <= bp_cluster.location.end: + bp_clustercdsfeatures.append(cds) + self.assertEqual(len(bp_clustercdsfeatures), len(mod_cluster.get_CDSs())) + for bp_cds, mod_cds in zip(bp_clustercdsfeatures, mod_cluster.get_CDSs()): + self.assertEqual(str(bp_cds.location), str(mod_cds.location)) + self.assertEqual(str(mod_cds.get_cluster().location), str(bp_cluster.location), \ + str(mod_cluster.location)) + def test_add_feature(self): """Test add_feature() in Record""" testfile = self.get_testfile() From c8ede0d58a82cf3d4070142c3b01e37670598836 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 06:24:54 +0530 Subject: [PATCH 36/71] Replace __str__() with str() Check for 'references' before deleting --- tests/test_record.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index c4c6d88..c3378b9 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -26,8 +26,10 @@ def test_from_file(self): self.assertEqual(rec.seq, bp_rec.seq) # SNAG: Can't compare Reference objects in Biopython :( # So delete them to make the test work. - del rec.annotations['references'] - del bp_rec.annotations['references'] + if 'references' in rec.annotations: + del rec.annotations['references'] + if 'references' in bp_rec.annotations: + del bp_rec.annotations['references'] self.assertEqual(rec.annotations, bp_rec.annotations) self.assertEqual(rec.description, bp_rec.description) @@ -54,7 +56,7 @@ def test_get_clusters(self): for bcluster, mcluster in zip(bp_clusters, mod_clusters): self.assertIsInstance(mcluster, Bio.SeqFeature.SeqFeature) self.assertEqual(bcluster.type, mcluster.type) - self.assertEqual(bcluster.location.__str__(), mcluster.location.__str__()) + self.assertEqual(str(bcluster.location), str(mcluster.location)) for key, value in bcluster.qualifiers.items(): self.assertEqual(value, mcluster.qualifiers[key]) @@ -69,7 +71,7 @@ def test_get_CDSs(self): for bcds, mcds in zip(bp_CDSs, mod_CDSs): self.assertIsInstance(mcds, Bio.SeqFeature.SeqFeature) self.assertEqual(bcds.type, mcds.type) - self.assertEqual(bcds.location.__str__(), mcds.location.__str__()) + self.assertEqual(str(bcds.location), str(mcds.location)) for key, value in bcds.qualifiers.items(): if key != 'sec_met': self.assertEqual(value, mcds.qualifiers[key]) @@ -86,7 +88,7 @@ def test_get_generics(self): for bgen, mgen in zip(bp_gens, mod_gens): self.assertIsInstance(mgen, Bio.SeqFeature.SeqFeature) self.assertEqual(bgen.type, mgen.type) - self.assertEqual(bgen.location.__str__(), mgen.location.__str__()) + self.assertEqual(str(bgen.location), str(mgen.location)) for key, value in bgen.qualifiers.items(): self.assertEqual(value, mgen.qualifiers[key]) @@ -101,7 +103,7 @@ def test_get_CDS_motifs(self): for b_motif, m_motif in zip(mod_CDS_motifs, bp_CDS_motifs): self.assertIsInstance(m_motif, Bio.SeqFeature.SeqFeature) self.assertEqual(b_motif.type, m_motif.type) - self.assertEqual(b_motif.location.__str__(), m_motif.location.__str__()) + self.assertEqual(str(b_motif.location), str(m_motif.location)) for key, value in b_motif.qualifiers.items(): self.assertEqual(value, m_motif.qualifiers[key]) @@ -116,7 +118,7 @@ def test_get_PFAM_domains(self): for b_fam, m_fam in zip(mod_PFAM_domains, bp_PFAM_domains): self.assertIsInstance(m_fam, Bio.SeqFeature.SeqFeature) self.assertEqual(b_fam.type, m_fam.type) - self.assertEqual(b_fam.location.__str__(), m_fam.location.__str__()) + self.assertEqual(str(b_fam.location), str(m_fam.location)) for key, value in b_fam.qualifiers.items(): if value is not None and value: self.assertEqual(value, m_fam.qualifiers[key]) @@ -132,7 +134,7 @@ def test_get_aSDomains(self): for b_asdomain, m_asdomain in zip(mod_aSDomains, bp_aSDomains): self.assertIsInstance(m_asdomain, Bio.SeqFeature.SeqFeature) self.assertEqual(b_asdomain.type, m_asdomain.type) - self.assertEqual(b_asdomain.location.__str__(), m_asdomain.location.__str__()) + self.assertEqual(str(b_asdomain.location), str(m_asdomain.location)) for key, value in b_asdomain.qualifiers.items(): if value is not None and value: self.assertEqual(value, m_asdomain.qualifiers[key]) From b7829bac5eea4bd6b99d74fea4b86c8346475e03 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 07:20:58 +0530 Subject: [PATCH 37/71] Add uniitest module in test_cluster.py --- tests/test_cluster.py | 108 +++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index d4077ba..1dd49f4 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -1,47 +1,22 @@ from os import path +import unittest from Bio import SeqIO from Bio.SeqFeature import FeatureLocation -from secmet.record import Record -from secmet.record import ClusterFeature +from secmet.record import Record, ClusterFeature filename = 'nisin.gbk' filetype = 'genbank' -def get_testfile(): - """File path for testing""" - return path.join(path.dirname(__file__), 'data', filename) +class TestClusterFeature(unittest.TestCase): + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) -def test_add_new_cluster(): - """Test for adding a new cluster to record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - new_cluster = ClusterFeature() - new_cluster.location = FeatureLocation(100, 500) - try: - new_cluster.cutoff = 300 - except: - raise ValueError('Error assigning cutoff value') - try: - new_cluster.extension = 300 - except: - raise ValueError('Error assiging extension value') - new_cluster.contig_edge = True - new_cluster.detection = 'Detection rules...' - new_cluster.add_product('product_info') - no_clusters_initial = len(rec.get_clusters()) - rec.add_feature(new_cluster) - no_clusters_final = len(rec.get_clusters()) - assert no_clusters_initial+1 == no_clusters_final - return new_cluster - -def test_add_existing_cluster(): - """Test for accessing the existing cluster from record""" - testfile = get_testfile() - rec = Record.from_file(testfile) - if len(rec.get_clusters()) >= 1: - new_cluster = rec.get_clusters()[0] - assert isinstance(new_cluster, ClusterFeature) - new_cluster.location = FeatureLocation(100, 500) + def test_add_new_cluster(self): + """Test for adding a new cluster to record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + new_cluster = ClusterFeature(FeatureLocation(100, 500)) try: new_cluster.cutoff = 300 except: @@ -50,26 +25,51 @@ def test_add_existing_cluster(): new_cluster.extension = 300 except: raise ValueError('Error assiging extension value') + new_cluster.contig_edge = True + new_cluster.detection = 'Detection rules...' + new_cluster.add_product('product_info') + no_clusters_initial = len(rec.get_clusters()) rec.add_feature(new_cluster) + no_clusters_final = len(rec.get_clusters()) + assert no_clusters_initial+1 == no_clusters_final return new_cluster -def write_to_file(): - """Write data from test_add_new_cluster()""" - testfile = get_testfile() - rec = Record.from_file(testfile) - new_cluster_feature = test_add_new_cluster() - rec.add_feature(new_cluster_feature) - record_1 = rec.to_biopython() - with open('test_new_'+filename, 'w') as handle: - SeqIO.write([record_1], handle, filetype) + def test_add_existing_cluster(self): + """Test for accessing the existing cluster from record""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + if len(rec.get_clusters()) >= 1: + new_cluster = rec.get_clusters()[0] + assert isinstance(new_cluster, ClusterFeature) + new_cluster.location = FeatureLocation(100, 500) + try: + new_cluster.cutoff = 300 + except: + raise ValueError('Error assigning cutoff value') + try: + new_cluster.extension = 300 + except: + raise ValueError('Error assiging extension value') + rec.add_feature(new_cluster) + return new_cluster - #Write data from test_add_existing_cluster() - rec = Record.from_file(testfile) - try: - new_cluster_feature = test_add_existing_cluster() + def test_write_to_file(self): + """Write data from test_add_new_cluster()""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + new_cluster_feature = self.test_add_new_cluster() rec.add_feature(new_cluster_feature) - except TypeError: #To return if no clusters are already present in the file - return - record_2 = rec.to_biopython() - with open('test_existing_'+filename, 'w') as handle: - SeqIO.write([record_2], handle, filetype) + record_1 = rec.to_biopython() + with open('test_new_'+filename, 'w') as handle: + SeqIO.write([record_1], handle, filetype) + + #Write data from test_add_existing_cluster() + rec = Record.from_file(testfile) + try: + new_cluster_feature = self.test_add_existing_cluster() + rec.add_feature(new_cluster_feature) + except TypeError: #To return if no clusters are already present in the file + return + record_2 = rec.to_biopython() + with open('test_existing_'+filename, 'w') as handle: + SeqIO.write([record_2], handle, filetype) From c09c00f951b6e5d95a1cc57d558073c723b54518 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 07:21:58 +0530 Subject: [PATCH 38/71] Add test_ClusterFeature_members() to check the members of ClusterFeature() --- tests/test_cluster.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 1dd49f4..dfa583b 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -12,6 +12,28 @@ def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def test_ClusterFeature_members(self): + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] + mod_clusters = rec.get_clusters() + for bp_cluster, mod_cluster in zip(bp_clusters, mod_clusters): + for key, value in bp_cluster.qualifiers.items(): + if value is not None and value: + #clusterblast, subclusterblast and knownclusterblast are lists + if key not in ['clusterblast', 'subclusterblast', 'knownclusterblast']: + if hasattr(mod_cluster, key): + self.assertEqual(str(value[0]), str(getattr(mod_cluster, key))) + else: + self.assertEqual(value, getattr(mod_cluster, key)) + if bp_cluster.qualifiers['product']: + #product is modified to products in secmet + self.assertEqual(bp_cluster.qualifiers['product'], mod_cluster.get_products()) + if bp_cluster.qualifiers['note']: + #notes will not contain 'Cluster number: ' and 'Detection rules: '' + self.assertEqual(len(bp_cluster.qualifiers['note'])-2, len(mod_cluster.notes)) + def test_add_new_cluster(self): """Test for adding a new cluster to record""" testfile = self.get_testfile() From c63285c1a00977c02c35664fb9a9b25fc00aac8a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 07:36:21 +0530 Subject: [PATCH 39/71] Bugs Fix in record.py --- secmet/record.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/secmet/record.py b/secmet/record.py index 88278f5..516a17b 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -630,7 +630,7 @@ def __init__(self, f_location=None, feature=None): self.notes = self._qualifiers['note'] if 'specificity' in self._qualifiers: - self.notes = self._qualifiers['specificity'] + self.specificity = self._qualifiers['specificity'] self.location = feature.location else: self.location = f_location @@ -743,6 +743,15 @@ def __init__(self, f_location=None, feature=None): if 'probability' in self._qualifiers: self.probability = self._qualifiers['probability'][0] + + if 'subclusterblast' in self._qualifiers: + self.subclusterblast = self._qualifiers['subclusterblast'] + + if 'clusterblast' in self._qualifiers: + self.clusterblast = self._qualifiers['clusterblast'] + + if 'knownclusterblast' in self._qualifiers: + self.knownclusterblast = self._qualifiers['knownclusterblast'] self.location = feature.location else: self.location = f_location From d5519554ac633d183e09296f3f97bb7212420c3b Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 07:56:55 +0530 Subject: [PATCH 40/71] Add test_cds.py to tests To verify members of CDSFeature() --- secmet/record.py | 1 - tests/test_cds.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/test_cds.py diff --git a/secmet/record.py b/secmet/record.py index 516a17b..47559de 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -233,7 +233,6 @@ def __init__(self, f_location=None, feature=None): self._qualifiers = {} self.sec_met_predictions = [] - self.other_qualifiers = {} self.type = 'CDS' if feature is not None: diff --git a/tests/test_cds.py b/tests/test_cds.py new file mode 100644 index 0000000..c3af2d3 --- /dev/null +++ b/tests/test_cds.py @@ -0,0 +1,36 @@ +from os import path +import unittest +from Bio import SeqIO +from secmet.record import Record + +filename = 'nisin.gbk' +filetype = 'genbank' + +class TestCDSFeature(unittest.TestCase): + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) + + def test_CDSFeature_members(self): + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_cdss = [i for i in bp_rec.features if i.type == 'CDS'] + mod_cdss = rec.get_CDSs() + self.assertEqual(len(bp_cdss), len(mod_cdss)) + for bp_cds, mod_cds in zip(bp_cdss, mod_cdss): + for key, value in bp_cds.qualifiers.items(): + if value is not None and value: + #aSProdPred, aSASF_choice, aSASF_choice, aSASF_note, aSASF_prediction + #aSASF_scaffold and sec_met_predictions are lists + if key not in ['aSProdPred', 'aSASF_choice', 'aSASF_note', 'aSASF_prediction', \ + 'aSASF_scaffold', 'sec_met_predictions']: + if key != 'sec_met': #antiSMASH anyways erases all sec_met qualifiers + if hasattr(mod_cds, key): + self.assertEqual(str(value[0]), str(getattr(mod_cds, key))) + else: + self.assertEqual(value, getattr(mod_cds, key)) + if 'note' in bp_cds.qualifiers: + if bp_cds.qualifiers['note']: + #note is modified to notes in secmet + self.assertEqual(bp_cds.qualifiers['note'], mod_cds.notes) From 269b542e188b7a86343037e0fe0c3252563f9bbe Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 08:19:42 +0530 Subject: [PATCH 41/71] Add test_cds_motif.py To test class members of CDS_motifFeature() --- tests/test_cds_motif.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tests/test_cds_motif.py diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py new file mode 100644 index 0000000..b815e10 --- /dev/null +++ b/tests/test_cds_motif.py @@ -0,0 +1,28 @@ +from os import path +import unittest +from Bio import SeqIO +from secmet.record import Record + +filename = 'nisin.gbk' +filetype = 'genbank' + +class TestCDS_motifFeature(unittest.TestCase): + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) + + def test_CDS_motifFeature_members(self): + """Check if all the qualifiers are properly stored in CDS_motifFeature""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_cds_motifs = [i for i in bp_rec.features if i.type == 'CDS_motif'] + mod_cds_motifs = rec.get_CDS_motifs() + for bp_motif, mod_motif in zip(bp_cds_motifs, mod_cds_motifs): + for key, value in bp_motif.qualifiers.items(): + if value is not None and value: + if hasattr(mod_motif, key): + self.assertEqual(str(value[0]), str(getattr(mod_motif, key))) + if 'note' in bp_motif.qualifiers: + #note is modified to notes in secmet + self.assertEqual(bp_motif.qualifiers['note'], mod_motif.notes) From 827eafabb94e9fceba1c685d4f4633b9e8536ecd Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 08:39:19 +0530 Subject: [PATCH 42/71] Add test_domains.py to tests To test the members of aSDomain() and PFAM_domain() --- tests/test_domains.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/test_domains.py diff --git a/tests/test_domains.py b/tests/test_domains.py new file mode 100644 index 0000000..3123333 --- /dev/null +++ b/tests/test_domains.py @@ -0,0 +1,52 @@ +from os import path +import unittest +from Bio import SeqIO +from secmet.record import Record + +filename = 'nisin.gbk' +filetype = 'genbank' + +class TestDomains(unittest.TestCase): + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) + + def test_aSDomain(self): + """Check if all the qualifiers are properly stored in aSDomain""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_asdomains = [i for i in bp_rec.features if i.type == 'aSDomain'] + mod_asdomains = rec.get_aSDomains() + for bp_asdomain, mod_asdomain in zip(bp_asdomains, mod_asdomains): + for key, value in bp_asdomain.qualifiers.items(): + if value is not None and value: + #label and specificity are lists + if key not in ['label', 'specificity']: + if hasattr(mod_asdomain, key): + self.assertEqual(str(value[0]), str(getattr(mod_asdomain, key))) + else: + self.assertEqual(value, getattr(mod_asdomain, key)) + if 'note' in bp_asdomain.qualifiers: + #note is modified to notes in secmet + self.assertEqual(bp_asdomain.qualifiers['note'], mod_asdomain.notes) + + def test_PFAM_domain(self): + """Check if all the qualifiers are properly stored in PFAM_domain""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + bp_pfams = [i for i in bp_rec.features if i.type == 'PFAM_domain'] + mod_pfams = rec.get_PFAM_domains() + for bp_pfam, mod_pfam in zip(bp_pfams, mod_pfams): + for key, value in bp_pfam.qualifiers.items(): + if value is not None and value: + #label and db_xref are lists + if key not in ['label', 'db_xref']: + if hasattr(mod_pfam, key): + self.assertEqual(str(value[0]), str(getattr(mod_pfam, key))) + else: + self.assertEqual(value, getattr(mod_pfam, key)) + if 'note' in bp_pfam.qualifiers: + #note is modified to notes in secmet + self.assertEqual(bp_pfam.qualifiers['note'], mod_pfam.notes) From 7fa4917e0f27cfbe3e8f4751c02e737b2df8da82 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 15:45:37 +0530 Subject: [PATCH 43/71] Add test_generic.py to tests Update GenericFeature() in record.py To test the class members of GenericFeature() --- secmet/record.py | 7 ++++++- tests/test_generic.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/test_generic.py diff --git a/secmet/record.py b/secmet/record.py index 47559de..63c414d 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -136,6 +136,9 @@ def __init__(self, f_location=None, f_type=None, feature=None): if 'sec_met' in self._qualifiers: self.sec_met.extend(self._qualifiers['sec_met']) del self._qualifiers['sec_met'] + if 'note' in self._qualifiers: + self.notes = self._qualifiers['note'] + del self._qualifiers['note'] else: self.location = f_location if not isinstance(f_type, str): @@ -178,7 +181,7 @@ def get_qualifier(self, category): else: if hasattr(self, category): if getattr(self, category): - return getattr(self, category) + return [getattr(self, category)] return [] def to_biopython(self): @@ -198,6 +201,8 @@ def to_biopython(self): new_Generic.qualifiers['description'] = [str(self.description)] if self.sec_met: new_Generic.qualifiers['sec_met'] = self.sec_met + if self.notes: + new_Generic.qualifiers['note'] = self.notes for key, value in self._qualifiers.items(): new_Generic.qualifiers[key] = value return [new_Generic] diff --git a/tests/test_generic.py b/tests/test_generic.py new file mode 100644 index 0000000..116ba4e --- /dev/null +++ b/tests/test_generic.py @@ -0,0 +1,30 @@ +from os import path +import unittest +from Bio import SeqIO +from secmet.record import Record + +filename = 'nisin.gbk' +filetype = 'genbank' + +class TestDomains(unittest.TestCase): + def get_testfile(self): + """File path for testing""" + return path.join(path.dirname(__file__), 'data', filename) + + def test_GenericFeature(self): + """Check if all the qualifiers are properly stored in GenericFeature""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + bp_rec = SeqIO.read(testfile, filetype) + non_generic_features = ['CDS', 'cluster', 'CDS_motif', 'aSDomain', 'PFAM_domain'] + bp_generics = [i for i in bp_rec.features if i.type not in non_generic_features] + mod_generics = rec.get_generics() + self.assertEqual(len(bp_generics), len(mod_generics)) + for bp_generic, mod_generic in zip(bp_generics, mod_generics): + for key, value in bp_generic.qualifiers.items(): + if value is not None and value: + if key != 'note': + self.assertEqual(value, mod_generic.get_qualifier(key)) + else: + #note is modified to notes in secmet + self.assertEqual(bp_generic.qualifiers['note'], mod_generic.notes) From 5dd0a2f8cf4402c5e95b599dfd043242454bf370 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 16:45:09 +0530 Subject: [PATCH 44/71] Bug fix: Replace aSDomain_id member with asDomain_id --- secmet/record.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 63c414d..f54d2df 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -353,7 +353,7 @@ def __init__(self, f_location=None, feature=None): super(CDS_motifFeature, self).__init__() self.label = None self.motif = None - self.aSDomain_id = None + self.asDomain_id = None self.aSTool = None self.detection = None self.database = None @@ -377,8 +377,8 @@ def __init__(self, f_location=None, feature=None): if 'motif' in self._qualifiers: self.motif = self._qualifiers['motif'][0] - if 'aSDomain_id' in self._qualifiers: - self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + if 'asDomain_id' in self._qualifiers: + self.asDomain_id = self._qualifiers['asDomain_id'][0] if 'evalue' in self._qualifiers: self.evalue = self._qualifiers['evalue'][0] @@ -437,8 +437,8 @@ def to_biopython(self): self._qualifiers['database'] = [str(self.database)] if self.evalue is not None: self._qualifiers['evalue'] = [str(self.evalue)] - if self.aSDomain_id is not None: - self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.asDomain_id is not None: + self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] if self.detection is not None: self._qualifiers['detection'] = [str(self.detection)] if self.score is not None: @@ -463,7 +463,7 @@ def __init__(self, f_location=None, feature=None): """ super(PFAM_domain, self).__init__() self.domain = None - self.aSDomain_id = None + self.asDomain_id = None self.locus_tag = None self.aSTool = None self.detection = None @@ -490,8 +490,8 @@ def __init__(self, f_location=None, feature=None): if 'label' in self._qualifiers: self.label = self._qualifiers['label'] - if 'aSDomain_id' in self._qualifiers: - self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + if 'asDomain_id' in self._qualifiers: + self.asDomain_id = self._qualifiers['asDomain_id'][0] if 'evalue' in self._qualifiers: self.evalue = self._qualifiers['evalue'][0] @@ -554,8 +554,8 @@ def to_biopython(self): self._qualifiers['database'] = [str(self.database)] if self.evalue is not None: self._qualifiers['evalue'] = [str(self.evalue)] - if self.aSDomain_id is not None: - self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.asDomain_id is not None: + self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] if self.detection is not None: self._qualifiers['detection'] = [str(self.detection)] if self.score is not None: @@ -587,7 +587,7 @@ def __init__(self, f_location=None, feature=None): super(aSDomain, self).__init__() self.domain = None self.domain_subtype = None - self.aSDomain_id = None + self.asDomain_id = None self.locus_tag = None self.detection = None self.database = None @@ -615,8 +615,8 @@ def __init__(self, f_location=None, feature=None): if 'label' in self._qualifiers: self.label = self._qualifiers['label'] - if 'aSDomain_id' in self._qualifiers: - self.aSDomain_id = self._qualifiers['aSDomain_id'][0] + if 'asDomain_id' in self._qualifiers: + self.asDomain_id = self._qualifiers['asDomain_id'][0] if 'evalue' in self._qualifiers: self.evalue = self._qualifiers['evalue'][0] @@ -673,8 +673,8 @@ def to_biopython(self): self._qualifiers['database'] = [str(self.database)] if self.evalue is not None: self._qualifiers['evalue'] = [str(self.evalue)] - if self.aSDomain_id is not None: - self._qualifiers['aSDomain_id'] = [str(self.aSDomain_id)] + if self.asDomain_id is not None: + self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] if self.detection is not None: self._qualifiers['detection'] = [str(self.detection)] if self.score is not None: From 8c16c15ae7ee8488f38d602f885bba5cf2a67971 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 16:46:27 +0530 Subject: [PATCH 45/71] Modify secmet tests -Raise errors if a qualifier is not a member -Refine the code structure --- tests/test_cds.py | 26 ++++++++++++++++---------- tests/test_cds_motif.py | 12 +++++++----- tests/test_cluster.py | 24 ++++++++++++++---------- tests/test_domains.py | 35 +++++++++++++++++++++-------------- tests/test_generic.py | 6 +++--- 5 files changed, 61 insertions(+), 42 deletions(-) diff --git a/tests/test_cds.py b/tests/test_cds.py index c3af2d3..12af4dc 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -18,19 +18,25 @@ def test_CDSFeature_members(self): bp_cdss = [i for i in bp_rec.features if i.type == 'CDS'] mod_cdss = rec.get_CDSs() self.assertEqual(len(bp_cdss), len(mod_cdss)) + #aSProdPred, aSASF_choice, aSASF_choice, aSASF_note, aSASF_prediction + #aSASF_scaffold and sec_met_predictions are lists + qualifiers_as_list = ['note', 'aSProdPred', 'aSASF_choice', 'aSASF_prediction', \ + 'aSASF_note', 'aSASF_scaffold', 'sec_met_predictions'] for bp_cds, mod_cds in zip(bp_cdss, mod_cdss): for key, value in bp_cds.qualifiers.items(): if value is not None and value: - #aSProdPred, aSASF_choice, aSASF_choice, aSASF_note, aSASF_prediction - #aSASF_scaffold and sec_met_predictions are lists - if key not in ['aSProdPred', 'aSASF_choice', 'aSASF_note', 'aSASF_prediction', \ - 'aSASF_scaffold', 'sec_met_predictions']: + if key not in qualifiers_as_list: if key != 'sec_met': #antiSMASH anyways erases all sec_met qualifiers - if hasattr(mod_cds, key): + if not hasattr(mod_cds, key): + if not key in mod_cds._qualifiers: + raise AttributeError('%s is not a member of CDSFeature'%key) + else: + self.assertEqual(bp_cds.qualifiers[key], mod_cds._qualifiers[key]) + else: self.assertEqual(str(value[0]), str(getattr(mod_cds, key))) else: - self.assertEqual(value, getattr(mod_cds, key)) - if 'note' in bp_cds.qualifiers: - if bp_cds.qualifiers['note']: - #note is modified to notes in secmet - self.assertEqual(bp_cds.qualifiers['note'], mod_cds.notes) + if key == 'note': + #note is modified to notes in secmet + self.assertEqual(value, mod_cds.notes) + else: + self.assertEqual(value, getattr(mod_cds, key)) diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py index b815e10..4b5dda4 100644 --- a/tests/test_cds_motif.py +++ b/tests/test_cds_motif.py @@ -20,9 +20,11 @@ def test_CDS_motifFeature_members(self): mod_cds_motifs = rec.get_CDS_motifs() for bp_motif, mod_motif in zip(bp_cds_motifs, mod_cds_motifs): for key, value in bp_motif.qualifiers.items(): - if value is not None and value: - if hasattr(mod_motif, key): + if key == 'note': + #note is modified to notes in secmet + self.assertEqual(bp_motif.qualifiers['note'], mod_motif.notes) + else: + if value is not None and value: + if not hasattr(mod_motif, key): + raise AttributeError("%s is not a member of CDS_motifFeature"%key) self.assertEqual(str(value[0]), str(getattr(mod_motif, key))) - if 'note' in bp_motif.qualifiers: - #note is modified to notes in secmet - self.assertEqual(bp_motif.qualifiers['note'], mod_motif.notes) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index dfa583b..17e3a86 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -18,21 +18,25 @@ def test_ClusterFeature_members(self): bp_rec = SeqIO.read(testfile, filetype) bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] mod_clusters = rec.get_clusters() + qualifiers_as_list = ['note', 'product', 'clusterblast', 'subclusterblast', \ + 'knownclusterblast'] for bp_cluster, mod_cluster in zip(bp_clusters, mod_clusters): for key, value in bp_cluster.qualifiers.items(): if value is not None and value: #clusterblast, subclusterblast and knownclusterblast are lists - if key not in ['clusterblast', 'subclusterblast', 'knownclusterblast']: - if hasattr(mod_cluster, key): - self.assertEqual(str(value[0]), str(getattr(mod_cluster, key))) + if key not in qualifiers_as_list: + if not hasattr(mod_cluster, key): + raise AttributeError('%s is not a member of ClusterFeature'%key) + self.assertEqual(str(value[0]), str(getattr(mod_cluster, key))) else: - self.assertEqual(value, getattr(mod_cluster, key)) - if bp_cluster.qualifiers['product']: - #product is modified to products in secmet - self.assertEqual(bp_cluster.qualifiers['product'], mod_cluster.get_products()) - if bp_cluster.qualifiers['note']: - #notes will not contain 'Cluster number: ' and 'Detection rules: '' - self.assertEqual(len(bp_cluster.qualifiers['note'])-2, len(mod_cluster.notes)) + if key == 'note': + #notes will not contain 'Cluster number: ' and 'Detection rules: '' + self.assertEqual(len(value)-2, len(mod_cluster.notes)) + elif key == 'product': + #product is modified to products in secmet + self.assertEqual(bp_cluster.qualifiers['product'], mod_cluster.get_products()) + else: + self.assertEqual(value, getattr(mod_cluster, key)) def test_add_new_cluster(self): """Test for adding a new cluster to record""" diff --git a/tests/test_domains.py b/tests/test_domains.py index 3123333..ca208ce 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -18,18 +18,22 @@ def test_aSDomain(self): bp_rec = SeqIO.read(testfile, filetype) bp_asdomains = [i for i in bp_rec.features if i.type == 'aSDomain'] mod_asdomains = rec.get_aSDomains() + qualifiers_as_list = ['note', 'label', 'specificity'] for bp_asdomain, mod_asdomain in zip(bp_asdomains, mod_asdomains): for key, value in bp_asdomain.qualifiers.items(): if value is not None and value: #label and specificity are lists - if key not in ['label', 'specificity']: - if hasattr(mod_asdomain, key): - self.assertEqual(str(value[0]), str(getattr(mod_asdomain, key))) + if key not in qualifiers_as_list: + if not hasattr(mod_asdomain, key): + raise AttributeError('%s is not a member of aSDomain'%key) + self.assertEqual(str(value[0]), str(getattr(mod_asdomain, key))) else: - self.assertEqual(value, getattr(mod_asdomain, key)) - if 'note' in bp_asdomain.qualifiers: - #note is modified to notes in secmet - self.assertEqual(bp_asdomain.qualifiers['note'], mod_asdomain.notes) + if key == 'note': + #note is modified to notes in secmet + self.assertEqual(value, mod_asdomain.notes) + else: + self.assertEqual(value, getattr(mod_asdomain, key)) + def test_PFAM_domain(self): """Check if all the qualifiers are properly stored in PFAM_domain""" @@ -38,15 +42,18 @@ def test_PFAM_domain(self): bp_rec = SeqIO.read(testfile, filetype) bp_pfams = [i for i in bp_rec.features if i.type == 'PFAM_domain'] mod_pfams = rec.get_PFAM_domains() + qualifiers_as_list = ['note', 'label', 'db_xref'] for bp_pfam, mod_pfam in zip(bp_pfams, mod_pfams): for key, value in bp_pfam.qualifiers.items(): if value is not None and value: #label and db_xref are lists - if key not in ['label', 'db_xref']: - if hasattr(mod_pfam, key): - self.assertEqual(str(value[0]), str(getattr(mod_pfam, key))) + if key not in qualifiers_as_list: + if not hasattr(mod_pfam, key): + raise AttributeError('%s is not a member of PFAM_domain'%key) + self.assertEqual(str(value[0]), str(getattr(mod_pfam, key))) else: - self.assertEqual(value, getattr(mod_pfam, key)) - if 'note' in bp_pfam.qualifiers: - #note is modified to notes in secmet - self.assertEqual(bp_pfam.qualifiers['note'], mod_pfam.notes) + if key == 'note': + #note is modified to notes in secmet + self.assertEqual(value, mod_pfam.notes) + else: + self.assertEqual(value, getattr(mod_pfam, key)) diff --git a/tests/test_generic.py b/tests/test_generic.py index 116ba4e..e4e5363 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -23,8 +23,8 @@ def test_GenericFeature(self): for bp_generic, mod_generic in zip(bp_generics, mod_generics): for key, value in bp_generic.qualifiers.items(): if value is not None and value: - if key != 'note': - self.assertEqual(value, mod_generic.get_qualifier(key)) - else: + if key == 'note': #note is modified to notes in secmet self.assertEqual(bp_generic.qualifiers['note'], mod_generic.notes) + else: + self.assertEqual(value, mod_generic.get_qualifier(key)) From e2c8c5d3137ac0b1bb93eb0b76600e889aeb0cd4 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 15 Aug 2017 23:05:53 +0530 Subject: [PATCH 46/71] Update docstrings, uniformise the code Remove redundant code --- secmet/record.py | 103 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 29 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index f54d2df..54308fd 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -15,16 +15,15 @@ def cmp_feature_location(a, b): return cmp(a.location.end, b.location.end) def sort_features(seq_record): - "Sort features in a seq_record by their position" - #Check if all features have a proper location assigned - for feature in seq_record.features: - assert feature.location is not None - #Sort features by location + "Sort features in a seq_record using their locations" seq_record.features.sort(cmp=cmp_feature_location) def find_new_cluster_pos(clusters, target_cluster): - """Search for appropriate position in array to add cluster""" + """Binary search for appropriate position in array to add new cluster + param clusters: A list of all existing ClusterFeature(s) in the record + param target_cluster: An instance of ClusterFeature + """ if not clusters: return 0 start = 0 @@ -49,6 +48,10 @@ def find_new_cluster_pos(clusters, target_cluster): start = mid def find_cluster_of_new_cds(clusters, new_cds): + """Binary search to find the corresponding cluster feature of a cds feature + param clusters: A list of all existing ClusterFeature(s) in the record + param new_cds: An instance of CDSFeature + """ if not clusters: return start = 0 @@ -70,11 +73,13 @@ def find_cluster_of_new_cds(clusters, new_cds): class Feature(object): - """A Feature super class that expands to different subclasses""" + """A Feature super class that extends to different subclasses""" def __init__(self): - """ Initialise a feature object""" + """ Initialise a Feature object""" self.type = None self.notes = [] + + #Check for a valid feature location def _get_location(self): try: return self.__location @@ -95,10 +100,11 @@ def extract(self, parent_seq): class GenericFeature(Feature): """A GenericFeature Feature subclasses Feature - (Features other than CDSFeature and ClusterFeature) + (Features other than CDS, cluster, CDS_motif, PFAM_domain and aSDomin) """ def __init__(self, f_location=None, f_type=None, feature=None): """Initialise a GenericFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' param feature: class 'Bio.SeqFeature.SeqFeature' """ super(GenericFeature, self).__init__() @@ -112,30 +118,39 @@ def __init__(self, f_location=None, f_type=None, feature=None): self.sec_met = [] if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers + self.type = feature.type self.location = feature.location if 'locus_tag' in self._qualifiers: self.locus_tag = self._qualifiers['locus_tag'][0] del self._qualifiers['locus_tag'] + if 'gene' in self._qualifiers: self.gene = self._qualifiers['gene'][0] del self._qualifiers['gene'] + if 'translation' in self._qualifiers: self.translation = self._qualifiers['translation'][0] del self._qualifiers['translation'] + if 'name' in self._qualifiers: self.translation = self._qualifiers['name'][0] del self._qualifiers['name'] + if 'seq' in self._qualifiers: self.seq = self._qualifiers['seq'][0] del self._qualifiers['seq'] + if 'description' in self._qualifiers: self.description = self._qualifiers['description'][0] del self._qualifiers['description'] + if 'sec_met' in self._qualifiers: self.sec_met.extend(self._qualifiers['sec_met']) del self._qualifiers['sec_met'] + if 'note' in self._qualifiers: self.notes = self._qualifiers['note'] del self._qualifiers['note'] @@ -208,6 +223,7 @@ def to_biopython(self): return [new_Generic] def __repr__(self): + """A string representation of biopython generic features""" return repr(self.to_biopython()[0]) @@ -216,6 +232,7 @@ class CDSFeature(Feature): def __init__(self, f_location=None, feature=None): """Initialise a CDSFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' param feature: class 'Bio.SeqFeature.SeqFeature' """ super(CDSFeature, self).__init__() @@ -241,6 +258,7 @@ def __init__(self, f_location=None, feature=None): self.type = 'CDS' if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers if 'locus_tag' in self._qualifiers: @@ -291,13 +309,8 @@ def __init__(self, f_location=None, feature=None): else: self.location = f_location - - def get_id(self): - """Returns the id of the CDSFeature""" - return self.gene - def get_cluster(self): - """Returns a ClusterFeature""" + """Returns the corresponding ClusterFeature""" return self.cluster def to_biopython(self): @@ -340,11 +353,12 @@ def to_biopython(self): return [new_CDS] def __repr__(self): + """A string representation of biopython CDS feature""" return repr(self.to_biopython()[0]) class CDS_motifFeature(Feature): - """A CDS_motifFeature which subclasses CDSFeature""" + """A CDS_motifFeature which subclasses Feature""" def __init__(self, f_location=None, feature=None): """Initialise a CDS_motifFeature param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' @@ -363,6 +377,7 @@ def __init__(self, f_location=None, feature=None): self._qualifiers = {} if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers if 'locus_tag' in self._qualifiers: @@ -400,6 +415,8 @@ def __init__(self, f_location=None, feature=None): self.location = feature.location else: self.location = f_location + + #Check for a valid score qualifier before assigning def _get_score(self): try: return self.__score @@ -411,6 +428,7 @@ def _set_score(self, value): self.__score = value score = property(_get_score, _set_score) + #Check for a valid evalue qualifier before assigning def _get_evalue(self): try: return self.__evalue @@ -451,6 +469,7 @@ def to_biopython(self): return [new_CDS_motif] def __repr__(self): + """A string representation of biopython CDS_motif feature""" return repr(self.to_biopython()[0]) @@ -476,6 +495,7 @@ def __init__(self, f_location=None, feature=None): self._qualifiers = {} if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers if 'locus_tag' in self._qualifiers: @@ -519,6 +539,8 @@ def __init__(self, f_location=None, feature=None): self.location = feature.location else: self.location = f_location + + #Check for a valid score qualifier before assigning def _get_score(self): try: return self.__score @@ -530,6 +552,7 @@ def _set_score(self, value): self.__score = value score = property(_get_score, _set_score) + #Check for a valid evalue qualifier before assigning def _get_evalue(self): try: return self.__evalue @@ -574,6 +597,7 @@ def to_biopython(self): return [new_PFAM_domain] def __repr__(self): + """A string representation of biopython PFAM_domain feature""" return repr(self.to_biopython()[0]) @@ -598,6 +622,7 @@ def __init__(self, f_location=None, feature=None): self._qualifiers = {} if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers if 'locus_tag' in self._qualifiers: @@ -638,6 +663,8 @@ def __init__(self, f_location=None, feature=None): self.location = feature.location else: self.location = f_location + + #Check for a valid score qualifier before assigning def _get_score(self): try: return self.__score @@ -649,6 +676,7 @@ def _set_score(self, value): self.__score = value score = property(_get_score, _set_score) + #Check for a valid evalue qualifier before assigning def _get_evalue(self): try: return self.__evalue @@ -691,6 +719,7 @@ def to_biopython(self): return [new_aSDomain] def __repr__(self): + """A string representation of the biopython aSDomain feature""" return repr(self.to_biopython()[0]) @@ -716,6 +745,7 @@ def __init__(self, f_location=None, feature=None): self.cdss = [] if feature is not None: + """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers if 'cutoff' in self._qualifiers: @@ -760,6 +790,7 @@ def __init__(self, f_location=None, feature=None): else: self.location = f_location + #Check if cutoff is an integer before assigning def _get_cutoff(self): try: return self.__cutoff @@ -771,6 +802,7 @@ def _set_cutoff(self, value): self.__cutoff = value cutoff = property(_get_cutoff, _set_cutoff) + #Check if extension is an integer before assigning def _get_extension(self): try: return self.__extension @@ -799,7 +831,7 @@ def get_cluster_number(self): return self.parent_record.get_cluster_number(self) def get_CDSs(self): - """Retruns a list of CDS objects which belong to this cluster""" + """Retruns a list of CDSFeature(s) which belong to this cluster""" return self.cdss def to_biopython(self): @@ -826,6 +858,7 @@ def to_biopython(self): return [new_Cluster] def __repr__(self): + """A string representation of biopython cluster feature""" return repr(self.to_biopython()[0]) class Record(object): @@ -855,11 +888,9 @@ def __init__(self, seq_record=None): @classmethod def from_file(cls, filename): - - """Initialise a record from a file of specified type + """Initialise a record from a file :param string filename: file name of the file to read - :param string filetype: Type of the inputfile """ filetype = filename.split('.')[-1] if filetype in ['gb', 'gbk', 'genbank']: @@ -955,7 +986,7 @@ def set_clusters(self, clusters_list): self._modified_cluster = clusters_list def get_CDSs(self): - """A list of secondary metabolite clusters present in the record""" + """A list of secondary metabolite CDS features present in the record""" return self._modified_cds def set_CDSs(self, cds_list): """To set the CDS features of the seq_record""" @@ -1017,7 +1048,7 @@ def get_cluster_number(self, clusterfeature): return self._cluster_number_dict[clusterfeature] def add_feature(self, feature): - """Adds features to appropriate lists""" + """Adds feature to appropriate lists""" if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") if isinstance(feature, ClusterFeature): @@ -1025,12 +1056,14 @@ def add_feature(self, feature): index = find_new_cluster_pos(clusters, feature) clusters.insert(index, feature) feature.parent_record = self + #Link cluster feature with its cds features self._update_cluster_cds_links(feature) for i, cluster in enumerate(clusters): self._cluster_number_dict[cluster] = i+1 elif isinstance(feature, CDSFeature): self._modified_cds.append(feature) + #Link cds feature with its cluster feature self._update_cluster_cds_links(feature) elif isinstance(feature, CDS_motifFeature): self._modified_cds_motif.append(feature) @@ -1042,7 +1075,7 @@ def add_feature(self, feature): self._modified_generic.append(feature) def from_biopython(self, record): - """Modifies _modified_features list with new Feature instances""" + """Modifies _modified_features_* list with new Feature instances""" features = record.features for feature in features: if feature.type == 'CDS': @@ -1071,7 +1104,7 @@ def from_biopython(self, record): return self def _update_cluster_cds_links(self, feature): - """Link cluster and their corresponding CDS features""" + """Link cluster and their CDS features""" if isinstance(feature, ClusterFeature): clustercdsfeatures = [] cdss = self.get_CDSs() @@ -1087,8 +1120,20 @@ def _update_cluster_cds_links(self, feature): class SecMetQualifier(list): - """A Secmet class to store sec_met qualifiers""" + """A SecMetQualifier class for sec_met qualifiers""" + def __init__(self, clustertype=None, domains=None, kind=None): + """Initialise a SecMetQualifier with the given attributes + :param clustertype: an instance of str + :param domains: a list of SecMetResult instance(s) + :param kind: an instance of str + """ + if clustertype is not None and not isinstance(clustertype, str): + raise ValueError('clustertype should be an instance of str') + if domains is not None and not isinstance(domains, list): + raise ValueError('domains should be an instance of list') + if kind is not None and not isinstance(kind, str): + raise ValueError('kind should be an instance of str') self.clustertype = clustertype self.domains = domains self.kind = kind @@ -1097,7 +1142,7 @@ def __init__(self, clustertype=None, domains=None, kind=None): super(SecMetQualifier, self).__init__() def __len__(self): - """Return length of the sec_met qualifier""" + """Return length of the secmet qualifier""" count = 0 if self.clustertype is not None: count += 1 @@ -1112,11 +1157,11 @@ def __len__(self): return count def __repr__(self): - """A string representation of the sec_met qualifier""" + """A string representation of the list of sec_met qualifier""" return str(self.as_list()) def __nonzero__(self): - """Returns False if sec_met doesn't contain any qualifier""" + """Returns False if nothing is initialized""" if self.clustertype is not None or self.kind is not None or (self.domains is not None and self.domains): return True if self.nrpspks or self.asf_predictions: @@ -1138,7 +1183,7 @@ def __iter__(self): yield asf def as_list(self): - """Returns a list of all sec_met qualifiers""" + """Returns sec_met qualifier in a list""" self._sec_met = [] for qual in self: self._sec_met.append(qual) From cb17cf9f84ba882f6aa47a279658f70761426a0a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Wed, 16 Aug 2017 05:55:56 +0530 Subject: [PATCH 47/71] Bug fix: self.translation -> self.name return 0 if cluster number is not found --- secmet/record.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 54308fd..e7786a1 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -136,7 +136,7 @@ def __init__(self, f_location=None, f_type=None, feature=None): del self._qualifiers['translation'] if 'name' in self._qualifiers: - self.translation = self._qualifiers['name'][0] + self.name = self._qualifiers['name'][0] del self._qualifiers['name'] if 'seq' in self._qualifiers: @@ -827,7 +827,7 @@ def get_products(self): def get_cluster_number(self): """Returns the clusternumber of the cluster""" if self.parent_record is None: - raise ValueError('Parent record is None') + return 0 return self.parent_record.get_cluster_number(self) def get_CDSs(self): From 0d3ab35d17871420f0c78d8b91134272814d04e0 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 01:10:32 +0530 Subject: [PATCH 48/71] Add more tests to test_generic.py --- tests/test_generic.py | 123 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index e4e5363..9e7fa6d 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1,16 +1,29 @@ from os import path import unittest from Bio import SeqIO -from secmet.record import Record +from Bio.Seq import Seq +from Bio.SeqFeature import SeqFeature +from Bio.SeqFeature import FeatureLocation +from secmet.record import Record, GenericFeature -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestDomains(unittest.TestCase): + def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def BioFeature(self): + biofeature = SeqFeature(location=FeatureLocation(10, 100)) + biofeature.type = 'FAKE_BIO_FEATURE' + biofeature.qualifiers = {'locus_tag': ['fake_locus_tag'], 'translation': ['fake_translation'], \ + 'gene': ['fake_gene'], 'name': ['fake_name'], 'seq': [Seq('FAKE')], \ + 'description': ['fake_description'], 'sec_met': ['fake_sec_met'], \ + 'note': ['fake_notes'], 'unknown_qualifier': ['fake_qualifier']} + return biofeature + def test_GenericFeature(self): """Check if all the qualifiers are properly stored in GenericFeature""" testfile = self.get_testfile() @@ -28,3 +41,109 @@ def test_GenericFeature(self): self.assertEqual(bp_generic.qualifiers['note'], mod_generic.notes) else: self.assertEqual(value, mod_generic.get_qualifier(key)) + + def test_add_qualifier(self): + """Test adding a new GenericFeature""" + #GenericFeature should be initialised with valid location and type + try: + new_generic = GenericFeature() + except TypeError: + pass + try: + new_generic = GenericFeature(FeatureLocation(10, 100), 20) #Invalid type + except ValueError: + pass + new_generic = GenericFeature(FeatureLocation(1, 100), 'FAKE') + + #qualifiers should be strings and their values should be either strings or list of strings + try: + new_generic.add_qualifier(10, (20, 30)) + except TypeError: + pass + + #If the formats are valid shouldn't get any error + try: + new_generic.add_qualifier('Some string1', 'Fake_value1') + new_generic.add_qualifier('Some string2', ['Fake_value2']) + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + #score, evalue and probability should be numbers + try: + new_generic.add_qualifier('score', '-a50') + except ValueError: + pass + try: + new_generic.add_qualifier('evalue', 'a5.50E-08') + except ValueError: + pass + try: + new_generic.add_qualifier('probability', 'a0.5') + except ValueError: + pass + + #If valid qualifiers and values are added, We shouldn't get an error + try: + new_generic.add_qualifier('score', '-50') + new_generic.add_qualifier('evalue', '5.50E-08') + new_generic.add_qualifier('probability', '0.5') + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + #If GenericFeature has the qualifier as member, the member should get initialised + new_generic.add_qualifier('locus_tag', 'FAKE_TAG') + self.assertEqual(new_generic.locus_tag, 'FAKE_TAG') + + #If GenericFeature has the qualifier as memeber and if the memeber is a list, the new qualifier value should get appended + new_generic.add_qualifier('sec_met', 'FAKE_sec_met1') + self.assertEqual(new_generic.sec_met, ['FAKE_sec_met1']) + new_generic.add_qualifier('sec_met', 'FAKE_sec_met2') + self.assertEqual(new_generic.sec_met, ['FAKE_sec_met1', 'FAKE_sec_met2']) + #If the qualifier is a list and value is also a list, qualifier should extend to new values + new_generic.add_qualifier('sec_met', ['FAKE_sec_met3']) + self.assertEqual(new_generic.sec_met, ['FAKE_sec_met1', 'FAKE_sec_met2', 'FAKE_sec_met3']) + + #If GenericFeature doesn't contain the qualifier as its member, the member gets stored in _qualifiers + new_generic.add_qualifier('fake_qualifier1', 'FAKE1') + self.assertEqual(new_generic._qualifiers['fake_qualifier1'], ['FAKE1']) + + #If a new value is added for existing qualifier it should get appended + new_generic.add_qualifier('fake_qualifier1', 'FAKE2') + self.assertEqual(new_generic._qualifiers['fake_qualifier1'], ['FAKE1', 'FAKE2']) + + #If the qualifier value is a list, then it should store it as a list only + new_generic.add_qualifier('fake_qualifier2', ['FAKE1']) + + def test_convert_Biofeature_to_GenericFeature(self): + """Test the convesion of BioFeature to GenericFeature""" + biofeature = self.BioFeature() + generic_feature = GenericFeature(feature=biofeature) + self.assertEqual(str(generic_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(generic_feature.type, 'FAKE_BIO_FEATURE') + self.assertEqual(generic_feature.locus_tag, 'fake_locus_tag') + self.assertEqual(generic_feature.translation, 'fake_translation') + self.assertEqual(generic_feature.gene, 'fake_gene') + self.assertEqual(generic_feature.name, 'fake_name') + self.assertEqual(generic_feature.seq, Seq('FAKE')) + self.assertEqual(generic_feature.description, 'fake_description') + self.assertEqual(generic_feature.sec_met, ['fake_sec_met']) + self.assertEqual(generic_feature.notes, ['fake_notes']) + self.assertEqual(generic_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(generic_feature), repr(generic_feature.to_biopython()[0])) + self.assertIsInstance(generic_feature.to_biopython()[0], SeqFeature) + + def test_get_qualifier(self): + """Test the get_qualifier method of GenericFeature""" + biofeature = self.BioFeature() + generic_feature = GenericFeature(feature=biofeature) + #getting qualifiers should be case insensitive + self.assertEqual(generic_feature.get_qualifier('LoCUs_tAg'), ['fake_locus_tag']) + #Upper case + generic_feature.add_qualifier('SOME_UPPER_CASE_QUALIFIER', 'FAKE') + self.assertEqual(generic_feature.get_qualifier('some_upper_case_qualifier'), ['FAKE']) + #Lower case + generic_feature.add_qualifier('some_lower_case_qualifier', 'FAKE') + self.assertEqual(generic_feature.get_qualifier('SOME_LOWER_CASE_QUALIFIER'), ['FAKE']) + + #If qualifier is not present, [] should be returned + self.assertEqual(generic_feature.get_qualifier('Absent_qualifier'), []) From b9a6c01b3301320629d8f68483458c5e318a778c Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:12:31 +0530 Subject: [PATCH 49/71] Refine secmet Bugs fix Remove __nonzero__() Remove get_secmet_features() Add members to Features --- secmet/record.py | 191 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 140 insertions(+), 51 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index e7786a1..0c1cfc2 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -36,7 +36,7 @@ def find_new_cluster_pos(clusters, target_cluster): return start+1 except IndexError: return start+1 - mid = start+(end-start)/2 + mid = start+((end-start)/2+1) if start == end or mid == start: if target_cluster.location.start < clusters[0].location.start: return 0 @@ -63,12 +63,12 @@ def find_cluster_of_new_cds(clusters, new_cds): clusters[start].cdss.append(new_cds) new_cds.cluster = clusters[start] return - mid = start+(end-start)/2 + mid = start+((end-start)/2+1) if start == end or mid == start: return if clusters[mid].location.start > new_cds.location.start: end = mid - elif clusters[mid].location.start <= new_cds.location.start: + else: start = mid @@ -81,10 +81,7 @@ def __init__(self): #Check for a valid feature location def _get_location(self): - try: - return self.__location - except: - raise ValueError('Unassigned location') + return self.__location def _set_location(self, value): if not isinstance(value, (FeatureLocation, CompoundLocation)): raise TypeError("Location must be an instance of 'FeatureLocation' or 'CompoundLocation'") @@ -162,10 +159,10 @@ def __init__(self, f_location=None, f_type=None, feature=None): def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" - if not isinstance(category, str) and isinstance(info, (str, list)): + if not isinstance(category, str) or not isinstance(info, (str, list)): raise TypeError("Type of qualifiers should be 'str'") if category in ['evalue', 'score', 'probability']: - if not (info.replace('.', '')).replace('E-', '').isdigit(): + if not (((info.replace('.', '')).replace('E-', '')).replace('-', '')).replace('+', '').isdigit(): raise ValueError('%s should be a number'% category) if hasattr(self, category): if isinstance(getattr(self, category), list): @@ -197,6 +194,9 @@ def get_qualifier(self, category): if hasattr(self, category): if getattr(self, category): return [getattr(self, category)] + elif hasattr(self, category.lower()): + if getattr(self, category.lower()): + return [getattr(self, category.lower())] return [] def to_biopython(self): @@ -244,9 +244,10 @@ def __init__(self, f_location=None, feature=None): self.gene = None self.translation = None self.cluster = None - self.EC_number = None self.transl_table = None self.source = None + self.db_xref = [] + self.EC_number = [] self.aSProdPred = [] self.aSASF_choice = [] self.aSASF_note = [] @@ -280,7 +281,7 @@ def __init__(self, f_location=None, feature=None): self.notes = self._qualifiers['note'] if 'EC_number' in self._qualifiers: - self.EC_number = self._qualifiers['EC_number'][0] + self.EC_number = self._qualifiers['EC_number'] if 'transl_table' in self._qualifiers: self.transl_table = self._qualifiers['transl_table'][0] @@ -289,20 +290,23 @@ def __init__(self, f_location=None, feature=None): self.source = self._qualifiers['source'][0] if 'aSASF_choice' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSASF_choiceS'] + self.aSASF_choice = self._qualifiers['aSASF_choice'] if 'aSASF_note' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSASF_note'] + self.aSASF_note = self._qualifiers['aSASF_note'] if 'aSASF_prediction' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSASF_prediction'] + self.aSASF_prediction = self._qualifiers['aSASF_prediction'] if 'aSASF_scaffold' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSASF_scaffold'] + self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] if 'aSProdPred' in self._qualifiers: self.aSProdPred = self._qualifiers['aSProdPred'] + if 'db_xref' in self._qualifiers: + self.db_xref = self._qualifiers['db_xref'] + if 'sec_met_predictions' in self._qualifiers: self.sec_met_predictions = self._qualifiers['sec_met_predictions'] self.location = feature.location @@ -332,11 +336,13 @@ def to_biopython(self): if self.notes: self._qualifiers['note'] = self.notes if self.EC_number is not None: - self._qualifiers['EC_number'] = [str(self.EC_number)] + self._qualifiers['EC_number'] = self.EC_number if self.transl_table is not None: self._qualifiers['transl_table'] = [str(self.transl_table)] if self.source is not None: self._qualifiers['source'] = [str(self.source)] + if self.db_xref: + self._qualifiers['db_xref'] = self.db_xref if self.aSASF_choice: self._qualifiers['aSASF_choice'] = self.aSASF_choice if self.aSASF_note: @@ -374,6 +380,11 @@ def __init__(self, f_location=None, feature=None): self.translation = None self.locus_tag = None self.type = 'CDS_motif' + self.aSProdPred = [] + self.aSASF_choice = [] + self.aSASF_note = [] + self.aSASF_prediction = [] + self.aSASF_scaffold = [] self._qualifiers = {} if feature is not None: @@ -410,6 +421,21 @@ def __init__(self, f_location=None, feature=None): if 'database' in self._qualifiers: self.database = self._qualifiers['database'][0] + if 'aSASF_choice' in self._qualifiers: + self.aSASF_choice = self._qualifiers['aSASF_choice'] + + if 'aSASF_note' in self._qualifiers: + self.aSASF_note = self._qualifiers['aSASF_note'] + + if 'aSASF_prediction' in self._qualifiers: + self.aSASF_prediction = self._qualifiers['aSASF_prediction'] + + if 'aSASF_scaffold' in self._qualifiers: + self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] + + if 'aSProdPred' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSProdPred'] + if 'note' in self._qualifiers: self.notes = self._qualifiers['note'] self.location = feature.location @@ -423,8 +449,8 @@ def _get_score(self): except: return None def _set_score(self, value): - if not ((value.replace('.', '')).replace('-', '')).isdigit(): - raise TypeError("score must be a number") + if not (((value.replace('.', '')).replace('-', ''))).replace('+', '').isdigit(): + raise ValueError("score must be a number") self.__score = value score = property(_get_score, _set_score) @@ -436,7 +462,7 @@ def _get_evalue(self): return None def _set_evalue(self, value): if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise TypeError("evalue must be an number") + raise ValueError("evalue must be an number") self.__evalue = value evalue = property(_get_evalue, _set_evalue) @@ -463,6 +489,16 @@ def to_biopython(self): self._qualifiers['score'] = [str(self.score)] if self.aSTool is not None: self._qualifiers['aSTool'] = [str(self.aSTool)] + if self.aSASF_choice: + self._qualifiers['aSASF_choice'] = self.aSASF_choice + if self.aSASF_note: + self._qualifiers['aSASF_note'] = self.aSASF_note + if self.aSASF_prediction: + self._qualifiers['aSASF_prediction'] = self.aSASF_prediction + if self.aSASF_scaffold: + self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold + if self.aSProdPred: + self._qualifiers['aSProdPred'] = self.aSProdPred if self.notes: self._qualifiers['note'] = self.notes new_CDS_motif.qualifiers = self._qualifiers.copy() @@ -491,6 +527,11 @@ def __init__(self, f_location=None, feature=None): self.description = None self.db_xref = [] self.label = [] + self.aSProdPred = [] + self.aSASF_choice = [] + self.aSASF_note = [] + self.aSASF_prediction = [] + self.aSASF_scaffold = [] self.type = 'PFAM_domain' self._qualifiers = {} @@ -534,6 +575,21 @@ def __init__(self, f_location=None, feature=None): if 'description' in self._qualifiers: self.description = self._qualifiers['description'][0] + if 'aSASF_choice' in self._qualifiers: + self.aSASF_choice = self._qualifiers['aSASF_choice'] + + if 'aSASF_note' in self._qualifiers: + self.aSASF_note = self._qualifiers['aSASF_note'] + + if 'aSASF_prediction' in self._qualifiers: + self.aSASF_prediction = self._qualifiers['aSASF_prediction'] + + if 'aSASF_scaffold' in self._qualifiers: + self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] + + if 'aSProdPred' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSProdPred'] + if 'note' in self._qualifiers: self.notes = self._qualifiers['note'] self.location = feature.location @@ -548,7 +604,7 @@ def _get_score(self): return None def _set_score(self, value): if not ((value.replace('.', '')).replace('-', '')).isdigit(): - raise TypeError("score must be a number") + raise ValueError("score must be a number") self.__score = value score = property(_get_score, _set_score) @@ -560,7 +616,7 @@ def _get_evalue(self): return None def _set_evalue(self, value): if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise TypeError("evalue must be an number") + raise ValueError("evalue must be an number") self.__evalue = value evalue = property(_get_evalue, _set_evalue) @@ -591,6 +647,16 @@ def to_biopython(self): self._qualifiers['description'] = [str(self.description)] if self.db_xref is not None: self._qualifiers['db_xref'] = self.db_xref + if self.aSASF_choice: + self._qualifiers['aSASF_choice'] = self.aSASF_choice + if self.aSASF_note: + self._qualifiers['aSASF_note'] = self.aSASF_note + if self.aSASF_prediction: + self._qualifiers['aSASF_prediction'] = self.aSASF_prediction + if self.aSASF_scaffold: + self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold + if self.aSProdPred: + self._qualifiers['aSProdPred'] = self.aSProdPred if self.notes: self._qualifiers['note'] = self.notes new_PFAM_domain.qualifiers = self._qualifiers.copy() @@ -618,6 +684,11 @@ def __init__(self, f_location=None, feature=None): self.translation = None self.label = [] self.specificity = [] + self.aSProdPred = [] + self.aSASF_choice = [] + self.aSASF_note = [] + self.aSASF_prediction = [] + self.aSASF_scaffold = [] self.type = 'aSDomain' self._qualifiers = {} @@ -658,6 +729,21 @@ def __init__(self, f_location=None, feature=None): if 'note' in self._qualifiers: self.notes = self._qualifiers['note'] + if 'aSASF_choice' in self._qualifiers: + self.aSASF_choice = self._qualifiers['aSASF_choice'] + + if 'aSASF_note' in self._qualifiers: + self.aSASF_note = self._qualifiers['aSASF_note'] + + if 'aSASF_prediction' in self._qualifiers: + self.aSASF_prediction = self._qualifiers['aSASF_prediction'] + + if 'aSASF_scaffold' in self._qualifiers: + self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] + + if 'aSProdPred' in self._qualifiers: + self.aSProdPred = self._qualifiers['aSProdPred'] + if 'specificity' in self._qualifiers: self.specificity = self._qualifiers['specificity'] self.location = feature.location @@ -672,7 +758,7 @@ def _get_score(self): return None def _set_score(self, value): if not ((value.replace('.', '')).replace('-', '')).isdigit(): - raise TypeError("score must be a number") + raise ValueError("score must be a number") self.__score = value score = property(_get_score, _set_score) @@ -684,7 +770,7 @@ def _get_evalue(self): return None def _set_evalue(self, value): if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise TypeError("evalue must be an number") + raise ValueError("evalue must be an number") self.__evalue = value evalue = property(_get_evalue, _set_evalue) @@ -711,6 +797,16 @@ def to_biopython(self): self._qualifiers['domain_subtype'] = [str(self.domain_subtype)] if self.domain is not None: self._qualifiers['domain'] = [str(self.domain)] + if self.aSASF_choice: + self._qualifiers['aSASF_choice'] = self.aSASF_choice + if self.aSASF_note: + self._qualifiers['aSASF_note'] = self.aSASF_note + if self.aSASF_prediction: + self._qualifiers['aSASF_prediction'] = self.aSASF_prediction + if self.aSASF_scaffold: + self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold + if self.aSProdPred: + self._qualifiers['aSProdPred'] = self.aSProdPred if self.notes: self._qualifiers['note'] = self.notes if self.specificity: @@ -838,12 +934,18 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_Cluster = SeqFeature(self.location, type=self.type) self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] - self._qualifiers['note'].append(self.detection) - self._qualifiers['note'].extend(self.notes) - self._qualifiers['cutoff'] = [str(self.cutoff)] - self._qualifiers['extension'] = [str(self.extension)] - self._qualifiers['product'] = self.products - self._qualifiers['contig_edge'] = [str(self.contig_edge)] + if self.detection is not None: + self._qualifiers['note'].append(self.detection) + if self.notes: + self._qualifiers['note'].extend(self.notes) + if self.cutoff is not None: + self._qualifiers['cutoff'] = [str(self.cutoff)] + if self.extension is not None: + self._qualifiers['extension'] = [str(self.extension)] + if self.products: + self._qualifiers['product'] = self.products + if self.contig_edge is not None: + self._qualifiers['contig_edge'] = [str(self.contig_edge)] if self.structure is not None: self._qualifiers['structure'] = [str(self.structure)] if self.probability is not None: @@ -982,7 +1084,7 @@ def get_clusters(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cluster def set_clusters(self, clusters_list): - """To set the clusters of the seq_record""" + """To set the cluster features of the seq_record""" self._modified_cluster = clusters_list def get_CDSs(self): @@ -996,18 +1098,21 @@ def get_CDS_motifs(self): """A list of secondary metabolite CDS_motifs present in the record""" return self._modified_cds_motif def set_CDS_motifs(self, cds_motif_list): - """To set the cds_motifs features of the seq_record""" + """To set the CDS_motif features of the seq_record""" self._modified_cds_motif = cds_motif_list def get_PFAM_domains(self): """A list of secondary metabolite PFAM_domains present in the record""" return self._modified_pfam_domain + def set_PFAM_domains(self, pfam_domains_list): + """To set the PFAM_domain features of the seq_record""" + self._modified_pfam_domain = pfam_domains_list def get_aSDomains(self): """A list of secondary metabolite aSDomains present in the record""" return self._modified_asdomain def set_aSDomains(self, asdomains_list): - """To set the asdomains features of the seq_record""" + """To set the aSDomain features of the seq_record""" self._modified_asdomain = asdomains_list def get_generics(self): @@ -1017,14 +1122,6 @@ def set_generics(self, generics_list): """To set the generic features of the seq_record""" self._modified_generic = generics_list - def get_secmet_features(self): - """Return all features with sec_met qualifier""" - secmet_features = self.get_CDSs() - for generic in self.get_generics(): - if generic.sec_met: - secmet_features.append(generic) - return secmet_features - def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record @@ -1129,11 +1226,11 @@ def __init__(self, clustertype=None, domains=None, kind=None): :param kind: an instance of str """ if clustertype is not None and not isinstance(clustertype, str): - raise ValueError('clustertype should be an instance of str') + raise TypeError('clustertype should be an instance of str') if domains is not None and not isinstance(domains, list): - raise ValueError('domains should be an instance of list') + raise TypeError('domains should be an instance of list') if kind is not None and not isinstance(kind, str): - raise ValueError('kind should be an instance of str') + raise TypeError('kind should be an instance of str') self.clustertype = clustertype self.domains = domains self.kind = kind @@ -1160,14 +1257,6 @@ def __repr__(self): """A string representation of the list of sec_met qualifier""" return str(self.as_list()) - def __nonzero__(self): - """Returns False if nothing is initialized""" - if self.clustertype is not None or self.kind is not None or (self.domains is not None and self.domains): - return True - if self.nrpspks or self.asf_predictions: - return True - return False - def __iter__(self): if self.clustertype is not None: yield "Type: %s" % self.clustertype From 4c08f6425b780665eb0ccd12e89b052216d0d1ce Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:12:56 +0530 Subject: [PATCH 50/71] Add more test to test_record.py --- tests/test_record.py | 112 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 104 insertions(+), 8 deletions(-) diff --git a/tests/test_record.py b/tests/test_record.py index c3378b9..1ec78fd 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -2,12 +2,13 @@ import unittest import Bio from Bio import SeqIO +from Bio.Seq import Seq from Bio.SeqFeature import FeatureLocation from secmet.record import Record, GenericFeature, ClusterFeature, CDSFeature \ , CDS_motifFeature, aSDomain, PFAM_domain #Global variables for test file name and its type -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestRecordMethods(unittest.TestCase): @@ -30,11 +31,77 @@ def test_from_file(self): del rec.annotations['references'] if 'references' in bp_rec.annotations: del bp_rec.annotations['references'] + self.assertEqual(len(rec), 66669) self.assertEqual(rec.annotations, bp_rec.annotations) self.assertEqual(rec.description, bp_rec.description) + def test_empty_Record(self): + """Test the identifiers of empty Record""" + rec = Record() + #seq should be a instance of Bio.Seq.Seq + try: + rec.seq = 'FAKE' + except ValueError: + pass + #description, name and id are strings + try: + rec.description = 123 + except ValueError: + pass + + try: + rec.name = 123 + except ValueError: + pass + try: + rec.id = 123 + except ValueError: + pass + rec.id = "fake_id" + rec.name = 'fake_name' + rec.seq = Seq("FAKE") + rec.description = 'fake_description' + self.assertEqual(rec.id, 'fake_id') + self.assertEqual(rec.name, 'fake_name') + self.assertEqual(rec.seq, Seq("FAKE")) + self.assertEqual(rec.description, 'fake_description') + self.assertEqual(rec.annotations, {}) + try: + rec.add_annotation(12, 34) + except ValueError: + pass + rec.add_annotation('fake_key', 'fake_value') + self.assertEqual(rec.annotations, {'fake_key': 'fake_value'}) + + def test_setters(self): + """Test setters for features lists""" + testfile = self.get_testfile() + rec = Record.from_file(testfile) + self.assertNotEqual(rec.get_CDSs(), []) + self.assertNotEqual(rec.get_clusters(), []) + self.assertNotEqual(rec.get_PFAM_domains(), []) + self.assertNotEqual(rec.get_aSDomains(), []) + self.assertNotEqual(rec.get_generics(), []) + + rec.set_CDSs([]) + rec.set_clusters([]) + rec.set_generics([]) + rec.set_CDS_motifs([]) + rec.set_PFAM_domains([]) + rec.set_aSDomains([]) + + self.assertEqual(rec.get_CDSs(), []) + self.assertEqual(rec.get_clusters(), []) + self.assertEqual(rec.get_PFAM_domains(), []) + self.assertEqual(rec.get_aSDomains(), []) + self.assertEqual(rec.get_generics(), []) + def test_from_biopython(self): """Test from_biopython() in Record""" + try: + rec = Record('fake_record') + except: + pass testfile = self.get_testfile() rec = Record.from_file(testfile) self.assertIsInstance(rec.from_biopython(rec._record), Record) @@ -141,11 +208,19 @@ def test_get_aSDomains(self): def test_get_cluster_number(self): """Test get_cluster_number() in Record""" - testfile = self.get_testfile() - rec = Record.from_file(testfile) - clusters = rec.get_clusters() - for index, cluster in enumerate(clusters): - self.assertEqual(rec.get_cluster_number(cluster), index+1) + rec = Record() + cluster1 = ClusterFeature(FeatureLocation(500, 1500)) + cluster2 = ClusterFeature(FeatureLocation(5000, 6000)) + cluster3 = ClusterFeature(FeatureLocation(2500, 4000)) + rec.add_feature(cluster2) + self.assertEqual(1, cluster2.get_cluster_number()) + rec.add_feature(cluster1) + self.assertEqual(1, cluster1.get_cluster_number()) + self.assertEqual(2, cluster2.get_cluster_number()) + rec.add_feature(cluster3) + self.assertEqual(1, cluster1.get_cluster_number()) + self.assertEqual(3, cluster2.get_cluster_number()) + self.assertEqual(2, cluster3.get_cluster_number()) def test_cluster_cds_links(self): """Test whether cluster(s) and CDS(s) are properly linked""" @@ -168,6 +243,22 @@ def test_cluster_cds_links(self): self.assertEqual(str(mod_cds.get_cluster().location), str(bp_cluster.location), \ str(mod_cluster.location)) + def test_add_feature_cds(self): + rec = Record() + cluster1 = ClusterFeature(FeatureLocation(1, 1000)) + cluster2 = ClusterFeature(FeatureLocation(2000, 3000)) + cluster3 = ClusterFeature(FeatureLocation(4000, 5000)) + cds = CDSFeature(FeatureLocation(4500, 4600)) + rec.add_feature(cds) + #If no clusters are present None should be returned + self.assertEqual(cds.get_cluster(), None) + rec.set_CDSs([]) + rec.add_feature(cluster1) + rec.add_feature(cluster2) + rec.add_feature(cluster3) + rec.add_feature(cds) + self.assertEqual(cds.get_cluster(), cluster3) + def test_add_feature(self): """Test add_feature() in Record""" testfile = self.get_testfile() @@ -179,8 +270,9 @@ def test_add_feature(self): no_of_pfam_domains = len(rec.get_PFAM_domains()) no_of_asdomains = len(rec.get_aSDomains()) #Create new Feature's with fake identity and fake location - new_cluster = ClusterFeature(FeatureLocation(15100, 15200)) - new_cds = CDSFeature(FeatureLocation(200, 300)) + invalid_feature = 'INVALID_FEATURE' + new_cluster = ClusterFeature(FeatureLocation(1000, 2000)) + new_cds = CDSFeature(FeatureLocation(1500, 1700)) new_generic = GenericFeature(FeatureLocation(350, 450), 'FAKE') new_cds_motif = CDS_motifFeature(FeatureLocation(150, 200)) new_pfam_domain = PFAM_domain(FeatureLocation(500, 600)) @@ -192,6 +284,10 @@ def test_add_feature(self): rec.add_feature(new_pfam_domain) rec.add_feature(new_asdomain) clusters = rec.get_clusters() + try: + rec.add_feature(invalid_feature) + except TypeError: + pass self.assertEqual(no_of_clusters+1, len(clusters)) self.assertEqual(no_of_cdss+1, len(rec.get_CDSs())) self.assertEqual(no_of_generics+1, len(rec.get_generics())) From 215c7be82c29cde4fa8ad388536590f9a7fcc290 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:13:11 +0530 Subject: [PATCH 51/71] Add more tests to test_cds.py --- tests/test_cds.py | 99 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 6 deletions(-) diff --git a/tests/test_cds.py b/tests/test_cds.py index 12af4dc..1b059fa 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -1,9 +1,10 @@ from os import path import unittest from Bio import SeqIO -from secmet.record import Record +from Bio.SeqFeature import FeatureLocation, SeqFeature +from secmet.record import Record, CDSFeature, SecMetQualifier -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestCDSFeature(unittest.TestCase): @@ -11,17 +12,28 @@ def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def BioFeature(self): + biofeature = SeqFeature(location=FeatureLocation(10, 100)) + biofeature.qualifiers = {'locus_tag': ['fake_locus_tag'], 'translation': ['fake_translation'], \ + 'gene': ['fake_gene'], 'product': ['fake_product'], 'protein_id': ['fake_protein_id'], \ + 'transl_table': ['fake_transl_table'], 'source': ['fake_source'], 'db_xref': ['fake_db_xref'],\ + 'EC_number': ['fake_EC_number'], 'note': ['fake_notes'], 'aSProdPred': ['fake_aSProdPred'], \ + 'aSASF_choice':['fake_aSAF_choice'], 'aSASF_note': ['fake_aSASF_note'], 'aSASF_choice': ['fake_aSASF_choice'], \ + 'aSASF_scaffold': ['fake_aSASF_scaffold'], 'aSASF_prediction': ['fake_aSASF_prediction'], \ + 'sec_met_predictions': ['fake_sec_met_predictions'], 'unknown_qualifier': ['fake_qualifier']} + return biofeature + def test_CDSFeature_members(self): + """Test the members of CDSFeature""" testfile = self.get_testfile() rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) bp_cdss = [i for i in bp_rec.features if i.type == 'CDS'] mod_cdss = rec.get_CDSs() self.assertEqual(len(bp_cdss), len(mod_cdss)) - #aSProdPred, aSASF_choice, aSASF_choice, aSASF_note, aSASF_prediction - #aSASF_scaffold and sec_met_predictions are lists - qualifiers_as_list = ['note', 'aSProdPred', 'aSASF_choice', 'aSASF_prediction', \ - 'aSASF_note', 'aSASF_scaffold', 'sec_met_predictions'] + #Segregate out qualifiers that are stored in list form + qualifiers_as_list = ['note', 'aSProdPred', 'aSASF_choice', 'aSASF_prediction', 'db_xref', \ + 'aSASF_note', 'aSASF_scaffold', 'sec_met_predictions', 'EC_number'] for bp_cds, mod_cds in zip(bp_cdss, mod_cdss): for key, value in bp_cds.qualifiers.items(): if value is not None and value: @@ -40,3 +52,78 @@ def test_CDSFeature_members(self): self.assertEqual(value, mod_cds.notes) else: self.assertEqual(value, getattr(mod_cds, key)) + + def test_BioFeature_to_CDSFeature(self): + biofeature = self.BioFeature() + cds_feature = CDSFeature(feature=biofeature) + self.assertEqual(str(cds_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(cds_feature.type, 'CDS') + self.assertEqual(cds_feature.locus_tag, 'fake_locus_tag') + self.assertEqual(cds_feature.translation, 'fake_translation') + self.assertEqual(cds_feature.gene, 'fake_gene') + self.assertEqual(cds_feature.product, 'fake_product') + self.assertEqual(cds_feature.protein_id, 'fake_protein_id') + self.assertEqual(cds_feature.transl_table, 'fake_transl_table') + self.assertEqual(cds_feature.source, 'fake_source') + self.assertEqual(cds_feature.EC_number, ['fake_EC_number']) + self.assertEqual(cds_feature.notes, ['fake_notes']) + self.assertEqual(cds_feature.db_xref, ['fake_db_xref']) + self.assertEqual(cds_feature.aSProdPred, ['fake_aSProdPred']) + self.assertEqual(cds_feature.aSASF_note, ['fake_aSASF_note']) + self.assertEqual(cds_feature.aSASF_scaffold, ['fake_aSASF_scaffold']) + self.assertEqual(cds_feature.aSASF_choice, ['fake_aSASF_choice']) + self.assertEqual(cds_feature.aSASF_prediction, ['fake_aSASF_prediction']) + self.assertEqual(cds_feature.sec_met_predictions, ['fake_sec_met_predictions']) + self.assertEqual(cds_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(cds_feature), repr(cds_feature.to_biopython()[0])) + self.assertIsInstance(cds_feature.to_biopython()[0], SeqFeature) + self.assertIsInstance(cds_feature.sec_met, SecMetQualifier) + + def test_SecMetQualifier(self): + """Test SecMetQualifier""" + try: + #clustertype should be a string instance + SecMetQualifier(clustertype=1) + except TypeError: + pass + try: + #domains should be a list instance + SecMetQualifier(domains='Invalid domains type') + except TypeError: + pass + try: + #kind should be a string instance + SecMetQualifier(kind=1) + except TypeError: + pass + cds = CDSFeature(FeatureLocation(1, 10)) + self.assertEqual(None, cds.sec_met.clustertype) + self.assertEqual(None, cds.sec_met.domains) + self.assertEqual(None, cds.sec_met.kind) + self.assertEqual(0, len(cds.sec_met)) + self.assertEqual([], cds.sec_met) + self.assertEqual([], cds.sec_met.nrpspks) + self.assertEqual([], cds.sec_met.asf_predictions) + self.assertEqual([], cds.sec_met.as_list()) + + cds.sec_met.clustertype = "FAKE" + cds.sec_met.domains = ["FAKE_DOMAIN1", "FAKE_DOMAIN2"] + cds.sec_met.kind = "FAKE" + cds.sec_met.nrpspks = ["FAKE_NRPS/PKS Domain: "] + cds.sec_met.asf_predictions = ['FAKE_ASF_predictions: '] + self.assertEqual("FAKE", cds.sec_met.clustertype) + self.assertEqual(["FAKE_DOMAIN1", "FAKE_DOMAIN2"], cds.sec_met.domains) + self.assertEqual("FAKE", cds.sec_met.kind) + self.assertEqual(5, len(cds.sec_met)) + expected_sec_met = ['Type: FAKE', 'Domains detected: FAKE_DOMAIN1; FAKE_DOMAIN2', 'Kind: FAKE', \ + 'FAKE_NRPS/PKS Domain: ', 'FAKE_ASF_predictions: '] + self.assertEqual(expected_sec_met, cds.sec_met.as_list()) + self.assertEqual(["FAKE_NRPS/PKS Domain: "], cds.sec_met.nrpspks) + self.assertEqual(['FAKE_ASF_predictions: '], cds.sec_met.asf_predictions) + self.assertEqual(str(cds.sec_met), repr(cds.sec_met)) + #sec_met feature should be an instance of SecMetQualifier + cds.sec_met = [] + try: + cds.to_biopython() + except ValueError: + pass From f31ab820aedc5225b9208873ae6751ca8ea6af09 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:13:34 +0530 Subject: [PATCH 52/71] Add more tests to test_cds_motif.py --- tests/test_cds_motif.py | 71 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py index 4b5dda4..e34c945 100644 --- a/tests/test_cds_motif.py +++ b/tests/test_cds_motif.py @@ -1,9 +1,10 @@ from os import path import unittest from Bio import SeqIO -from secmet.record import Record +from Bio.SeqFeature import SeqFeature, FeatureLocation +from secmet.record import Record, CDS_motifFeature -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestCDS_motifFeature(unittest.TestCase): @@ -11,6 +12,16 @@ def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def BioFeature(self): + biofeature = SeqFeature(location=FeatureLocation(10, 100)) + biofeature.qualifiers = {'locus_tag': ['fake_locus_tag'], 'translation': ['fake_translation'], 'aSTool': ['fake_aSTool'],\ + 'motif': ['fake_motif'], 'asDomain_id': ['fake_asDomain_id'], 'detection': ['fake_detection'],\ + 'database': ['fake_database'], 'label': ['fake_label'], 'unknown_qualifier': ['fake_qualifier'],\ + 'note': ['fake_notes'], 'aSProdPred': ['fake_aSProdPred'], 'aSASF_choice':['fake_aSAF_choice'],\ + 'aSASF_note': ['fake_aSASF_note'], 'aSASF_choice': ['fake_aSASF_choice'], 'aSASF_scaffold': ['fake_aSASF_scaffold'],\ + 'aSASF_prediction': ['fake_aSASF_prediction']} + return biofeature + def test_CDS_motifFeature_members(self): """Check if all the qualifiers are properly stored in CDS_motifFeature""" testfile = self.get_testfile() @@ -18,13 +29,59 @@ def test_CDS_motifFeature_members(self): bp_rec = SeqIO.read(testfile, filetype) bp_cds_motifs = [i for i in bp_rec.features if i.type == 'CDS_motif'] mod_cds_motifs = rec.get_CDS_motifs() + #Segregate out qualifiers that are stored in list form + qualifiers_as_list = ['note', 'aSASF_choice', 'aSASF_note', 'aSASF_scaffold', \ + 'aSASF_prediction', 'aSProdPred'] for bp_motif, mod_motif in zip(bp_cds_motifs, mod_cds_motifs): for key, value in bp_motif.qualifiers.items(): - if key == 'note': - #note is modified to notes in secmet - self.assertEqual(bp_motif.qualifiers['note'], mod_motif.notes) - else: - if value is not None and value: + if value is not None and value: + if key not in qualifiers_as_list: if not hasattr(mod_motif, key): raise AttributeError("%s is not a member of CDS_motifFeature"%key) self.assertEqual(str(value[0]), str(getattr(mod_motif, key))) + else: + if key == 'note': + #note is modified to notes in secmet + self.assertEqual(value, mod_motif.notes) + else: + self.assertEqual(value, getattr(mod_motif, key)) + cdsmotif = CDS_motifFeature(FeatureLocation(1, 10)) + #score, evalue should be numbers + try: + cdsmotif.score = '-a50' + except ValueError: + pass + try: + cdsmotif.evalue = 'a5.50E-08' + except ValueError: + pass + + #If valid qualifiers and values are added, We shouldn't get an error + try: + cdsmotif.score = '-50' + cdsmotif.evalue = '5.50E-08' + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + def test_BioFeature_to_CDS_motifFeauture(self): + biofeature = self.BioFeature() + cdsmotif_feature = CDS_motifFeature(feature=biofeature) + self.assertEqual(str(cdsmotif_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(cdsmotif_feature.type, 'CDS_motif') + self.assertEqual(cdsmotif_feature.locus_tag, 'fake_locus_tag') + self.assertEqual(cdsmotif_feature.translation, 'fake_translation') + self.assertEqual(cdsmotif_feature.label, 'fake_label') + self.assertEqual(cdsmotif_feature.aSTool, 'fake_aSTool') + self.assertEqual(cdsmotif_feature.detection, 'fake_detection') + self.assertEqual(cdsmotif_feature.database, 'fake_database') + self.assertEqual(cdsmotif_feature.asDomain_id, 'fake_asDomain_id') + self.assertEqual(cdsmotif_feature.motif, 'fake_motif') + self.assertEqual(cdsmotif_feature.notes, ['fake_notes']) + self.assertEqual(cdsmotif_feature.aSProdPred, ['fake_aSProdPred']) + self.assertEqual(cdsmotif_feature.aSASF_note, ['fake_aSASF_note']) + self.assertEqual(cdsmotif_feature.aSASF_scaffold, ['fake_aSASF_scaffold']) + self.assertEqual(cdsmotif_feature.aSASF_choice, ['fake_aSASF_choice']) + self.assertEqual(cdsmotif_feature.aSASF_prediction, ['fake_aSASF_prediction']) + self.assertEqual(cdsmotif_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(cdsmotif_feature), repr(cdsmotif_feature.to_biopython()[0])) + self.assertIsInstance(cdsmotif_feature.to_biopython()[0], SeqFeature) From df98d562fd397e8ba687e439aaddfd6497ef576b Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:13:46 +0530 Subject: [PATCH 53/71] Add more tests to test_cluster.py --- tests/test_cluster.py | 60 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 17e3a86..5166dc4 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -1,10 +1,10 @@ from os import path import unittest from Bio import SeqIO -from Bio.SeqFeature import FeatureLocation +from Bio.SeqFeature import SeqFeature, FeatureLocation from secmet.record import Record, ClusterFeature -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestClusterFeature(unittest.TestCase): @@ -12,12 +12,23 @@ def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def BioFeature(self): + biofeature = SeqFeature(location=FeatureLocation(10, 100)) + biofeature.qualifiers = {'contig_edge': ['fake_contig_edge'], 'detection': ['fake_detection'],\ + 'product': ['fake_products'], 'structure': ['fake_structure'],\ + 'note': ['Cluster number: 1', 'Detection rule(s): fake_detection', 'fake_notes'],\ + 'probability': ['fake_probability'], 'subclusterblast': ['fake_subclusterblast'],\ + 'knownclusterblast': ['fake_knownclusterblast'], 'clusterblast': ['fake_clusterblast'],\ + 'unknown_qualifier': ['fake_qualifier']} + return biofeature + def test_ClusterFeature_members(self): testfile = self.get_testfile() rec = Record.from_file(testfile) bp_rec = SeqIO.read(testfile, filetype) bp_clusters = [i for i in bp_rec.features if i.type == 'cluster'] mod_clusters = rec.get_clusters() + #Segregate out qualifiers that are stored in list form qualifiers_as_list = ['note', 'product', 'clusterblast', 'subclusterblast', \ 'knownclusterblast'] for bp_cluster, mod_cluster in zip(bp_clusters, mod_clusters): @@ -37,6 +48,51 @@ def test_ClusterFeature_members(self): self.assertEqual(bp_cluster.qualifiers['product'], mod_cluster.get_products()) else: self.assertEqual(value, getattr(mod_cluster, key)) + cluster = ClusterFeature(FeatureLocation(100, 1000)) + #cutoff, extension should be numbers + try: + cluster.cutoff = '-a5000' + except TypeError: + pass + try: + cluster.extension = 'a5000' + except TypeError: + pass + + #If valid qualifiers and values are added, We shouldn't get an error + try: + cluster.cutoff = 50000 + cluster.extension = 50000 + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + def test_BioFeature_to_ClsuterFeature(self): + biofeature = self.BioFeature() + cluster_feature = ClusterFeature(feature=biofeature) + self.assertEqual(str(cluster_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(cluster_feature.type, 'cluster') + self.assertEqual(cluster_feature.contig_edge, 'fake_contig_edge') + self.assertEqual(cluster_feature.detection, 'Detection rule(s): fake_detection') + self.assertEqual(cluster_feature.products, ['fake_products']) + self.assertEqual(cluster_feature.structure, 'fake_structure') + self.assertEqual(cluster_feature.probability, 'fake_probability') + self.assertEqual(cluster_feature.subclusterblast, ['fake_subclusterblast']) + self.assertEqual(cluster_feature.knownclusterblast, ['fake_knownclusterblast']) + self.assertEqual(cluster_feature.clusterblast, ['fake_clusterblast']) + self.assertEqual(cluster_feature.notes, ['fake_notes']) + self.assertEqual(cluster_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(cluster_feature), repr(cluster_feature.to_biopython()[0])) + self.assertIsInstance(cluster_feature.to_biopython()[0], SeqFeature) + + def test_add_product(self): + cluster = ClusterFeature(FeatureLocation(1000, 10000)) + self.assertEqual([], cluster.get_products()) + try: + cluster.add_product(111) + except TypeError: + pass + cluster.add_product('fake_product') + self.assertEqual(['fake_product'], cluster.get_products()) def test_add_new_cluster(self): """Test for adding a new cluster to record""" From a6fb2a5208481e236cee9f3536d8d7a958ca1bfe Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Thu, 17 Aug 2017 04:13:57 +0530 Subject: [PATCH 54/71] Add more tests to test_domains.py --- tests/test_domains.py | 110 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/tests/test_domains.py b/tests/test_domains.py index ca208ce..28f5af0 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -1,9 +1,10 @@ from os import path import unittest from Bio import SeqIO -from secmet.record import Record +from Bio.SeqFeature import SeqFeature, FeatureLocation +from secmet.record import Record, aSDomain, PFAM_domain -filename = 'nisin.gbk' +filename = 'Y16952.3.final.gbk' filetype = 'genbank' class TestDomains(unittest.TestCase): @@ -11,6 +12,17 @@ def get_testfile(self): """File path for testing""" return path.join(path.dirname(__file__), 'data', filename) + def BioFeature(self): + biofeature = SeqFeature(location=FeatureLocation(10, 100)) + biofeature.qualifiers = {'locus_tag': ['fake_locus_tag'], 'translation': ['fake_translation'], 'aSTool': ['fake_aSTool'], \ + 'domain': ['fake_domain'], 'asDomain_id': ['fake_asDomain_id'], 'detection': ['fake_detection'], \ + 'database': ['fake_database'], 'label': ['fake_label'], 'unknown_qualifier': ['fake_qualifier'], \ + 'db_xref': ['fake_db_xref'], 'note': ['fake_notes'], 'aSProdPred': ['fake_aSProdPred'], \ + 'domain_subtype': ['fake_domain_subtype'], 'aSASF_choice':['fake_aSAF_choice'], 'aSASF_note': ['fake_aSASF_note'], \ + 'aSASF_choice': ['fake_aSASF_choice'], 'aSASF_scaffold': ['fake_aSASF_scaffold'], 'aSASF_prediction': ['fake_aSASF_prediction'], \ + 'specificity': ['fake_specificity']} + return biofeature + def test_aSDomain(self): """Check if all the qualifiers are properly stored in aSDomain""" testfile = self.get_testfile() @@ -18,11 +30,12 @@ def test_aSDomain(self): bp_rec = SeqIO.read(testfile, filetype) bp_asdomains = [i for i in bp_rec.features if i.type == 'aSDomain'] mod_asdomains = rec.get_aSDomains() - qualifiers_as_list = ['note', 'label', 'specificity'] + #Segregate out qualifiers that are stored in list form + qualifiers_as_list = ['note', 'label', 'specificity', 'aSASF_choice', 'aSASF_note', \ + 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_asdomain, mod_asdomain in zip(bp_asdomains, mod_asdomains): for key, value in bp_asdomain.qualifiers.items(): if value is not None and value: - #label and specificity are lists if key not in qualifiers_as_list: if not hasattr(mod_asdomain, key): raise AttributeError('%s is not a member of aSDomain'%key) @@ -34,6 +47,47 @@ def test_aSDomain(self): else: self.assertEqual(value, getattr(mod_asdomain, key)) + asdomain = aSDomain(FeatureLocation(1, 10)) + #score, evalue should be numbers + try: + asdomain.score = '-a50' + except ValueError: + pass + try: + asdomain.evalue = 'a5.50E-08' + except ValueError: + pass + + #If valid qualifiers and values are added, We shouldn't get an error + try: + asdomain.score = '-50' + asdomain.evalue = '5.50E-08' + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + def test_BioFeature_to_aSDomain(self): + biofeature = self.BioFeature() + asdomain_feature = aSDomain(feature=biofeature) + self.assertEqual(str(asdomain_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(asdomain_feature.type, 'aSDomain') + self.assertEqual(asdomain_feature.locus_tag, 'fake_locus_tag') + self.assertEqual(asdomain_feature.translation, 'fake_translation') + self.assertEqual(asdomain_feature.label, ['fake_label']) + self.assertEqual(asdomain_feature.detection, 'fake_detection') + self.assertEqual(asdomain_feature.database, 'fake_database') + self.assertEqual(asdomain_feature.asDomain_id, 'fake_asDomain_id') + self.assertEqual(asdomain_feature.domain, 'fake_domain') + self.assertEqual(asdomain_feature.domain_subtype, 'fake_domain_subtype') + self.assertEqual(asdomain_feature.specificity, ['fake_specificity']) + self.assertEqual(asdomain_feature.notes, ['fake_notes']) + self.assertEqual(asdomain_feature.aSProdPred, ['fake_aSProdPred']) + self.assertEqual(asdomain_feature.aSASF_note, ['fake_aSASF_note']) + self.assertEqual(asdomain_feature.aSASF_scaffold, ['fake_aSASF_scaffold']) + self.assertEqual(asdomain_feature.aSASF_choice, ['fake_aSASF_choice']) + self.assertEqual(asdomain_feature.aSASF_prediction, ['fake_aSASF_prediction']) + self.assertEqual(asdomain_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(asdomain_feature), repr(asdomain_feature.to_biopython()[0])) + self.assertIsInstance(asdomain_feature.to_biopython()[0], SeqFeature) def test_PFAM_domain(self): """Check if all the qualifiers are properly stored in PFAM_domain""" @@ -42,11 +96,12 @@ def test_PFAM_domain(self): bp_rec = SeqIO.read(testfile, filetype) bp_pfams = [i for i in bp_rec.features if i.type == 'PFAM_domain'] mod_pfams = rec.get_PFAM_domains() - qualifiers_as_list = ['note', 'label', 'db_xref'] + #Segregate out qualifiers that are stored in list form + qualifiers_as_list = ['note', 'label', 'db_xref', 'aSASF_choice', 'aSASF_note', \ + 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_pfam, mod_pfam in zip(bp_pfams, mod_pfams): for key, value in bp_pfam.qualifiers.items(): if value is not None and value: - #label and db_xref are lists if key not in qualifiers_as_list: if not hasattr(mod_pfam, key): raise AttributeError('%s is not a member of PFAM_domain'%key) @@ -57,3 +112,46 @@ def test_PFAM_domain(self): self.assertEqual(value, mod_pfam.notes) else: self.assertEqual(value, getattr(mod_pfam, key)) + + pfam = PFAM_domain(FeatureLocation(1, 10)) + #score, evalue should be numbers + try: + pfam.score = '-a50' + except ValueError: + pass + try: + pfam.evalue = 'a5.50E-08' + except ValueError: + pass + + #If valid qualifiers and values are added, We shouldn't get an error + try: + pfam.score = '-50' + pfam.evalue = '5.50E-08' + except: + raise RuntimeError('Secmet unable to add valid qualifiers') + + + def test_BioFeature_to_PFAM_domain(self): + biofeature = self.BioFeature() + pfam_feature = PFAM_domain(feature=biofeature) + self.assertEqual(str(pfam_feature.location), str(FeatureLocation(10, 100))) + self.assertEqual(pfam_feature.type, 'PFAM_domain') + self.assertEqual(pfam_feature.locus_tag, 'fake_locus_tag') + self.assertEqual(pfam_feature.translation, 'fake_translation') + self.assertEqual(pfam_feature.label, ['fake_label']) + self.assertEqual(pfam_feature.aSTool, 'fake_aSTool') + self.assertEqual(pfam_feature.detection, 'fake_detection') + self.assertEqual(pfam_feature.database, 'fake_database') + self.assertEqual(pfam_feature.asDomain_id, 'fake_asDomain_id') + self.assertEqual(pfam_feature.domain, 'fake_domain') + self.assertEqual(pfam_feature.db_xref, ['fake_db_xref']) + self.assertEqual(pfam_feature.notes, ['fake_notes']) + self.assertEqual(pfam_feature.aSProdPred, ['fake_aSProdPred']) + self.assertEqual(pfam_feature.aSASF_note, ['fake_aSASF_note']) + self.assertEqual(pfam_feature.aSASF_scaffold, ['fake_aSASF_scaffold']) + self.assertEqual(pfam_feature.aSASF_choice, ['fake_aSASF_choice']) + self.assertEqual(pfam_feature.aSASF_prediction, ['fake_aSASF_prediction']) + self.assertEqual(pfam_feature._qualifiers['unknown_qualifier'], ['fake_qualifier']) + self.assertEqual(repr(pfam_feature), repr(pfam_feature.to_biopython()[0])) + self.assertIsInstance(pfam_feature.to_biopython()[0], SeqFeature) From a9c936f0de46931313071622a044a702c1cb224f Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 19 Aug 2017 04:00:48 +0530 Subject: [PATCH 55/71] Add _map_sec_met_list_to_SecMetQualifier() and SecMetResult() --- secmet/record.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/secmet/record.py b/secmet/record.py index 0c1cfc2..6a4b2b0 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -262,6 +262,9 @@ def __init__(self, f_location=None, feature=None): """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers + if 'sec_met' in self._qualifiers: + self.sec_met = self._map_sec_met_list_to_SecMetQualifier(self._qualifiers['sec_met']) + if 'locus_tag' in self._qualifiers: self.locus_tag = self._qualifiers['locus_tag'][0] @@ -317,6 +320,44 @@ def get_cluster(self): """Returns the corresponding ClusterFeature""" return self.cluster + def _map_sec_met_list_to_SecMetQualifier(self, sec_met_as_list): + """Convert sec_met in list form to SecMetQualifier() form""" + self._clustertype = None + self._domains = None + self._kind = None + self._nrpspks = [] + self._asf_predictions = [] + for qualifier in sec_met_as_list: + if qualifier.startswith('Type: '): + self._clustertype = qualifier.split()[-1] + elif qualifier.startswith('Kind: '): + self._kind = qualifier.split()[-1] + elif qualifier.startswith('Domains detected: '): + qualifier = qualifier[18:] + domains = qualifier.split(';') + self._domains = [] + for domain in domains: + domain_name = domain.partition(" (")[0].replace(" ", "") + evalue = domain.partition("E-value: ")[2].partition(",")[0] + bitscore = domain.partition("bitscore: ")[2].partition(",")[0] + nr_seeds = domain.partition("seeds: ")[2].partition(")")[0] + sec_met_result = SecMetResult() + sec_met_result.query_id = domain_name + sec_met_result.evalue = evalue + sec_met_result.bitscore = bitscore + sec_met_result.nseeds = nr_seeds + self._domains.append(sec_met_result) + elif qualifier.startswith('ASF-prediction: '): + self._asf_predictions.append(qualifier) + elif qualifier.startswith('NRPS/PKS '): + self._nrpspks.append(qualifier) + sec_met = SecMetQualifier(self._clustertype, self._domains, self._kind) + if self._nrpspks: + sec_met.nrpspks = self._nrpspks + if self._asf_predictions: + sec_met.asf_prediction = self._asf_predictions + return sec_met + def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_CDS = SeqFeature(self.location, type=self.type, id=self.id) @@ -1277,3 +1318,22 @@ def as_list(self): for qual in self: self._sec_met.append(qual) return self._sec_met + +class SecMetResult(): + def __init__(self, res=None, nseeds=None): + self.query_id = None + self.evalue = None + self.bitscore = None + self.nseeds = None + if res is not None and nseeds is not None: + self.query_id = res.query_id + self.evalue = res.evalue + self.bitscore = res.bitscore + self.nseeds = nseeds + + def __repr__(self): + return self.__str__() + + def __str__(self): + return "{} (E-value: {}, bitscore: {}, seeds: {})".format( + self.query_id, self.evalue, self.bitscore, self.nseeds) From 493ff04337d7ef69e12c18716f104f367054c8f4 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 19 Aug 2017 04:42:59 +0530 Subject: [PATCH 56/71] test_cds: Modify tests to test sec_met and SecMetResult() --- tests/test_cds.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tests/test_cds.py b/tests/test_cds.py index 1b059fa..d7534be 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -2,11 +2,19 @@ import unittest from Bio import SeqIO from Bio.SeqFeature import FeatureLocation, SeqFeature -from secmet.record import Record, CDSFeature, SecMetQualifier +from secmet.record import Record, CDSFeature, SecMetQualifier, SecMetResult filename = 'Y16952.3.final.gbk' filetype = 'genbank' +class FakeResult(object): + """A FakeResult to test SecMetResult""" + def __init__(self): + """Initialise members with fake values""" + self.query_id = 'fake_id' + self.evalue = 'fake_evalue' + self.bitscore = 'fake_bitscore' + class TestCDSFeature(unittest.TestCase): def get_testfile(self): """File path for testing""" @@ -43,9 +51,11 @@ def test_CDSFeature_members(self): if not key in mod_cds._qualifiers: raise AttributeError('%s is not a member of CDSFeature'%key) else: - self.assertEqual(bp_cds.qualifiers[key], mod_cds._qualifiers[key]) + self.assertEqual(value, mod_cds._qualifiers[key]) else: self.assertEqual(str(value[0]), str(getattr(mod_cds, key))) + else: + self.assertEqual(value, mod_cds.sec_met.as_list()) else: if key == 'note': #note is modified to notes in secmet @@ -127,3 +137,20 @@ def test_SecMetQualifier(self): cds.to_biopython() except ValueError: pass + + def test_SecMetResult(self): + """Test the SecMetResult class""" + empty_result = SecMetResult() + self.assertEqual(None, empty_result.query_id) + self.assertEqual(None, empty_result.evalue) + self.assertEqual(None, empty_result.bitscore) + self.assertEqual(None, empty_result.nseeds) + + result = SecMetResult(FakeResult(), "fake_seeds") + self.assertEqual('fake_id', result.query_id) + self.assertEqual('fake_evalue', result.evalue) + self.assertEqual('fake_bitscore', result.bitscore) + self.assertEqual('fake_seeds', result.nseeds) + + expected = "fake_id (E-value: fake_evalue, bitscore: fake_bitscore, seeds: fake_seeds)" + self.assertEqual(expected, repr(result), str(result)) From de626d31a3281e969239fb95b7d64dda83917363 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sat, 19 Aug 2017 04:49:35 +0530 Subject: [PATCH 57/71] Modify tests to use looping value for asserting --- tests/test_cluster.py | 2 +- tests/test_generic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 5166dc4..8e298ac 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -45,7 +45,7 @@ def test_ClusterFeature_members(self): self.assertEqual(len(value)-2, len(mod_cluster.notes)) elif key == 'product': #product is modified to products in secmet - self.assertEqual(bp_cluster.qualifiers['product'], mod_cluster.get_products()) + self.assertEqual(value, mod_cluster.get_products()) else: self.assertEqual(value, getattr(mod_cluster, key)) cluster = ClusterFeature(FeatureLocation(100, 1000)) diff --git a/tests/test_generic.py b/tests/test_generic.py index 9e7fa6d..2cd884a 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -38,7 +38,7 @@ def test_GenericFeature(self): if value is not None and value: if key == 'note': #note is modified to notes in secmet - self.assertEqual(bp_generic.qualifiers['note'], mod_generic.notes) + self.assertEqual(value, mod_generic.notes) else: self.assertEqual(value, mod_generic.get_qualifier(key)) From 615024055639ff01631e24d36d38cf54b4b6f8b1 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Sun, 20 Aug 2017 04:10:19 +0530 Subject: [PATCH 58/71] add_qualifier(): Check for the qualifier values for int or float and covert them to str --- secmet/record.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 6a4b2b0..674a869 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -159,8 +159,12 @@ def __init__(self, f_location=None, f_type=None, feature=None): def add_qualifier(self, category, info): """Adds a qualifier to qualifiers dictionary""" - if not isinstance(category, str) or not isinstance(info, (str, list)): - raise TypeError("Type of qualifiers should be 'str'") + if not isinstance(category, str): + if not isinstance(info, (str, list)): + if not isinstance(info, (int, float)): + raise TypeError("Qualifier category should be str and value should be str or list or number") + else: + info = str(info) if category in ['evalue', 'score', 'probability']: if not (((info.replace('.', '')).replace('E-', '')).replace('-', '')).replace('+', '').isdigit(): raise ValueError('%s should be a number'% category) From 7bd9442ec3dd35bf472531551340408eb94411d9 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 21 Aug 2017 03:41:50 +0530 Subject: [PATCH 59/71] Replace binary searches with python-inbuilt bisect_left method --- secmet/record.py | 60 ++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 674a869..3727e97 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -6,6 +6,7 @@ from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation +from bisect import bisect_left def cmp_feature_location(a, b): "Compare two features by their start/end locations" @@ -20,56 +21,38 @@ def sort_features(seq_record): def find_new_cluster_pos(clusters, target_cluster): - """Binary search for appropriate position in array to add new cluster + """Find appropriate position in array to add new cluster using Bisection method param clusters: A list of all existing ClusterFeature(s) in the record param target_cluster: An instance of ClusterFeature """ if not clusters: return 0 - start = 0 - end = len(clusters)-1 - while True: - try: - #Stopping condition - if clusters[start].location.start <= target_cluster.location.start and \ - clusters[start+1].location.start >= target_cluster.location.start: - return start+1 - except IndexError: - return start+1 - mid = start+((end-start)/2+1) - if start == end or mid == start: - if target_cluster.location.start < clusters[0].location.start: - return 0 - else: - return len(clusters) - if clusters[mid].location.start > target_cluster.location.start: - end = mid - else: - start = mid + cluster_start_locations = [cluster.location.start for cluster in clusters] + return bisect_left(cluster_start_locations, target_cluster.location.start) + def find_cluster_of_new_cds(clusters, new_cds): - """Binary search to find the corresponding cluster feature of a cds feature + """Find the corresponding cluster feature of a cds feature using Bisection method param clusters: A list of all existing ClusterFeature(s) in the record param new_cds: An instance of CDSFeature """ if not clusters: return - start = 0 - end = len(clusters)-1 - while True: - #Stopping condition - if clusters[start].location.start <= new_cds.location.start <= clusters[start].location.end or \ - clusters[start].location.start <= new_cds.location.end <= clusters[start].location.end: - clusters[start].cdss.append(new_cds) - new_cds.cluster = clusters[start] - return - mid = start+((end-start)/2+1) - if start == end or mid == start: - return - if clusters[mid].location.start > new_cds.location.start: - end = mid - else: - start = mid + if new_cds.location.end < clusters[0].location.start or \ + new_cds.location.start > clusters[len(clusters)-1].location.end: + return + else: + cluster_starts = [cluster.location.start for cluster in clusters] + cluster_ends = [cluster.location.end for cluster in clusters] + if bisect_left(cluster_starts, new_cds.location.start)-1 == bisect_left(cluster_ends, new_cds.location.start): + index = bisect_left(cluster_ends, new_cds.location.start) + clusters[index].cdss.append(new_cds) + new_cds.cluster = clusters[index] + + if bisect_left(cluster_starts, new_cds.location.end)-1 == bisect_left(cluster_ends, new_cds.location.end): + index = bisect_left(cluster_ends, new_cds.location.end) + clusters[index].cdss.append(new_cds) + new_cds.cluster = clusters[index] class Feature(object): @@ -266,6 +249,7 @@ def __init__(self, f_location=None, feature=None): """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers + self.id = feature.id if 'sec_met' in self._qualifiers: self.sec_met = self._map_sec_met_list_to_SecMetQualifier(self._qualifiers['sec_met']) From 5d746c5407fefc9c4d183d0a3b5611633b651a56 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 21 Aug 2017 03:44:35 +0530 Subject: [PATCH 60/71] from_biopython(): Remove updating cluster and cds links(There is no much use here) Update docstings, refine code --- secmet/record.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 3727e97..b5f3bf0 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -1202,8 +1202,7 @@ def add_feature(self, feature): def from_biopython(self, record): """Modifies _modified_features_* list with new Feature instances""" - features = record.features - for feature in features: + for feature in record.features: if feature.type == 'CDS': feature = CDSFeature(feature=feature) self._modified_cds.append(feature) @@ -1224,17 +1223,13 @@ def from_biopython(self, record): else: feature = GenericFeature(feature=feature) self._modified_generic.append(feature) - cluster_cds_features = self.get_CDSs() + self.get_clusters() - for feature in cluster_cds_features: - self._update_cluster_cds_links(feature) return self def _update_cluster_cds_links(self, feature): - """Link cluster and their CDS features""" + """Link cluster and CDS features""" if isinstance(feature, ClusterFeature): clustercdsfeatures = [] - cdss = self.get_CDSs() - for cds in cdss: + for cds in self.get_CDSs(): if feature.location.start <= cds.location.start <= feature.location.end or \ feature.location.start <= cds.location.end <= feature.location.end: clustercdsfeatures.append(cds) From 01799f19c45932953be95b1e0c1e69bb8687f167 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 21 Aug 2017 03:46:33 +0530 Subject: [PATCH 61/71] Add code to update cluster and cds links for testing purpose --- tests/test_record.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_record.py b/tests/test_record.py index 1ec78fd..7639930 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -237,6 +237,9 @@ def test_cluster_cds_links(self): if bp_cluster.location.start <= cds.location.start <= bp_cluster.location.end or \ bp_cluster.location.start <= cds.location.end <= bp_cluster.location.end: bp_clustercdsfeatures.append(cds) + cluster_cds_features = rec.get_CDSs()+rec.get_clusters() + for feature in cluster_cds_features: + rec._update_cluster_cds_links(feature) self.assertEqual(len(bp_clustercdsfeatures), len(mod_cluster.get_CDSs())) for bp_cds, mod_cds in zip(bp_clustercdsfeatures, mod_cluster.get_CDSs()): self.assertEqual(str(bp_cds.location), str(mod_cds.location)) From 360052ab60ad7f3ce5d764582f6bfcb263ce222c Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 21 Aug 2017 22:22:07 +0530 Subject: [PATCH 62/71] Replace set_*() methods with erase_*() methods --- secmet/record.py | 36 ++++++++++++++++++------------------ tests/test_record.py | 14 +++++++------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index b5f3bf0..b705b1a 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -1112,44 +1112,44 @@ def __len__(self): def get_clusters(self): """A list of secondary metabolite clusters present in the record""" return self._modified_cluster - def set_clusters(self, clusters_list): - """To set the cluster features of the seq_record""" - self._modified_cluster = clusters_list + def erase_clusters(self): + """Erase all cluster features from the Record""" + self._modified_cluster = [] def get_CDSs(self): """A list of secondary metabolite CDS features present in the record""" return self._modified_cds - def set_CDSs(self, cds_list): - """To set the CDS features of the seq_record""" - self._modified_cds = cds_list + def erase_CDSs(self): + """Erase all CDS features from the Record""" + self._modified_cds = [] def get_CDS_motifs(self): """A list of secondary metabolite CDS_motifs present in the record""" return self._modified_cds_motif - def set_CDS_motifs(self, cds_motif_list): - """To set the CDS_motif features of the seq_record""" - self._modified_cds_motif = cds_motif_list + def erase_CDS_motifs(self): + """Erase all CDS_motif features present in the Record""" + self._modified_cds_motif = [] def get_PFAM_domains(self): """A list of secondary metabolite PFAM_domains present in the record""" return self._modified_pfam_domain - def set_PFAM_domains(self, pfam_domains_list): - """To set the PFAM_domain features of the seq_record""" - self._modified_pfam_domain = pfam_domains_list + def erase_PFAM_domains(self): + """Erase all PFAM_domain features present in the Record""" + self._modified_pfam_domain = [] def get_aSDomains(self): """A list of secondary metabolite aSDomains present in the record""" return self._modified_asdomain - def set_aSDomains(self, asdomains_list): - """To set the aSDomain features of the seq_record""" - self._modified_asdomain = asdomains_list + def erase_aSDomains(self): + """Erase all aSDomain features present in the Record""" + self._modified_asdomain = [] def get_generics(self): """A list of secondary metabolite generics present in the record""" return self._modified_generic - def set_generics(self, generics_list): - """To set the generic features of the seq_record""" - self._modified_generic = generics_list + def erase_generics(self): + """Erase all generic features present in the Record""" + self._modified_generic = [] def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" diff --git a/tests/test_record.py b/tests/test_record.py index 7639930..d7e646e 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -83,12 +83,12 @@ def test_setters(self): self.assertNotEqual(rec.get_aSDomains(), []) self.assertNotEqual(rec.get_generics(), []) - rec.set_CDSs([]) - rec.set_clusters([]) - rec.set_generics([]) - rec.set_CDS_motifs([]) - rec.set_PFAM_domains([]) - rec.set_aSDomains([]) + rec.erase_CDSs() + rec.erase_clusters() + rec.erase_generics() + rec.erase_CDS_motifs() + rec.erase_PFAM_domains() + rec.erase_aSDomains() self.assertEqual(rec.get_CDSs(), []) self.assertEqual(rec.get_clusters(), []) @@ -255,7 +255,7 @@ def test_add_feature_cds(self): rec.add_feature(cds) #If no clusters are present None should be returned self.assertEqual(cds.get_cluster(), None) - rec.set_CDSs([]) + rec.erase_CDSs() rec.add_feature(cluster1) rec.add_feature(cluster2) rec.add_feature(cluster3) From 4e74f1d55b1cf40496aa5d1cdb94c4a21a91819e Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Mon, 21 Aug 2017 23:16:40 +0530 Subject: [PATCH 63/71] Replace mutable lists to immutable tuples for returning features --- secmet/record.py | 28 ++++++++++++++-------------- tests/test_record.py | 10 +++++----- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index b705b1a..23b354a 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -957,7 +957,7 @@ def get_cluster_number(self): def get_CDSs(self): """Retruns a list of CDSFeature(s) which belong to this cluster""" - return self.cdss + return tuple(self.cdss) def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" @@ -1111,42 +1111,42 @@ def __len__(self): def get_clusters(self): """A list of secondary metabolite clusters present in the record""" - return self._modified_cluster + return tuple(self._modified_cluster) def erase_clusters(self): """Erase all cluster features from the Record""" self._modified_cluster = [] def get_CDSs(self): """A list of secondary metabolite CDS features present in the record""" - return self._modified_cds + return tuple(self._modified_cds) def erase_CDSs(self): """Erase all CDS features from the Record""" self._modified_cds = [] def get_CDS_motifs(self): """A list of secondary metabolite CDS_motifs present in the record""" - return self._modified_cds_motif + return tuple(self._modified_cds_motif) def erase_CDS_motifs(self): """Erase all CDS_motif features present in the Record""" self._modified_cds_motif = [] def get_PFAM_domains(self): """A list of secondary metabolite PFAM_domains present in the record""" - return self._modified_pfam_domain + return tuple(self._modified_pfam_domain) def erase_PFAM_domains(self): """Erase all PFAM_domain features present in the Record""" self._modified_pfam_domain = [] def get_aSDomains(self): """A list of secondary metabolite aSDomains present in the record""" - return self._modified_asdomain + return tuple(self._modified_asdomain) def erase_aSDomains(self): """Erase all aSDomain features present in the Record""" self._modified_asdomain = [] def get_generics(self): """A list of secondary metabolite generics present in the record""" - return self._modified_generic + return tuple(self._modified_generic) def erase_generics(self): """Erase all generic features present in the Record""" self._modified_generic = [] @@ -1154,12 +1154,12 @@ def erase_generics(self): def to_biopython(self): """Returns a Bio.SeqRecord instance of the record""" new_record = self._record - features = self.get_generics()[:] #Clone the private list - features.extend(self.get_clusters()) - features.extend(self.get_CDSs()) - features.extend(self.get_CDS_motifs()) - features.extend(self.get_aSDomains()) - features.extend(self.get_PFAM_domains()) + features = list(self.get_generics()) + features.extend(list(self.get_clusters())) + features.extend(list(self.get_CDSs())) + features.extend(list(self.get_CDS_motifs())) + features.extend(list(self.get_aSDomains())) + features.extend(list(self.get_PFAM_domains())) record_features = [] for feature in features: record_features.append(feature.to_biopython()[0]) @@ -1178,7 +1178,7 @@ def add_feature(self, feature): if not isinstance(feature, Feature): raise TypeError("The argument is not an instance of 'Feature'") if isinstance(feature, ClusterFeature): - clusters = self.get_clusters() + clusters = self._modified_cluster index = find_new_cluster_pos(clusters, feature) clusters.insert(index, feature) feature.parent_record = self diff --git a/tests/test_record.py b/tests/test_record.py index d7e646e..b65bafc 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -90,11 +90,11 @@ def test_setters(self): rec.erase_PFAM_domains() rec.erase_aSDomains() - self.assertEqual(rec.get_CDSs(), []) - self.assertEqual(rec.get_clusters(), []) - self.assertEqual(rec.get_PFAM_domains(), []) - self.assertEqual(rec.get_aSDomains(), []) - self.assertEqual(rec.get_generics(), []) + self.assertEqual(rec.get_CDSs(), ()) + self.assertEqual(rec.get_clusters(), ()) + self.assertEqual(rec.get_PFAM_domains(), ()) + self.assertEqual(rec.get_aSDomains(), ()) + self.assertEqual(rec.get_generics(), ()) def test_from_biopython(self): """Test from_biopython() in Record""" From 09143d295ccd6e8cdda6579941907be0042007d4 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 04:37:00 +0530 Subject: [PATCH 64/71] Add SubCDSFeature super class for CDS_motifFeature, aSDomain, PFAM_domain Refine code, remove unnecessary checks for NoneType --- secmet/record.py | 533 ++++++++++------------------------------------- 1 file changed, 113 insertions(+), 420 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 23b354a..d5aede5 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -103,37 +103,15 @@ def __init__(self, f_location=None, f_type=None, feature=None): self.type = feature.type self.location = feature.location - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'][0] - del self._qualifiers['locus_tag'] + self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] + self.gene = self._qualifiers.pop('gene', [None])[0] + self.translation = self._qualifiers.pop('translation', [None])[0] + self.name = self._qualifiers.pop('name', [None])[0] + self.seq = self._qualifiers.pop('seq', [None])[0] + self.description = self._qualifiers.pop('description', [None])[0] + self.sec_met = self._qualifiers.pop('sec_met', []) + self.notes = self._qualifiers.pop('note', []) - if 'gene' in self._qualifiers: - self.gene = self._qualifiers['gene'][0] - del self._qualifiers['gene'] - - if 'translation' in self._qualifiers: - self.translation = self._qualifiers['translation'][0] - del self._qualifiers['translation'] - - if 'name' in self._qualifiers: - self.name = self._qualifiers['name'][0] - del self._qualifiers['name'] - - if 'seq' in self._qualifiers: - self.seq = self._qualifiers['seq'][0] - del self._qualifiers['seq'] - - if 'description' in self._qualifiers: - self.description = self._qualifiers['description'][0] - del self._qualifiers['description'] - - if 'sec_met' in self._qualifiers: - self.sec_met.extend(self._qualifiers['sec_met']) - del self._qualifiers['sec_met'] - - if 'note' in self._qualifiers: - self.notes = self._qualifiers['note'] - del self._qualifiers['note'] else: self.location = f_location if not isinstance(f_type, str): @@ -245,62 +223,29 @@ def __init__(self, f_location=None, feature=None): self.sec_met_predictions = [] self.type = 'CDS' - if feature is not None: + if feature: """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers - self.id = feature.id + self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] + self.product = self._qualifiers.pop('product', [None])[0] + self.protein_id = self._qualifiers.pop('protein_id', [None])[0] + self.gene = self._qualifiers.pop('gene', [None])[0] + self.translation = self._qualifiers.pop('translation', [None])[0] + self.notes = self._qualifiers.pop('note',[]) + self.EC_number = self._qualifiers.pop('EC_number', []) + self.transl_table = self._qualifiers.pop('transl_table', [None])[0] + self.source = self._qualifiers.pop('source', [None])[0] + self.aSASF_note = self._qualifiers.pop('aSASF_note', []) + self.aSASF_choice = self._qualifiers.pop('aSASF_choice', []) + self.aSASF_scaffold = self._qualifiers.pop('aSASF_scaffold', []) + self.aSASF_prediction = self._qualifiers.pop('aSASF_prediction', []) + self.aSProdPred = self._qualifiers.pop('aSProdPred', []) + self.db_xref = self._qualifiers.pop('db_xref', []) + self.sec_met_predictions = self._qualifiers.pop('sec_met_predictions', []) + self.location = feature.location if 'sec_met' in self._qualifiers: self.sec_met = self._map_sec_met_list_to_SecMetQualifier(self._qualifiers['sec_met']) - - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'][0] - - if 'product' in self._qualifiers: - self.product = self._qualifiers['product'][0] - - if 'protein_id' in self._qualifiers: - self.protein_id = self._qualifiers['protein_id'][0] - - if 'gene' in self._qualifiers: - self.gene = self._qualifiers['gene'][0] - - if 'translation' in self._qualifiers: - self.translation = self._qualifiers['translation'][0] - - if 'note' in self._qualifiers: - self.notes = self._qualifiers['note'] - - if 'EC_number' in self._qualifiers: - self.EC_number = self._qualifiers['EC_number'] - - if 'transl_table' in self._qualifiers: - self.transl_table = self._qualifiers['transl_table'][0] - - if 'source' in self._qualifiers: - self.source = self._qualifiers['source'][0] - - if 'aSASF_choice' in self._qualifiers: - self.aSASF_choice = self._qualifiers['aSASF_choice'] - - if 'aSASF_note' in self._qualifiers: - self.aSASF_note = self._qualifiers['aSASF_note'] - - if 'aSASF_prediction' in self._qualifiers: - self.aSASF_prediction = self._qualifiers['aSASF_prediction'] - - if 'aSASF_scaffold' in self._qualifiers: - self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] - - if 'aSProdPred' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSProdPred'] - - if 'db_xref' in self._qualifiers: - self.db_xref = self._qualifiers['db_xref'] - - if 'sec_met_predictions' in self._qualifiers: - self.sec_met_predictions = self._qualifiers['sec_met_predictions'] - self.location = feature.location else: self.location = f_location @@ -364,7 +309,7 @@ def to_biopython(self): self._qualifiers['translation'] = [str(self.translation)] if self.notes: self._qualifiers['note'] = self.notes - if self.EC_number is not None: + if self.EC_number: self._qualifiers['EC_number'] = self.EC_number if self.transl_table is not None: self._qualifiers['transl_table'] = [str(self.transl_table)] @@ -392,23 +337,17 @@ def __repr__(self): return repr(self.to_biopython()[0]) -class CDS_motifFeature(Feature): - """A CDS_motifFeature which subclasses Feature""" - def __init__(self, f_location=None, feature=None): - """Initialise a CDS_motifFeature - param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' - param feature: class 'Bio.SeqFeature.SeqFeature' - """ - super(CDS_motifFeature, self).__init__() - self.label = None - self.motif = None +class SubCDSFeature(Feature): + """A super class for CDS_motifFeature, PFAM_domain and aSDomain""" + def __init__(self, f_location, feature): + super(SubCDSFeature, self).__init__() self.asDomain_id = None self.aSTool = None self.detection = None self.database = None self.translation = None self.locus_tag = None - self.type = 'CDS_motif' + self.label = None self.aSProdPred = [] self.aSASF_choice = [] self.aSASF_note = [] @@ -416,61 +355,30 @@ def __init__(self, f_location=None, feature=None): self.aSASF_scaffold = [] self._qualifiers = {} - if feature is not None: - """Initialise class members(qualifiers) using SeqFeature object""" + if feature: self._qualifiers = feature.qualifiers - - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'][0] - - if 'translation' in self._qualifiers: - self.translation = self._qualifiers['translation'][0] - - if 'label' in self._qualifiers: - self.label = self._qualifiers['label'][0] - - if 'motif' in self._qualifiers: - self.motif = self._qualifiers['motif'][0] - - if 'asDomain_id' in self._qualifiers: - self.asDomain_id = self._qualifiers['asDomain_id'][0] - - if 'evalue' in self._qualifiers: - self.evalue = self._qualifiers['evalue'][0] - + self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] + self.translation = self._qualifiers.pop('translation', [None])[0] + self.asDomain_id = self._qualifiers.pop('asDomain_id', [None])[0] + self.aSTool = self._qualifiers.pop('aSTool', [None])[0] + self.detection = self._qualifiers.pop('detection', [None])[0] + self.database = self._qualifiers.pop('database', [None])[0] + self.label = self._qualifiers.pop('label', [None])[0] + self.aSASF_note = self._qualifiers.pop('aSASF_note', []) + self.aSASF_choice = self._qualifiers.pop('aSASF_choice', []) + self.aSASF_scaffold = self._qualifiers.pop('aSASF_scaffold', []) + self.aSASF_prediction = self._qualifiers.pop('aSASF_prediction', []) + self.aSProdPred = self._qualifiers.pop('aSProdPred', []) + self.notes = self._qualifiers.pop('note', []) + self.location = feature.location if 'score' in self._qualifiers: self.score = self._qualifiers['score'][0] - - if 'aSTool' in self._qualifiers: - self.aSTool = self._qualifiers['aSTool'][0] - - if 'detection' in self._qualifiers: - self.detection = self._qualifiers['detection'][0] - - if 'database' in self._qualifiers: - self.database = self._qualifiers['database'][0] - - if 'aSASF_choice' in self._qualifiers: - self.aSASF_choice = self._qualifiers['aSASF_choice'] - - if 'aSASF_note' in self._qualifiers: - self.aSASF_note = self._qualifiers['aSASF_note'] - - if 'aSASF_prediction' in self._qualifiers: - self.aSASF_prediction = self._qualifiers['aSASF_prediction'] - - if 'aSASF_scaffold' in self._qualifiers: - self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] - - if 'aSProdPred' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSProdPred'] - - if 'note' in self._qualifiers: - self.notes = self._qualifiers['note'] - self.location = feature.location + del self._qualifiers['score'] + if 'evalue' in self._qualifiers: + self.evalue = self._qualifiers['evalue'][0] + del self._qualifiers['evalue'] else: self.location = f_location - #Check for a valid score qualifier before assigning def _get_score(self): try: @@ -478,9 +386,10 @@ def _get_score(self): except: return None def _set_score(self, value): - if not (((value.replace('.', '')).replace('-', ''))).replace('+', '').isdigit(): - raise ValueError("score must be a number") - self.__score = value + try: + self.__score = float(value) + except ValueError: + raise ValueError('Invalid score value') score = property(_get_score, _set_score) #Check for a valid evalue qualifier before assigning @@ -490,33 +399,30 @@ def _get_evalue(self): except: return None def _set_evalue(self, value): - if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise ValueError("evalue must be an number") - self.__evalue = value + try: + self.__evalue = float(value) + except ValueError: + raise ValueError('Invalid evalue value') evalue = property(_get_evalue, _set_evalue) - def to_biopython(self): - """Returns a Bio.SeqFeature.SeqFeature object with all its members""" - new_CDS_motif = SeqFeature(self.location, type=self.type) - if self.locus_tag is not None: + def _get_feature_qualifiers(self): + if self.locus_tag: self._qualifiers['locus_tag'] = [str(self.locus_tag)] - if self.translation is not None: + if self.translation: self._qualifiers['translation'] = [str(self.translation)] - if self.label is not None: - self._qualifiers['label'] = [str(self.label)] - if self.motif is not None: - self._qualifiers['motif'] = [str(self.motif)] - if self.database is not None: + if self.database: self._qualifiers['database'] = [str(self.database)] - if self.evalue is not None: + if self.evalue: self._qualifiers['evalue'] = [str(self.evalue)] - if self.asDomain_id is not None: + if self.asDomain_id: self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] - if self.detection is not None: + if self.detection: self._qualifiers['detection'] = [str(self.detection)] - if self.score is not None: + if self.score: self._qualifiers['score'] = [str(self.score)] - if self.aSTool is not None: + if self.label: + self._qualifiers['label'] = [str(self.label)] + if self.aSTool: self._qualifiers['aSTool'] = [str(self.aSTool)] if self.aSASF_choice: self._qualifiers['aSASF_choice'] = self.aSASF_choice @@ -530,6 +436,29 @@ def to_biopython(self): self._qualifiers['aSProdPred'] = self.aSProdPred if self.notes: self._qualifiers['note'] = self.notes + return self._qualifiers + +class CDS_motifFeature(SubCDSFeature): + """A CDS_motifFeature which subclasses Feature""" + def __init__(self, f_location=None, feature=None): + """Initialise a CDS_motifFeature + param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' + param feature: class 'Bio.SeqFeature.SeqFeature' + """ + super(CDS_motifFeature, self).__init__(f_location, feature) + self.motif = None + self.type = 'CDS_motif' + + if feature: + """Initialise class members(qualifiers) using SeqFeature object""" + self.motif = feature.qualifiers.pop('motif', [None])[0] + + def to_biopython(self): + """Returns a Bio.SeqFeature.SeqFeature object with all its members""" + new_CDS_motif = SeqFeature(self.location, type=self.type) + self._qualifiers = self._get_feature_qualifiers() + if self.motif: + self._qualifiers['motif'] = [str(self.motif)] new_CDS_motif.qualifiers = self._qualifiers.copy() return [new_CDS_motif] @@ -538,156 +467,35 @@ def __repr__(self): return repr(self.to_biopython()[0]) -class PFAM_domain(Feature): +class PFAM_domain(SubCDSFeature): """A PHAM_domain feature which subclasses Feature""" def __init__(self, f_location=None, feature=None): """Initialise a ClusterFeature param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' param feature: class 'Bio.SeqFeature.SeqFeature' """ - super(PFAM_domain, self).__init__() + super(PFAM_domain, self).__init__(f_location, feature) self.domain = None - self.asDomain_id = None - self.locus_tag = None - self.aSTool = None - self.detection = None - self.database = None - self.translation = None self.description = None self.db_xref = [] - self.label = [] - self.aSProdPred = [] - self.aSASF_choice = [] - self.aSASF_note = [] - self.aSASF_prediction = [] - self.aSASF_scaffold = [] self.type = 'PFAM_domain' - self._qualifiers = {} - if feature is not None: + if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self._qualifiers = feature.qualifiers - - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'][0] - - if 'domain' in self._qualifiers: - self.domain = self._qualifiers['domain'][0] - - if 'translation' in self._qualifiers: - self.translation = self._qualifiers['translation'][0] - - if 'label' in self._qualifiers: - self.label = self._qualifiers['label'] - - if 'asDomain_id' in self._qualifiers: - self.asDomain_id = self._qualifiers['asDomain_id'][0] - - if 'evalue' in self._qualifiers: - self.evalue = self._qualifiers['evalue'][0] - - if 'score' in self._qualifiers: - self.score = self._qualifiers['score'][0] - - if 'aSTool' in self._qualifiers: - self.aSTool = self._qualifiers['aSTool'][0] - - if 'detection' in self._qualifiers: - self.detection = self._qualifiers['detection'][0] - - if 'database' in self._qualifiers: - self.database = self._qualifiers['database'][0] - - if 'db_xref' in self._qualifiers: - self.db_xref = self._qualifiers['db_xref'] - - if 'description' in self._qualifiers: - self.description = self._qualifiers['description'][0] - - if 'aSASF_choice' in self._qualifiers: - self.aSASF_choice = self._qualifiers['aSASF_choice'] - - if 'aSASF_note' in self._qualifiers: - self.aSASF_note = self._qualifiers['aSASF_note'] - - if 'aSASF_prediction' in self._qualifiers: - self.aSASF_prediction = self._qualifiers['aSASF_prediction'] - - if 'aSASF_scaffold' in self._qualifiers: - self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] - - if 'aSProdPred' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSProdPred'] - - if 'note' in self._qualifiers: - self.notes = self._qualifiers['note'] - self.location = feature.location - else: - self.location = f_location - - #Check for a valid score qualifier before assigning - def _get_score(self): - try: - return self.__score - except: - return None - def _set_score(self, value): - if not ((value.replace('.', '')).replace('-', '')).isdigit(): - raise ValueError("score must be a number") - self.__score = value - score = property(_get_score, _set_score) - - #Check for a valid evalue qualifier before assigning - def _get_evalue(self): - try: - return self.__evalue - except: - return None - def _set_evalue(self, value): - if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise ValueError("evalue must be an number") - self.__evalue = value - evalue = property(_get_evalue, _set_evalue) + self.domain = feature.qualifiers.pop('domain', [None])[0] + self.description = feature.qualifiers.pop('description', [None])[0] + self.db_xref = feature.qualifiers.pop('db_xref', []) def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_PFAM_domain = SeqFeature(self.location, type=self.type) - if self.locus_tag is not None: - self._qualifiers['locus_tag'] = [str(self.locus_tag)] - if self.translation is not None: - self._qualifiers['translation'] = [str(self.translation)] - if self.label is not None: - self._qualifiers['label'] = self.label - if self.database is not None: - self._qualifiers['database'] = [str(self.database)] - if self.evalue is not None: - self._qualifiers['evalue'] = [str(self.evalue)] - if self.asDomain_id is not None: - self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] - if self.detection is not None: - self._qualifiers['detection'] = [str(self.detection)] - if self.score is not None: - self._qualifiers['score'] = [str(self.score)] - if self.aSTool is not None: - self._qualifiers['aSTool'] = [str(self.aSTool)] - if self.domain is not None: + self._qualifiers = self._get_feature_qualifiers() + if self.domain: self._qualifiers['domain'] = [str(self.domain)] - if self.description is not None: + if self.description: self._qualifiers['description'] = [str(self.description)] - if self.db_xref is not None: + if self.db_xref: self._qualifiers['db_xref'] = self.db_xref - if self.aSASF_choice: - self._qualifiers['aSASF_choice'] = self.aSASF_choice - if self.aSASF_note: - self._qualifiers['aSASF_note'] = self.aSASF_note - if self.aSASF_prediction: - self._qualifiers['aSASF_prediction'] = self.aSASF_prediction - if self.aSASF_scaffold: - self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold - if self.aSProdPred: - self._qualifiers['aSProdPred'] = self.aSProdPred - if self.notes: - self._qualifiers['note'] = self.notes new_PFAM_domain.qualifiers = self._qualifiers.copy() return [new_PFAM_domain] @@ -696,148 +504,33 @@ def __repr__(self): return repr(self.to_biopython()[0]) -class aSDomain(Feature): +class aSDomain(SubCDSFeature): """A aSDomain feature which subclasses Feature""" def __init__(self, f_location=None, feature=None): """Initialise a ClusterFeature param f_location: class 'Bio.SeqFeature.FeatureLocation/CompoundLocation' param feature: class 'Bio.SeqFeature.SeqFeature' """ - super(aSDomain, self).__init__() + super(aSDomain, self).__init__(f_location, feature) self.domain = None self.domain_subtype = None - self.asDomain_id = None - self.locus_tag = None - self.detection = None - self.database = None - self.translation = None - self.label = [] self.specificity = [] - self.aSProdPred = [] - self.aSASF_choice = [] - self.aSASF_note = [] - self.aSASF_prediction = [] - self.aSASF_scaffold = [] self.type = 'aSDomain' - self._qualifiers = {} - if feature is not None: + if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self._qualifiers = feature.qualifiers - - if 'locus_tag' in self._qualifiers: - self.locus_tag = self._qualifiers['locus_tag'][0] - - if 'domain' in self._qualifiers: - self.domain = self._qualifiers['domain'][0] - - if 'domain_subtype' in self._qualifiers: - self.domain_subtype = self._qualifiers['domain_subtype'][0] - - if 'translation' in self._qualifiers: - self.translation = self._qualifiers['translation'][0] - - if 'label' in self._qualifiers: - self.label = self._qualifiers['label'] - - if 'asDomain_id' in self._qualifiers: - self.asDomain_id = self._qualifiers['asDomain_id'][0] - - if 'evalue' in self._qualifiers: - self.evalue = self._qualifiers['evalue'][0] - - if 'score' in self._qualifiers: - self.score = self._qualifiers['score'][0] - - if 'detection' in self._qualifiers: - self.detection = self._qualifiers['detection'][0] - - if 'database' in self._qualifiers: - self.database = self._qualifiers['database'][0] - - if 'note' in self._qualifiers: - self.notes = self._qualifiers['note'] - - if 'aSASF_choice' in self._qualifiers: - self.aSASF_choice = self._qualifiers['aSASF_choice'] - - if 'aSASF_note' in self._qualifiers: - self.aSASF_note = self._qualifiers['aSASF_note'] - - if 'aSASF_prediction' in self._qualifiers: - self.aSASF_prediction = self._qualifiers['aSASF_prediction'] - - if 'aSASF_scaffold' in self._qualifiers: - self.aSASF_scaffold = self._qualifiers['aSASF_scaffold'] - - if 'aSProdPred' in self._qualifiers: - self.aSProdPred = self._qualifiers['aSProdPred'] - - if 'specificity' in self._qualifiers: - self.specificity = self._qualifiers['specificity'] - self.location = feature.location - else: - self.location = f_location - - #Check for a valid score qualifier before assigning - def _get_score(self): - try: - return self.__score - except: - return None - def _set_score(self, value): - if not ((value.replace('.', '')).replace('-', '')).isdigit(): - raise ValueError("score must be a number") - self.__score = value - score = property(_get_score, _set_score) - - #Check for a valid evalue qualifier before assigning - def _get_evalue(self): - try: - return self.__evalue - except: - return None - def _set_evalue(self, value): - if not ((value.replace('.', '')).replace('E-', '').replace('E+', '')).isdigit(): - raise ValueError("evalue must be an number") - self.__evalue = value - evalue = property(_get_evalue, _set_evalue) + self.domain = feature.qualifiers.pop('domain', [None])[0] + self.domain_subtype = feature.qualifiers.pop('domain_subtype', [None])[0] + self.specificity = feature.qualifiers.pop('specificity', []) def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_aSDomain = SeqFeature(self.location, type=self.type) - if self.locus_tag is not None: - self._qualifiers['locus_tag'] = [str(self.locus_tag)] - if self.translation is not None: - self._qualifiers['translation'] = [str(self.translation)] - if self.label is not None: - self._qualifiers['label'] = self.label - if self.database is not None: - self._qualifiers['database'] = [str(self.database)] - if self.evalue is not None: - self._qualifiers['evalue'] = [str(self.evalue)] - if self.asDomain_id is not None: - self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] - if self.detection is not None: - self._qualifiers['detection'] = [str(self.detection)] - if self.score is not None: - self._qualifiers['score'] = [str(self.score)] - if self.domain_subtype is not None: - self._qualifiers['domain_subtype'] = [str(self.domain_subtype)] - if self.domain is not None: + self._qualifiers = self._get_feature_qualifiers() + if self.domain: self._qualifiers['domain'] = [str(self.domain)] - if self.aSASF_choice: - self._qualifiers['aSASF_choice'] = self.aSASF_choice - if self.aSASF_note: - self._qualifiers['aSASF_note'] = self.aSASF_note - if self.aSASF_prediction: - self._qualifiers['aSASF_prediction'] = self.aSASF_prediction - if self.aSASF_scaffold: - self._qualifiers['aSASF_scaffold'] = self.aSASF_scaffold - if self.aSProdPred: - self._qualifiers['aSProdPred'] = self.aSProdPred - if self.notes: - self._qualifiers['note'] = self.notes + if self.domain_subtype: + self._qualifiers['domain_subtype'] = [str(self.domain_subtype)] if self.specificity: self._qualifiers['specificity'] = self.specificity new_aSDomain.qualifiers = self._qualifiers.copy() From da69795b5e94e41f0cf58089e758fd340cb02a2a Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 04:38:03 +0530 Subject: [PATCH 65/71] Update secmet tests --- tests/test_cds_motif.py | 6 +++++- tests/test_domains.py | 18 ++++++++++++------ tests/test_record.py | 20 +++++++++++++++----- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py index e34c945..0f5af73 100644 --- a/tests/test_cds_motif.py +++ b/tests/test_cds_motif.py @@ -38,7 +38,11 @@ def test_CDS_motifFeature_members(self): if key not in qualifiers_as_list: if not hasattr(mod_motif, key): raise AttributeError("%s is not a member of CDS_motifFeature"%key) - self.assertEqual(str(value[0]), str(getattr(mod_motif, key))) + #score and evalue are numbers + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(getattr(mod_motif, key))) + else: + self.assertEqual(str(value[0]), str(getattr(mod_motif, key))) else: if key == 'note': #note is modified to notes in secmet diff --git a/tests/test_domains.py b/tests/test_domains.py index 28f5af0..3e8244f 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -31,7 +31,7 @@ def test_aSDomain(self): bp_asdomains = [i for i in bp_rec.features if i.type == 'aSDomain'] mod_asdomains = rec.get_aSDomains() #Segregate out qualifiers that are stored in list form - qualifiers_as_list = ['note', 'label', 'specificity', 'aSASF_choice', 'aSASF_note', \ + qualifiers_as_list = ['note', 'specificity', 'aSASF_choice', 'aSASF_note', \ 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_asdomain, mod_asdomain in zip(bp_asdomains, mod_asdomains): for key, value in bp_asdomain.qualifiers.items(): @@ -39,7 +39,10 @@ def test_aSDomain(self): if key not in qualifiers_as_list: if not hasattr(mod_asdomain, key): raise AttributeError('%s is not a member of aSDomain'%key) - self.assertEqual(str(value[0]), str(getattr(mod_asdomain, key))) + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(getattr(mod_asdomain, key))) + else: + self.assertEqual(str(value[0]), str(getattr(mod_asdomain, key))) else: if key == 'note': #note is modified to notes in secmet @@ -72,7 +75,7 @@ def test_BioFeature_to_aSDomain(self): self.assertEqual(asdomain_feature.type, 'aSDomain') self.assertEqual(asdomain_feature.locus_tag, 'fake_locus_tag') self.assertEqual(asdomain_feature.translation, 'fake_translation') - self.assertEqual(asdomain_feature.label, ['fake_label']) + self.assertEqual(asdomain_feature.label, 'fake_label') self.assertEqual(asdomain_feature.detection, 'fake_detection') self.assertEqual(asdomain_feature.database, 'fake_database') self.assertEqual(asdomain_feature.asDomain_id, 'fake_asDomain_id') @@ -97,7 +100,7 @@ def test_PFAM_domain(self): bp_pfams = [i for i in bp_rec.features if i.type == 'PFAM_domain'] mod_pfams = rec.get_PFAM_domains() #Segregate out qualifiers that are stored in list form - qualifiers_as_list = ['note', 'label', 'db_xref', 'aSASF_choice', 'aSASF_note', \ + qualifiers_as_list = ['note', 'db_xref', 'aSASF_choice', 'aSASF_note', \ 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_pfam, mod_pfam in zip(bp_pfams, mod_pfams): for key, value in bp_pfam.qualifiers.items(): @@ -105,7 +108,10 @@ def test_PFAM_domain(self): if key not in qualifiers_as_list: if not hasattr(mod_pfam, key): raise AttributeError('%s is not a member of PFAM_domain'%key) - self.assertEqual(str(value[0]), str(getattr(mod_pfam, key))) + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(getattr(mod_pfam, key))) + else: + self.assertEqual(str(value[0]), str(getattr(mod_pfam, key))) else: if key == 'note': #note is modified to notes in secmet @@ -139,7 +145,7 @@ def test_BioFeature_to_PFAM_domain(self): self.assertEqual(pfam_feature.type, 'PFAM_domain') self.assertEqual(pfam_feature.locus_tag, 'fake_locus_tag') self.assertEqual(pfam_feature.translation, 'fake_translation') - self.assertEqual(pfam_feature.label, ['fake_label']) + self.assertEqual(pfam_feature.label, 'fake_label') self.assertEqual(pfam_feature.aSTool, 'fake_aSTool') self.assertEqual(pfam_feature.detection, 'fake_detection') self.assertEqual(pfam_feature.database, 'fake_database') diff --git a/tests/test_record.py b/tests/test_record.py index b65bafc..64b65cb 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -172,7 +172,11 @@ def test_get_CDS_motifs(self): self.assertEqual(b_motif.type, m_motif.type) self.assertEqual(str(b_motif.location), str(m_motif.location)) for key, value in b_motif.qualifiers.items(): - self.assertEqual(value, m_motif.qualifiers[key]) + if value: + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(m_motif.qualifiers[key][0])) + else: + self.assertEqual(value, m_motif.qualifiers[key]) def test_get_PFAM_domains(self): """Test get_PFAM_domains() in Record""" @@ -187,8 +191,11 @@ def test_get_PFAM_domains(self): self.assertEqual(b_fam.type, m_fam.type) self.assertEqual(str(b_fam.location), str(m_fam.location)) for key, value in b_fam.qualifiers.items(): - if value is not None and value: - self.assertEqual(value, m_fam.qualifiers[key]) + if value: + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(m_fam.qualifiers[key][0])) + else: + self.assertEqual(value, m_fam.qualifiers[key]) def test_get_aSDomains(self): """Test get_aSDomains() in Record""" @@ -203,8 +210,11 @@ def test_get_aSDomains(self): self.assertEqual(b_asdomain.type, m_asdomain.type) self.assertEqual(str(b_asdomain.location), str(m_asdomain.location)) for key, value in b_asdomain.qualifiers.items(): - if value is not None and value: - self.assertEqual(value, m_asdomain.qualifiers[key]) + if value: + if key in ['score', 'evalue']: + self.assertEqual(float(value[0]), float(m_asdomain.qualifiers[key][0])) + else: + self.assertEqual(value, m_asdomain.qualifiers[key]) def test_get_cluster_number(self): """Test get_cluster_number() in Record""" From 384595f5aed8282c19d7c8c2ad3b2a788ec4bf74 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 05:26:09 +0530 Subject: [PATCH 66/71] Refine secmet, Avoid explicit checking for None --- secmet/record.py | 80 ++++++++++++++++++++--------------------- tests/test_cds.py | 2 +- tests/test_cds_motif.py | 2 +- tests/test_cluster.py | 2 +- tests/test_domains.py | 4 +-- tests/test_generic.py | 2 +- 6 files changed, 45 insertions(+), 47 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index d5aede5..a17370a 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -97,10 +97,9 @@ def __init__(self, f_location=None, f_type=None, feature=None): self.description = None self.sec_met = [] - if feature is not None: + if feature: """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers - self.type = feature.type self.location = feature.location self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] @@ -111,7 +110,6 @@ def __init__(self, f_location=None, f_type=None, feature=None): self.description = self._qualifiers.pop('description', [None])[0] self.sec_met = self._qualifiers.pop('sec_met', []) self.notes = self._qualifiers.pop('note', []) - else: self.location = f_location if not isinstance(f_type, str): @@ -167,17 +165,17 @@ def get_qualifier(self, category): def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature of given type of feature""" new_Generic = SeqFeature(self.location, type=self.type) - if self.locus_tag is not None: + if self.locus_tag: new_Generic.qualifiers['locus_tag'] = [str(self.locus_tag)] - if self.translation is not None: + if self.translation: new_Generic.qualifiers['translation'] = [str(self.translation)] - if self.gene is not None: + if self.gene: new_Generic.qualifiers['gene'] = [str(self.gene)] - if self.name is not None: + if self.name: new_Generic.qualifiers['name'] = [str(self.name)] - if self.seq is not None: + if self.seq: new_Generic.qualifiers['seq'] = [str(self.seq)] - if self.description is not None: + if self.description: new_Generic.qualifiers['description'] = [str(self.description)] if self.sec_met: new_Generic.qualifiers['sec_met'] = self.sec_met @@ -297,23 +295,23 @@ def to_biopython(self): if not isinstance(self.sec_met, SecMetQualifier): raise ValueError('Invalid sec_met type') self._qualifiers['sec_met'] = self.sec_met.as_list() - if self.locus_tag is not None: + if self.locus_tag: self._qualifiers['locus_tag'] = [str(self.locus_tag)] - if self.product is not None: + if self.product: self._qualifiers['product'] = [str(self.product)] - if self.protein_id is not None: + if self.protein_id: self._qualifiers['protein_id'] = [str(self.protein_id)] - if self.gene is not None: + if self.gene: self._qualifiers['gene'] = [str(self.gene)] - if self.translation is not None: + if self.translation: self._qualifiers['translation'] = [str(self.translation)] if self.notes: self._qualifiers['note'] = self.notes if self.EC_number: self._qualifiers['EC_number'] = self.EC_number - if self.transl_table is not None: + if self.transl_table: self._qualifiers['transl_table'] = [str(self.transl_table)] - if self.source is not None: + if self.source: self._qualifiers['source'] = [str(self.source)] if self.db_xref: self._qualifiers['db_xref'] = self.db_xref @@ -656,27 +654,27 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_Cluster = SeqFeature(self.location, type=self.type) self._qualifiers['note'] = ["Cluster number: " + str(self.get_cluster_number())] - if self.detection is not None: + if self.detection: self._qualifiers['note'].append(self.detection) if self.notes: self._qualifiers['note'].extend(self.notes) - if self.cutoff is not None: + if self.cutoff: self._qualifiers['cutoff'] = [str(self.cutoff)] - if self.extension is not None: + if self.extension: self._qualifiers['extension'] = [str(self.extension)] if self.products: self._qualifiers['product'] = self.products - if self.contig_edge is not None: + if self.contig_edge: self._qualifiers['contig_edge'] = [str(self.contig_edge)] - if self.structure is not None: + if self.structure: self._qualifiers['structure'] = [str(self.structure)] - if self.probability is not None: + if self.probability: self._qualifiers['probability'] = [str(self.probability)] - if self.subclusterblast is not None: + if self.subclusterblast: self._qualifiers['subclusterblast'] = self.subclusterblast - if self.knownclusterblast is not None: + if self.knownclusterblast: self._qualifiers['knownclusterblast'] = self.knownclusterblast - if self.clusterblast is not None: + if self.clusterblast: self._qualifiers['clusterblast'] = self.clusterblast new_Cluster.qualifiers = self._qualifiers.copy() return [new_Cluster] @@ -703,7 +701,7 @@ def __init__(self, seq_record=None): self._modified_asdomain = [] #A list containing instances of aSDomain self._cluster_number_dict = {} #A dictionary to map clusters and their numbers - if self._record is not None: + if self._record: if not isinstance(self._record, SeqRecord): raise ValueError("SeqRecord should be an instance of 'Bio.SeqRecord.SeqRecord'") self.from_biopython(self._record) @@ -732,7 +730,7 @@ def from_file(cls, filename): @property def id(self): """Pass through to seq_record object if available""" - if self._record is not None: + if self._record: return self._record.id else: return "NO_ID_ASSIGNED" @@ -746,7 +744,7 @@ def id(self, value): @property def seq(self): """Pass through to seq_record object if available""" - if self._record is not None: + if self._record: return self._record.seq else: return None @@ -760,7 +758,7 @@ def seq(self, value): @property def description(self): """Pass through to seq_record object if available""" - if self._record is not None: + if self._record: return self._record.description else: return "" @@ -774,7 +772,7 @@ def description(self, value): @property def name(self): """Pass through to seq_record object if available""" - if self._record is not None: + if self._record: return self._record.name else: return "NO_NAME_ASSIGNED" @@ -788,7 +786,7 @@ def name(self, value): @property def annotations(self): """Pass through to seq_record object if available""" - if self._record is not None: + if self._record: return self._record.annotations else: return {} @@ -942,11 +940,11 @@ def __init__(self, clustertype=None, domains=None, kind=None): :param domains: a list of SecMetResult instance(s) :param kind: an instance of str """ - if clustertype is not None and not isinstance(clustertype, str): + if clustertype and not isinstance(clustertype, str): raise TypeError('clustertype should be an instance of str') - if domains is not None and not isinstance(domains, list): + if domains and not isinstance(domains, list): raise TypeError('domains should be an instance of list') - if kind is not None and not isinstance(kind, str): + if kind and not isinstance(kind, str): raise TypeError('kind should be an instance of str') self.clustertype = clustertype self.domains = domains @@ -958,11 +956,11 @@ def __init__(self, clustertype=None, domains=None, kind=None): def __len__(self): """Return length of the secmet qualifier""" count = 0 - if self.clustertype is not None: + if self.clustertype: count += 1 - if self.domains is not None: + if self.domains: count += 1 - if self.kind is not None: + if self.kind: count += 1 if self.nrpspks: count += len(self.nrpspks) @@ -975,11 +973,11 @@ def __repr__(self): return str(self.as_list()) def __iter__(self): - if self.clustertype is not None: + if self.clustertype: yield "Type: %s" % self.clustertype - if self.domains is not None: + if self.domains: yield "Domains detected: " + "; ".join(map(str, self.domains)) - if self.kind is not None: + if self.kind: yield "Kind: %s" % self.kind if self.nrpspks: for nrps in self.nrpspks: @@ -1001,7 +999,7 @@ def __init__(self, res=None, nseeds=None): self.evalue = None self.bitscore = None self.nseeds = None - if res is not None and nseeds is not None: + if res and nseeds: self.query_id = res.query_id self.evalue = res.evalue self.bitscore = res.bitscore diff --git a/tests/test_cds.py b/tests/test_cds.py index d7534be..5ae4903 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -44,7 +44,7 @@ def test_CDSFeature_members(self): 'aSASF_note', 'aSASF_scaffold', 'sec_met_predictions', 'EC_number'] for bp_cds, mod_cds in zip(bp_cdss, mod_cdss): for key, value in bp_cds.qualifiers.items(): - if value is not None and value: + if value: if key not in qualifiers_as_list: if key != 'sec_met': #antiSMASH anyways erases all sec_met qualifiers if not hasattr(mod_cds, key): diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py index 0f5af73..ea20704 100644 --- a/tests/test_cds_motif.py +++ b/tests/test_cds_motif.py @@ -34,7 +34,7 @@ def test_CDS_motifFeature_members(self): 'aSASF_prediction', 'aSProdPred'] for bp_motif, mod_motif in zip(bp_cds_motifs, mod_cds_motifs): for key, value in bp_motif.qualifiers.items(): - if value is not None and value: + if value: if key not in qualifiers_as_list: if not hasattr(mod_motif, key): raise AttributeError("%s is not a member of CDS_motifFeature"%key) diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 8e298ac..497b43d 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -33,7 +33,7 @@ def test_ClusterFeature_members(self): 'knownclusterblast'] for bp_cluster, mod_cluster in zip(bp_clusters, mod_clusters): for key, value in bp_cluster.qualifiers.items(): - if value is not None and value: + if value: #clusterblast, subclusterblast and knownclusterblast are lists if key not in qualifiers_as_list: if not hasattr(mod_cluster, key): diff --git a/tests/test_domains.py b/tests/test_domains.py index 3e8244f..858a90e 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -35,7 +35,7 @@ def test_aSDomain(self): 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_asdomain, mod_asdomain in zip(bp_asdomains, mod_asdomains): for key, value in bp_asdomain.qualifiers.items(): - if value is not None and value: + if value: if key not in qualifiers_as_list: if not hasattr(mod_asdomain, key): raise AttributeError('%s is not a member of aSDomain'%key) @@ -104,7 +104,7 @@ def test_PFAM_domain(self): 'aSASF_scaffold', 'aSASF_prediction', 'aSProdPred'] for bp_pfam, mod_pfam in zip(bp_pfams, mod_pfams): for key, value in bp_pfam.qualifiers.items(): - if value is not None and value: + if value: if key not in qualifiers_as_list: if not hasattr(mod_pfam, key): raise AttributeError('%s is not a member of PFAM_domain'%key) diff --git a/tests/test_generic.py b/tests/test_generic.py index 2cd884a..f86e71e 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -35,7 +35,7 @@ def test_GenericFeature(self): self.assertEqual(len(bp_generics), len(mod_generics)) for bp_generic, mod_generic in zip(bp_generics, mod_generics): for key, value in bp_generic.qualifiers.items(): - if value is not None and value: + if value: if key == 'note': #note is modified to notes in secmet self.assertEqual(value, mod_generic.notes) From 5978267adf2c9b5190b2d3e2aa846d08c4b08bd6 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 05:56:11 +0530 Subject: [PATCH 67/71] Refine members initilization in ClusterFeature --- secmet/record.py | 60 ++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index a17370a..a0aa76c 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -73,7 +73,7 @@ def _set_location(self, value): def extract(self, parent_seq): """Return Feature's seq from its parent's seq""" - if self.location is None: + if not self.location: raise ValueError("Location is None. Extracting Failed") return self.location.extract(parent_seq) @@ -125,7 +125,9 @@ def add_qualifier(self, category, info): else: info = str(info) if category in ['evalue', 'score', 'probability']: - if not (((info.replace('.', '')).replace('E-', '')).replace('-', '')).replace('+', '').isdigit(): + try: + info = float(info) + except: raise ValueError('%s should be a number'% category) if hasattr(self, category): if isinstance(getattr(self, category), list): @@ -230,7 +232,7 @@ def __init__(self, f_location=None, feature=None): self.protein_id = self._qualifiers.pop('protein_id', [None])[0] self.gene = self._qualifiers.pop('gene', [None])[0] self.translation = self._qualifiers.pop('translation', [None])[0] - self.notes = self._qualifiers.pop('note',[]) + self.notes = self._qualifiers.pop('note', []) self.EC_number = self._qualifiers.pop('EC_number', []) self.transl_table = self._qualifiers.pop('transl_table', [None])[0] self.source = self._qualifiers.pop('source', [None])[0] @@ -560,48 +562,30 @@ def __init__(self, f_location=None, feature=None): self.clusterblast = None self.cdss = [] - if feature is not None: + if feature: """Initialise class members(qualifiers) using SeqFeature object""" self._qualifiers = feature.qualifiers - - if 'cutoff' in self._qualifiers: - self.cutoff = int(self._qualifiers['cutoff'][0]) - - if 'extension' in self._qualifiers: - self.extension = int(self._qualifiers['extension'][0]) - - if 'contig_edge' in self._qualifiers: - self.contig_edge = self._qualifiers['contig_edge'][0] - - if 'note' in self._qualifiers: - note_list = self._qualifiers['note'] - note_list_copy = note_list[:] - for value in note_list: + self.contig_edge = self._qualifiers.pop('contig_edge', [None])[0] + self.products = self._qualifiers.pop('product', []) + self.structure = self._qualifiers.pop('structure', [None])[0] + self.probability = self._qualifiers.pop('probability', [None])[0] + self.subclusterblast = self._qualifiers.pop('subclusterblast', []) + self.knownclusterblast = self._qualifiers.pop('knownclusterblast', []) + self.clusterblast = self._qualifiers.pop('clusterblast', []) + self.notes = self._qualifiers.pop('note', []) + if self.notes: + note_list_copy = self.notes[:] + for value in self.notes: if value.startswith('Cluster number'): - self.clusternumber = int(value.split(':')[1]) note_list_copy.remove(value) if value.startswith('Detection rule(s)'): self.detection = value note_list_copy.remove(value) - self.notes.extend(note_list_copy) - - if 'product' in self._qualifiers: - self.products = self._qualifiers['product'] - - if 'structure' in self._qualifiers: - self.structure = self._qualifiers['structure'][0] - - if 'probability' in self._qualifiers: - self.probability = self._qualifiers['probability'][0] - - if 'subclusterblast' in self._qualifiers: - self.subclusterblast = self._qualifiers['subclusterblast'] - - if 'clusterblast' in self._qualifiers: - self.clusterblast = self._qualifiers['clusterblast'] - - if 'knownclusterblast' in self._qualifiers: - self.knownclusterblast = self._qualifiers['knownclusterblast'] + self.notes = note_list_copy + if 'cutoff' in self._qualifiers: + self.cutoff = int(self._qualifiers['cutoff'][0]) + if 'extension' in self._qualifiers: + self.extension = int(self._qualifiers['extension'][0]) self.location = feature.location else: self.location = f_location From b9d196db78e87464642f3d2e25ece9f5ba14dbcc Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 19:36:40 +0530 Subject: [PATCH 68/71] SecMetResult: Check for valid float bitscore and evalue before assigning --- secmet/record.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index a0aa76c..0f98a7c 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -295,7 +295,7 @@ def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" new_CDS = SeqFeature(self.location, type=self.type, id=self.id) if not isinstance(self.sec_met, SecMetQualifier): - raise ValueError('Invalid sec_met type') + raise TypeError('Invalid sec_met type') self._qualifiers['sec_met'] = self.sec_met.as_list() if self.locus_tag: self._qualifiers['locus_tag'] = [str(self.locus_tag)] @@ -977,11 +977,9 @@ def as_list(self): self._sec_met.append(qual) return self._sec_met -class SecMetResult(): +class SecMetResult(object): def __init__(self, res=None, nseeds=None): self.query_id = None - self.evalue = None - self.bitscore = None self.nseeds = None if res and nseeds: self.query_id = res.query_id @@ -989,6 +987,32 @@ def __init__(self, res=None, nseeds=None): self.bitscore = res.bitscore self.nseeds = nseeds + #Check for a valid bitscore qualifier before assigning + def _get_bitscore(self): + try: + return self.__bitscore + except: + return None + def _set_bitscore(self, value): + try: + self.__bitscore = float(value) + except ValueError: + raise ValueError('bitscore should be a number') + bitscore = property(_get_bitscore, _set_bitscore) + + #Check for a valid evalue qualifier before assigning + def _get_evalue(self): + try: + return self.__evalue + except: + return None + def _set_evalue(self, value): + try: + self.__evalue = float(value) + except ValueError: + raise ValueError('evalue should be a number') + evalue = property(_get_evalue, _set_evalue) + def __repr__(self): return self.__str__() From 3151f17993c75770bfcb779852f56c7c4c1bc070 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 19:37:31 +0530 Subject: [PATCH 69/71] Modify tests for failure cases using assertRaises() of unittest --- tests/test_cds.py | 53 +++++++++++++++++++++-------------------- tests/test_cds_motif.py | 8 ++----- tests/test_cluster.py | 14 ++++------- tests/test_domains.py | 16 ++++--------- tests/test_generic.py | 25 +++++-------------- tests/test_record.py | 30 +++++++---------------- 6 files changed, 52 insertions(+), 94 deletions(-) diff --git a/tests/test_cds.py b/tests/test_cds.py index 5ae4903..26203b9 100644 --- a/tests/test_cds.py +++ b/tests/test_cds.py @@ -12,8 +12,8 @@ class FakeResult(object): def __init__(self): """Initialise members with fake values""" self.query_id = 'fake_id' - self.evalue = 'fake_evalue' - self.bitscore = 'fake_bitscore' + self.evalue = '10000' + self.bitscore = '10000' class TestCDSFeature(unittest.TestCase): def get_testfile(self): @@ -91,21 +91,6 @@ def test_BioFeature_to_CDSFeature(self): def test_SecMetQualifier(self): """Test SecMetQualifier""" - try: - #clustertype should be a string instance - SecMetQualifier(clustertype=1) - except TypeError: - pass - try: - #domains should be a list instance - SecMetQualifier(domains='Invalid domains type') - except TypeError: - pass - try: - #kind should be a string instance - SecMetQualifier(kind=1) - except TypeError: - pass cds = CDSFeature(FeatureLocation(1, 10)) self.assertEqual(None, cds.sec_met.clustertype) self.assertEqual(None, cds.sec_met.domains) @@ -131,12 +116,21 @@ def test_SecMetQualifier(self): self.assertEqual(["FAKE_NRPS/PKS Domain: "], cds.sec_met.nrpspks) self.assertEqual(['FAKE_ASF_predictions: '], cds.sec_met.asf_predictions) self.assertEqual(str(cds.sec_met), repr(cds.sec_met)) - #sec_met feature should be an instance of SecMetQualifier - cds.sec_met = [] - try: + with self.assertRaises(TypeError): + #sec_met feature should be an instance of SecMetQualifier + cds.sec_met = [] cds.to_biopython() - except ValueError: - pass + + #Test the failure cases in SecMetQualifier + with self.assertRaises(TypeError): + #clustertype should be a string + SecMetQualifier(clustertype=1) + with self.assertRaises(TypeError): + #domains should be a list + SecMetQualifier(domains='invalid_domains_type') + with self.assertRaises(TypeError): + #kind should be a str + SecMetQualifier(kind=1) def test_SecMetResult(self): """Test the SecMetResult class""" @@ -145,12 +139,19 @@ def test_SecMetResult(self): self.assertEqual(None, empty_result.evalue) self.assertEqual(None, empty_result.bitscore) self.assertEqual(None, empty_result.nseeds) - result = SecMetResult(FakeResult(), "fake_seeds") self.assertEqual('fake_id', result.query_id) - self.assertEqual('fake_evalue', result.evalue) - self.assertEqual('fake_bitscore', result.bitscore) + self.assertEqual(10000.0, result.evalue) + self.assertEqual(10000.0, result.bitscore) self.assertEqual('fake_seeds', result.nseeds) - expected = "fake_id (E-value: fake_evalue, bitscore: fake_bitscore, seeds: fake_seeds)" + expected = "fake_id (E-value: 10000.0, bitscore: 10000.0, seeds: fake_seeds)" self.assertEqual(expected, repr(result), str(result)) + #Test the failure cases in SecMetResult + result = SecMetResult() + with self.assertRaises(ValueError): + #evalue should be a float + result.evalue = 'invalid_evalue' + with self.assertRaises(ValueError): + #bitscore should be a float + result.bitscore = 'invalid_bitscore' diff --git a/tests/test_cds_motif.py b/tests/test_cds_motif.py index ea20704..c84ffa0 100644 --- a/tests/test_cds_motif.py +++ b/tests/test_cds_motif.py @@ -51,14 +51,10 @@ def test_CDS_motifFeature_members(self): self.assertEqual(value, getattr(mod_motif, key)) cdsmotif = CDS_motifFeature(FeatureLocation(1, 10)) #score, evalue should be numbers - try: + with self.assertRaises(ValueError): cdsmotif.score = '-a50' - except ValueError: - pass - try: + with self.assertRaises(ValueError): cdsmotif.evalue = 'a5.50E-08' - except ValueError: - pass #If valid qualifiers and values are added, We shouldn't get an error try: diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 497b43d..86a8dd9 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -50,14 +50,10 @@ def test_ClusterFeature_members(self): self.assertEqual(value, getattr(mod_cluster, key)) cluster = ClusterFeature(FeatureLocation(100, 1000)) #cutoff, extension should be numbers - try: + with self.assertRaises(TypeError): cluster.cutoff = '-a5000' - except TypeError: - pass - try: + with self.assertRaises(TypeError): cluster.extension = 'a5000' - except TypeError: - pass #If valid qualifiers and values are added, We shouldn't get an error try: @@ -87,10 +83,9 @@ def test_BioFeature_to_ClsuterFeature(self): def test_add_product(self): cluster = ClusterFeature(FeatureLocation(1000, 10000)) self.assertEqual([], cluster.get_products()) - try: + with self.assertRaises(TypeError): + #product should be an instance of str cluster.add_product(111) - except TypeError: - pass cluster.add_product('fake_product') self.assertEqual(['fake_product'], cluster.get_products()) @@ -99,6 +94,7 @@ def test_add_new_cluster(self): testfile = self.get_testfile() rec = Record.from_file(testfile) new_cluster = ClusterFeature(FeatureLocation(100, 500)) + #Shouldn't throw exceptions if valid values are assigned try: new_cluster.cutoff = 300 except: diff --git a/tests/test_domains.py b/tests/test_domains.py index 858a90e..7ebf305 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -52,14 +52,10 @@ def test_aSDomain(self): asdomain = aSDomain(FeatureLocation(1, 10)) #score, evalue should be numbers - try: + with self.assertRaises(ValueError): asdomain.score = '-a50' - except ValueError: - pass - try: + with self.assertRaises(ValueError): asdomain.evalue = 'a5.50E-08' - except ValueError: - pass #If valid qualifiers and values are added, We shouldn't get an error try: @@ -121,14 +117,10 @@ def test_PFAM_domain(self): pfam = PFAM_domain(FeatureLocation(1, 10)) #score, evalue should be numbers - try: + with self.assertRaises(ValueError): pfam.score = '-a50' - except ValueError: - pass - try: + with self.assertRaises(ValueError): pfam.evalue = 'a5.50E-08' - except ValueError: - pass #If valid qualifiers and values are added, We shouldn't get an error try: diff --git a/tests/test_generic.py b/tests/test_generic.py index f86e71e..2df1af2 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -45,21 +45,15 @@ def test_GenericFeature(self): def test_add_qualifier(self): """Test adding a new GenericFeature""" #GenericFeature should be initialised with valid location and type - try: + with self.assertRaises(TypeError): new_generic = GenericFeature() - except TypeError: - pass - try: + with self.assertRaises(ValueError): new_generic = GenericFeature(FeatureLocation(10, 100), 20) #Invalid type - except ValueError: - pass new_generic = GenericFeature(FeatureLocation(1, 100), 'FAKE') #qualifiers should be strings and their values should be either strings or list of strings - try: + with self.assertRaises(TypeError): new_generic.add_qualifier(10, (20, 30)) - except TypeError: - pass #If the formats are valid shouldn't get any error try: @@ -68,19 +62,12 @@ def test_add_qualifier(self): except: raise RuntimeError('Secmet unable to add valid qualifiers') - #score, evalue and probability should be numbers - try: + with self.assertRaises(ValueError): new_generic.add_qualifier('score', '-a50') - except ValueError: - pass - try: + with self.assertRaises(ValueError): new_generic.add_qualifier('evalue', 'a5.50E-08') - except ValueError: - pass - try: + with self.assertRaises(ValueError): new_generic.add_qualifier('probability', 'a0.5') - except ValueError: - pass #If valid qualifiers and values are added, We shouldn't get an error try: diff --git a/tests/test_record.py b/tests/test_record.py index 64b65cb..82d65e2 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -39,24 +39,16 @@ def test_empty_Record(self): """Test the identifiers of empty Record""" rec = Record() #seq should be a instance of Bio.Seq.Seq - try: + with self.assertRaises(ValueError): rec.seq = 'FAKE' - except ValueError: - pass #description, name and id are strings - try: + with self.assertRaises(ValueError): rec.description = 123 - except ValueError: - pass - - try: + with self.assertRaises(ValueError): rec.name = 123 - except ValueError: - pass - try: + with self.assertRaises(ValueError): rec.id = 123 - except ValueError: - pass + rec.id = "fake_id" rec.name = 'fake_name' rec.seq = Seq("FAKE") @@ -66,10 +58,8 @@ def test_empty_Record(self): self.assertEqual(rec.seq, Seq("FAKE")) self.assertEqual(rec.description, 'fake_description') self.assertEqual(rec.annotations, {}) - try: + with self.assertRaises(ValueError): rec.add_annotation(12, 34) - except ValueError: - pass rec.add_annotation('fake_key', 'fake_value') self.assertEqual(rec.annotations, {'fake_key': 'fake_value'}) @@ -98,10 +88,8 @@ def test_setters(self): def test_from_biopython(self): """Test from_biopython() in Record""" - try: + with self.assertRaises(ValueError): rec = Record('fake_record') - except: - pass testfile = self.get_testfile() rec = Record.from_file(testfile) self.assertIsInstance(rec.from_biopython(rec._record), Record) @@ -297,10 +285,8 @@ def test_add_feature(self): rec.add_feature(new_pfam_domain) rec.add_feature(new_asdomain) clusters = rec.get_clusters() - try: + with self.assertRaises(TypeError): rec.add_feature(invalid_feature) - except TypeError: - pass self.assertEqual(no_of_clusters+1, len(clusters)) self.assertEqual(no_of_cdss+1, len(rec.get_CDSs())) self.assertEqual(no_of_generics+1, len(rec.get_generics())) From 647fc12cc440b9b5dd4eb1706632fbd7ad636f52 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 21:40:12 +0530 Subject: [PATCH 70/71] Use a copy of feature qualifiers instead of original qualifiers dict Explicit check for None for score, evalue, cutoff and extension --- secmet/record.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 0f98a7c..7193e9c 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -99,7 +99,7 @@ def __init__(self, f_location=None, f_type=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self._qualifiers = feature.qualifiers + self._qualifiers = feature.qualifiers.copy() self.type = feature.type self.location = feature.location self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] @@ -225,7 +225,7 @@ def __init__(self, f_location=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self._qualifiers = feature.qualifiers + self._qualifiers = feature.qualifiers.copy() self.id = feature.id self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] self.product = self._qualifiers.pop('product', [None])[0] @@ -356,7 +356,7 @@ def __init__(self, f_location, feature): self._qualifiers = {} if feature: - self._qualifiers = feature.qualifiers + self._qualifiers = feature.qualifiers.copy() self.locus_tag = self._qualifiers.pop('locus_tag', [None])[0] self.translation = self._qualifiers.pop('translation', [None])[0] self.asDomain_id = self._qualifiers.pop('asDomain_id', [None])[0] @@ -412,13 +412,13 @@ def _get_feature_qualifiers(self): self._qualifiers['translation'] = [str(self.translation)] if self.database: self._qualifiers['database'] = [str(self.database)] - if self.evalue: + if self.evalue is not None: self._qualifiers['evalue'] = [str(self.evalue)] if self.asDomain_id: self._qualifiers['asDomain_id'] = [str(self.asDomain_id)] if self.detection: self._qualifiers['detection'] = [str(self.detection)] - if self.score: + if self.score is not None: self._qualifiers['score'] = [str(self.score)] if self.label: self._qualifiers['label'] = [str(self.label)] @@ -451,7 +451,7 @@ def __init__(self, f_location=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self.motif = feature.qualifiers.pop('motif', [None])[0] + self.motif = feature.qualifiers.copy().pop('motif', [None])[0] def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" @@ -482,9 +482,10 @@ def __init__(self, f_location=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self.domain = feature.qualifiers.pop('domain', [None])[0] - self.description = feature.qualifiers.pop('description', [None])[0] - self.db_xref = feature.qualifiers.pop('db_xref', []) + qualifiers = feature.qualifiers.copy() + self.domain = qualifiers.pop('domain', [None])[0] + self.description = qualifiers.pop('description', [None])[0] + self.db_xref = qualifiers.pop('db_xref', []) def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" @@ -519,9 +520,10 @@ def __init__(self, f_location=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self.domain = feature.qualifiers.pop('domain', [None])[0] - self.domain_subtype = feature.qualifiers.pop('domain_subtype', [None])[0] - self.specificity = feature.qualifiers.pop('specificity', []) + qualifiers = feature.qualifiers.copy() + self.domain = qualifiers.pop('domain', [None])[0] + self.domain_subtype = qualifiers.pop('domain_subtype', [None])[0] + self.specificity = qualifiers.pop('specificity', []) def to_biopython(self): """Returns a Bio.SeqFeature.SeqFeature object with all its members""" @@ -564,7 +566,7 @@ def __init__(self, f_location=None, feature=None): if feature: """Initialise class members(qualifiers) using SeqFeature object""" - self._qualifiers = feature.qualifiers + self._qualifiers = feature.qualifiers.copy() self.contig_edge = self._qualifiers.pop('contig_edge', [None])[0] self.products = self._qualifiers.pop('product', []) self.structure = self._qualifiers.pop('structure', [None])[0] @@ -642,9 +644,9 @@ def to_biopython(self): self._qualifiers['note'].append(self.detection) if self.notes: self._qualifiers['note'].extend(self.notes) - if self.cutoff: + if self.cutoff is not None: self._qualifiers['cutoff'] = [str(self.cutoff)] - if self.extension: + if self.extension is not None: self._qualifiers['extension'] = [str(self.extension)] if self.products: self._qualifiers['product'] = self.products From 32656a7cc9825ebf59680605e058b7a7d680ab54 Mon Sep 17 00:00:00 2001 From: Manikumar1998 Date: Tue, 22 Aug 2017 23:55:24 +0530 Subject: [PATCH 71/71] Refine secmet Update docstrigns and inline comments Bug fixes --- secmet/record.py | 127 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 24 deletions(-) diff --git a/secmet/record.py b/secmet/record.py index 7193e9c..0d48d01 100644 --- a/secmet/record.py +++ b/secmet/record.py @@ -25,8 +25,10 @@ def find_new_cluster_pos(clusters, target_cluster): param clusters: A list of all existing ClusterFeature(s) in the record param target_cluster: An instance of ClusterFeature """ + #If no clusters are available return 0 if not clusters: return 0 + #Search for the index position in clusters list using cluster start positions cluster_start_locations = [cluster.location.start for cluster in clusters] return bisect_left(cluster_start_locations, target_cluster.location.start) @@ -36,24 +38,36 @@ def find_cluster_of_new_cds(clusters, new_cds): param clusters: A list of all existing ClusterFeature(s) in the record param new_cds: An instance of CDSFeature """ + #If no clusters are available return if not clusters: return + #If new_cds is not within the clusters return if new_cds.location.end < clusters[0].location.start or \ new_cds.location.start > clusters[len(clusters)-1].location.end: return + #Search for the cluster feature else: + #Segregate the start locations and end locations of sorted clusters cluster_starts = [cluster.location.start for cluster in clusters] cluster_ends = [cluster.location.end for cluster in clusters] - if bisect_left(cluster_starts, new_cds.location.start)-1 == bisect_left(cluster_ends, new_cds.location.start): - index = bisect_left(cluster_ends, new_cds.location.start) - clusters[index].cdss.append(new_cds) - new_cds.cluster = clusters[index] - - if bisect_left(cluster_starts, new_cds.location.end)-1 == bisect_left(cluster_ends, new_cds.location.end): - index = bisect_left(cluster_ends, new_cds.location.end) - clusters[index].cdss.append(new_cds) - new_cds.cluster = clusters[index] + #Compare the new_cds start location with clusters starts and end locations + cluster_starts_index = bisect_left(cluster_starts, new_cds.location.start) + cluster_ends_index = bisect_left(cluster_ends, new_cds.location.start) + if cluster_starts_index-1 == cluster_ends_index: + #new_cds within start and end positions of a cluster + clusters[cluster_ends_index].cdss.append(new_cds) + new_cds.cluster = clusters[cluster_ends_index] + return + + #Compare using new_cds end location with clusters start and end locations + cluster_starts_index = bisect_left(cluster_starts, new_cds.location.end) + cluster_ends_index = bisect_left(cluster_ends, new_cds.location.end) + if cluster_starts_index-1 == cluster_ends_index: + #new_cds within start and end positions of a cluster + clusters[cluster_ends_index].cdss.append(new_cds) + new_cds.cluster = clusters[cluster_ends_index] + return class Feature(object): """A Feature super class that extends to different subclasses""" @@ -110,51 +124,106 @@ def __init__(self, f_location=None, f_type=None, feature=None): self.description = self._qualifiers.pop('description', [None])[0] self.sec_met = self._qualifiers.pop('sec_met', []) self.notes = self._qualifiers.pop('note', []) + if 'score' in self._qualifiers: + self.score = self._qualifiers('score')[0] + del self._qualifiers['score'] + if 'evalue' in self._qualifiers: + self.evalue = self._qualifiers['evalue'][0] + del self._qualifiers['evalue'] + if 'probability' in self._qualifiers: + self.probability = self._qualifiers['probability'][0] + del self._qualifiers['probability'] else: self.location = f_location if not isinstance(f_type, str): raise ValueError('Type of the feature should be a string') self.type = f_type + #Check for a valid score qualifier before assigning + def _get_score(self): + try: + return self.__score + except: + return None + def _set_score(self, value): + try: + self.__score = float(value) + except ValueError: + raise ValueError('Invalid score value') + score = property(_get_score, _set_score) + + #Check for a valid evalue qualifier before assigning + def _get_evalue(self): + try: + return self.__evalue + except: + return None + def _set_evalue(self, value): + try: + self.__evalue = float(value) + except ValueError: + raise ValueError('Invalid evalue value') + evalue = property(_get_evalue, _set_evalue) + + def _get_probaility(self): + try: + return self.__probability + except: + return None + def _set_probability(self, value): + try: + self.__probability = float(value) + except ValueError: + raise ValueError('Invalid probability value') + probability = property(_get_probaility, _set_probability) + def add_qualifier(self, category, info): - """Adds a qualifier to qualifiers dictionary""" + """Adds a qualifier to qualifiers dictionary + param category: An instance of str + param info: An instance of str or list or a number + """ + #Check for valid category and info type if not isinstance(category, str): if not isinstance(info, (str, list)): if not isinstance(info, (int, float)): raise TypeError("Qualifier category should be str and value should be str or list or number") else: info = str(info) - if category in ['evalue', 'score', 'probability']: - try: - info = float(info) - except: - raise ValueError('%s should be a number'% category) - if hasattr(self, category): - if isinstance(getattr(self, category), list): + + #Check and return if category is present as a member of the class + elif hasattr(self, category): + if category == 'sec_met': if isinstance(info, list): - getattr(self, category).extend(info) + self.sec_met.extend(info) else: - getattr(self, category).append(info) + self.sec_met.append(info) else: setattr(self, category, info) + + #If not a member of class, check in _qualifiers else: - if category not in self._qualifiers: + if category in self._qualifiers: + if isinstance(info, list): + self._qualifiers[category].extend(info) + else: + self._qualifiers[category].append(info) + else: if isinstance(info, list): self._qualifiers[category] = info else: self._qualifiers[category] = [info] - else: - self._qualifiers[category].append(info) return None def get_qualifier(self, category): """Returns a qualifier of given category""" + #Check for the category in _qualifiers if category in self._qualifiers: return self._qualifiers[category] elif category.lower() in self._qualifiers: return self._qualifiers[category.lower()] elif category.upper() in self._qualifiers: return self._qualifiers[category.upper()] + #If not found, the check if it is a member of the class else: if hasattr(self, category): if getattr(self, category): @@ -179,6 +248,12 @@ def to_biopython(self): new_Generic.qualifiers['seq'] = [str(self.seq)] if self.description: new_Generic.qualifiers['description'] = [str(self.description)] + if self.score is not None: + new_Generic.qualifiers['score'] = [str(self.score)] + if self.evalue is not None: + new_Generic.qualifiers['evalue'] = [str(self.evalue)] + if self.probability is not None: + new_Generic.qualifiers['probability'] = [str(self.probability)] if self.sec_met: new_Generic.qualifiers['sec_met'] = self.sec_met if self.notes: @@ -246,6 +321,7 @@ def __init__(self, f_location=None, feature=None): self.location = feature.location if 'sec_met' in self._qualifiers: self.sec_met = self._map_sec_met_list_to_SecMetQualifier(self._qualifiers['sec_met']) + del self._qualifiers['sec_met'] else: self.location = f_location @@ -586,8 +662,10 @@ def __init__(self, f_location=None, feature=None): self.notes = note_list_copy if 'cutoff' in self._qualifiers: self.cutoff = int(self._qualifiers['cutoff'][0]) + del self._qualifiers['cutoff'] if 'extension' in self._qualifiers: self.extension = int(self._qualifiers['extension'][0]) + del self._qualifiers['extension'] self.location = feature.location else: self.location = f_location @@ -697,7 +775,6 @@ def __init__(self, seq_record=None): @classmethod def from_file(cls, filename): """Initialise a record from a file - :param string filename: file name of the file to read """ filetype = filename.split('.')[-1] @@ -778,7 +855,7 @@ def annotations(self): return {} def add_annotation(self, key, value): """Adding annotations in Record""" - if not (isinstance(key, str) and (isinstance(value, str) or isinstance(value, list))): + if not isinstance(key, str) or not isinstance(value, (str, list)): raise ValueError('Key and Value are not in right format') self._record.annotations[key] = value @@ -980,10 +1057,12 @@ def as_list(self): return self._sec_met class SecMetResult(object): + """A SecMetResult class to store a domain qualifier""" def __init__(self, res=None, nseeds=None): self.query_id = None self.nseeds = None if res and nseeds: + #Initialise using arguments self.query_id = res.query_id self.evalue = res.evalue self.bitscore = res.bitscore