From 9a20ce4433239f39a6958c07890bfa8fe79232c0 Mon Sep 17 00:00:00 2001 From: snitz Date: Mon, 6 Jul 2015 16:14:01 +0200 Subject: [PATCH 01/31] OK UPGMA AND NJ --- Makefile | 2 +- Makefile.global | 6 +- Phylo/APPS/Makefile | 55 +++ Phylo/APPS/phylogen.cc | 305 +++++++++++++++++ Phylo/Sources/Makefile | 51 +++ Phylo/Sources/NewickTree.cc | 606 ++++++++++++++++++++++++++++++++++ Phylo/Sources/NewickTree.h | 106 ++++++ Phylo/Sources/PhyloSupport.cc | 267 +++++++++++++++ Phylo/Sources/PhyloSupport.h | 78 +++++ Phylo/Tests/Makefile | 55 +++ Phylo/Tests/TestPhylo.h | 69 ++++ Phylo/Tests/TestPhylogen.cc | 25 ++ 12 files changed, 1621 insertions(+), 4 deletions(-) create mode 100755 Phylo/APPS/Makefile create mode 100755 Phylo/APPS/phylogen.cc create mode 100755 Phylo/Sources/Makefile create mode 100644 Phylo/Sources/NewickTree.cc create mode 100644 Phylo/Sources/NewickTree.h create mode 100644 Phylo/Sources/PhyloSupport.cc create mode 100644 Phylo/Sources/PhyloSupport.h create mode 100644 Phylo/Tests/Makefile create mode 100644 Phylo/Tests/TestPhylo.h create mode 100644 Phylo/Tests/TestPhylogen.cc diff --git a/Makefile b/Makefile index c0f7aa4..7336999 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ # Path to project directory. UPDIR = . # Path to subdirectories. -SUBDIRS = tools Energy/Sources Biopool/Sources Align2/Sources Energy/Sources/TorsionPotential Lobo/Sources Lobo/APPS Energy/APPS Biopool/APPS Align2/APPS +SUBDIRS = tools Energy/Sources Biopool/Sources Align2/Sources Phylo/Sources Energy/Sources/TorsionPotential Lobo/Sources Lobo/APPS Energy/APPS Biopool/APPS Align2/APPS Phylo/APPS # # Libraries and paths (which are not defined globally). diff --git a/Makefile.global b/Makefile.global index 1f78a89..982f852 100644 --- a/Makefile.global +++ b/Makefile.global @@ -106,11 +106,11 @@ SUBDIRS += _dummy_ LIB_PATH += -L$(UPDIR)/lib -INC_PATH += -I$(UPDIR)/tools -I$(UPDIR)/Energy/Sources -I$(UPDIR)/Biopool/Sources -I$(UPDIR)/Energy/Sources/TorsionPotential -I$(UPDIR)/Lobo/Sources -I$(UPDIR)/Align2/Sources -I$(UPDIR)/Biopool/APPS -I$(UPDIR)/Energy/APPS -I$(UPDIR)/Align2/APPS -I$(UPDIR)/Lobo/APPS +INC_PATH += -I$(UPDIR)/tools -I$(UPDIR)/Energy/Sources -I$(UPDIR)/Biopool/Sources -I$(UPDIR)/Energy/Sources/TorsionPotential -I$(UPDIR)/Lobo/Sources -I$(UPDIR)/Align2/Sources -I$(UPDIR)/Phylo/Sources -I$(UPDIR)/Biopool/APPS -I$(UPDIR)/Energy/APPS -I$(UPDIR)/Align2/APPS -I$(UPDIR)/Phylo/APPS -I$(UPDIR)/Lobo/APPS ifdef test - INC_PATH += -I$(UPDIR)/Biopool/Tests -I$(UPDIR)/Energy/Tests -I$(UPDIR)/Align2/Tests -I$(UPDIR)/Lobo/Tests - SUBDIRS = Biopool/Tests Energy/Tests Align2/Tests Lobo/Tests + INC_PATH += -I$(UPDIR)/Biopool/Tests -I$(UPDIR)/Energy/Tests -I$(UPDIR)/Align2/Tests -I$(UPDIR)/Phylo/Tests -I$(UPDIR)/Lobo/Tests + SUBDIRS = Biopool/Tests Energy/Tests Align2/Tests Phylo/Tests Lobo/Tests endif ####### Implicit rules diff --git a/Phylo/APPS/Makefile b/Phylo/APPS/Makefile new file mode 100755 index 0000000..d8b7e98 --- /dev/null +++ b/Phylo/APPS/Makefile @@ -0,0 +1,55 @@ +#--*- makefile -*-------------------------------------------------------------- +# +# Standard makefile +# +#------------------------------------------------------------------------------ + +# Path to project directory +UPDIR = ../.. +# Path to subdirectories +SUBDIR = +# Path to directory for binaries +BINPATH = ../../bin + +# +# Libraries and paths (which are not defined globally) +# + +LIBS = -lAlign2 -lBiopool -lPhylo -ltools + +LIB_PATH = -L. + +INC_PATH = -I. -I ../../Biopool/ -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources + +# +# Objects and headers +# + +SOURCES = phylogen.cc + +OBJECTS = phylogen.o + +TARGETS = phylogen + + +EXECS = phylogen + + +LIBRARY = APPSlibPhylo.a + +# +# Install rule +# + +compile: all + +all: install +install: $(LIBRARY) $(TARGETS) + mv $(LIBRARY) $(UPDIR)/lib/ + mv $(EXECS) $(BINPATH)/ + +# +# Call global Makefile to do the job +# + +include ../../Makefile.global diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc new file mode 100755 index 0000000..7ca7246 --- /dev/null +++ b/Phylo/APPS/phylogen.cc @@ -0,0 +1,305 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + +// --*- C++ -*------x----------------------------------------------------------- +// +// +// Description: +// +// -----------------x----------------------------------------------------------- +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +using namespace Victor::Align2; +using namespace Victor::Phylo; +using namespace Victor; +using namespace std; + +vector calcAlignmentV(Alignment *aliSec, vector > &distance){ + string seq1Name, seq2Name, seq1, seq2, sec1, sec2; + + Alignment newAli; + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //n=4? no idea. Default in APP/subali.cc + int n=4; + + AlignmentData *ad; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + + //Default gap function + double openGapPenalty=12; + double extensionGapPenalty=3; + GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty); + + + //Default Structure + Structure *str; + str = 0; + + //Default csep + double cSeq; + cSeq = 1.00; + + //Default ScoringScheme + ScoringScheme *ss; + + //Global Align + Align *a; + cout << "\nSuboptimal Needleman-Wunsch alignments:\n" << endl; + + double suboptPenaltyMul=1; + double suboptPenaltyAdd=1; + + unsigned int suboptNum=1; + + vector a2; + + Alignment a3; + + vector alignV(aliSec->size()+1);//vector of align + + double score=0; + //size not count target. so +1 + for(unsigned int index=0;indexsize()+1;index++){ + cout<<" index="<size()){ + seq1 = Alignment::getPureSequence(aliSec->getTarget()); + seq1Name = aliSec->getTargetName(); + } + else{ + seq1 = Alignment::getPureSequence(aliSec->getTemplate(index)); + seq1Name = aliSec->getTemplateName(index); + } + for(unsigned int j=0;jsize()+1;j++){ + cout<<" j="<size()){ + seq2 = Alignment::getPureSequence(aliSec->getTarget()); + seq2Name = aliSec->getTargetName(); + }else{ + seq2 = Alignment::getPureSequence(aliSec->getTemplate(j)); + seq2Name = aliSec->getTemplateName(j); + } + + ad = new SequenceData(n, seq1, seq2, seq1Name, seq2Name); + + ss = new ScoringS2S(&sub, ad, str, cSeq); + + a = new NWAlign(ad, gf, ss); + + a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd); + a2 = a->generateMultiMatch(suboptNum); + if (a2.size() == 0) + ERROR("No output alignments generated.", exception); + + a2[0].cutTemplate(1); + score=PhyloTree::distanceCalcTwoSeq(a2[0].getTarget(),a2[0].getTemplate(0)); + distance[index][j]=score; + cout<<"SCORE="<] \t Name of input FASTA file" + << "\n [--out ] \t Name of output file (default = to screen)" + << "\n [-m ] \t Name of substitution matrix file (default = blosum62.dat)" + << "\n" + << "\n [--gf <0|1|2>] \t Gap function (default = 0, i.e. AGP)" + << "\n \t --gf=0: AGP (Affine Gap Penalty) function (default)." + << "\n \t --gf=1: VGP (Variable Gap Penalty) function." + << "\n \t --gf=2: VGP2 (Variable Gap Penalty) function." + << "\n [-o ] \t Open gap penalty (default = 12.00)" + << "\n [-e ] \t Extension gap penalty (default = 3.00)" + << "\n" + << "\n [--cluster <0|1>] \t CLuster function (default = 0, i.e. UPGMA)" + << "\n \t --gf=0: UPGMA (Affine Gap Penalty) Cluster (default)." + << "\n \t --gf=1: NJ (Variable Gap Penalty) Cluster." + << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 0.80)" + << "\n [--cStr ] \t Coefficient for structural alignment (default = 0.20)" + << "\n" + << "\n [--verbose] \t Verbose mode" + << "\n" << endl; +} + + +/// Show command line options and help text. + +int main(int argc, char **argv) { + + string inputFileName, outputFileName, matrixFileName, matrixStrFileName; + double openGapPenalty, extensionGapPenalty; + unsigned int gapFunction, cluster; + double cSeq, cStr; + bool verbose; + struct tm* newtime; + time_t t; + + // -------------------------------------------------- + // 0. Treat options + // -------------------------------------------------- + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + } + + + getArg("-in", inputFileName, argc, argv, "cyc1_input.fasta"); + getArg("-out", outputFileName, argc, argv, "!"); + + getArg("m", matrixFileName, argc, argv, "blosum62.dat"); + + getArg("-cluster", cluster, argc, argv, 0); + + getArg("-gf", gapFunction, argc, argv, 0); + getArg("o", openGapPenalty, argc, argv, 12.00); + getArg("e", extensionGapPenalty, argc, argv, 3.00); + + getArg("-cSeq", cSeq, argc, argv, 0.80); + getArg("-cStr", cStr, argc, argv, 0.20); + + verbose = getArg("-verbose", argc, argv); + + + // -------------------------------------------------- + // 1. Load FASTA + // -------------------------------------------------- + Alignment aliSec; + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + string dataPath = path + "data/"; + string finalPath=dataPath+inputFileName; + + ifstream secFile(finalPath.c_str()); + if (!secFile) + ERROR("Error opening secondary structure FASTA file.", exception); + aliSec.loadFasta(secFile); + if (aliSec.size() < 1) + ERROR("Secondary structure FASTA file must contain two sequences.", exception); + + + + // -------------------------------------------------- + // 3. Load data + // -------------------------------------------------- + + NewickTree tree; + string out="Error Tree"; + if(cluster==0){ + cout<<"##########################################Starting PhyloTreeUPGMA#####################################"<]"; + } + + // -------------------------------------------------- + // 2. Output Tree + // -------------------------------------------------- + if(outputFileName=="!"){ + cout<<"stampiamo l'albero creato "<left=NULL; + root->right=NULL; + root->seq="noPURESeq"; + root->allignSeq="noSeq"; + root->branchLength=-1; + root->divergenceR=0; + root->nseq=-1; + root->isLeaf=true; + } + + NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ + root=new iNode; + root->left=NULL; + root->right=NULL; + root->allignSeq="noSeq"; + root->branchLength=-1; + + root->name=name; + root->divergenceR=divR; + root->seq=pureSeq; + root->nseq=position; + root->isLeaf=true; + } + + + NewickTree::NewickTree(int position,string name,string pureSeq){ + root=new iNode; + root->left=NULL; + root->right=NULL; + root->allignSeq="noSeq"; + root->branchLength=-1; + + root->name=name; + root->divergenceR=0; + root->seq=pureSeq; + root->nseq=position; + root->isLeaf=true; + } + + + + /** + *@Description Create new Tree. Used for union two different Tree. + */ + NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { + root=new iNode; + root->allignSeq="noSeq"; + root->branchLength=-1; + root->isLeaf=false;//internal + root->name=""; + root->seq=""; + root->left=rTree->getRoot(); + root->right=lTree->getRoot(); + root->divergenceR=0; + } + + + /** + *@Description Basic destructor + */ + NewickTree::~NewickTree() { + //destroy_tree();//to do + } + + + // PREDICATES: + iNode *NewickTree::getRoot(){ + return root; + } + + iNode *NewickTree::getRightChild(){ + return root->right; + } + + iNode*NewickTree::getLeftChild(){ + return root->left; + } + + string NewickTree::getName(){ + return root->name; + } + + double NewickTree::getRdiv(){ + return root->divergenceR; + } + + + // MODIFIERS: + + + + // OPERATORS: + /** + * */ + void NewickTree::neighborJoining(Align2::Alignment ali,bool kimuraD){ + + int Nseq=ali.size()+1;//number of seq + vector > distance(Nseq, std::vector(Nseq, 0)); + cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD); + cout<<"---------Print Distance Matrix--------------/"< trees(Nseq); + + //Create all node + for(unsigned int i=0;i > Mij(trees.size(), std::vector(trees.size(), 0)); + + //memorize the nearest seq + unsigned int minj=0; + unsigned int mini=1; + //vector for new distance + vector newMediaD ( trees.size()); + + //for creat new matrix; + unsigned int newi=0; + unsigned int newj=0; + + int count=0; + while(trees.size()!=3){//start 27, end 3 + count=1; + cout<<"----------------WHile num= "< Mij[i][j] && i!=j) + { + minj=j; + mini=i; + } + } + //cout< > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0)); + newi=0; + newj=0; + cout<<"startcopy"<mini && imini && i>minj){ + cout<<"caso 2,"; + newi=i-2; + } + if(jmini && jmini && j>minj){ + newj=j-2; + cout<<"caso 5 "; + } + cout<<"mini e minj "<mini && imini && i>minj){ + cout<<"caso 2,"; + newi=i-2; + } + tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i]-distance[mini][minj])/2; + cout<setBranchLength((distance[1][2]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength()); + tmpT=new NewickTree(tmpT,&trees[2]); + } + else{//min distance 0,2 + trees[0].setBranchLength(distance[0][2]/2-trees[0].getValueOFChildBranchLength()); + trees[2].setBranchLength(distance[0][2]/2-trees[2].getValueOFChildBranchLength()); + tmpT=new NewickTree(&trees[0],&trees[2]); + trees[1].setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-trees[1].getValueOFChildBranchLength()); + tmpT->setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-tmpT->getValueOFChildBranchLength()); + tmpT=new NewickTree(tmpT,&trees[1]); + } + + } + else{//min distance 1,2 + trees[1].setBranchLength(distance[1][2]/2-trees[1].getValueOFChildBranchLength()); + trees[2].setBranchLength(distance[1][2]/2-trees[2].getValueOFChildBranchLength()); + tmpT=new NewickTree(&trees[1],&trees[2]); + trees[0].setBranchLength((distance[0][2]/2+distance[0][1]/2)/2-trees[0].getValueOFChildBranchLength()); + tmpT->setBranchLength((distance[0][1]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength()); + tmpT=new NewickTree(tmpT,&trees[0]); + } + root=tmpT->getRoot(); + + + }//end method + + + /** + * + * */ + void NewickTree::upgma(Align2::Alignment ali,bool kimuraD){ + + int Nseq=ali.size()+1;//number of seq + vector > distance(Nseq, std::vector(Nseq, 0)); + cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD); + cout<<"---------Print Distance Matrix--------------/"< trees(Nseq); + + //Create all node + for(unsigned int i=0;i newMediaD ( trees.size()); + + //for creat new matrix; + unsigned int newi=0; + unsigned int newj=0; + + int count=0; + while(trees.size()!=1){//start 27, end 3 + count=1; + cout<<"----------------WHile num= "< distance[i][j] && i!=j) + { + minj=j; + mini=i; + } + } + //cout< > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0)); + newi=0; + newj=0; + cout<<"startcopy"<mini && imini && i>minj){ + cout<<"caso 2,"; + newi=i-2; + } + if(jmini && jmini && j>minj){ + newj=j-2; + cout<<"caso 5 "; + } + cout<<"mini e minj "<mini && imini && i>minj){ + cout<<"caso 2,"; + newi=i-2; + } + tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i])/2; + cout<left!=NULL) + { + value1=root->left->branchLength; + } + if(root->right!=NULL) + { + value2=root->right->branchLength; + } + if(value1>value2) + return value1; + return value2; + } + + + /** + * Set the value of node if is -1 its escape during print + * @param val + * + */ + void NewickTree::setBranchLength(double val){ + if(val>0) + root->branchLength=val; + else + root->branchLength=0; + } + + /** + * Set the value of divergence will use in NJ if is -1 its escape during print + * @param val + * + */ + void NewickTree::setRdiv(double val){ + root->divergenceR=val; + } + + +}} // namespace diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h new file mode 100644 index 0000000..dd0b5a0 --- /dev/null +++ b/Phylo/Sources/NewickTree.h @@ -0,0 +1,106 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +#ifndef _NewickTree_h_ +#define _NewickTree_h_ + +// Includes: + +#include +#include +#include +#include + + +using std::string; +using std::vector; + +struct iNode +{ + string name; + int nseq; + string seq; + string allignSeq; + iNode *left; + iNode *right; + bool isLeaf; + double branchLength; + double divergenceR; +}; + +// Global constants, typedefs, etc. (to avoid): +namespace Victor { namespace Phylo { + + /**@brief Methods to manages the global statistic data. + * + *@Description Use for create Tree during phylogeny + * */ + class NewickTree { + public: + + // CONSTRUCTORS/DESTRUCTOR: + NewickTree(); + NewickTree(int position,string name,string pureSeq,double divR); + NewickTree(NewickTree *rTree, NewickTree *lTree); + NewickTree(int position,string name,string pureSeq); + + ~NewickTree(); + + + // PREDICATES: + iNode *getRoot(); + + iNode *getRightChild(); + + iNode*getLeftChild(); + + string getName(); + + double getValueOFChildBranchLength(); + + double getRdiv(); + + + // OPERATORS: + void setBranchLength(double val); + void neighborJoining(Align2::Alignment ali,bool kimuraD=false); + void upgma(Align2::Alignment ali,bool kimuraD=false); + void setRdiv(double val); + + + /// Print in a string the tree using Newick format + string printNewickTree(); + + protected: + // HELPERS: + string printNewickTreeNode(iNode node); + + // ATTRIBUTES: + iNode *root; + + + + private: + // HELPERS: + void destroy_tree(iNode *leaf); + + }; + + +}} +#endif + diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc new file mode 100644 index 0000000..b82e532 --- /dev/null +++ b/Phylo/Sources/PhyloSupport.cc @@ -0,0 +1,267 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +// Includes: +#include +#include +#include +#include +#include "PhyloSupport.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Global constants, typedefs, etc. (to avoid): +using namespace Victor::Align2; +using namespace Victor; +using std::string; +using std::vector; +using std::ifstream; +using std::cout; +using std::endl; + + +namespace Victor { namespace Phylo{ + + + /** @brief + * + **/ + // CONSTRUCTORS/DESTRUCTOR: + + /** + *@Description Basic contructor + */ + PhyloSupport::PhyloSupport(){ + + } + + + /** + *@Description Basic destructor + */ + PhyloSupport::~PhyloSupport() { + + } + + + vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool kimura){ + string seq1Name, seq2Name, seq1, seq2, sec1, sec2; + + Alignment newAli; + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //n=4? no idea. Default in APP/subali.cc + int n=4; + + AlignmentData *ad; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + + //Default gap function + double openGapPenalty=12; + double extensionGapPenalty=3; + GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty); + + + //Default Structure + Structure *str; + str = 0; + + //Default csep + double cSeq; + cSeq = 1.00; + + //Default ScoringScheme + ScoringScheme *ss; + + //Global Align + Align *a; + cout << "\nSuboptimal Needleman-Wunsch alignments:\n" << endl; + + double suboptPenaltyMul=1; + double suboptPenaltyAdd=1; + + unsigned int suboptNum=1; + + vector a2; + + Alignment a3; + + vector alignV(aliSec->size()+1);//vector of align + + double score=0; + //size not count target. so +1 + for(unsigned int index=0;indexsize()+1;index++){ + cout<<" index="<size()){ + seq1 = Alignment::getPureSequence(aliSec->getTarget()); + seq1Name = aliSec->getTargetName(); + } + else{ + seq1 = Alignment::getPureSequence(aliSec->getTemplate(index)); + seq1Name = aliSec->getTemplateName(index); + } + for(unsigned int j=0;jsize()+1;j++){ + cout<<" j="<size()){ + seq2 = Alignment::getPureSequence(aliSec->getTarget()); + seq2Name = aliSec->getTargetName(); + }else{ + seq2 = Alignment::getPureSequence(aliSec->getTemplate(j)); + seq2Name = aliSec->getTemplateName(j); + } + + ad = new SequenceData(n, seq1, seq2, seq1Name, seq2Name); + + ss = new ScoringS2S(&sub, ad, str, cSeq); + + a = new NWAlign(ad, gf, ss); + + a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd); + a2 = a->generateMultiMatch(suboptNum); + if (a2.size() == 0) + ERROR("No output alignments generated.", exception); + + a2[0].cutTemplate(1); + if(kimura==true) + score=PhyloSupport::distanceCalcTwoSeqKimura(a2[0].getTarget(),a2[0].getTemplate(0)); + else + score=PhyloSupport::distanceCalcTwoSeq(a2[0].getTarget(),a2[0].getTemplate(0)); + distance[index][j]=score; + cout<<"SCORE="< &PhyloSupport::split(const std::string &s, char delim, std::vector &elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; + } + + std::vector PhyloSupport::split(const std::string &s, char delim) { + std::vector elems; + split(s, delim, elems); + return elems; + } + + + double PhyloSupport::calcDivR(vector vDist){ + double tmpR=0; + for(unsigned int i=0;i > &distance){ + string sMatrix=""; + for(int i=0; i. + */ + + +#ifndef _PhyloSupport_h_ +#define _PhyloSupport_h_ + +// Includes: + +#include +#include +#include +#include + +using namespace Victor::Align2; +using std::string; +using std::vector; + + +// Global constants, typedefs, etc. (to avoid): +namespace Victor { namespace Phylo { + + /**@brief Methods to manages the global statistic data. + * + *@Description Use for create Tree during phylogeny + * */ + class PhyloSupport { + public: + + // CONSTRUCTORS/DESTRUCTOR: + PhyloSupport(); + + + ~PhyloSupport(); + + + //STATIC + //Calc multi Align + static vector calcAlignmentV(Alignment *aliSec, vector > &distance, bool kimura); + //Calc distance from 2 seq of DNA or protein + static double distanceCalcTwoSeq(string seq1,string seq2); + static double distanceCalcTwoSeqKimura(string seq1,string seq2); + static std::vector &split(const std::string &s, char delim, std::vector &elems); + static std::vector split(const std::string &s, char delim); + static double calcDivR(vector vDist); + static void printMatrix(vector > &distance); + + + + protected: + // HELPERS: + + // ATTRIBUTES: + + + + private: + // HELPERS: + + }; + + +}} +#endif + diff --git a/Phylo/Tests/Makefile b/Phylo/Tests/Makefile new file mode 100644 index 0000000..9744d9f --- /dev/null +++ b/Phylo/Tests/Makefile @@ -0,0 +1,55 @@ +#--*- makefile -*-------------------------------------------------------------- +# +# Standard makefile +# +#------------------------------------------------------------------------------ + +# Path to project directory. +UPDIR = ../.. +# Path to subdirectories. +SUBDIR= +# Path to directory for binaries: +BINPATH = ../../bin + + +# +# Libraries and paths (which are not defined globally). +# + +LIBS = -lAlign2 -lBiopool -lPhylo -ltools -L/usr/lib/ -lm -ldl -lcppunit + +LIB_PATH = -L. + +INC_PATH = -I. -I ../../Biopool/ -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources + +# +# Objects and headers +# + +SOURCES = TestPhylogen.cc TestPhylo.h + +OBJECTS = $(SOURCES:.cpp=.o) + +TARGETS = TestPhylogen + +EXECS = TestPhylogen + +LIBRARY = TESTlibTestPhylogen.a + +# +# Install rule +# + +compile: all + +all: install + +install: $(LIBRARY) $(TARGETS) + mv $(EXECS) $(UPDIR)/bin + mv $(LIBRARY) $(UPDIR)/lib + +# +# Call global Makefile to do the job. +# + +include ../../Makefile.global diff --git a/Phylo/Tests/TestPhylo.h b/Phylo/Tests/TestPhylo.h new file mode 100644 index 0000000..a98a4b2 --- /dev/null +++ b/Phylo/Tests/TestPhylo.h @@ -0,0 +1,69 @@ +/* + * TestPhylo.h + * + * Created on: 02 07 2015 + * Author: Matteo Del Pioluogo + */ + + +#include +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace Victor; + + +class TestPhylo : public CppUnit::TestFixture { +private: +private: + +public: + + TestPhylo(){ + } + + + static CppUnit::Test *suite() { + CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("TestAlign"); + suiteOfTests->addTest(new CppUnit::TestCaller("Test1 - Calculate distance of 2 seq.", + &TestPhylo::testPhylo_A)); + suiteOfTests->addTest(new CppUnit::TestCaller("Test2 - .", + &TestPhylo::testPhylo_B)); + suiteOfTests->addTest(new CppUnit::TestCaller("Test3 - .", + &TestPhylo::testPhylo_C)); + + return suiteOfTests; + } + + /// Setup method + + void setUp() { + } + + /// Teardown method + + void tearDown() { + } + +protected: + + void testPhylo_A() { + CPPUNIT_ASSERT(1==0); + } + + void testPhylo_B() { + + + CPPUNIT_ASSERT(7 == 1); + } + + void testPhylo_C() { + + CPPUNIT_ASSERT(1 == 1); + } + +}; diff --git a/Phylo/Tests/TestPhylogen.cc b/Phylo/Tests/TestPhylogen.cc new file mode 100644 index 0000000..aee0a97 --- /dev/null +++ b/Phylo/Tests/TestPhylogen.cc @@ -0,0 +1,25 @@ +/* + * TestPhylogen.cc + * + * Created on: 02 07 2015 + * Author: Matteo Del Pioluogo + */ + +#include +#include +#include + +#include + +using namespace std; + + +int main() { + CppUnit::TextUi::TestRunner runner; + cout << "Creating Test Suites: OK" << endl; + runner.addTest(TestPhylo::suite()); + cout<< "Running the unit tests."< Date: Mon, 6 Jul 2015 16:37:13 +0200 Subject: [PATCH 02/31] make file modificati --- Phylo/APPS/Makefile | 4 ++-- Phylo/Sources/Makefile | 4 ++-- Phylo/Tests/Makefile | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Phylo/APPS/Makefile b/Phylo/APPS/Makefile index d8b7e98..02a0733 100755 --- a/Phylo/APPS/Makefile +++ b/Phylo/APPS/Makefile @@ -15,11 +15,11 @@ BINPATH = ../../bin # Libraries and paths (which are not defined globally) # -LIBS = -lAlign2 -lBiopool -lPhylo -ltools +LIBS = -lPhylo -lAlign2 -ltools LIB_PATH = -L. -INC_PATH = -I. -I ../../Biopool/ -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources +INC_PATH = -I. -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources # # Objects and headers diff --git a/Phylo/Sources/Makefile b/Phylo/Sources/Makefile index dc34d16..e8f0c6e 100755 --- a/Phylo/Sources/Makefile +++ b/Phylo/Sources/Makefile @@ -15,11 +15,11 @@ BINPATH = ../../bin # Libraries and paths (which are not defined globally) # -LIBS = -lAlign2 -lBiopool -ltools -lPhylo +LIBS = -lAlign2 -ltools LIB_PATH = -L. -INC_PATH = -I. -I ../../Align2/Sources -I../../Biopool/Sources -I../../tools/ +INC_PATH = -I. -I../../tools/ -I../../Align2/Sources # # Objects and headers diff --git a/Phylo/Tests/Makefile b/Phylo/Tests/Makefile index 9744d9f..8665980 100644 --- a/Phylo/Tests/Makefile +++ b/Phylo/Tests/Makefile @@ -16,11 +16,11 @@ BINPATH = ../../bin # Libraries and paths (which are not defined globally). # -LIBS = -lAlign2 -lBiopool -lPhylo -ltools -L/usr/lib/ -lm -ldl -lcppunit +LIBS = -lPhylo -lAlign2 -ltools -L/usr/lib/ -lm -ldl -lcppunit LIB_PATH = -L. -INC_PATH = -I. -I ../../Biopool/ -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources +INC_PATH = -I. -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources # # Objects and headers @@ -34,7 +34,7 @@ TARGETS = TestPhylogen EXECS = TestPhylogen -LIBRARY = TESTlibTestPhylogen.a +LIBRARY = TESTlibPhylogen.a # # Install rule From f676467cbc3a0180d9266dd7aa8243b84eef7f1d Mon Sep 17 00:00:00 2001 From: snitz Date: Mon, 6 Jul 2015 16:49:30 +0200 Subject: [PATCH 03/31] new delete --- Phylo/APPS/phylogen.cc | 115 +----------------------------------- Phylo/Sources/NewickTree.cc | 2 +- 2 files changed, 2 insertions(+), 115 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 7ca7246..129537f 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -30,10 +30,6 @@ #include -#include -#include -#include - #include #include @@ -47,116 +43,6 @@ using namespace Victor::Phylo; using namespace Victor; using namespace std; -vector calcAlignmentV(Alignment *aliSec, vector > &distance){ - string seq1Name, seq2Name, seq1, seq2, sec1, sec2; - - Alignment newAli; - string path = getenv("VICTOR_ROOT"); - if (path.length() < 3) - cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; - - string dataPath = path + "data/"; - - //n=4? no idea. Default in APP/subali.cc - int n=4; - - AlignmentData *ad; - - //Default matrix - string matrixFileName="blosum62.dat"; - matrixFileName = dataPath + matrixFileName; - ifstream matrixFile(matrixFileName.c_str()); - if (!matrixFile) - ERROR("Error opening substitution matrix file.", exception); - SubMatrix sub(matrixFile); - - //Default gap function - double openGapPenalty=12; - double extensionGapPenalty=3; - GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty); - - - //Default Structure - Structure *str; - str = 0; - - //Default csep - double cSeq; - cSeq = 1.00; - - //Default ScoringScheme - ScoringScheme *ss; - - //Global Align - Align *a; - cout << "\nSuboptimal Needleman-Wunsch alignments:\n" << endl; - - double suboptPenaltyMul=1; - double suboptPenaltyAdd=1; - - unsigned int suboptNum=1; - - vector a2; - - Alignment a3; - - vector alignV(aliSec->size()+1);//vector of align - - double score=0; - //size not count target. so +1 - for(unsigned int index=0;indexsize()+1;index++){ - cout<<" index="<size()){ - seq1 = Alignment::getPureSequence(aliSec->getTarget()); - seq1Name = aliSec->getTargetName(); - } - else{ - seq1 = Alignment::getPureSequence(aliSec->getTemplate(index)); - seq1Name = aliSec->getTemplateName(index); - } - for(unsigned int j=0;jsize()+1;j++){ - cout<<" j="<size()){ - seq2 = Alignment::getPureSequence(aliSec->getTarget()); - seq2Name = aliSec->getTargetName(); - }else{ - seq2 = Alignment::getPureSequence(aliSec->getTemplate(j)); - seq2Name = aliSec->getTemplateName(j); - } - - ad = new SequenceData(n, seq1, seq2, seq1Name, seq2Name); - - ss = new ScoringS2S(&sub, ad, str, cSeq); - - a = new NWAlign(ad, gf, ss); - - a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd); - a2 = a->generateMultiMatch(suboptNum); - if (a2.size() == 0) - ERROR("No output alignments generated.", exception); - - a2[0].cutTemplate(1); - score=PhyloTree::distanceCalcTwoSeq(a2[0].getTarget(),a2[0].getTemplate(0)); - distance[index][j]=score; - cout<<"SCORE="< distance; }//end while root=trees[0].getRoot(); - + cout<<"end UPGMA"< Date: Mon, 6 Jul 2015 17:36:51 +0200 Subject: [PATCH 04/31] insert defaul data fasta input file --- Phylo/APPS/phylogen.cc | 2 +- data/cyc1_input.fasta | 145 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 data/cyc1_input.fasta diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 129537f..a7a9eab 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -185,7 +185,7 @@ int main(int argc, char **argv) { cout<<"Align2 Victor distance of "<sp|P00125|CY1_BOVIN Cytochrome c1, heme protein, mitochondrial OS=Bos taurus GN=CYC1 PE=1 SV=2 +MAAAAATLRGAMVGPRGAGLPGARARGLLCGARPGQLPLRTPQAVSLSSKSGLSRGRKVI +LSALGMLAAGGAGLAVALHSAVSASDLELHPPSYPWSHRGLLSSLDHTSIRRGFQVYKQV +CSSCHSMDYVAYRHLVGVCYTEDEAKALAEEVEVQDGPNEDGEMFMRPGKLSDYFPKPYP +NPEAARAANNGALPPDLSYIVRARHGGEDYVFSLLTGYCEPPTGVSLREGLYFNPYFPGQ +AIGMAPPIYNEVLEFDDGTPATMSQVAKDVCTFLRWAAEPEHDHRKRMGLKMLLMMGLLL +PLVYAMKRHKWSVLKSRKLAYRPPK +>sp|P08574|CY1_HUMAN Cytochrome c1, heme protein, mitochondrial OS=Homo sapiens GN=CYC1 PE=1 SV=3 +MAAAAASLRGVVLGPRGAGLPGARARGLLCSARPGQLPLRTPQAVALSSKSGLSRGRKVM +LSALGMLAAGGAGLAMALHSAVSASDLELHPPSYPWSHRGLLSSLDHTSIRRGFQVYKQV +CASCHSMDFVAYRHLVGVCYTEDEAKELAAEVEVQDGPNEDGEMFMRPGKLFDYFPKPYP +NSEAARAANNGALPPDLSYIVRARHGGEDYVFSLLTGYCEPPTGVSLREGLYFNPYFPGQ +AIAMAPPIYTDVLEFDDGTPATMSQIAKDVCTFLRWASEPEHDHRKRMGLKMLMMMALLV +PLVYTIKRHKWSVLKSRKLAYRPPK +>sp|P00044|CYC1_YEAST Cytochrome c iso-1 OS=Saccharomyces cerevisiae (strain ATCC 204508 / S288c) GN=CYC1 PE=1 SV=2 +MTEFKAGSAKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRHSGQAEGYSYTDANIK +KNVLWDENNMSEYLTNPKKYIPGTKMAFGGLKKEKDRNDLITYLKKACE +>sp|P30183|CCB11_ARATH Cyclin-B1-1 OS=Arabidopsis thaliana GN=CYCB1-1 PE=1 SV=2 +MMTSRSIVPQQSTDDVVVVDGKNVAKGRNRQVLGDIGNVVRGNYPKNNEPEKINHRPRTR +SQNPTLLVEDNLKKPVVKRNAVPKPKKVAGKPKVVDVIEISSDSDEELGLVAAREKKATK +KKATTYTSVLTARSKAACGLEKKQKEKIVDIDSADVENDLAAVEYVEDIYSFYKSVESEW +RPRDYMASQPDINEKMRLILVEWLIDVHVRFELNPETFYLTVNILDRFLSVKPVPRKELQ +LVGLSALLMSAKYEEIWPPQVEDLVDIADHAYSHKQILVMEKTILSTLEWYLTVPTHYVF +LARFIKASIADEKMENMVHYLAELGVMHYDTMIMFSPSMVAASAIYAARSSLRQVPIWTS +TLKHHTGYSETQLMDCAKLLAYQQWKQQEEGSESSTKGALRKKYSKDERFAVALIPPAKA +LLTGTESA +>sp|Q0JF02|CPS4_ORYSJ Syn-copalyl diphosphate synthase OS=Oryza sativa subsp. japonica GN=CPS4 PE=2 SV=1 +MPVFTASFQCVTLFGQPASAADAQPLLQGQRPFLHLHARRRRPCGPMLISKSPPYPASEE +TREWEADGQHEHTDELRETTTTMIDGIRTALRSIGEGEISISAYDTSLVALLKRLDGGDG +PQFPSTIDWIVQNQLPDGSWGDASFFMMGDRIMSTLACVVALKSWNIHTDKCERGLLFIQ +ENMWRLAHEEEDWMLVGFEIALPSLLDMAKDLDLDIPYDEPALKAIYAERERKLAKIPRD +VLHSMPTTLLHSLEGMVDLDWEKLLKLRCLDGSFHCSPASTATAFQQTGDQKCFEYLDGI +VKKFNGGVPCIYPLDVYERLWAVDRLTRLGISRHFTSEIEDCLDYIFRNWTPDGLAHTKN +CPVKDIDDTAMGFRLLRLYGYQVDPCVLKKFEKDGKFFCLHGESNPSSVTPMYNTYRASQ +LKFPGDDGVLGRAEVFCRSFLQDRRGSNRMKDKWAIAKDIPGEVEYAMDYPWKASLPRIE +TRLYLDQYGGSGDVWIGKVLHRMTLFCNDLYLKAAKADFSNFQKECRVELNGLRRWYLRS +NLEKFGGTDPQTTLMTSYFLASANIFEANRAAERLGWARVALLADAVSSHFRRIGGPKNS +TSNLEELISLVPFDDAYSGSLREAWKQWLMAWTAKESSQESIEGDTAILLVRAIEIFGGR +HVLTGQRPDLWEYSQLEQLTSSICCKLSRRVLAQENGESTEKVEEIDQQVDLEMQELTRR +VLQGCSAINRLTRETFLHVVKSFCYVAYCSPETIDSHIDKVIFQDVI +>sp|P00048|CYC_NEUCR Cytochrome c OS=Neurospora crassa (strain ATCC 24698 / 74-OR23-1A / CBS 708.71 / DSM 1257 / FGSC 987) GN=cyc-1 PE=1 SV=2 +MGFSAGDSKKGANLFKTRCAQCHTLEEGGGNKIGPALHGLFGRKTGSVDGYAYTDANKQK +GITWDENTLFEYLENPKKYIPGTKMAFGGLKKDKDRNDIITFMKEATA +>sp|Q9D0M3|CY1_MOUSE Cytochrome c1, heme protein, mitochondrial OS=Mus musculus GN=Cyc1 PE=1 SV=1 +MAAAAASLRRTVLGPRGVGLPGASAPGLLGGARSRQLPLRTPQAVSLSSKSGPSRGRKVM +LSALGMLAAGGAGLAVALHSAVSASDLELHPPSYPWSHRGLLSSLDHTSIRRGFQVYKQV +CSSCHSMDYVAYRHLVGVCYTEEEAKALAEEVEVQDGPNDDGEMFMRPGKLSDYFPKPYP +NPEAARAANNGALPPDLSYIVRARHGGEDYVFSLLTGYCEPPTGVSLREGLYFNPYFPGQ +AIGMAPPIYTEVLEYDDGTPATMSQVAKDVATFLRWASEPEHDHRKRMGLKMLLMMGLLL +PLTYAMKRHKWSVLKSRKLAYRPPK +>sp|Q9K499|CYC1_STRCO Epi-isozizaene synthase OS=Streptomyces coelicolor (strain ATCC BAA-471 / A3(2) / M145) GN=cyc1 PE=1 SV=1 +MHAFPHGTTATPTAIAVPPSLRLPVIEAAFPRQLHPYWPKLQETTRTWLLEKRLMPADKV +EEYADGLCYTDLMAGYYLGAPDEVLQAIADYSAWFFVWDDRHDRDIVHGRAGAWRRLRGL +LHTALDSPGDHLHHEDTLVAGFADSVRRLYAFLPATWNARFARHFHTVIEAYDREFHNRT +RGIVPGVEEYLELRRLTFAHWIWTDLLEPSSGCELPDAVRKHPAYRRAALLSQEFAAWYN +DLCSLPKEIAGDEVHNLGISLITHHSLTLEEAIGEVRRRVEECITEFLAVERDALRFADE +LADGTVRGKELSGAVRANVGNMRNWFSSVYWFHHESGRYMVDSWDDRSTPPYVNNEAAGE +K +>sp|Q753F4|CYC_ASHGO Cytochrome c OS=Ashbya gossypii (strain ATCC 10895 / CBS 109.51 / FGSC 9923 / NRRL Y-1056) GN=CYC1 PE=3 SV=2 +MPAPYKPGSEKKGATLFKTRCLQCHTTEKGGPHKVGPNLHGVFSRQSGQVSGYSYTDAMI +NRNVTWDAQSMSDYLENPKKYIPGTKMAFGGLKKEKDRNDLITYMLKACK +>sp|P25400|CYC_CANGA Cytochrome c OS=Candida glabrata (strain ATCC 2001 / CBS 138 / JCM 3761 / NBRC 0622 / NRRL Y-65) GN=CYC1 PE=3 SV=1 +MSEKKGATLFKTRCLQCHTVEKGGPNKVGPNLHGIFGRKSGQAAGYSYTDANIKKNVTWD +EDNMSDYLTNPKKYIPGTKMAFGGLKKEKDRKDLIAYLKKATSD +>sp|P53698|CYC_CANAL Cytochrome c OS=Candida albicans (strain SC5314 / ATCC MYA-2876) GN=CYC1 PE=3 SV=3 +MPAPFEKGSEKKGATLFKTRCLQCHTVEKGGPHKVGPNLHGVFGRKSGLAEGYSYTDANK +KKGVEWTEQTMSDYLENPKKYIPGTKMAFGGLKKPKDRNDLVTYLKKATS +>sp|P15451|CYC_CHLRE Cytochrome c OS=Chlamydomonas reinhardtii GN=CYC1 PE=3 SV=2 +MSTFAEAPAGDLARGEKIFKTKCAQCHVAEKGGGHKQGPNLGGLFGRVSGTAAGFAYSKA +NKEAAVTWGESTLYEYLLNPKKYMPGNKMVFAGLKKPEERADLIAYLKQATA +>sp|P00043|CYC_DEBHA Cytochrome c OS=Debaryomyces hansenii (strain ATCC 36239 / CBS 767 / JCM 1990 / NBRC 0083 / IGC 2968) GN=CYC1 PE=1 SV=3 +MPAPYEKGSEKKGANLFKTRCLQCHTVEEGGPHKVGPNLHGVVGRTSGQAQGFSYTDANK +KKGVEWSEQNLSDYLENPKKYIPGTKMAFGGLKKAKDRNDLISYLVKATK +>sp|P00041|CYC_ISSOR Cytochrome c OS=Issatchenkia orientalis GN=CYC1 PE=1 SV=1 +PAPFEQGSAKKGATLFKTRCAQCHTIEAGGPHKVGPNLHGIFSRHSGQAEGYSYTDANKR +AGVEWAEPTMSDYLENPKKYIPGTKMAFGGLKKAKDRNDLVTYMLEASK +>sp|O13393|CYC_PICST Cytochrome c OS=Scheffersomyces stipitis (strain ATCC 58785 / CBS 6054 / NBRC 10063 / NRRL Y-11545) GN=CYC1 PE=3 SV=3 +MPAPFEKGSEKKGATLFKTRCLQCHTVEEGGPHKVGPNLHGIMGRKSGQAVGYSYTDANK +KKGVEWSEQTMSDYLENPKKYIPGTKMAFGGLKKPKDRNDLVTYLASATK +>sp|P00046|CYC_SCHPO Cytochrome c OS=Schizosaccharomyces pombe (strain 972 / ATCC 24843) GN=cyc1 PE=1 SV=3 +MPYAPGDEKKGASLFKTRCAQCHTVEKGGANKVGPNLHGVFGRKTGQAEGFSYTEANRDK +GITWDEETLFAYLENPKKYIPGTKMAFAGFKKPADRNNVITYLKKATSE +>sp|Q6E7D7|CPS4_ORYSI Syn-copalyl diphosphate synthase OS=Oryza sativa subsp. indica GN=CPS4 PE=2 SV=1 +MPVFTASFQCVTLFGQPASAADAQPLLQGQRPFLHLHARRRRPCGPMLISKSPPYPASEE +TREWEAEGQHEHTDELRETTTTMIDGIRTALRSIGEGEISISAYDTSLVALLKRLDGGDG +PQFPSTIDWIVQNQLPDGSWGDASFFMMGDRIMSTLACVVALKSWNIHTDKCERGLLFIQ +ENMWRLAHEEEDWMLVGFEIALPSLLDMAKDLDLDIPYDEPALKAIYAERERKLAKIPRD +VLHAMPTTLLHSLEGMVDLDWEKLLKLRCLDGSFHCSPASTATAFQQTGDQKCFEYLDGI +VKKFNGGVPCIYPLDVYERLWAVDRLTRLGISRHFTSEIEDCLDYIFRNWTPDGLAHTKN +CPVKDIDDTAMGFRLLRLYGYQVDPCVLKKFEKDGKFFCLHGESNPSSVTPMYNTYRASQ +LKFPGDDGVLGRAEVFCRSFLQDRRGSNRMKDKWAIAKDIPGEVEYAMDYPWKASLPRIE +TRLYLDQYGGSGDVWIGKVLHRMTLFCNDLYLKAAKADFSNFQKECRVELNGLRRWYLRS +NLERFGGTDPQTTLMTSYFLASANIFEPNRAAERLGWARVALLADAVSSHFRRIGGPKNL +TSNLEELISLVPFDDAYSGSLREAWKQWLMAWTAKESSQESIEGDTAILLVRAIEIFGGR +HVLTGQRPDLWEYSQLEQLTSSICRKLYRRVLAQENGKSTEKVEEIDQQLDLEMQELTRR +VLQGCSAINRLTRETFLHVVKSFCYVAYCSPETIDNHIDKVIFQDVI +>sp|Q54D07|CY1_DICDI Cytochrome c1, heme protein, mitochondrial OS=Dictyostelium discoideum GN=cyc1 PE=3 SV=1 +MFSQKLLANGKLLSKLAIVSGVVGTATAITTSTTLLSDDFVQPTSYPWEHRFPWQSYDHA +AIRRGHQVYTQVCSTCHSLNLISYRHLANVAYTPEELKAMAADTTVMDGPDSEGDMFERK +GQVTDFHPKPYPNSNAARFANNGALPPDLSLVIKARGAHEDYVFSLLTGYCEPPAGVRVV +GGQYFNPYFPGTKIAMAPPLADGMVEYDDGTDNSMSQMAKDVSTFLCWASEPEHDDRKKL +GMKVLLGVSIIALPLFYWKRLKWSVIKTRKISFKD +>sp|P74917|CY552_ACIFR Cytochrome c-552 OS=Acidithiobacillus ferrooxidans GN=cyc1 PE=1 SV=2 +MTTYLSQDRLRNKENDTMTYQHSKMYQSRTFLLFSALLLVAGQASAAVGSADAPAPYRVS +SDCMVCHGMTGRDTLYPIVPRLAGQHKSYMEAQLKAYKDHSRADQNGEIYMWPVAQALDS +AKITALADYFNAQKPPMQSSGIKHAGAKEGKAIFNQGVTNEQIPACMECHGSDGQGAGPF +PRLAGQRYGYIIQQLTYFHNGTRVNTLMNQIAKNITVAQMKDVAAYLSSL +>sp|Q9AJE4|CYC1_KITGR Terpentedienyl-diphosphate synthase OS=Kitasatospora griseola GN=cyc1 PE=1 SV=1 +MKDRAADPVTKFSPSPYETGQFLRISERADVGTPQIDYLLATQRPDGLWGSVGFELVPTL +GAVAGLSSRPEYADRAGVTDAVARACEKLWELALGEGGLPKLPDTVASEIIVPSLIDLLS +EVLQRHRPAVGGKAGQEQEFPSPPGANAELWRQLSDRIARGQAIPKTAWHTLEAFHPLPK +QFAATVTPAADGAVTCSPSSTAAWLSAVGTDAGASTRAYLDEAQSRYGGAIPMGSSMPYF +EVLWVLNLVLKYFPDVPIPREIIEEIAAGFSDSGIGGGPGLPPDGDDTAYANLAGDKLGA +PTHPEILMKFWAEDHFVSYPGEQTPSETVNAHALEYLNHLRMRRGITEFGAVEDACAEWV +ISQQTEDGCWYDKWNVSPYYSTAACVEALLDARKQDEPQLDSLRRAREWLLRHQTDSGGW +GMAEPSPEETAYAVLALDLFASRGGEGAEECAAAISRAKEFFTDESRENPPLWMGKDLYT +PFRIVDVTVMCGRAVVGRY +>sp|Q4HVX7|CYC_GIBZE Cytochrome c OS=Gibberella zeae (strain PH-1 / ATCC MYA-4620 / FGSC 9075 / NRRL 31084) GN=CYC1 PE=3 SV=2 +MAGGDIKKGANLFKTRCAQCHTVEKDGGNKIGPALHGLWGRKTGSVEGYSYTDANKQKGI +EWNDDTLFEYLENPKKYIPGTKMAFGGLKKAKDRNDLIAYLKDSTK +>sp|P19681|CYC_SCHOC Cytochrome c OS=Schwanniomyces occidentalis GN=CYC1 PE=3 SV=2 +MPAPYEKGSEKKDANLFKTRCLQCHTVEKGGPHKVGPNLHGIFGRKSGQAAGYSYTDANK +KKGVEWTEQTMSDYLENPKKYIPGTKMAFGGLKKPKDRNDLITYLANATK +>sp|Q6C9Q0|CYC_YARLI Cytochrome c OS=Yarrowia lipolytica (strain CLIB 122 / E 150) GN=CYC1 PE=3 SV=1 +MGYKEGSAKKGATLFKTRCAQCHTTEAGGPHKVGPNLHGVINRHSGEAEGYSYSDANKRK +GIEWTTEHLFEYLENPKKYIPGTKMAFGGLKKPKDRNDLITWMVENC +>sp|P41179|CCN1_TRYBB G2/mitotic-specific cyclin-1 OS=Trypanosoma brucei brucei GN=CYC1 PE=2 SV=1 +MMTNLNVRTIKGCFPGSLPDALNDMSVINHILSLTGRFDEGQAALASTVNVVYVGTAVYD +IPRYREEYTKNFIARGCGISEVCVAEARSTGATPCAAATMVTPDQLQHLAEAHIILLPEV +TPFSLYDAGRKRASMRVSGQVAARGVVLVGGGCCFRAEHSDSANPKTCAQYMLSRENEVD +SGRPVEMEEGGAKWEYLCVHGLSVLPGIFCPQHSSRDATGLLLNESFSKMLKRHPTERGI +GVDCRAVLLLMGDGRYQVLTIANREGRTASVKDINIQIKDVVEGNVQTTTIQQQGSVEEL +LRKPCGPVVRDPFEAYYAMANPTALTEKLLCAPR +>sp|Q06374|CG1_COLGL Putative G1/S-specific cyclin OS=Colletotrichum gloeosporioides GN=CYC1 PE=2 SV=1 +MLKRATVDVSSPADVPQPKRRKAYLTGADQARDEYAEDAIKHMEYIESETTPDKRCIPAS +VLEMRPEVIRSLIKIHKELNLLPQTLLLSVNLLDRFCCKKDFLGQVYYRVFSCAALWIAS +KFVEDKDRVPSVCRVLDYVPSGMYCNSALFFRLEICMLNVLAWRVNHPTPWCFIQLFCPG +EGNKTKVRELALDICTARLPENRWMAIRPSILARHCLLHACNNLFSDTSLVEMYGSMPRD +KPSTH +>sp|O93863|CYC_PACTA Cytochrome c OS=Pachysolen tannophilus GN=CYC1 PE=3 SV=3 +MPAPYEKGSAKKGATLFKTRCLQCHTTEAGGAHKVGPNLNGVFGRHSGQAEGYSYTDANK +QKGALWEAQTMSDYLENPKKYIPGTKMAFGGLKKAKDRNDLVTYLLSATK +>sp|Q6Q4H8|CYC_PICPA Cytochrome c OS=Komagataella pastoris GN=CYC1 PE=3 SV=3 +MPAPFEKGSEKKGATLFKTRCLQCHTVEAGGPHKVGPNLHGVFGRKSGLAEGYSYTDANK +RKGVEWSEQTMSDYLENPKKYIPGTKMAFGGLKKAKDRNDLITYLASATK From 996e55676ef33c62ab1bceb5fbbef13ef6913d71 Mon Sep 17 00:00:00 2001 From: snitz Date: Tue, 7 Jul 2015 13:52:41 +0200 Subject: [PATCH 05/31] improve --verbose --- Phylo/APPS/phylogen.cc | 36 +++++----------- Phylo/Sources/NewickTree.cc | 79 ++++++++++++++--------------------- Phylo/Sources/NewickTree.h | 4 +- Phylo/Sources/PhyloSupport.cc | 32 +++++++------- Phylo/Sources/PhyloSupport.h | 2 +- 5 files changed, 60 insertions(+), 93 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index a7a9eab..3f1958b 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -65,12 +65,13 @@ sShowHelp() { << "\n [-e ] \t Extension gap penalty (default = 3.00)" << "\n" << "\n [--cluster <0|1>] \t CLuster function (default = 0, i.e. UPGMA)" - << "\n \t --gf=0: UPGMA (Affine Gap Penalty) Cluster (default)." - << "\n \t --gf=1: NJ (Variable Gap Penalty) Cluster." + << "\n \t --gf=0: UPGMA (default)." + << "\n \t --gf=1: NJ." << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 0.80)" << "\n [--cStr ] \t Coefficient for structural alignment (default = 0.20)" << "\n" - << "\n [--verbose] \t Verbose mode" + << "\n [--kimura] \t use kimura formula for calculate distance of pairwise sequence" + << "\n [--verbose] \t Verbose mode" << "\n" << endl; } @@ -83,7 +84,8 @@ int main(int argc, char **argv) { double openGapPenalty, extensionGapPenalty; unsigned int gapFunction, cluster; double cSeq, cStr; - bool verbose; + bool verbose=false; + bool kimura=false; struct tm* newtime; time_t t; @@ -110,9 +112,9 @@ int main(int argc, char **argv) { getArg("-cSeq", cSeq, argc, argv, 0.80); getArg("-cStr", cStr, argc, argv, 0.20); + kimura = getArg("-kimura", argc, argv); verbose = getArg("-verbose", argc, argv); - // -------------------------------------------------- // 1. Load FASTA // -------------------------------------------------- @@ -140,13 +142,13 @@ int main(int argc, char **argv) { string out="Error Tree"; if(cluster==0){ cout<<"##########################################Starting PhyloTreeUPGMA#####################################"< distance; // OPERATORS: /** * */ - void NewickTree::neighborJoining(Align2::Alignment ali,bool kimuraD){ + void NewickTree::neighborJoining(Align2::Alignment ali,bool kimuraD, bool verbose){ int Nseq=ali.size()+1;//number of seq vector > distance(Nseq, std::vector(Nseq, 0)); + cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD); - cout<<"---------Print Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD,verbose); + cout<<"---------Print Distance Matrix--------------/"< distance; /** * * */ - void NewickTree::upgma(Align2::Alignment ali,bool kimuraD){ + void NewickTree::upgma(Align2::Alignment ali,bool kimuraD, bool verbose){ int Nseq=ali.size()+1;//number of seq vector > distance(Nseq, std::vector(Nseq, 0)); - cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD); - cout<<"---------Print Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD,verbose); + vector trees(Nseq); + + + if(verbose){ + cout<<"---------Print Distance Matrix--------------/"< trees(Nseq); //Create all node for(unsigned int i=0;i distance; unsigned int newi=0; unsigned int newj=0; - int count=0; - while(trees.size()!=1){//start 27, end 3 - count=1; - cout<<"----------------WHile num= "< distance[i][j] && i!=j) { minj=j; mini=i; } } - //cout< distance; vector > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0)); newi=0; newj=0; - cout<<"startcopy"<mini && imini && i>minj){ - cout<<"caso 2,"; newi=i-2; } if(jmini && jmini && j>minj){ newj=j-2; - cout<<"caso 5 "; } - cout<<"mini e minj "< distance; if(i!=mini && i!=minj){ if(imini && imini && i>minj){ - cout<<"caso 2,"; newi=i-2; } tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i])/2; - cout< distance; trees.erase(trees.begin()+mini); //minus 1, because length was decreased trees.erase(trees.begin()+minj-1); - cout<<"the distance of tree vector "< PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool kimura){ + vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool kimura,bool verbose){ string seq1Name, seq2Name, seq1, seq2, sec1, sec2; Alignment newAli; @@ -107,7 +107,7 @@ namespace Victor { namespace Phylo{ //Global Align Align *a; - cout << "\nSuboptimal Needleman-Wunsch alignments:\n" << endl; + cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl; double suboptPenaltyMul=1; double suboptPenaltyAdd=1; @@ -123,7 +123,8 @@ namespace Victor { namespace Phylo{ double score=0; //size not count target. so +1 for(unsigned int index=0;indexsize()+1;index++){ - cout<<" index="< calcAlignmentV(Alignment *aliSec, vector > &distance, bool kimura); + static vector calcAlignmentV(Alignment *aliSec, vector > &distance, bool kimura=false, bool verbose=false); //Calc distance from 2 seq of DNA or protein static double distanceCalcTwoSeq(string seq1,string seq2); static double distanceCalcTwoSeqKimura(string seq1,string seq2); From e335aea5290998d2bc2c1ec5fd2d5bfd5afd991c Mon Sep 17 00:00:00 2001 From: snitz Date: Tue, 7 Jul 2015 19:46:12 +0200 Subject: [PATCH 06/31] improve verbose and fix distance formula for NJ --- Phylo/APPS/phylogen.cc | 10 +- Phylo/Sources/NewickTree.cc | 203 +++++++++++++++++++++++++--------- Phylo/Sources/NewickTree.h | 22 +++- Phylo/Sources/PhyloSupport.cc | 38 ++++--- Phylo/Sources/PhyloSupport.h | 4 +- 5 files changed, 195 insertions(+), 82 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 3f1958b..4859816 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -70,7 +70,7 @@ sShowHelp() { << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 0.80)" << "\n [--cStr ] \t Coefficient for structural alignment (default = 0.20)" << "\n" - << "\n [--kimura] \t use kimura formula for calculate distance of pairwise sequence" + << "\n [--ktuples] \t use ktuples method formula for calculate distance of pairwise sequence" << "\n [--verbose] \t Verbose mode" << "\n" << endl; } @@ -85,7 +85,7 @@ int main(int argc, char **argv) { unsigned int gapFunction, cluster; double cSeq, cStr; bool verbose=false; - bool kimura=false; + bool ktuples=false; struct tm* newtime; time_t t; @@ -112,7 +112,7 @@ int main(int argc, char **argv) { getArg("-cSeq", cSeq, argc, argv, 0.80); getArg("-cStr", cStr, argc, argv, 0.20); - kimura = getArg("-kimura", argc, argv); + ktuples = getArg("-ktuples", argc, argv); verbose = getArg("-verbose", argc, argv); // -------------------------------------------------- @@ -142,13 +142,13 @@ int main(int argc, char **argv) { string out="Error Tree"; if(cluster==0){ cout<<"##########################################Starting PhyloTreeUPGMA#####################################"< distance; NewickTree::NewickTree() { + + leafList=NULL; root=new iNode; root->name="NULL"; root->left=NULL; root->right=NULL; + root->parent=NULL; root->seq="noPURESeq"; root->allignSeq="noSeq"; root->branchLength=-1; root->divergenceR=0; root->nseq=-1; + root->isLeaf=true; + root->numberOfChildren=0; } NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ + + leafList=NULL; root=new iNode; root->left=NULL; root->right=NULL; + root->parent=NULL; root->allignSeq="noSeq"; root->branchLength=-1; @@ -73,21 +81,26 @@ std::vector distance; root->seq=pureSeq; root->nseq=position; root->isLeaf=true; + root->numberOfChildren=0; } NewickTree::NewickTree(int position,string name,string pureSeq){ + + leafList=NULL; root=new iNode; root->left=NULL; root->right=NULL; + root->parent=NULL; root->allignSeq="noSeq"; - root->branchLength=-1; + root->branchLength=0; root->name=name; root->divergenceR=0; root->seq=pureSeq; root->nseq=position; root->isLeaf=true; + root->numberOfChildren=0; } @@ -96,18 +109,25 @@ std::vector distance; *@Description Create new Tree. Used for union two different Tree. */ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { + leafList=NULL; root=new iNode; root->allignSeq="noSeq"; root->branchLength=-1; + root->isLeaf=false;//internal + root->numberOfChildren=rTree->getNumOfChildren()+lTree->getNumOfChildren(); + root->name=""; root->seq=""; root->left=rTree->getRoot(); root->right=lTree->getRoot(); + root->parent=NULL; + root->divergenceR=0; } + /** *@Description Basic destructor */ @@ -128,6 +148,9 @@ std::vector distance; iNode*NewickTree::getLeftChild(){ return root->left; } + iNode*NewickTree::getParent(){ + return root->parent; + } string NewickTree::getName(){ return root->name; @@ -138,6 +161,20 @@ std::vector distance; } + bool NewickTree::isLeaf(){ + return root->isLeaf; + } + + int NewickTree::getNumOfChildren(){ + return root->numberOfChildren; + } + + double NewickTree::getWeigth(){ + return root->weigth; + } + + + // MODIFIERS: @@ -145,20 +182,20 @@ std::vector distance; // OPERATORS: /** * */ - void NewickTree::neighborJoining(Align2::Alignment ali,bool kimuraD, bool verbose){ + void NewickTree::neighborJoining(Align2::Alignment ali,bool ktuples, bool verbose){ int Nseq=ali.size()+1;//number of seq vector > distance(Nseq, std::vector(Nseq, 0)); - cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD,verbose); + if(verbose) + cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,ktuples,verbose); - cout<<"---------Print Distance Matrix--------------/"< trees(Nseq); @@ -173,8 +210,10 @@ std::vector distance; } - //matrix Mij for calculate minimun distance - vector > Mij(trees.size(), std::vector(trees.size(), 0)); + + vector seqList(trees.size());//never modify, used after create guide tree + int count=0;//count number of seq insert in seqList + //memorize the nearest seq unsigned int minj=0; @@ -186,85 +225,90 @@ std::vector distance; unsigned int newi=0; unsigned int newj=0; - int count=0; - while(trees.size()!=3){//start 27, end 3 - count=1; - cout<<"----------------WHile num= "< > Mij(trees.size(), std::vector(trees.size(), 0)); for(unsigned int i=0; i Mij[i][j] && i!=j) { minj=j; mini=i; } } - //cout< > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0)); + + //reset newi=0; newj=0; - cout<<"startcopy"<mini && imini && i>minj){ - cout<<"caso 2,"; newi=i-2; } if(jmini && jmini && j>minj){ newj=j-2; - cout<<"caso 5 "; } - cout<<"mini e minj "< distance; if(i!=mini && i!=minj){ if(imini && imini && i>minj){ - cout<<"caso 2,"; newi=i-2; } tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i]-distance[mini][minj])/2; - cout< distance; tmpT->setBranchLength((distance[0][1]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength()); tmpT=new NewickTree(tmpT,&trees[0]); } + + if(trees[0].isLeaf()){ + seqList[count]=trees[0]; + count++; + } + if(trees[1].isLeaf()){ + seqList[count]=trees[1]; + count++; + } + if(trees[2].isLeaf()){ + seqList[count]=trees[2]; + count++; + } + + //calculate weigth + //The weights are dependent upon the distance from the root of the tree + //but sequences which have a common branch with other sequences share the weight + //derived from the shared branch + cout<getRoot(); @@ -368,12 +443,12 @@ std::vector distance; /** * * */ - void NewickTree::upgma(Align2::Alignment ali,bool kimuraD, bool verbose){ + void NewickTree::upgma(Align2::Alignment ali,bool ktuples, bool verbose){ int Nseq=ali.size()+1;//number of seq vector > distance(Nseq, std::vector(Nseq, 0)); - vectorvet=PhyloSupport::calcAlignmentV(&ali,distance,kimuraD,verbose); + vectorvet=PhyloSupport::calcAlignmentV(&ali,distance,ktuples,verbose); vector trees(Nseq); @@ -541,6 +616,14 @@ std::vector distance; } + double NewickTree::calcWeigth(iNode* seq){ + double w=seq->branchLength; + if(seq->parent!=NULL){ + w+=( calcWeigth(seq->parent)/(seq->parent->numberOfChildren) ); + } + return w; + } + /** * Return max values of this node children branchLength. @@ -586,4 +669,14 @@ std::vector distance; } + void NewickTree::setParent(NewickTree *pTree) { + root->parent=pTree->getParent(); + } + + void NewickTree::setWeigth(double w){ + root->weigth=w; + } + + + }} // namespace diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index c49659a..dac00fe 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -37,9 +37,12 @@ struct iNode string allignSeq; iNode *left; iNode *right; + iNode *parent; + int numberOfChildren; bool isLeaf; double branchLength; - double divergenceR; + double divergenceR;//for NJ + double weigth;//for ClustalW }; // Global constants, typedefs, etc. (to avoid): @@ -68,18 +71,28 @@ namespace Victor { namespace Phylo { iNode*getLeftChild(); + iNode*getParent(); + string getName(); double getValueOFChildBranchLength(); double getRdiv(); + bool isLeaf(); + + int getNumOfChildren(); + + double getWeigth(); + // OPERATORS: void setBranchLength(double val); - void neighborJoining(Align2::Alignment ali,bool kimuraD=false, bool verbose=false); - void upgma(Align2::Alignment ali, bool kimuraD=false, bool verbose=false); + void neighborJoining(Align2::Alignment ali,bool ktuples=false, bool verbose=false); + void upgma(Align2::Alignment ali, bool ktuples=false, bool verbose=false); void setRdiv(double val); + void setParent(NewickTree *pTree); + void setWeigth(double w); /// Print in a string the tree using Newick format @@ -88,10 +101,11 @@ namespace Victor { namespace Phylo { protected: // HELPERS: string printNewickTreeNode(iNode node); + double calcWeigth(iNode* seq); // ATTRIBUTES: iNode *root; - + vector *leafList; private: diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 3dcc496..f5f7f5a 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -65,7 +65,7 @@ namespace Victor { namespace Phylo{ } - vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool kimura,bool verbose){ + vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool ktuples,bool verbose){ string seq1Name, seq2Name, seq1, seq2, sec1, sec2; Alignment newAli; @@ -156,8 +156,10 @@ namespace Victor { namespace Phylo{ ERROR("No output alignments generated.", exception); a2[0].cutTemplate(1); - if(kimura==true) - score=PhyloSupport::distanceCalcTwoSeqKimura(a2[0].getTarget(),a2[0].getTemplate(0)); + if(ktuples==true) + { + score=PhyloSupport::distanceCalcTwoSeqktuples(a2[0].getTarget(),a2[0].getTemplate(0)); + } else score=PhyloSupport::distanceCalcTwoSeq(a2[0].getTarget(),a2[0].getTemplate(0)); distance[index][j]=score; @@ -203,25 +205,28 @@ namespace Victor { namespace Phylo{ /** - * Calculate the distance for two sequence by Kimura formula. - * p=fraction of difference amino acids from seq1 and seq2 - * Kimura distance from seq1,seq2=−ln(1−p−0.2*p^2) - * its very nice if seq1 seq2 not have big divergence(p<0,7). + * Calculate the distance for two sequence by ktuples formula. + * return number of k-tuples (number of residues identical) minus + * constant penality for each gap. * */ - double PhyloSupport::distanceCalcTwoSeqKimura(string seq1,string seq2){ + double PhyloSupport::distanceCalcTwoSeqktuples(string seq1,string seq2){ if (seq1.length() != seq2.length()) cout << "Warning: sequence lengths do not match:\n" << "seq1 = " << seq1.length() << "\n" << "seq2 = " << seq2.length() << "\n"; - double p = 0; + double penality = 0; + double d=0; for (unsigned int i = 0; i < seq1.length(); i++){ - if ( seq1[i] != seq2[i]) - p++; + if(seq1[i]=='-' || seq2[i]=='-') + penality+=1; + else if( seq1[i] == seq2[i] ) + d++; } - p=p/seq1.length(); - return -log(1-p-(0.2*p*p)); + penality=(penality/seq1.length())*0.9*d; + + return 1-((d-penality)/seq1.length()); } @@ -254,9 +259,10 @@ namespace Victor { namespace Phylo{ */ void PhyloSupport::printMatrix(vector > &distance){ string sMatrix=""; - for(int i=0; i calcAlignmentV(Alignment *aliSec, vector > &distance, bool kimura=false, bool verbose=false); + static vector calcAlignmentV(Alignment *aliSec, vector > &distance, bool ktuples=false, bool verbose=false); //Calc distance from 2 seq of DNA or protein static double distanceCalcTwoSeq(string seq1,string seq2); - static double distanceCalcTwoSeqKimura(string seq1,string seq2); + static double distanceCalcTwoSeqktuples(string seq1,string seq2); static std::vector &split(const std::string &s, char delim, std::vector &elems); static std::vector split(const std::string &s, char delim); static double calcDivR(vector vDist); From 40719bfa8cf5456a88cc7203fc6985d123d72369 Mon Sep 17 00:00:00 2001 From: snitz Date: Wed, 8 Jul 2015 14:51:22 +0200 Subject: [PATCH 07/31] insert clustal W --- Phylo/APPS/phylogen.cc | 23 ++++++++++++++++++----- Phylo/Sources/Makefile | 4 ++-- Phylo/Sources/NewickTree.cc | 27 +++++++++++++++++++-------- Phylo/Sources/NewickTree.h | 6 +++++- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 4859816..2027185 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -64,9 +65,10 @@ sShowHelp() { << "\n [-o ] \t Open gap penalty (default = 12.00)" << "\n [-e ] \t Extension gap penalty (default = 3.00)" << "\n" - << "\n [--cluster <0|1>] \t CLuster function (default = 0, i.e. UPGMA)" + << "\n [--function <0|1|2>] \t CLuster function (default = 0, i.e. UPGMA)" << "\n \t --gf=0: UPGMA (default)." << "\n \t --gf=1: NJ." + << "\n \t --gf=2: ClustalW use NJ." << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 0.80)" << "\n [--cStr ] \t Coefficient for structural alignment (default = 0.20)" << "\n" @@ -82,7 +84,7 @@ int main(int argc, char **argv) { string inputFileName, outputFileName, matrixFileName, matrixStrFileName; double openGapPenalty, extensionGapPenalty; - unsigned int gapFunction, cluster; + unsigned int gapFunction, function; double cSeq, cStr; bool verbose=false; bool ktuples=false; @@ -103,7 +105,7 @@ int main(int argc, char **argv) { getArg("m", matrixFileName, argc, argv, "blosum62.dat"); - getArg("-cluster", cluster, argc, argv, 0); + getArg("-function", function, argc, argv, 0); getArg("-gf", gapFunction, argc, argv, 0); getArg("o", openGapPenalty, argc, argv, 12.00); @@ -139,19 +141,30 @@ int main(int argc, char **argv) { // -------------------------------------------------- NewickTree tree; + ClustalW cw; string out="Error Tree"; - if(cluster==0){ + if(function==0){ cout<<"##########################################Starting PhyloTreeUPGMA#####################################"<]"; diff --git a/Phylo/Sources/Makefile b/Phylo/Sources/Makefile index e8f0c6e..036846d 100755 --- a/Phylo/Sources/Makefile +++ b/Phylo/Sources/Makefile @@ -25,9 +25,9 @@ INC_PATH = -I. -I../../tools/ -I../../Align2/Sources # Objects and headers # -SOURCES = PhyloSupport.cc NewickTree.cc +SOURCES = PhyloSupport.cc NewickTree.cc ClustalW.cc -OBJECTS = PhyloSupport.o NewickTree.o +OBJECTS = PhyloSupport.o NewickTree.o ClustalW.o TARGETS = diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index a84fd72..3a77786 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -50,7 +50,6 @@ std::vector distance; NewickTree::NewickTree() { - leafList=NULL; root=new iNode; root->name="NULL"; root->left=NULL; @@ -68,7 +67,6 @@ std::vector distance; NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ - leafList=NULL; root=new iNode; root->left=NULL; root->right=NULL; @@ -87,7 +85,6 @@ std::vector distance; NewickTree::NewickTree(int position,string name,string pureSeq){ - leafList=NULL; root=new iNode; root->left=NULL; root->right=NULL; @@ -109,7 +106,6 @@ std::vector distance; *@Description Create new Tree. Used for union two different Tree. */ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { - leafList=NULL; root=new iNode; root->allignSeq="noSeq"; root->branchLength=-1; @@ -173,7 +169,13 @@ std::vector distance; return root->weigth; } + vector NewickTree::getLeafList(){ + return leafList; + } + int NewickTree::getNumberOfLeaf(){ + return leafList.size(); + } // MODIFIERS: @@ -342,7 +344,8 @@ std::vector distance; //ccreate ne distance matrix dimension-1 distance=tmpdistance; - PhyloSupport::printMatrix(distance); + if(verbose) + PhyloSupport::printMatrix(distance); //insert new tree trees.push_back(NewickTree(&trees[mini],&trees[minj])); @@ -425,14 +428,22 @@ std::vector distance; //The weights are dependent upon the distance from the root of the tree //but sequences which have a common branch with other sequences share the weight //derived from the shared branch - cout<getRoot(); diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index dac00fe..be90a4d 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -85,6 +85,10 @@ namespace Victor { namespace Phylo { double getWeigth(); + vector getLeafList(); + + int getNumberOfLeaf(); + // OPERATORS: void setBranchLength(double val); @@ -105,7 +109,7 @@ namespace Victor { namespace Phylo { // ATTRIBUTES: iNode *root; - vector *leafList; + vector leafList; private: From 54b81e2c32b650bdd22b91e7e9c686714297fbb1 Mon Sep 17 00:00:00 2001 From: snitz Date: Mon, 20 Jul 2015 16:04:15 +0200 Subject: [PATCH 08/31] test2 --- Align2/Sources/Profile.cc | 2 +- Phylo/APPS/phylogen.cc | 49 +++++++++++++---- Phylo/Sources/NewickTree.cc | 82 +++++++++++++++++++++-------- Phylo/Sources/NewickTree.h | 13 +++-- Phylo/Sources/PhyloSupport.cc | 99 ++++++++++++++++++++++++++++++++--- Phylo/Sources/PhyloSupport.h | 8 +++ Phylo/Tests/TestPhylo.h | 11 ++-- 7 files changed, 215 insertions(+), 49 deletions(-) diff --git a/Align2/Sources/Profile.cc b/Align2/Sources/Profile.cc index 670ec84..694f0ff 100755 --- a/Align2/Sources/Profile.cc +++ b/Align2/Sources/Profile.cc @@ -89,7 +89,7 @@ namespace Victor { namespace Align2{ time_t t; pResetData(); seqLen = ali.getTarget().size(); - numSeq = ali.size() - 2; + numSeq = ali.size(); time(&t); newtime = localtime(&t); cout << "ready for pConstructData " << newtime->tm_hour << "/" << newtime->tm_min << endl; diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 2027185..2053628 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -58,20 +58,26 @@ sShowHelp() { << "\n [--out ] \t Name of output file (default = to screen)" << "\n [-m ] \t Name of substitution matrix file (default = blosum62.dat)" << "\n" - << "\n [--gf <0|1|2>] \t Gap function (default = 0, i.e. AGP)" - << "\n \t --gf=0: AGP (Affine Gap Penalty) function (default)." - << "\n \t --gf=1: VGP (Variable Gap Penalty) function." - << "\n \t --gf=2: VGP2 (Variable Gap Penalty) function." - << "\n [-o ] \t Open gap penalty (default = 12.00)" + << "\n [-d ] \t Min. master vs all seq. id. filter threshold (suggested = 0.00)" + << "\n [-D ] \t Min. all vs all seq. id. filter threshold (suggested = 0.00)" + << "\n [-u ] \t Max. master vs all seq. id. filter threshold (suggested = 1.00)" + << "\n [-U ] \t Max. all vs all seq. id. filter threshold (suggested = 1.00)" + << "\n" + << "\n [--ws <0|1|2|3>] \t Weighting scheme for profiles (default = 0, i.e. no weighting scheme)" + << "\n \t --ws=0: No weighting scheme (default)." + << "\n \t --ws=1: Calculate a frequency profile or PSSM using Henikoff weighting scheme." + << "\n \t --ws=2: Calculate a frequency profile or PSSM using PSIC weighting scheme." + << "\n \t --ws=3: Calculate a frequency profile or PSSM using SeqDivergence weighting scheme." + << "\n" + << "\n [-o ] \t Open gap penalty (default = 12.00)" << "\n [-e ] \t Extension gap penalty (default = 3.00)" << "\n" << "\n [--function <0|1|2>] \t CLuster function (default = 0, i.e. UPGMA)" << "\n \t --gf=0: UPGMA (default)." << "\n \t --gf=1: NJ." << "\n \t --gf=2: ClustalW use NJ." - << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 0.80)" - << "\n [--cStr ] \t Coefficient for structural alignment (default = 0.20)" - << "\n" + << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 1.0)" + << "\n" << "\n [--ktuples] \t use ktuples method formula for calculate distance of pairwise sequence" << "\n [--verbose] \t Verbose mode" << "\n" << endl; @@ -85,10 +91,15 @@ int main(int argc, char **argv) { string inputFileName, outputFileName, matrixFileName, matrixStrFileName; double openGapPenalty, extensionGapPenalty; unsigned int gapFunction, function; - double cSeq, cStr; + double cSeq; + double downs, downa, ups, upa; bool verbose=false; bool ktuples=false; + unsigned int weightingScheme; + + struct tm* newtime; + time_t t; // -------------------------------------------------- @@ -107,12 +118,18 @@ int main(int argc, char **argv) { getArg("-function", function, argc, argv, 0); + getArg("-ws", weightingScheme, argc, argv, 0); + getArg("-gf", gapFunction, argc, argv, 0); getArg("o", openGapPenalty, argc, argv, 12.00); getArg("e", extensionGapPenalty, argc, argv, 3.00); + getArg("d", downs, argc, argv, 999.9); + getArg("D", downa, argc, argv, 999.9); + getArg("u", ups, argc, argv, 999.9); + getArg("U", upa, argc, argv, 999.9); + getArg("-cSeq", cSeq, argc, argv, 0.80); - getArg("-cStr", cStr, argc, argv, 0.20); ktuples = getArg("-ktuples", argc, argv); verbose = getArg("-verbose", argc, argv); @@ -134,7 +151,17 @@ int main(int argc, char **argv) { if (aliSec.size() < 1) ERROR("Secondary structure FASTA file must contain two sequences.", exception); - + // -------------------------------------------------- + // 2. Setup Param + // -------------------------------------------------- + PhyloSupport::extensionGapPenalty=extensionGapPenalty; + PhyloSupport::openGapPenalty=openGapPenalty; + PhyloSupport::cSeq=cSeq; + PhyloSupport::downs=downs; + PhyloSupport::downa=downa; + PhyloSupport::ups=downa; + PhyloSupport::upa=downa; + PhyloSupport::weightingScheme=weightingScheme; // -------------------------------------------------- // 3. Load data diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index 3a77786..f20173f 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -62,7 +62,7 @@ std::vector distance; root->nseq=-1; root->isLeaf=true; - root->numberOfChildren=0; + root->numberOfChildLeaf=0; } NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ @@ -79,7 +79,7 @@ std::vector distance; root->seq=pureSeq; root->nseq=position; root->isLeaf=true; - root->numberOfChildren=0; + root->numberOfChildLeaf=0; } @@ -97,7 +97,7 @@ std::vector distance; root->seq=pureSeq; root->nseq=position; root->isLeaf=true; - root->numberOfChildren=0; + root->numberOfChildLeaf=0; } @@ -111,8 +111,12 @@ std::vector distance; root->branchLength=-1; root->isLeaf=false;//internal - root->numberOfChildren=rTree->getNumOfChildren()+lTree->getNumOfChildren(); - + int tmp=0; + if(rTree->isLeaf()) + tmp++; + if(lTree->isLeaf()) + tmp++; + root->numberOfChildLeaf=tmp+rTree->getNumOfChildren()+lTree->getNumOfChildren(); root->name=""; root->seq=""; root->left=rTree->getRoot(); @@ -152,6 +156,11 @@ std::vector distance; return root->name; } + string NewickTree::getPureSeq(){ + return root->seq; + } + + double NewickTree::getRdiv(){ return root->divergenceR; } @@ -162,18 +171,18 @@ std::vector distance; } int NewickTree::getNumOfChildren(){ - return root->numberOfChildren; + return root->numberOfChildLeaf; } double NewickTree::getWeigth(){ return root->weigth; } - vector NewickTree::getLeafList(){ - return leafList; + NewickTree NewickTree::getLeafInPosition(unsigned int index){ + return leafList[index]; } - int NewickTree::getNumberOfLeaf(){ + unsigned int NewickTree::getNumberOfLeaf(){ return leafList.size(); } @@ -204,11 +213,11 @@ std::vector distance; //Create all node for(unsigned int i=0;i distance; trees.push_back(NewickTree(&trees[mini],&trees[minj])); //insert parent - trees[mini].setParent(&trees[trees.size()-1]); - trees[minj].setParent(&trees[trees.size()-1]); + trees[mini].setParent(trees[trees.size()-1]); + trees[minj].setParent(trees[trees.size()-1]); //save in array if leaf if(trees[mini].isLeaf()){ seqList[count]=trees[mini]; + trees[mini].setPosition(count); count++; } if(trees[minj].isLeaf()){ seqList[count]=trees[minj]; + trees[minj].setPosition(count); count++; } @@ -388,17 +399,32 @@ std::vector distance; trees[0].setBranchLength(distance[0][1]/2-trees[0].getValueOFChildBranchLength()); trees[1].setBranchLength(distance[0][1]/2-trees[1].getValueOFChildBranchLength()); tmpT=new NewickTree(&trees[0],&trees[1]); + //parent + trees[0].setParent(*tmpT); + trees[1].setParent(*tmpT); + trees[2].setBranchLength((distance[1][2]/2+distance[0][2]/2)/2-trees[2].getValueOFChildBranchLength()); tmpT->setBranchLength((distance[1][2]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength()); tmpT=new NewickTree(tmpT,&trees[2]); + + //parent + trees[2].setParent(*tmpT); + } else{//min distance 0,2 trees[0].setBranchLength(distance[0][2]/2-trees[0].getValueOFChildBranchLength()); trees[2].setBranchLength(distance[0][2]/2-trees[2].getValueOFChildBranchLength()); tmpT=new NewickTree(&trees[0],&trees[2]); + //parent + trees[0].setParent(*tmpT); + trees[2].setParent(*tmpT); + trees[1].setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-trees[1].getValueOFChildBranchLength()); tmpT->setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-tmpT->getValueOFChildBranchLength()); tmpT=new NewickTree(tmpT,&trees[1]); + //parent + trees[1].setParent(*tmpT); + } } @@ -406,21 +432,31 @@ std::vector distance; trees[1].setBranchLength(distance[1][2]/2-trees[1].getValueOFChildBranchLength()); trees[2].setBranchLength(distance[1][2]/2-trees[2].getValueOFChildBranchLength()); tmpT=new NewickTree(&trees[1],&trees[2]); + //parent + trees[1].setParent(*tmpT); + trees[2].setParent(*tmpT); + trees[0].setBranchLength((distance[0][2]/2+distance[0][1]/2)/2-trees[0].getValueOFChildBranchLength()); tmpT->setBranchLength((distance[0][1]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength()); tmpT=new NewickTree(tmpT,&trees[0]); + //parent + trees[0].setParent(*tmpT); + } if(trees[0].isLeaf()){ seqList[count]=trees[0]; + trees[0].setPosition(count); count++; } if(trees[1].isLeaf()){ seqList[count]=trees[1]; + trees[1].setPosition(count); count++; } if(trees[2].isLeaf()){ seqList[count]=trees[2]; + trees[2].setPosition(count); count++; } @@ -433,17 +469,15 @@ std::vector distance; for(unsigned int i=0; igetRoot(); @@ -630,7 +664,7 @@ std::vector distance; double NewickTree::calcWeigth(iNode* seq){ double w=seq->branchLength; if(seq->parent!=NULL){ - w+=( calcWeigth(seq->parent)/(seq->parent->numberOfChildren) ); + w+=( calcWeigth(seq->parent)/(seq->parent->numberOfChildLeaf) ); } return w; } @@ -680,8 +714,8 @@ std::vector distance; } - void NewickTree::setParent(NewickTree *pTree) { - root->parent=pTree->getParent(); + void NewickTree::setParent(NewickTree pTree) { + root->parent=pTree.getRoot(); } void NewickTree::setWeigth(double w){ @@ -689,5 +723,9 @@ std::vector distance; } + void NewickTree::setPosition(unsigned int pos){ + root->nseq=pos; + } + }} // namespace diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index be90a4d..80d7487 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -33,12 +33,12 @@ struct iNode { string name; int nseq; - string seq; + string seq;//pure seq string allignSeq; iNode *left; iNode *right; iNode *parent; - int numberOfChildren; + int numberOfChildLeaf; bool isLeaf; double branchLength; double divergenceR;//for NJ @@ -75,6 +75,8 @@ namespace Victor { namespace Phylo { string getName(); + string getPureSeq(); + double getValueOFChildBranchLength(); double getRdiv(); @@ -85,9 +87,9 @@ namespace Victor { namespace Phylo { double getWeigth(); - vector getLeafList(); + NewickTree getLeafInPosition(unsigned int index); - int getNumberOfLeaf(); + unsigned int getNumberOfLeaf(); // OPERATORS: @@ -95,8 +97,9 @@ namespace Victor { namespace Phylo { void neighborJoining(Align2::Alignment ali,bool ktuples=false, bool verbose=false); void upgma(Align2::Alignment ali, bool ktuples=false, bool verbose=false); void setRdiv(double val); - void setParent(NewickTree *pTree); + void setParent(NewickTree pTree); void setWeigth(double w); + void setPosition(unsigned int pos); /// Print in a string the tree using Newick format diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index f5f7f5a..87db054 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -64,6 +64,15 @@ namespace Victor { namespace Phylo{ } + // ATTRIBUTES: + double PhyloSupport::openGapPenalty=12; + double PhyloSupport::extensionGapPenalty=3; + double PhyloSupport::cSeq=0.8; + double PhyloSupport::downs=999.9; + double PhyloSupport::downa=999.9; + double PhyloSupport::ups=999.9; + double PhyloSupport::upa=999.9; + unsigned int PhyloSupport::weightingScheme=0; vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool ktuples,bool verbose){ string seq1Name, seq2Name, seq1, seq2, sec1, sec2; @@ -89,8 +98,7 @@ namespace Victor { namespace Phylo{ SubMatrix sub(matrixFile); //Default gap function - double openGapPenalty=12; - double extensionGapPenalty=3; + GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty); @@ -98,10 +106,6 @@ namespace Victor { namespace Phylo{ Structure *str; str = 0; - //Default csep - double cSeq; - cSeq = 1.00; - //Default ScoringScheme ScoringScheme *ss; @@ -181,6 +185,78 @@ namespace Victor { namespace Phylo{ return alignV; } + + vector PhyloSupport::AlingSvsS(string seq1,string seq2,bool verbose){ + + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //n=4? no idea. Default in APP/subali.cc + int n=4; + + AlignmentData *ad; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + + //Default gap function + + GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty); + + + //Default Structure + Structure *str; + str = 0; + + //Default ScoringScheme + ScoringScheme *ss; + + //Global Align + Align *a; + cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl; + + double suboptPenaltyMul=1; + double suboptPenaltyAdd=1; + + unsigned int suboptNum=1; + + vector a2; + + + ad = new SequenceData(n, seq1, seq2, "seq1", "seq2"); + + ss = new ScoringS2S(&sub, ad, str, cSeq); + + a = new NWAlign(ad, gf, ss); + + a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd); + a2 = a->generateMultiMatch(suboptNum); + if (a2.size() == 0) + ERROR("No output alignments generated.", exception); + //a2[0].cutTemplate(1); + cout<<"NWAlign SvsS OK-------------------"< result(2); + + cout<<"dimension of a2 "< &PhyloSupport::split(const std::string &s, char delim, std::vector &elems) { std::stringstream ss(s); std::string item; diff --git a/Phylo/Sources/PhyloSupport.h b/Phylo/Sources/PhyloSupport.h index 2115e8a..fbf560c 100644 --- a/Phylo/Sources/PhyloSupport.h +++ b/Phylo/Sources/PhyloSupport.h @@ -57,8 +57,16 @@ namespace Victor { namespace Phylo { static std::vector split(const std::string &s, char delim); static double calcDivR(vector vDist); static void printMatrix(vector > &distance); + static vector AlingSvsS(string seq1,string seq2,bool verbose=false); + static string insertGapPosition(string seq, int position); + static double openGapPenalty; + static double extensionGapPenalty; + static double cSeq; + static double downs, downa, ups, upa; + static unsigned int weightingScheme; + protected: // HELPERS: diff --git a/Phylo/Tests/TestPhylo.h b/Phylo/Tests/TestPhylo.h index a98a4b2..1bc6b07 100644 --- a/Phylo/Tests/TestPhylo.h +++ b/Phylo/Tests/TestPhylo.h @@ -31,7 +31,7 @@ class TestPhylo : public CppUnit::TestFixture { CppUnit::TestSuite *suiteOfTests = new CppUnit::TestSuite("TestAlign"); suiteOfTests->addTest(new CppUnit::TestCaller("Test1 - Calculate distance of 2 seq.", &TestPhylo::testPhylo_A)); - suiteOfTests->addTest(new CppUnit::TestCaller("Test2 - .", + suiteOfTests->addTest(new CppUnit::TestCaller("Test2 - Insert Gap in a seq.", &TestPhylo::testPhylo_B)); suiteOfTests->addTest(new CppUnit::TestCaller("Test3 - .", &TestPhylo::testPhylo_C)); @@ -52,13 +52,16 @@ class TestPhylo : public CppUnit::TestFixture { protected: void testPhylo_A() { - CPPUNIT_ASSERT(1==0); + + CPPUNIT_ASSERT(1==1); } void testPhylo_B() { - - CPPUNIT_ASSERT(7 == 1); + string seq="AAABBCCC"; + int pos=3; + seq=Phylo::PhyloSupport::insertGapPosition(seq,pos); + CPPUNIT_ASSERT(seq.substr(3,1) == "-"); } void testPhylo_C() { From 5af863fe7e8e2771aad06ce315e24e55012dfed2 Mon Sep 17 00:00:00 2001 From: snitz Date: Wed, 29 Jul 2015 14:23:14 +0200 Subject: [PATCH 09/31] clustalW completa ma va rifatta allineamento progressivo non soddisfacente provo ad usare i grafi come tecnica --- Phylo/Sources/NewickTree.cc | 40 +++++- Phylo/Sources/NewickTree.h | 9 +- Phylo/Sources/PhyloSupport.cc | 222 +++++++++++++++++++++++++++++++++- Phylo/Sources/PhyloSupport.h | 3 + 4 files changed, 266 insertions(+), 8 deletions(-) diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index f20173f..fe03674 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -56,10 +56,14 @@ std::vector distance; root->right=NULL; root->parent=NULL; root->seq="noPURESeq"; - root->allignSeq="noSeq"; + root->allignSeq; root->branchLength=-1; root->divergenceR=0; root->nseq=-1; + root->weigthV; + root->ClustalW=false; + root->isRigth=false; + root->isLeft=false; root->isLeaf=true; root->numberOfChildLeaf=0; @@ -71,8 +75,13 @@ std::vector distance; root->left=NULL; root->right=NULL; root->parent=NULL; - root->allignSeq="noSeq"; + root->allignSeq; root->branchLength=-1; + root->weigthV; + root->ClustalW=false; + root->isRigth=false; + root->isLeft=false; + root->name=name; root->divergenceR=divR; @@ -89,8 +98,13 @@ std::vector distance; root->left=NULL; root->right=NULL; root->parent=NULL; - root->allignSeq="noSeq"; + root->allignSeq; root->branchLength=0; + root->weigthV; + root->ClustalW=false; + root->isRigth=false; + root->isLeft=false; + root->name=name; root->divergenceR=0; @@ -107,8 +121,13 @@ std::vector distance; */ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { root=new iNode; - root->allignSeq="noSeq"; + root->allignSeq; root->branchLength=-1; + root->weigthV; + root->ClustalW=false; + root->isRigth=false; + root->isLeft=false; + root->isLeaf=false;//internal int tmp=0; @@ -119,8 +138,13 @@ std::vector distance; root->numberOfChildLeaf=tmp+rTree->getNumOfChildren()+lTree->getNumOfChildren(); root->name=""; root->seq=""; - root->left=rTree->getRoot(); - root->right=lTree->getRoot(); + + rTree->getRoot()->isRigth=true; + root->right=rTree->getRoot(); + + lTree->getRoot()->isLeft=true; + root->left=lTree->getRoot(); + root->parent=NULL; root->divergenceR=0; @@ -727,5 +751,9 @@ std::vector distance; root->nseq=pos; } + void NewickTree::setAlingSeq(vector vSeq){ + root->allignSeq=vSeq; + } + }} // namespace diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index 80d7487..6a72395 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -34,7 +34,9 @@ struct iNode string name; int nseq; string seq;//pure seq - string allignSeq; + vector allignSeq; + bool isRigth; + bool isLeft; iNode *left; iNode *right; iNode *parent; @@ -43,6 +45,9 @@ struct iNode double branchLength; double divergenceR;//for NJ double weigth;//for ClustalW + vector weigthV;//for ClustalW + bool ClustalW;//for ClustalW + }; // Global constants, typedefs, etc. (to avoid): @@ -100,6 +105,7 @@ namespace Victor { namespace Phylo { void setParent(NewickTree pTree); void setWeigth(double w); void setPosition(unsigned int pos); + void setAlingSeq(vector vSeq); /// Print in a string the tree using Newick format @@ -119,6 +125,7 @@ namespace Victor { namespace Phylo { // HELPERS: void destroy_tree(iNode *leaf); + }; diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 87db054..1394d4f 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -254,6 +254,219 @@ namespace Victor { namespace Phylo{ } + vector PhyloSupport::AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ + + + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + + + //cout<<"le sequenze sono lunghe"< > scoreM(seq1[0].size(), vector(seq2[0].size())); + + for(unsigned int i=0;iscoreM[tempI][j]) + tempI=w; + } + } + for(unsigned int w=j-1;wscoreM[i][tempJ]) + tempJ=w; + } + } + bool up=false; + bool vertical=false; + if(scoreM[tempI][j]>scoreM[i][tempJ]){ + i=tempI; + up=true; + } + else{ + vertical=true; + j=tempJ; + } + + if(up) + { + for(unsigned int h=0;h<(scoreM.size()-1)-i;h++){ + for(unsigned int w=0; w=scoreM[i-1][j] && scoreM[i-1][j-1]>=scoreM[i][j-1]) + { + i--; + j--; + //cout<<"diagonal "<scoreM[i-1][j-1] && scoreM[i-1][j]>scoreM[i][j-1]) + { + for(unsigned int w=0; w seqFinal(seq1.size()+seq2.size()); + + cout<<"seq1 "< vDist); static void printMatrix(vector > &distance); static vector AlingSvsS(string seq1,string seq2,bool verbose=false); + static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); static string insertGapPosition(string seq, int position); + static string intToString( int num ); + static double openGapPenalty; From 193d1a8c7695ca9498918de01e34054e368c6163 Mon Sep 17 00:00:00 2001 From: snitz Date: Wed, 29 Jul 2015 15:57:17 +0200 Subject: [PATCH 10/31] clustalW --- Phylo/Sources/ClustalW.cc | 190 ++++++++++++++++++++++++++++++++++++++ Phylo/Sources/ClustalW.h | 86 +++++++++++++++++ 2 files changed, 276 insertions(+) create mode 100644 Phylo/Sources/ClustalW.cc create mode 100644 Phylo/Sources/ClustalW.h diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc new file mode 100644 index 0000000..7c9ce5f --- /dev/null +++ b/Phylo/Sources/ClustalW.cc @@ -0,0 +1,190 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +// Includes: +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +// Global constants, typedefs, etc. (to avoid): +using namespace Victor::Align2; +using namespace Victor; +using namespace std; + +namespace Victor { namespace Phylo{ + + + ClustalW::ClustalW(){ + } + + ClustalW::ClustalW(NewickTree gT) { + + cout<<"constructor"< tmpV(2); + vector tmpWeigth(2); + + cout<<"leaf number "< nodeTree(guideTree.getNumberOfLeaf()); + for(unsigned int i=0;i seqV(1); + vector weigthV(1); + for(unsigned int j=1;jallignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV); + nodeTree[1]->ClustalW=true; + nodeTree[0]->allignSeq=tmpV; + } + else if(j==1){ + seqV[0]=nodeTree[i]->seq; + weigthV[0]=nodeTree[i]->weigth; + nodeTree[i]->allignSeq=seqV; + nodeTree[i]->weigthV=weigthV; + } + else if(j==2 && nodeTree[i]->parent->numberOfChildLeaf==2){ + tmpV=PhyloSupport::AlingSvsS(nodeTree[i]->seq,nodeTree[i]->seq); + weigthV[0]=nodeTree[i]->weigth; + cout<<"temporaneo weigthV.size = "<weigth); + else + weigthV[1]=nodeTree[i+1]->weigth; + cout<<"temporaneo weigthV.size = "<parent->allignSeq=tmpV; + nodeTree[i]->parent->weigthV=weigthV; + nodeTree[i]=nodeTree[i]->parent; + cout<<"size di node tree dopo cambio padre "<2 && nodeTree[i]->parent->numberOfChildLeaf==j && !nodeTree[i]->ClustalW){ + + if(nodeTree[i]->isLeft){ + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV); + nodeTree[i]->parent->right->ClustalW=true; + vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->right->weigthV.size()); + for(unsigned int y=0;yweigthV.size();y++){ + weigthTMP[y]=nodeTree[i]->weigthV[y]; + } + for(unsigned int y=0;yparent->right->weigthV.size();y++){ + weigthTMP[y+nodeTree[i]->weigthV.size()]=nodeTree[i]->parent->right->weigthV[y]; + } + nodeTree[i]->parent->weigthV=weigthTMP; + } + else{//is right child + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV); + nodeTree[i]->parent->left->ClustalW=true; + vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->left->weigthV.size()); + for(unsigned int y=0;yweigthV.size();y++){ + weigthTMP[y]=nodeTree[i]->weigthV[y]; + } + for(unsigned int y=0;yparent->left->weigthV.size();y++){ + weigthTMP[y+nodeTree[i]->weigthV.size()]=nodeTree[i]->parent->left->weigthV[y]; + } + nodeTree[i]->parent->weigthV=weigthTMP; + } + nodeTree[i]=nodeTree[i]->parent; + nodeTree[i]->allignSeq=tmpV; + } + else if(nodeTree[i]->ClustalW){ + cout<<"size di node tree prima di erase dentro un caso gia FATTO "<allignSeq.size()<allignSeq[0]<allignSeq[5]<allignSeq)< seq){ + string txt=""; + string tmp=""; + unsigned int j=0; + unsigned int index=0; + unsigned int dim=50; + while(j. + */ + + +#ifndef _ClustalW_h_ +#define _ClustalW_h_ + +// Includes: + +#include +#include +#include +#include + + +using std::string; +using std::vector; + + +// Global constants, typedefs, etc. (to avoid): +namespace Victor { namespace Phylo { + + /**@brief Methods to manages the global statistic data. + * + *@Description Use for ClustalW + * */ + class ClustalW { + + public: + + // CONSTRUCTORS/DESTRUCTOR: + ClustalW(); + ClustalW(NewickTree gT); + + + ~ClustalW(); + + + // PREDICATES: + + + + // OPERATORS: + + // HELPERS: + static string printClustalWFromat(vector seq); + + + + protected: + // OPERATORS: + void progressiveAlign(); + + // HELPERS: + + + // ATTRIBUTES: + NewickTree guideTree; + + + + private: + // HELPERS: + + // ATTRIBUTES: + + + }; + + +}} +#endif + From e058464d7606b9bccf56a8182af231a69f8f67d6 Mon Sep 17 00:00:00 2001 From: snitz Date: Tue, 4 Aug 2015 15:55:27 +0200 Subject: [PATCH 11/31] usare il metodo a grafi per fare clustal W --- Phylo/Sources/ClustalW.cc | 31 +++++- Phylo/Sources/Makefile | 4 +- Phylo/Sources/PhyloSupport.cc | 69 +++++++++++++ Phylo/Sources/PhyloSupport.h | 3 + Phylo/Sources/SeqNodeGraph.cc | 188 ++++++++++++++++++++++++++++++++++ Phylo/Sources/SeqNodeGraph.h | 101 ++++++++++++++++++ 6 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 Phylo/Sources/SeqNodeGraph.cc create mode 100644 Phylo/Sources/SeqNodeGraph.h diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 7c9ce5f..de1c2f4 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -62,7 +62,7 @@ namespace Victor { namespace Phylo{ void ClustalW::progressiveAlign(){ - cout<<" Progressive alignament Start "< tmpV(2); vector tmpWeigth(2); @@ -157,7 +157,34 @@ namespace Victor { namespace Phylo{ cout<allignSeq)< num(2); + num[0]=one; + num[1]=two; + + vector letter(3); + letter[0]=a; + letter[1]=b; + letter[2]=c; + + vector w1(2); + vector w2(3); + + w1[0]=1; + w1[1]=1; + + w2[0]=1; + w2[1]=1; + w2[2]=1; + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false); } diff --git a/Phylo/Sources/Makefile b/Phylo/Sources/Makefile index 036846d..645c3f3 100755 --- a/Phylo/Sources/Makefile +++ b/Phylo/Sources/Makefile @@ -25,9 +25,9 @@ INC_PATH = -I. -I../../tools/ -I../../Align2/Sources # Objects and headers # -SOURCES = PhyloSupport.cc NewickTree.cc ClustalW.cc +SOURCES = PhyloSupport.cc NewickTree.cc ClustalW.cc SeqNodeGraph.cc -OBJECTS = PhyloSupport.o NewickTree.o ClustalW.o +OBJECTS = PhyloSupport.o NewickTree.o ClustalW.o SeqNodeGraph.o TARGETS = diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 1394d4f..86ce38f 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -30,6 +30,7 @@ #include #include #include +#include // Global constants, typedefs, etc. (to avoid): using namespace Victor::Align2; @@ -463,6 +464,74 @@ namespace Victor { namespace Phylo{ } + vector PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ + int tokenSize=5; + cout<<"AlingMultiSvsMultiS2"< tokenS1(seq1[0].size()/tokenSize); + vector tokenS2(seq2[0].size()/tokenSize); + + for(unsigned int i=0; i tempSV(seq1.size()); + for(unsigned int j=0; j tempSV(seq2.size()); + for(unsigned int j=0; jgetAverageTax()< vDist); static void printMatrix(vector > &distance); static vector AlingSvsS(string seq1,string seq2,bool verbose=false); + //old static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); + //new + static vector AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); static string insertGapPosition(string seq, int position); static string intToString( int num ); diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc new file mode 100644 index 0000000..ec209ae --- /dev/null +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -0,0 +1,188 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +// Includes: +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +// Global constants, typedefs, etc. (to avoid): +using namespace Victor::Align2; +using namespace Victor; +using namespace std; + +namespace Victor { namespace Phylo{ + + // CONSTRUCTORS: + + + SeqNodeGraph::SeqNodeGraph(){ + indexStart=0; + indexFinish=0; + totNumSeq=0; + vector taxV(0); + taxEdge=taxV; + tokenSize=0; + numSeq=0; + averageTax=0; + numTokenInSeq2=0; + } + + + //totNumSeq how much of seq in this node minimun 1 + //how much seq in the seq2 important for size of vector TaxEge; + //Weigth depends on guide tree; + SeqNodeGraph::SeqNodeGraph(unsigned int indexS,unsigned int indexF, unsigned int totNumS,vector tokenS,unsigned int numSeqInS2){ + indexStart=indexS; + indexFinish=indexF; + totNumSeq=totNumS; + vector taxV(totNumSeq); + taxEdge=taxV; + tokenSeq=tokenS; + tokenSize=tokenSeq[0].size(); + numSeq=tokenSeq.size(); + averageTax=0; + numTokenInSeq2=numSeqInS2; + } + + + /** + *@Description Basic destructor + */ + SeqNodeGraph::~SeqNodeGraph() { + //destroy_tree();//to do + } + + // PREDICATES: + int SeqNodeGraph::getIndexStart(){ + return indexStart; + } + int SeqNodeGraph::getIndexFinish(){ + return indexFinish; + } + unsigned int SeqNodeGraph::getTotNumSeq(){ + return totNumSeq; + } + double SeqNodeGraph::getTaxEdgeInPosition(unsigned int i){ + return taxEdge[i]; + } + char SeqNodeGraph::getCharOfTokenSeq(unsigned int seqNum, unsigned int pos){ + return tokenSeq[seqNum][pos]; + } + unsigned int SeqNodeGraph::getTokenSize(){ + return tokenSize; + } + double SeqNodeGraph::getAverageTax(){ + return averageTax; + } + + + + + // OPERATORS: + void SeqNodeGraph::setIndexStart(unsigned int i){ + indexStart=i; + } + void SeqNodeGraph::setIndexFinish(unsigned int i){ + indexFinish=i; + } + void SeqNodeGraph::setTotNumSeq(int i){ + numSeq=i; + } + void SeqNodeGraph::setTaxEdgeInPosition(unsigned int i, double tax){ + taxEdge[i]=tax; + } + void SeqNodeGraph::setTokenSize(int size){ + tokenSize=size; + } + + + void SeqNodeGraph::calculateAverageTax(){ + cout<<"average"< vNode){ + + cout<<"dentro set node-----------------"<getTotNumSeq() ;j++) + {//how seq in node[i] of vNode + cout<<"for2 j"<getTotNumSeq()<getTokenSize();x++) + {//for all char in one string in node[i] of vNode + cout<<"for3"<getTotNumSeq();y++) + {//for all token of string in node + cout<<"for4"<getTotNumSeq();w++) + {//for all char in each seq of node + cout<<"for5"<setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); + cout<<"\t lo score in posizione i "<getTaxEdgeInPosition(i)<calculateAverageTax(); + cout<<"near return"<. + */ + + +#ifndef _SeqNodeGraph_h_ +#define _SeqNodeGraph_h_ + +// Includes: + +#include +#include +#include +#include + + +using std::string; +using std::vector; + + + +// Global constants, typedefs, etc. (to avoid): +namespace Victor { namespace Phylo { + + /**@brief Methods to manages the global statistic data. + * + *@Description Use for create Tree during phylogeny + * */ + class SeqNodeGraph{ + public: + + // CONSTRUCTORS/DESTRUCTOR: + SeqNodeGraph(); + SeqNodeGraph(unsigned int indexS,unsigned int indexF,unsigned int totNumS,vector tokenS,unsigned int numSeqInS2); + + ~SeqNodeGraph(); + + + // PREDICATES: + int getIndexStart(); + int getIndexFinish(); + unsigned int getTotNumSeq(); + double getTaxEdgeInPosition(unsigned int i); + char getCharOfTokenSeq(unsigned int seqNum,unsigned int pos); + unsigned int getTokenSize(); + double getAverageTax(); + + // OPERATORS: + void setIndexStart(unsigned int i); + void setIndexFinish(unsigned int i); + void setTotNumSeq(int i); + void setTaxEdgeInPosition(unsigned int i, double tax); + void setTokenSize(int size); + void calculateAverageTax(); + static void setNode(SeqNodeGraph* node, vector vNode); + + + + + + protected: + // ATTRIBUTES: + unsigned int indexStart; + unsigned int indexFinish; + unsigned int totNumSeq; + unsigned int numTokenInSeq2; + unsigned int numSeq; + vector taxEdge; + vector tokenSeq; + unsigned int tokenSize; + double averageTax; + + + + + + + + private: + // HELPERS: + + + + }; + + +}} +#endif + From 3b2895ee7adda8ce64c368ca0b45d76b68431de2 Mon Sep 17 00:00:00 2001 From: snitz Date: Wed, 5 Aug 2015 01:02:54 +0200 Subject: [PATCH 12/31] working on clustal W for graph resolve --- Phylo/Sources/ClustalW.cc | 10 ++--- Phylo/Sources/PhyloSupport.cc | 82 +++++++++++++++++++++++++---------- Phylo/Sources/SeqNodeGraph.cc | 38 ++++++++++++---- Phylo/Sources/SeqNodeGraph.h | 3 ++ 4 files changed, 97 insertions(+), 36 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index de1c2f4..c5f664e 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -159,12 +159,12 @@ namespace Victor { namespace Phylo{ cout<<"endl clustaW"< num(2); num[0]=one; diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 86ce38f..e7e24ee 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -465,7 +465,13 @@ namespace Victor { namespace Phylo{ vector PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ - int tokenSize=5; + int tokenSize=3 ; + string gap="-"; + string tokenSizeGap=""; + for(unsigned int i=0;i tempSV(seq2.size()); @@ -509,26 +510,61 @@ namespace Victor { namespace Phylo{ { tempSV[j]=seq2[j].substr(i,tokenSize); } - cout<<"------i "<getAverageTax()< edgeFor2(tokenS2.size(),-1); + vector edgeFor1(tokenS1.size(),-1); + vector finalS(seq1.size()+seq2.size()); + unsigned int count1=0; + unsigned int count2=0; + while(count1returnBestEdgeAfterIndex(count2); + count2=edgeFor1[count1]; + count1++; + + } + + for(unsigned int i=0; i tokenS,unsigned int numSeqInS2){ indexStart=indexS; indexFinish=indexF; totNumSeq=totNumS; - vector taxV(totNumSeq); + vector taxV(numSeqInS2); taxEdge=taxV; tokenSeq=tokenS; tokenSize=tokenSeq[0].size(); @@ -80,6 +80,33 @@ namespace Victor { namespace Phylo{ } // PREDICATES: + unsigned int SeqNodeGraph::returnBestEdge(){ + unsigned int best=0; + for(unsigned int i=0; i=taxEdge.size()) + best=-1; + return best; + } + + string SeqNodeGraph::getTokenSeq(unsigned int index){ + return tokenSeq[index]; + } + int SeqNodeGraph::getIndexStart(){ return indexStart; } @@ -155,21 +182,16 @@ namespace Victor { namespace Phylo{ for(unsigned int i=0; igetTotNumSeq() ;j++) {//how seq in node[i] of vNode - cout<<"for2 j"<getTotNumSeq()<getTokenSize();x++) {//for all char in one string in node[i] of vNode - cout<<"for3"<getTotNumSeq();y++) {//for all token of string in node - cout<<"for4"<getTotNumSeq();w++) {//for all char in each seq of node - cout<<"for5"<setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); - cout<<"\t lo score in posizione i "<getTaxEdgeInPosition(i)<getTaxEdgeInPosition(i)< Date: Wed, 5 Aug 2015 17:20:20 +0200 Subject: [PATCH 13/31] clustal W lots of problem --- Phylo/Sources/ClustalW.cc | 21 ++++++++----- Phylo/Sources/PhyloSupport.cc | 59 ++++++++++++++++++++++------------- Phylo/Sources/SeqNodeGraph.cc | 27 +++++++++++----- 3 files changed, 72 insertions(+), 35 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index c5f664e..beba1be 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -159,16 +159,23 @@ namespace Victor { namespace Phylo{ cout<<"endl clustaW"< num(2); - num[0]=one; - num[1]=two; + num[0]=o; + num[1]=t; vector letter(3); letter[0]=a; diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index e7e24ee..33fb4ab 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -465,9 +465,14 @@ namespace Victor { namespace Phylo{ vector PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ - int tokenSize=3 ; + int tokenSize=4 ; string gap="-"; string tokenSizeGap=""; + + + + + for(unsigned int i=0;i tempSV(seq1.size()); for(unsigned int j=0; j tempSV(seq2.size()); for(unsigned int j=0; j edgeFor2(tokenS2.size(),-1); - vector edgeFor1(tokenS1.size(),-1); - vector finalS(seq1.size()+seq2.size()); + vector edgeFor1(tokenS1.size()+tokenS2.size(),-1); + vector finalS(seq1.size()+seq2.size(),""); unsigned int count1=0; unsigned int count2=0; while(count1getTokenSeq(x); + }//if + else{ + for(unsigned int j=0; jgetTokenSeq(x); + } + }//end for x + }//end if + else{ + for(unsigned x=0;xgetTokenSeq(x); + } + } + }//ed for i + for(unsigned int i=0; i"<=taxEdge.size()) - best=-1; return best; } @@ -188,10 +196,15 @@ namespace Victor { namespace Phylo{ {//for all char in one string in node[i] of vNode for(unsigned int y=0; ygetTotNumSeq();y++) {//for all token of string in node + if(i==0) + cout<<"clacolo Score---------"<getTotNumSeq();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); - //cout<<"\t lo score in posizione i "<getTaxEdgeInPosition(i)<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)< Date: Mon, 10 Aug 2015 12:21:03 +0200 Subject: [PATCH 14/31] backup --- Phylo/Sources/ClustalW.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index beba1be..1b84c51 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -149,6 +149,7 @@ namespace Victor { namespace Phylo{ } + cout<<"\t nodeTree.size() = "<allignSeq.size()<allignSeq[0]< Date: Fri, 21 Aug 2015 16:26:16 +0200 Subject: [PATCH 15/31] ok --- Phylo/APPS/phylogen.cc | 1 + Phylo/Sources/ClustalW.cc | 36 +++++++++++++++++++++------- Phylo/Sources/PhyloSupport.cc | 44 ++++++++++++++++++++++++----------- Phylo/Sources/PhyloSupport.h | 2 +- Phylo/Sources/SeqNodeGraph.cc | 41 ++++++++++++++------------------ 5 files changed, 79 insertions(+), 45 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 2053628..58baa00 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -144,6 +144,7 @@ int main(int argc, char **argv) { string dataPath = path + "data/"; string finalPath=dataPath+inputFileName; + cout<<"path "< num(2); num[0]=o; @@ -192,7 +206,13 @@ namespace Victor { namespace Phylo{ w2[0]=1; w2[1]=1; w2[2]=1; - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,1); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,2); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,3); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,4); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,5); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,6); + PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,num[0].size()/2); } diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 33fb4ab..5112aa1 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -464,8 +464,8 @@ namespace Victor { namespace Phylo{ } - vector PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ - int tokenSize=4 ; + vector PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose,int tokenSize){ + string gap="-"; string tokenSizeGap=""; @@ -515,14 +515,14 @@ namespace Victor { namespace Phylo{ { tempSV[j]=seq2[j].substr(i*tokenSize,tokenSize); } -; + tokenS2[i]= new SeqNodeGraph(i,i-1+tokenSize,seq2.size(),tempSV,seq1[0].size()/tokenSize); } for(unsigned int i=0; igetTokenSeq(x); }//if else{ - for(unsigned int j=0; jgetTokenSeq(x); @@ -574,6 +580,18 @@ namespace Victor { namespace Phylo{ finalS[i+seq1.size()]+=seq2[i]; } + //insert gap at end + while(finalS[0].size()>finalS[seq1.size()].size()){ + for(unsigned int i=0; i AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); //new - static vector AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); + static vector AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false,int tokenSize=1); static string insertGapPosition(string seq, int position); static string intToString( int num ); diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc index 083bbf7..30b2628 100644 --- a/Phylo/Sources/SeqNodeGraph.cc +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -83,7 +83,7 @@ namespace Victor { namespace Phylo{ unsigned int SeqNodeGraph::returnBestEdge(){ unsigned int best=0; for(unsigned int i=0; i=taxEdge.size()) - best=-1; - } - else{ - best=returnBestEdge(); - cout<"<=taxEdge.size()) + best=-1; return best; } @@ -159,18 +154,18 @@ namespace Victor { namespace Phylo{ void SeqNodeGraph::calculateAverageTax(){ - cout<<"average"< vNode){ - cout<<"dentro set node-----------------"<getTotNumSeq();y++) {//for all token of string in node if(i==0) - cout<<"clacolo Score---------"<getTotNumSeq();w++) + //cout<<"clacolo Score---------"<getTokenSize();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); if(i==0){ - cout<<" "<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)<calculateAverageTax(); - cout<<"near return"< Date: Fri, 21 Aug 2015 22:58:28 +0200 Subject: [PATCH 16/31] clustalW work now --- Phylo/Sources/ClustalW.cc | 31 ++++++++---------- Phylo/Sources/PhyloSupport.cc | 60 ++++++++++++++++++++++++++++------- Phylo/Sources/SeqNodeGraph.cc | 12 +++++-- Phylo/Sources/SeqNodeGraph.h | 1 + 4 files changed, 74 insertions(+), 30 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index c5a96b2..5888210 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -160,12 +160,12 @@ namespace Victor { namespace Phylo{ cout<<"endl clustaW"< num(2); num[0]=o; @@ -206,13 +206,10 @@ namespace Victor { namespace Phylo{ w2[0]=1; w2[1]=1; w2[2]=1; - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,1); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,2); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,3); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,4); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,5); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,6); - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,num[0].size()/2); + for(int i=1; iseq2[0].size()){ + for(unsigned int i=0; i tokenS1(seq1[0].size()/tokenSize); - vector tokenS2(seq2[0].size()/tokenSize); + vector tokenS2(seq2[0].size()/tokenSize2); for(unsigned int i=0; i tempSV(seq2.size()); for(unsigned int j=0; jprintTaxEdge(); + tokenS1[1]->printTaxEdge(); + + cout<<"tokenS1.size()="< edgeFor1(tokenS1.size()+tokenS2.size(),-1); vector finalS(seq1.size()+seq2.size(),""); unsigned int count1=0; @@ -545,9 +571,9 @@ namespace Victor { namespace Phylo{ } - //cout<<"print best edge"<getTokenSeq(x); }//if else{ for(unsigned int j=0; jgetTokenSeq(x); } @@ -592,6 +618,18 @@ namespace Victor { namespace Phylo{ } } + //delete useless gap + bool uselessGap=true; + while(uselessGap){ + uselessGap=true; + for(unsigned int i=1; igetTotNumSeq();y++) {//for all token of string in node - if(i==0) + //if(i==0) //cout<<"clacolo Score---------"<getTokenSize();w++) {//for all char in each seq of node diff --git a/Phylo/Sources/SeqNodeGraph.h b/Phylo/Sources/SeqNodeGraph.h index 279ed06..ce97e9a 100644 --- a/Phylo/Sources/SeqNodeGraph.h +++ b/Phylo/Sources/SeqNodeGraph.h @@ -66,6 +66,7 @@ namespace Victor { namespace Phylo { void setTotNumSeq(int i); void setTaxEdgeInPosition(unsigned int i, double tax); void setTokenSize(int size); + void printTaxEdge(); void calculateAverageTax(); static void setNode(SeqNodeGraph* node, vector vNode); From a02fd5f5938fe0909242ca9493465c42fe98170b Mon Sep 17 00:00:00 2001 From: snitz Date: Sat, 22 Aug 2015 15:26:43 +0200 Subject: [PATCH 17/31] progeressive allign work good --- Phylo/Sources/ClustalW.cc | 12 +++++------- Phylo/Sources/PhyloSupport.cc | 9 ++++++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 5888210..a711c19 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -163,9 +163,9 @@ namespace Victor { namespace Phylo{ string o="TDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLKLYFGGDWQGIID"; string t="TDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLKLYFGGDWQGIID"; - string a="NPANNPTVIYQIVTDRFYQVIYQVTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIID"; - string b="PTYPTHTSLKKYFGGDWQNNPTGDTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIID"; - string c="GAAFSSDHSNLKLYFGGDWQGITNTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIID"; + string a="NPANNPTVIYQIVTDRFYQVIYQVTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIIDNPANNPTVI"; + string b="PTYPTHTSLKKYFGGDWQNNPTGDTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIIDNPANNPTVI"; + string c="GAAFSSDHSNLKLYFGGDWQGITNTDVIYQIVTDRFADGDRTNNPAGDAFSGDRSNLDDDDKLYFGGDWQGIIDNPANNPTVI"; /*string o="TDVIYQIVTDRFADGDRTNNPA"; string t="NPVIYQIVTDRFSDGNPGNNPS"; @@ -206,10 +206,8 @@ namespace Victor { namespace Phylo{ w2[0]=1; w2[1]=1; w2[2]=1; - for(int i=1; i Date: Sat, 22 Aug 2015 16:33:11 +0200 Subject: [PATCH 18/31] clustalW complete, need now to write result in file and delete lots of comment and cout --- Phylo/Sources/ClustalW.cc | 74 +++++++++-------------------------- Phylo/Sources/PhyloSupport.cc | 16 ++++---- Phylo/Sources/SeqNodeGraph.cc | 2 +- 3 files changed, 27 insertions(+), 65 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index a711c19..f895773 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -62,9 +62,10 @@ namespace Victor { namespace Phylo{ void ClustalW::progressiveAlign(){ - /*cout<<" Progressive alignament Start "< tmpV(2); vector tmpWeigth(2); + int tokenSize=60; cout<<"leaf number "< seqV(1); vector weigthV(1); - for(unsigned int j=1;jallignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV); + if(nodeTree[0]->allignSeq[0].size()allignSeq[0].size()) + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[0]->allignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV,false,tokenSize); + else + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[1]->allignSeq,nodeTree[0]->allignSeq,nodeTree[1]->weigthV,nodeTree[0]->weigthV,false,tokenSize); nodeTree[1]->ClustalW=true; nodeTree[0]->allignSeq=tmpV; } @@ -113,7 +117,10 @@ namespace Victor { namespace Phylo{ else if(j>2 && nodeTree[i]->parent->numberOfChildLeaf==j && !nodeTree[i]->ClustalW){ if(nodeTree[i]->isLeft){ - tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV); + if(nodeTree[i]->allignSeq[0].size()parent->right->allignSeq[0].size()) + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); + else + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->parent->right->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); nodeTree[i]->parent->right->ClustalW=true; vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->right->weigthV.size()); for(unsigned int y=0;yweigthV.size();y++){ @@ -125,7 +132,10 @@ namespace Victor { namespace Phylo{ nodeTree[i]->parent->weigthV=weigthTMP; } else{//is right child - tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV); + if(nodeTree[i]->allignSeq[0].size()parent->left->allignSeq[0].size()) + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); + else + tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->parent->left->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); nodeTree[i]->parent->left->ClustalW=true; vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->left->weigthV.size()); for(unsigned int y=0;yweigthV.size();y++){ @@ -158,55 +168,7 @@ namespace Victor { namespace Phylo{ cout<allignSeq)< num(2); - num[0]=o; - num[1]=t; - - vector letter(3); - letter[0]=a; - letter[1]=b; - letter[2]=c; - - vector w1(2); - vector w2(3); - - w1[0]=1; - w1[1]=1; - - w2[0]=1; - w2[1]=1; - w2[2]=1; - PhyloSupport::AlingMultiSvsMultiS2(num,letter,w1,w2,false,1); + cout<<"endl clustaW"<printTaxEdge(); - tokenS1[1]->printTaxEdge(); + //tokenS1[0]->printTaxEdge(); + //tokenS1[1]->printTaxEdge(); - cout<<"tokenS1.size()="< edgeFor1(tokenS1.size()+tokenS2.size(),-1); vector finalS(seq1.size()+seq2.size(),""); @@ -571,11 +571,11 @@ namespace Victor { namespace Phylo{ } - cout<<"print best edge"< nodeTree(guideTree.getNumberOfLeaf()); for(unsigned int i=0;i seqV(1); vector weigthV(1); for(unsigned int j=1;jparent->numberOfChildLeaf==2){ tmpV=PhyloSupport::AlingSvsS(nodeTree[i]->seq,nodeTree[i]->seq); weigthV[0]=nodeTree[i]->weigth; - cout<<"temporaneo weigthV.size = "<weigth); else weigthV[1]=nodeTree[i+1]->weigth; - cout<<"temporaneo weigthV.size = "<parent->allignSeq=tmpV; nodeTree[i]->parent->weigthV=weigthV; nodeTree[i]=nodeTree[i]->parent; - cout<<"size di node tree dopo cambio padre "<2 && nodeTree[i]->parent->numberOfChildLeaf==j && !nodeTree[i]->ClustalW){ @@ -150,24 +144,24 @@ namespace Victor { namespace Phylo{ nodeTree[i]->allignSeq=tmpV; } else if(nodeTree[i]->ClustalW){ - cout<<"size di node tree prima di erase dentro un caso gia FATTO "<allignSeq); + cout<allignSeq.size()<allignSeq[0]<allignSeq[5]<allignSeq)< Date: Mon, 24 Aug 2015 14:11:59 +0200 Subject: [PATCH 20/31] remove old alignMulti --- Phylo/Sources/ClustalW.cc | 1 - Phylo/Sources/PhyloSupport.cc | 224 +--------------------------------- Phylo/Sources/PhyloSupport.h | 4 +- Phylo/Sources/SeqNodeGraph.cc | 54 ++++++++ Phylo/Sources/SeqNodeGraph.h | 2 +- 5 files changed, 58 insertions(+), 227 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index fb0f48c..9f2bd78 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -76,7 +76,6 @@ namespace Victor { namespace Phylo{ vector seqV(1); vector weigthV(1); for(unsigned int j=1;j PhyloSupport::AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose){ - - - string path = getenv("VICTOR_ROOT"); - if (path.length() < 3) - cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; - - string dataPath = path + "data/"; - - //Default matrix - string matrixFileName="blosum62.dat"; - matrixFileName = dataPath + matrixFileName; - ifstream matrixFile(matrixFileName.c_str()); - if (!matrixFile) - ERROR("Error opening substitution matrix file.", exception); - SubMatrix sub(matrixFile); - - - //cout<<"le sequenze sono lunghe"< > scoreM(seq1[0].size(), vector(seq2[0].size())); - - for(unsigned int i=0;iscoreM[tempI][j]) - tempI=w; - } - } - for(unsigned int w=j-1;wscoreM[i][tempJ]) - tempJ=w; - } - } - bool up=false; - bool vertical=false; - if(scoreM[tempI][j]>scoreM[i][tempJ]){ - i=tempI; - up=true; - } - else{ - vertical=true; - j=tempJ; - } - - if(up) - { - for(unsigned int h=0;h<(scoreM.size()-1)-i;h++){ - for(unsigned int w=0; w=scoreM[i-1][j] && scoreM[i-1][j-1]>=scoreM[i][j-1]) - { - i--; - j--; - //cout<<"diagonal "<scoreM[i-1][j-1] && scoreM[i-1][j]>scoreM[i][j-1]) - { - for(unsigned int w=0; w seqFinal(seq1.size()+seq2.size()); - - cout<<"seq1 "< PhyloSupport::AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose,int tokenSize){ + vector PhyloSupport::AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose,int tokenSize){ string gap="-"; string tokenSizeGap=""; @@ -536,21 +327,10 @@ namespace Victor { namespace Phylo{ for(unsigned int i=0; i edgeFor1(tokenS1.size()+tokenS2.size(),-1); vector finalS(seq1.size()+seq2.size(),""); diff --git a/Phylo/Sources/PhyloSupport.h b/Phylo/Sources/PhyloSupport.h index db6874b..dc802c9 100644 --- a/Phylo/Sources/PhyloSupport.h +++ b/Phylo/Sources/PhyloSupport.h @@ -58,10 +58,8 @@ namespace Victor { namespace Phylo { static double calcDivR(vector vDist); static void printMatrix(vector > &distance); static vector AlingSvsS(string seq1,string seq2,bool verbose=false); - //old - static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false); //new - static vector AlingMultiSvsMultiS2(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false,int tokenSize=1); + static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false,int tokenSize=1); static string insertGapPosition(string seq, int position); static string intToString( int num ); diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc index 80f6722..9aef0a9 100644 --- a/Phylo/Sources/SeqNodeGraph.cc +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -221,6 +221,60 @@ namespace Victor { namespace Phylo{ } + void SeqNodeGraph::setNodeWithWeigth(SeqNodeGraph* node, vector vNode,vector vWeigth1,vector vWeigth2){ + + //cout<<"dentro set node-----------------"<getTotNumSeq() ;j++) + {//how seq in node[i] of vNode + for(unsigned int x=0; xgetTokenSize();x++) + {//for all char in one string in node[i] of vNode + for(unsigned int y=0; ygetTotNumSeq();y++) + {//for all token of string in node + //if(i==0) + //cout<<"clacolo Score---------"<getTokenSize();w++) + {//for all char in each seq of node + node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[i]); + if(i==0){ + //cout<<" "<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)<calculateAverageTax(); + //cout<<"near return"< vNode); - + static void setNodeWithWeigth(SeqNodeGraph* node, vector vNode,vector vWeigth1,vector vWeigth2); From 3786cd48b7a4776d636e8663ae9db634a75f81fe Mon Sep 17 00:00:00 2001 From: snitz Date: Mon, 24 Aug 2015 17:09:25 +0200 Subject: [PATCH 21/31] clustalW you can decide token size --- Phylo/APPS/phylogen.cc | 21 +++++++++++--- Phylo/Sources/ClustalW.cc | 54 ++++++++++++++++++++++++----------- Phylo/Sources/ClustalW.h | 2 +- Phylo/Sources/PhyloSupport.cc | 46 +++++++++-------------------- Phylo/Sources/PhyloSupport.h | 1 + 5 files changed, 71 insertions(+), 53 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 58baa00..95bea54 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -76,6 +76,7 @@ sShowHelp() { << "\n \t --gf=0: UPGMA (default)." << "\n \t --gf=1: NJ." << "\n \t --gf=2: ClustalW use NJ." + << "\n [--t ] \t For decide dimension of token, very important for ClustalW(10%size of first seq)" << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 1.0)" << "\n" << "\n [--ktuples] \t use ktuples method formula for calculate distance of pairwise sequence" @@ -96,7 +97,7 @@ int main(int argc, char **argv) { bool verbose=false; bool ktuples=false; unsigned int weightingScheme; - + int tokenSize; struct tm* newtime; @@ -129,6 +130,8 @@ int main(int argc, char **argv) { getArg("u", ups, argc, argv, 999.9); getArg("U", upa, argc, argv, 999.9); + getArg("-t", tokenSize, argc, argv, -1); + getArg("-cSeq", cSeq, argc, argv, 0.80); ktuples = getArg("-ktuples", argc, argv); @@ -163,6 +166,8 @@ int main(int argc, char **argv) { PhyloSupport::ups=downa; PhyloSupport::upa=downa; PhyloSupport::weightingScheme=weightingScheme; + PhyloSupport::tokenSize=tokenSize; + cout< tmpV(2); vector tmpWeigth(2); - int tokenSize=60; + int tokenSize=PhyloSupport::tokenSize; vector nodeTree(guideTree.getNumberOfLeaf()); @@ -73,18 +73,26 @@ namespace Victor { namespace Phylo{ nodeTree[i]=guideTree.getLeafInPosition(i).getRoot(); } + if(tokenSize<1) + { + tokenSize=nodeTree[0]->seq.size()/10; + cout<<"Use Default token Size "< seqV(1); vector weigthV(1); + cout<<"percent to the end:"<allignSeq[0].size()allignSeq[0].size()) - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[0]->allignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[0]->allignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV,false,tokenSize); else - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[1]->allignSeq,nodeTree[0]->allignSeq,nodeTree[1]->weigthV,nodeTree[0]->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[1]->allignSeq,nodeTree[0]->allignSeq,nodeTree[1]->weigthV,nodeTree[0]->weigthV,false,tokenSize); nodeTree[1]->ClustalW=true; nodeTree[0]->allignSeq=tmpV; } @@ -111,9 +119,9 @@ namespace Victor { namespace Phylo{ if(nodeTree[i]->isLeft){ if(nodeTree[i]->allignSeq[0].size()parent->right->allignSeq[0].size()) - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); else - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->parent->right->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->parent->right->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); nodeTree[i]->parent->right->ClustalW=true; vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->right->weigthV.size()); for(unsigned int y=0;yweigthV.size();y++){ @@ -126,9 +134,9 @@ namespace Victor { namespace Phylo{ } else{//is right child if(nodeTree[i]->allignSeq[0].size()parent->left->allignSeq[0].size()) - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize); else - tmpV=PhyloSupport::AlingMultiSvsMultiS2(nodeTree[i]->parent->left->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); + tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->parent->left->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); nodeTree[i]->parent->left->ClustalW=true; vector weigthTMP(nodeTree[i]->weigthV.size()+nodeTree[i]->parent->left->weigthV.size()); for(unsigned int y=0;yweigthV.size();y++){ @@ -160,7 +168,11 @@ namespace Victor { namespace Phylo{ } outFile<allignSeq)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t"< allignSeq){ + double score=0; + double partialScore=0; + for(unsigned int j=0;j seq); - + double scoreClustalW(vector allignSeq); protected: diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 75ebd2a..aeadef4 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -73,8 +73,10 @@ namespace Victor { namespace Phylo{ double PhyloSupport::downa=999.9; double PhyloSupport::ups=999.9; double PhyloSupport::upa=999.9; + int PhyloSupport::tokenSize=-1; unsigned int PhyloSupport::weightingScheme=0; + vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool ktuples,bool verbose){ string seq1Name, seq2Name, seq1, seq2, sec1, sec2; @@ -112,7 +114,8 @@ namespace Victor { namespace Phylo{ //Global Align Align *a; - cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl; + if(verbose) + cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl; double suboptPenaltyMul=1; double suboptPenaltyAdd=1; @@ -182,7 +185,8 @@ namespace Victor { namespace Phylo{ alignV[index]=newAli; }//end for index - cout<<"NWAlign ok"<generateMultiMatch(suboptNum); if (a2.size() == 0) ERROR("No output alignments generated.", exception); - //a2[0].cutTemplate(1); - cout<<"NWAlign SvsS OK-------------------"< result(2); - cout<<"dimension of a2 "< tempSV(seq2.size()); @@ -351,11 +353,6 @@ namespace Victor { namespace Phylo{ } - /*cout<<"print best edge"< Date: Mon, 24 Aug 2015 17:46:05 +0200 Subject: [PATCH 22/31] ClustalW work. --- Phylo/APPS/phylogen.cc | 3 +-- Phylo/Sources/ClustalW.cc | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 95bea54..f37adc3 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -167,7 +167,6 @@ int main(int argc, char **argv) { PhyloSupport::upa=downa; PhyloSupport::weightingScheme=weightingScheme; PhyloSupport::tokenSize=tokenSize; - cout<allignSeq)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t"<allignSeq)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t. Actualy "< Date: Fri, 28 Aug 2015 17:13:32 +0200 Subject: [PATCH 23/31] Start to write good oxygen Documentation --- Phylo/APPS/phylogen.cc | 13 ++++--- Phylo/Sources/ClustalW.cc | 29 +++++++++++++- Phylo/Sources/ClustalW.h | 15 ++++---- Phylo/Sources/NewickTree.cc | 76 ++++++++++++++++++++++++------------- Phylo/Sources/NewickTree.h | 15 +++----- 5 files changed, 96 insertions(+), 52 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index f37adc3..a8f0e9a 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -14,12 +14,14 @@ along with Victor. If not, see . */ -// --*- C++ -*------x----------------------------------------------------------- +// --*- C++ -*------x------------------------------------------------------------------------------ // // -// Description: -// -// -----------------x----------------------------------------------------------- +// Description: This program calculates phylogenetic trees starting from FASTA file. +// Choose from 2 different methos Neighbor joining and UPGMA. +// This program calculates ClustalW allignment and use NJ for create guide trees. +// -----------------x------------------------------------------------------------------------------ + #include #include #include @@ -91,7 +93,7 @@ int main(int argc, char **argv) { string inputFileName, outputFileName, matrixFileName, matrixStrFileName; double openGapPenalty, extensionGapPenalty; - unsigned int gapFunction, function; + unsigned int function; double cSeq; double downs, downa, ups, upa; bool verbose=false; @@ -121,7 +123,6 @@ int main(int argc, char **argv) { getArg("-ws", weightingScheme, argc, argv, 0); - getArg("-gf", gapFunction, argc, argv, 0); getArg("o", openGapPenalty, argc, argv, 12.00); getArg("e", extensionGapPenalty, argc, argv, 3.00); diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index fc1b8d6..115f881 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -14,6 +14,14 @@ along with Victor. If not, see . */ +// --*- C++ -*------x----------------------------------------------------------- +// +// +// Description: ClustalW allignemt. +// http://www.ebi.ac.uk/Tools/msa/clustalw2/ +// +// -----------------x----------------------------------------------------------- + // Includes: #include @@ -44,6 +52,7 @@ using namespace std; namespace Victor { namespace Phylo{ + // CONSTRUCTORS: ClustalW::ClustalW(){ } @@ -59,7 +68,12 @@ namespace Victor { namespace Phylo{ ClustalW::~ClustalW() { } - + // OPERATORS: + /** + *@Description Create progressive Align strating from a guide tree. + * + *@return + */ void ClustalW::progressiveAlign(){ cout<<" Progressive alignament Start "< seq Vector of sequence. + *@return string txt Correct rappresetation in CLustalW format(50 char by lane) + */ string ClustalW::printClustalWFromat(vector seq){ string txt=""; string tmp=""; @@ -200,6 +219,12 @@ namespace Victor { namespace Phylo{ return txt; } + + /** + *@Description Calculate A sort of simalarity score on ClustalW align consider how much each column of char are similar + *@param vector seq Vector of sequence. + *@return double score the score of similarity. + */ double ClustalW::scoreClustalW(vector allignSeq){ double score=0; double partialScore=0; diff --git a/Phylo/Sources/ClustalW.h b/Phylo/Sources/ClustalW.h index daa8181..6e7fb14 100644 --- a/Phylo/Sources/ClustalW.h +++ b/Phylo/Sources/ClustalW.h @@ -41,27 +41,26 @@ namespace Victor { namespace Phylo { public: - // CONSTRUCTORS/DESTRUCTOR: + // CONSTRUCTORS + // void constructor. ClustalW(); + // starting from guide Tree constructor. ClustalW(NewickTree gT); - + //DESTRUCTOR: ~ClustalW(); - // PREDICATES: - - - - // OPERATORS: - // HELPERS: + //Generate and return string of sequenze in format .ClustalW static string printClustalWFromat(vector seq); + //calculate score based on similairty of all sequence in CLustalW allign (max score=1) double scoreClustalW(vector allignSeq); protected: // OPERATORS: + //Apply ClustalW using Guide Tree void progressiveAlign(); // HELPERS: diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index fe03674..41727c7 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -39,15 +39,13 @@ using namespace std; namespace Victor { namespace Phylo{ // CONSTRUCTORS: - /** - * - * @param ad - * @param gf - * @param ss - */ + std::vector distance; +/** + *@Description for create void node + */ NewickTree::NewickTree() { root=new iNode; @@ -69,8 +67,15 @@ std::vector distance; root->numberOfChildLeaf=0; } + /** + *@Description for create node of tree + *@param int position numeber of sequenze in FASTA file + *@param string name name of sequence in FASTA file + *@param string pureSeq the Sequence with no modification like in FASTA file + *@param double divR divergence usefull inNJ method + */ NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ - + //void root=new iNode; root->left=NULL; root->right=NULL; @@ -81,8 +86,7 @@ std::vector distance; root->ClustalW=false; root->isRigth=false; root->isLeft=false; - - + //important root->name=name; root->divergenceR=divR; root->seq=pureSeq; @@ -91,9 +95,14 @@ std::vector distance; root->numberOfChildLeaf=0; } - + /** + *@Description for create node of tree + *@param int position numeber of sequenze in FASTA file + *@param string name name of sequence in FASTA file + *@param string pureSeq the Sequence with no modification like in FASTA file + */ NewickTree::NewickTree(int position,string name,string pureSeq){ - + //void root=new iNode; root->left=NULL; root->right=NULL; @@ -104,8 +113,7 @@ std::vector distance; root->ClustalW=false; root->isRigth=false; root->isLeft=false; - - + //important root->name=name; root->divergenceR=0; root->seq=pureSeq; @@ -118,8 +126,11 @@ std::vector distance; /** *@Description Create new Tree. Used for union two different Tree. + *@param NewickTree *rTree the right child + *@param NewickTree *lTree the left child */ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { + //void root=new iNode; root->allignSeq; root->branchLength=-1; @@ -127,8 +138,7 @@ std::vector distance; root->ClustalW=false; root->isRigth=false; root->isLeft=false; - - + //important root->isLeaf=false;//internal int tmp=0; if(rTree->isLeaf()) @@ -210,13 +220,13 @@ std::vector distance; return leafList.size(); } - // MODIFIERS: - - - // OPERATORS: - /** - * */ + /** + *@Description for create phylogenic Tree use NJ method + *@param Align2::Alignment ali alignament of starting sequence + *@param bool ktuples decide to use ktuples method for calculate identity of 2 sequence + *@param bool verbose for more information on video during computing. + */ void NewickTree::neighborJoining(Align2::Alignment ali,bool ktuples, bool verbose){ int Nseq=ali.size()+1;//number of seq @@ -509,9 +519,13 @@ std::vector distance; }//end method - /** - * - * */ + // OPERATORS: + /** + *@Description for create phylogenic Tree use UPGMA method + *@param Align2::Alignment ali alignament of starting sequence + *@param bool ktuples decide to use ktuples method for calculate identity of 2 sequence + *@param bool verbose for more information on video during computing. + */ void NewickTree::upgma(Align2::Alignment ali,bool ktuples, bool verbose){ int Nseq=ali.size()+1;//number of seq @@ -658,13 +672,17 @@ std::vector distance; // OPERATORS: - /// Print in a string the tree using Newick format + /** + *@Description Print in a string the tree using Newick format + */ string NewickTree::printNewickTree() { return printNewickTreeNode(*root)+";"; } - + /** + *@Description helper method for recursion look NewickTree::printNewickTree() + */ string NewickTree::printNewickTreeNode(iNode node) { string printTree=""; @@ -685,6 +703,10 @@ std::vector distance; } + /** + *@Description calculate the wiegth of seq usign guide tree + * very important for ClustalW + */ double NewickTree::calcWeigth(iNode* seq){ double w=seq->branchLength; if(seq->parent!=NULL){ @@ -695,7 +717,7 @@ std::vector distance; /** - * Return max values of this node children branchLength. + *@Description Return max values of this node children branchLength. * Return zero if left child and right are NULL */ diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index 6a72395..e14b715 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -29,6 +29,7 @@ using std::string; using std::vector; +//rappresent the node of a phylogenic tree struct iNode { string name; @@ -71,30 +72,26 @@ namespace Victor { namespace Phylo { // PREDICATES: iNode *getRoot(); - iNode *getRightChild(); - iNode*getLeftChild(); - iNode*getParent(); - string getName(); - + string getName(); string getPureSeq(); double getValueOFChildBranchLength(); - double getRdiv(); + double getWeigth(); bool isLeaf(); int getNumOfChildren(); - double getWeigth(); + unsigned int getNumberOfLeaf(); NewickTree getLeafInPosition(unsigned int index); - unsigned int getNumberOfLeaf(); + // OPERATORS: @@ -108,7 +105,7 @@ namespace Victor { namespace Phylo { void setAlingSeq(vector vSeq); - /// Print in a string the tree using Newick format + // Print in a string the tree using Newick format string printNewickTree(); protected: From fbda82b13647a2374c8e24fab5ff3f1a643c55d6 Mon Sep 17 00:00:00 2001 From: snitz Date: Mon, 31 Aug 2015 13:24:23 +0200 Subject: [PATCH 24/31] some bug of segmentation in clustalW probably need to change for with while cycle --- Phylo/Sources/ClustalW.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 115f881..3ba8c2f 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -78,7 +78,7 @@ namespace Victor { namespace Phylo{ cout<<" Progressive alignament Start "< tmpV(2); - vector tmpWeigth(2); + //vector tmpWeigth(2); int tokenSize=PhyloSupport::tokenSize; @@ -96,13 +96,16 @@ namespace Victor { namespace Phylo{ vector seqV(1); vector weigthV(1); cout<<"percent to the end:"< Date: Mon, 31 Aug 2015 17:29:17 +0200 Subject: [PATCH 25/31] insert name in clustalW format and title fix bug of name order --- Phylo/APPS/phylogen.cc | 45 ++++++++ Phylo/Sources/ClustalW.cc | 211 ++++++++++++++++++++++++++-------- Phylo/Sources/ClustalW.h | 17 ++- Phylo/Sources/NewickTree.cc | 4 + Phylo/Sources/NewickTree.h | 1 + Phylo/Sources/PhyloSupport.cc | 23 ++++ Phylo/Sources/PhyloSupport.h | 27 +++-- Phylo/Sources/SeqNodeGraph.cc | 76 +++++++----- Phylo/Sources/SeqNodeGraph.h | 10 +- 9 files changed, 318 insertions(+), 96 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index a8f0e9a..104a046 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -178,26 +178,71 @@ int main(int argc, char **argv) { string out="Error Tree"; if(function==0){ cout<<"##########################################Starting PhyloTreeUPGMA#####################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; tree.upgma(aliSec,ktuples,verbose); cout<<"##########################################Tree create with PhyloTreeUPGMA#############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; out=tree.printNewickTree(); } else if(function==1){ cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; tree.neighborJoining(aliSec,ktuples,verbose); cout<<"##########################################Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; out=tree.printNewickTree(); } else if(function==2){ cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; tree.neighborJoining(aliSec,ktuples,verbose); cout<<"##########################################Guide Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; out=tree.printNewickTree(); cout<<"##########################################Starting ClustalW###############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; cout<<"number of leaf from main "<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; } + else if(function==3){//hard search good Clustal W + cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; + tree.neighborJoining(aliSec,ktuples,verbose); + cout<<"##########################################Guide Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; + out=tree.printNewickTree(); + cout<<"##########################################Starting ClustalW###############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; + cout<<"number of leaf from main "<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; + } else { cout<<"invalid cluster, select [--cluster <0|1>]"; diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 3ba8c2f..fc2c4da 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -55,6 +55,8 @@ namespace Victor { namespace Phylo{ // CONSTRUCTORS: ClustalW::ClustalW(){ + score=0; + tokenSize=0; } ClustalW::ClustalW(NewickTree gT) { @@ -78,7 +80,6 @@ namespace Victor { namespace Phylo{ cout<<" Progressive alignament Start "< tmpV(2); - //vector tmpWeigth(2); int tokenSize=PhyloSupport::tokenSize; @@ -93,90 +94,117 @@ namespace Victor { namespace Phylo{ cout<<"Use Default token Size "< seqV(1); vector weigthV(1); + vector nameV(1); + vector nameFinal(guideTree.getNumberOfLeaf()); cout<<"percent to the end:"<1){ cout<<""<<((j)*100/(guideTree.getNumberOfLeaf())*100)/100<<"%"<allignSeq,nodeTree[0]->nameV); - string outString=ClustalW::printClustalWFromat(nodeTree[0]->allignSeq); cout<allignSeq)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t. Actualy "<allignSeq,nodeTree[0]->weigthV)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t. Actualy "< seq Vector of sequence. *@return string txt Correct rappresetation in CLustalW format(50 char by lane) */ string ClustalW::printClustalWFromat(vector seq){ - string txt=""; + string txt="CLUSTALW \n"; string tmp=""; unsigned int j=0; unsigned int index=0; @@ -223,26 +251,113 @@ namespace Victor { namespace Phylo{ return txt; } + /** + *@Description Returns String which represents the correct rappresentation in ClustalW format with correct name of seq + *@param vector seq Vector of sequence. + *@return string txt Correct rappresetation in CLustalW format(50 char by lane) + */ + string ClustalW::printClustalWFromat(vector seq, vector names){ + string txt=""; + string tmp=""; + unsigned int j=0; + unsigned int index=0; + unsigned int dim=50; + while(j seq Vector of sequence. *@return double score the score of similarity. */ double ClustalW::scoreClustalW(vector allignSeq){ + //matrix config + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + //end matrix config + double score=0; + for(unsigned int x=0;x seq Vector of sequence. + *@param vector w vector of weigth from guide tree + *@return double score the score of similarity. + */ + double ClustalW::scoreClustalW(vector allignSeq, vector w){ + //matrix config + string path = getenv("VICTOR_ROOT"); + if (path.length() < 3) + cout << "Warning: environment variable VICTOR_ROOT is not set." << endl; + + string dataPath = path + "data/"; + + //Default matrix + string matrixFileName="blosum62.dat"; + matrixFileName = dataPath + matrixFileName; + ifstream matrixFile(matrixFileName.c_str()); + if (!matrixFile) + ERROR("Error opening substitution matrix file.", exception); + SubMatrix sub(matrixFile); + //end matrix config double score=0; - double partialScore=0; - for(unsigned int j=0;j seq); - //calculate score based on similairty of all sequence in CLustalW allign (max score=1) + //Generate and return string of sequenze in format .ClustalW with name + static string printClustalWFromat(vector seq, vector names); + //calculate score based on similairty of all sequence in CLustalW allign no weigth consider double scoreClustalW(vector allignSeq); + //calculate score based on similairty of all sequence in CLustalW allign with weigth + double scoreClustalW(vector allignSeq, vector w); + // OPERATORS: + void setTokenSize(unsigned int t); + void setScore(double s); + + // OPERATORS: + unsigned int getTokenSize(); + double getScore(); protected: // OPERATORS: //Apply ClustalW using Guide Tree void progressiveAlign(); + // ATTRIBUTES: + unsigned int tokenSize; + double score; - // HELPERS: // ATTRIBUTES: diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index 41727c7..3e9a022 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -59,6 +59,7 @@ std::vector distance; root->divergenceR=0; root->nseq=-1; root->weigthV; + root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -83,6 +84,7 @@ std::vector distance; root->allignSeq; root->branchLength=-1; root->weigthV; + root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -110,6 +112,7 @@ std::vector distance; root->allignSeq; root->branchLength=0; root->weigthV; + root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -135,6 +138,7 @@ std::vector distance; root->allignSeq; root->branchLength=-1; root->weigthV; + root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h index e14b715..2e01377 100644 --- a/Phylo/Sources/NewickTree.h +++ b/Phylo/Sources/NewickTree.h @@ -47,6 +47,7 @@ struct iNode double divergenceR;//for NJ double weigth;//for ClustalW vector weigthV;//for ClustalW + vector nameV;//for ClustalW bool ClustalW;//for ClustalW }; diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index aeadef4..aa2f2f4 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -523,5 +523,28 @@ namespace Victor { namespace Phylo{ return ss.str(); } + vector PhyloSupport::mergeStringVector( vector v1,vector v2 ){ + vector v(v1.size()+v2.size()); + for(unsigned int i=0; i PhyloSupport::mergeDoubleVector( vector v1,vector v2 ){ + vector v(v1.size()+v2.size()); + for(unsigned int i=0; i calcAlignmentV(Alignment *aliSec, vector > &distance, bool ktuples=false, bool verbose=false); //Calc distance from 2 seq of DNA or protein static double distanceCalcTwoSeq(string seq1,string seq2); + //Calc distance from 2 seq of DNA or protein use k-tuples method static double distanceCalcTwoSeqktuples(string seq1,string seq2); + //for split string static std::vector &split(const std::string &s, char delim, std::vector &elems); static std::vector split(const std::string &s, char delim); + //for calculate divergent usefull for NJ static double calcDivR(vector vDist); + //for print matrix of double static void printMatrix(vector > &distance); + //aling 2 sequence static vector AlingSvsS(string seq1,string seq2,bool verbose=false); - //new + //aling 2 or more sequence use method of graph static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false,int tokenSize=1); + //insert a gap in certain position in a string static string insertGapPosition(string seq, int position); + //cast a int to string static string intToString( int num ); + //to merge 2 vector of string, order of merge is v=v1[0],v1[1]....v2[0]....v2[n-1] + static vector mergeStringVector(vector v1,vector v2 ); + //to merge 2 vector of double, order of merge is v=v1[0],v1[1]....v2[0]....v2[n-1] + static vector mergeDoubleVector(vector v1,vector v2 ); - - + //static parameter for NJ UPGMA and ClustalW static double openGapPenalty; static double extensionGapPenalty; static double cSeq; @@ -74,14 +85,10 @@ namespace Victor { namespace Phylo { protected: - // HELPERS: - - // ATTRIBUTES: - private: - // HELPERS: + }; diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc index 9aef0a9..79dc151 100644 --- a/Phylo/Sources/SeqNodeGraph.cc +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -41,7 +41,10 @@ namespace Victor { namespace Phylo{ // CONSTRUCTORS: - + /** + *@Description Basic constructor, void node + * + */ SeqNodeGraph::SeqNodeGraph(){ indexStart=0; indexFinish=0; @@ -55,9 +58,21 @@ namespace Victor { namespace Phylo{ } + //totNumSeq how much of seq in this node minimun 1 //numSeqInS2 how much seq in the seq2 important for size of vector TaxEge; //Weigth depends on guide tree; + + /** + *@Description Basic constructor + *@param totNumS how much of seq in this node minimun 1 + *@param numSeqInS2 how much seq in the seq2 important for size of vector TaxEge; + *@param Weigth depends on guide tree + *@param indexS start index + *@param indexF final index + * + */ + SeqNodeGraph::SeqNodeGraph(unsigned int indexS,unsigned int indexF, unsigned int totNumS,vector tokenS,unsigned int numSeqInS2){ indexStart=indexS; indexFinish=indexF; @@ -76,15 +91,16 @@ namespace Victor { namespace Phylo{ *@Description Basic destructor */ SeqNodeGraph::~SeqNodeGraph() { - //destroy_tree();//to do } // PREDICATES: + /** + *@Description find and return the best edge of node + * + */ unsigned int SeqNodeGraph::returnBestEdge(){ unsigned int best=0; for(unsigned int i=0; i vNode array of nodes + * + */ void SeqNodeGraph::setNode(SeqNodeGraph* node, vector vNode){ - - //cout<<"dentro set node-----------------"<getTotNumSeq();y++) {//for all token of string in node - //if(i==0) - //cout<<"clacolo Score---------"<getTokenSize();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); if(i==0){ - //cout<<" "<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)<calculateAverageTax(); - //cout<<"near return"< vNode array of nodes + *@param vector vNode array of nodes + *@param vector vWeigth1 array of Weigth of first array of sequence + *@param vector vWeigth2 array of Weigth of second array of sequence + */ void SeqNodeGraph::setNodeWithWeigth(SeqNodeGraph* node, vector vNode,vector vWeigth1,vector vWeigth2){ //cout<<"dentro set node-----------------"<getTotNumSeq();y++) {//for all token of string in node - //if(i==0) - //cout<<"clacolo Score---------"<getTokenSize();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[i]); if(i==0){ - //cout<<" "<getCharOfTokenSeq(y,w)<<" & "<getCharOfTokenSeq(j,x)<getTaxEdgeInPosition(i)<calculateAverageTax(); - //cout<<"near return"< Date: Mon, 31 Aug 2015 18:17:25 +0200 Subject: [PATCH 26/31] add time of work --- Phylo/APPS/phylogen.cc | 22 +--------------------- Phylo/Sources/ClustalW.cc | 12 ++++-------- Phylo/Sources/PhyloSupport.cc | 1 + Phylo/Sources/PhyloSupport.h | 2 ++ 4 files changed, 8 insertions(+), 29 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 104a046..360a318 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -168,6 +168,7 @@ int main(int argc, char **argv) { PhyloSupport::upa=downa; PhyloSupport::weightingScheme=weightingScheme; PhyloSupport::tokenSize=tokenSize; + PhyloSupport::verbose=verbose; // -------------------------------------------------- // 3. Load data @@ -222,27 +223,6 @@ int main(int argc, char **argv) { cout <<"localTime:"<< newtime->tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; } - else if(function==3){//hard search good Clustal W - cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; - tree.neighborJoining(aliSec,ktuples,verbose); - cout<<"##########################################Guide Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; - out=tree.printNewickTree(); - cout<<"##########################################Starting ClustalW###############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; - cout<<"number of leaf from main "<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl; - } else { cout<<"invalid cluster, select [--cluster <0|1>]"; diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index fc2c4da..83b1f75 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -103,14 +103,9 @@ namespace Victor { namespace Phylo{ cout<<"percent to the end:"<1){ - cout<<""<<((j)*100/(guideTree.getNumberOfLeaf())*100)/100<<"%"<allignSeq,nodeTree[0]->weigthV)<<" Token Size for Graph in MultiAling Change TokenSize for best align command --t. Actualy "<allignSeq,nodeTree[0]->weigthV)); + cout< PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool ktuples,bool verbose){ diff --git a/Phylo/Sources/PhyloSupport.h b/Phylo/Sources/PhyloSupport.h index dc8d9b0..ce80d67 100644 --- a/Phylo/Sources/PhyloSupport.h +++ b/Phylo/Sources/PhyloSupport.h @@ -82,6 +82,8 @@ namespace Victor { namespace Phylo { static double downs, downa, ups, upa; static unsigned int weightingScheme; static int tokenSize; + static bool verbose; + protected: From 8f4b4f245a0fe0a280fb56781016846f4007c09c Mon Sep 17 00:00:00 2001 From: snitz Date: Tue, 1 Sep 2015 12:53:19 +0200 Subject: [PATCH 27/31] fix problem with cppunit in weigth->setnode --- Phylo/Sources/PhyloSupport.cc | 6 ++- Phylo/Sources/SeqNodeGraph.cc | 26 +++++++---- Phylo/Tests/TestPhylo.h | 67 +++++++++++++++++++++++++-- Phylo/Tests/TestPhylogen.cc | 87 ++++++++++++++++++++++++++++++++++- 4 files changed, 168 insertions(+), 18 deletions(-) diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 44656e2..f868c14 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -279,7 +279,8 @@ namespace Victor { namespace Phylo{ } } - + cout<<"tokensize2 "<returnBestEdgeAfterIndex(count2); + cout<<"edgeFor1[count1]"<returnBestEdgeAfterIndex(count2); count2=edgeFor1[count1]; count1++; } diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc index 79dc151..1ebde08 100644 --- a/Phylo/Sources/SeqNodeGraph.cc +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -77,7 +77,7 @@ namespace Victor { namespace Phylo{ indexStart=indexS; indexFinish=indexF; totNumSeq=totNumS; - vector taxV(numSeqInS2); + vector taxV(numSeqInS2,0); taxEdge=taxV; tokenSeq=tokenS; tokenSize=tokenSeq[0].size(); @@ -116,9 +116,9 @@ namespace Victor { namespace Phylo{ */ int SeqNodeGraph::returnBestEdgeAfterIndex(unsigned int index){ unsigned int best=index+1; - best=index+1; for(unsigned int i=index+1; igetTokenSize();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]); - if(i==0){ - } + }//end for }//end for @@ -250,14 +249,13 @@ namespace Victor { namespace Phylo{ *@Description for a node calculate all value of edges for all other node consider weigth by guide tree of sequence *@param SeqNodeGraph* node the node of graph That will be set *@param vector vNode array of nodes - *@param vector vNode array of nodes + *@param SeqNodeGraph* node *@param vector vWeigth1 array of Weigth of first array of sequence *@param vector vWeigth2 array of Weigth of second array of sequence */ void SeqNodeGraph::setNodeWithWeigth(SeqNodeGraph* node, vector vNode,vector vWeigth1,vector vWeigth2){ //cout<<"dentro set node-----------------"<getTokenSize();w++) {//for all char in each seq of node - node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[i]); - if(i==0){ - } + node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[j]); + //cout<getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[j]*vWeigth2[y]<getCharOfTokenSeq(y,w)]"<getCharOfTokenSeq(y,w)<<"[vNode[i]->getCharOfTokenSeq(j,x)]"<getCharOfTokenSeq(j,x)<addTest(new CppUnit::TestCaller("Test1 - Calculate distance of 2 seq.", &TestPhylo::testPhylo_A)); suiteOfTests->addTest(new CppUnit::TestCaller("Test2 - Insert Gap in a seq.", - &TestPhylo::testPhylo_B)); - suiteOfTests->addTest(new CppUnit::TestCaller("Test3 - .", + &TestPhylo::testPhylo_B)); + suiteOfTests->addTest(new CppUnit::TestCaller("Test3 - Calculate Multi Aling with weigth.", &TestPhylo::testPhylo_C)); + suiteOfTests->addTest(new CppUnit::TestCaller("Test4 - Calculate Multi Aling with weigth and insertion.", + &TestPhylo::testPhylo_D)); return suiteOfTests; } @@ -52,8 +54,13 @@ class TestPhylo : public CppUnit::TestFixture { protected: void testPhylo_A() { - - CPPUNIT_ASSERT(1==1); + string seq1="AAABBCCC"; + string seq2="AAABBCCC"; + double distance=Phylo::PhyloSupport::distanceCalcTwoSeq(seq1,seq2); + CPPUNIT_ASSERT( distance == 0 ); + seq2="DDDDDDDD"; + distance=Phylo::PhyloSupport::distanceCalcTwoSeq(seq1,seq2); + CPPUNIT_ASSERT( distance == 1 ); } void testPhylo_B() { @@ -65,8 +72,58 @@ class TestPhylo : public CppUnit::TestFixture { } void testPhylo_C() { + string a="AAAAAAAA"; + string b="AAAAAAAA"; + string c="AAAAAAAA"; + string o="AAAAAAAA"; + string t="AAAAAAAA"; + vector num(2); + vector letter(3); + num[0]=o; + num[1]=t; + letter[0]=a; + letter[1]=b; + letter[2]=c; + + vector w1(2); + vector w2(3); + w1[0]=1; + w1[1]=1; + w2[2]=1; + w2[0]=1; + w2[1]=1; + vector result=Phylo::PhyloSupport::AlingMultiSvsMultiS(num,letter,w1,w2,false,1); + for(unsigned int i=0;i num(2); + vector letter(3); + num[0]=o; + num[1]=t; + letter[0]=a; + letter[1]=b; + letter[2]=c; + + vector w1(3); + vector w2(2); + w1[0]=1; + w1[1]=1; + w1[2]=1; + w2[0]=1; + w2[1]=1; + vector result=Phylo::PhyloSupport::AlingMultiSvsMultiS(num,letter,w1,w2,false,2); + CPPUNIT_ASSERT(result[0][0] == '-'); - CPPUNIT_ASSERT(1 == 1); } }; diff --git a/Phylo/Tests/TestPhylogen.cc b/Phylo/Tests/TestPhylogen.cc index aee0a97..ccd1a39 100644 --- a/Phylo/Tests/TestPhylogen.cc +++ b/Phylo/Tests/TestPhylogen.cc @@ -13,8 +13,37 @@ using namespace std; +int main(){ + + string a="AACCAA"; + string b="AACCAA"; + string c="AACCAA"; + string o="CC"; + string t="CC"; + vector num(2); + vector letter(3); + num[0]=o; + num[1]=t; + letter[0]=a; + letter[1]=b; + letter[2]=c; + vector w1(2); + vector w2(3); + w1[0]=1; + w1[1]=1; + w2[0]=1; + w2[1]=1; + w2[2]=1; + vector result=Phylo::PhyloSupport::AlingMultiSvsMultiS(num,letter,w1,w2,false,1); + cout< Date: Tue, 1 Sep 2015 13:32:28 +0200 Subject: [PATCH 28/31] fix some warning --- Phylo/APPS/phylogen.cc | 4 ++-- Phylo/Sources/ClustalW.cc | 7 +++++-- Phylo/Sources/NewickTree.cc | 29 ++++++++++++++--------------- Phylo/Sources/PhyloSupport.cc | 11 ++++------- Phylo/Sources/SeqNodeGraph.cc | 16 +--------------- Phylo/Tests/TestPhylo.h | 3 ++- Phylo/Tests/TestPhylogen.cc | 30 ------------------------------ 7 files changed, 28 insertions(+), 72 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 360a318..163ac56 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -78,7 +78,7 @@ sShowHelp() { << "\n \t --gf=0: UPGMA (default)." << "\n \t --gf=1: NJ." << "\n \t --gf=2: ClustalW use NJ." - << "\n [--t ] \t For decide dimension of token, very important for ClustalW(10%size of first seq)" + << "\n [-t ] \t For decide dimension of token, very important for ClustalW(10%size of first seq)" << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 1.0)" << "\n" << "\n [--ktuples] \t use ktuples method formula for calculate distance of pairwise sequence" @@ -131,7 +131,7 @@ int main(int argc, char **argv) { getArg("u", ups, argc, argv, 999.9); getArg("U", upa, argc, argv, 999.9); - getArg("-t", tokenSize, argc, argv, -1); + getArg("t", tokenSize, argc, argv, -1); getArg("-cSeq", cSeq, argc, argv, 0.80); diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 83b1f75..a8568b3 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -66,7 +66,9 @@ namespace Victor { namespace Phylo{ progressiveAlign(); } - + /** + *@Description Basic destructor + */ ClustalW::~ClustalW() { } @@ -103,7 +105,7 @@ namespace Victor { namespace Phylo{ cout<<"percent to the end:"<1){ - cout<<""<<((j)*100/(guideTree.getNumberOfLeaf())*100)/100<<"% "; + cout<<""<<((j)*100/(guideTree.getNumberOfLeaf())*100)/100<<"% "<allignSeq[0].size()allignSeq[0].size()){ @@ -197,6 +199,7 @@ namespace Victor { namespace Phylo{ } j++; } + cout<<"...complete!"<allignSeq,nodeTree[0]->nameV); diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc index 3e9a022..8b1bf45 100644 --- a/Phylo/Sources/NewickTree.cc +++ b/Phylo/Sources/NewickTree.cc @@ -47,19 +47,18 @@ std::vector distance; *@Description for create void node */ NewickTree::NewickTree() { - + //root->weigthV; + //root->nameV; + //root->allignSeq; root=new iNode; root->name="NULL"; root->left=NULL; root->right=NULL; root->parent=NULL; root->seq="noPURESeq"; - root->allignSeq; root->branchLength=-1; root->divergenceR=0; root->nseq=-1; - root->weigthV; - root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -76,15 +75,16 @@ std::vector distance; *@param double divR divergence usefull inNJ method */ NewickTree::NewickTree(int position,string name,string pureSeq,double divR){ + //not need + //root->allignSeq; + //root->nameV; + //root->weigthV; //void root=new iNode; root->left=NULL; root->right=NULL; root->parent=NULL; - root->allignSeq; root->branchLength=-1; - root->weigthV; - root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -105,14 +105,14 @@ std::vector distance; */ NewickTree::NewickTree(int position,string name,string pureSeq){ //void + //root->weigthV; + //root->nameV; + //root->allignSeq; root=new iNode; root->left=NULL; root->right=NULL; root->parent=NULL; - root->allignSeq; root->branchLength=0; - root->weigthV; - root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -133,12 +133,13 @@ std::vector distance; *@param NewickTree *lTree the left child */ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) { + //not need + //root->weigthV; + //root->nameV; + //root->allignSeq; //void root=new iNode; - root->allignSeq; root->branchLength=-1; - root->weigthV; - root->nameV; root->ClustalW=false; root->isRigth=false; root->isLeft=false; @@ -165,12 +166,10 @@ std::vector distance; } - /** *@Description Basic destructor */ NewickTree::~NewickTree() { - //destroy_tree();//to do } diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index f868c14..6beb1c3 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -279,12 +279,10 @@ namespace Victor { namespace Phylo{ } } - cout<<"tokensize2 "<returnBestEdgeAfterIndex(count2); - cout<<"edgeFor1[count1]"<returnBestEdgeAfterIndex(count2); count2=edgeFor1[count1]; count1++; } @@ -362,13 +359,13 @@ namespace Victor { namespace Phylo{ if(edgeFor1[i]!=-1 ){ for(unsigned x=0;xgetTokenSeq(x); }//if else{ - for(unsigned int j=0; jgetTokenSeq(x); diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc index 1ebde08..3a73cae 100644 --- a/Phylo/Sources/SeqNodeGraph.cc +++ b/Phylo/Sources/SeqNodeGraph.cc @@ -118,7 +118,6 @@ namespace Victor { namespace Phylo{ unsigned int best=index+1; for(unsigned int i=index+1; i vNode array of nodes - *@param SeqNodeGraph* node *@param vector vWeigth1 array of Weigth of first array of sequence *@param vector vWeigth2 array of Weigth of second array of sequence */ void SeqNodeGraph::setNodeWithWeigth(SeqNodeGraph* node, vector vNode,vector vWeigth1,vector vWeigth2){ - - //cout<<"dentro set node-----------------"<getTokenSize();w++) {//for all char in each seq of node node->setTaxEdgeInPosition(i,sub.score[node->getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[j]); - //cout<getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[j]*vWeigth2[y]<getCharOfTokenSeq(y,w)][vNode[i]->getCharOfTokenSeq(j,x)]*vWeigth1[y]*vWeigth2[j]<getCharOfTokenSeq(y,w)]"<getCharOfTokenSeq(y,w)<<"[vNode[i]->getCharOfTokenSeq(j,x)]"<getCharOfTokenSeq(j,x)<addTest(new CppUnit::TestCaller("Test3 - Calculate Multi Aling with weigth.", &TestPhylo::testPhylo_C)); - suiteOfTests->addTest(new CppUnit::TestCaller("Test4 - Calculate Multi Aling with weigth and insertion.", + suiteOfTests->addTest(new CppUnit::TestCaller("Test4 - Calculate Multi Aling with weigth and deletetion/insertion.", &TestPhylo::testPhylo_D)); return suiteOfTests; @@ -123,6 +123,7 @@ class TestPhylo : public CppUnit::TestFixture { w2[1]=1; vector result=Phylo::PhyloSupport::AlingMultiSvsMultiS(num,letter,w1,w2,false,2); CPPUNIT_ASSERT(result[0][0] == '-'); + CPPUNIT_ASSERT(result[0][2] == 'C'); } diff --git a/Phylo/Tests/TestPhylogen.cc b/Phylo/Tests/TestPhylogen.cc index ccd1a39..2e7f493 100644 --- a/Phylo/Tests/TestPhylogen.cc +++ b/Phylo/Tests/TestPhylogen.cc @@ -14,36 +14,6 @@ using namespace std; int main(){ - - string a="AACCAA"; - string b="AACCAA"; - string c="AACCAA"; - string o="CC"; - string t="CC"; - vector num(2); - vector letter(3); - num[0]=o; - num[1]=t; - letter[0]=a; - letter[1]=b; - letter[2]=c; - vector w1(2); - vector w2(3); - w1[0]=1; - w1[1]=1; - w2[0]=1; - w2[1]=1; - w2[2]=1; - vector result=Phylo::PhyloSupport::AlingMultiSvsMultiS(num,letter,w1,w2,false,1); - cout< Date: Fri, 4 Sep 2015 12:09:13 +0200 Subject: [PATCH 29/31] fix output of clustalW --- Phylo/Sources/ClustalW.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index a8568b3..2133a34 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -178,12 +178,12 @@ namespace Victor { namespace Phylo{ //insert name nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->nameV,nodeTree[i]->parent->left->nameV); //insert weigth - nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->parent->left->weigthV,nodeTree[i]->weigthV); + nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->weigthV,nodeTree[i]->parent->left->weigthV); } else { tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->parent->left->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize); //insert name - nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->nameV,nodeTree[i]->parent->left->nameV); + nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->parent->left->nameV,nodeTree[i]->nameV); //insert weigth nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->parent->left->weigthV,nodeTree[i]->weigthV); } From bbd715f690b3c2d36f8cd50e56b339508354b2e0 Mon Sep 17 00:00:00 2001 From: snitz Date: Fri, 4 Sep 2015 12:37:32 +0200 Subject: [PATCH 30/31] Can choose matrix now -m --- Phylo/APPS/phylogen.cc | 1 + Phylo/Sources/PhyloSupport.cc | 5 +++-- Phylo/Sources/PhyloSupport.h | 14 +++++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc index 163ac56..e9fbc8d 100755 --- a/Phylo/APPS/phylogen.cc +++ b/Phylo/APPS/phylogen.cc @@ -169,6 +169,7 @@ int main(int argc, char **argv) { PhyloSupport::weightingScheme=weightingScheme; PhyloSupport::tokenSize=tokenSize; PhyloSupport::verbose=verbose; + PhyloSupport::matrix=matrixFileName; // -------------------------------------------------- // 3. Load data diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc index 6beb1c3..9f17d81 100644 --- a/Phylo/Sources/PhyloSupport.cc +++ b/Phylo/Sources/PhyloSupport.cc @@ -75,6 +75,7 @@ namespace Victor { namespace Phylo{ double PhyloSupport::upa=999.9; int PhyloSupport::tokenSize=-1; unsigned int PhyloSupport::weightingScheme=0; + string PhyloSupport::matrix="blosum62.dat"; bool PhyloSupport::verbose=false; @@ -94,7 +95,7 @@ namespace Victor { namespace Phylo{ AlignmentData *ad; //Default matrix - string matrixFileName="blosum62.dat"; + string matrixFileName=PhyloSupport::matrix; matrixFileName = dataPath + matrixFileName; ifstream matrixFile(matrixFileName.c_str()); if (!matrixFile) @@ -206,7 +207,7 @@ namespace Victor { namespace Phylo{ AlignmentData *ad; //Default matrix - string matrixFileName="blosum62.dat"; + string matrixFileName=PhyloSupport::matrix; matrixFileName = dataPath + matrixFileName; ifstream matrixFile(matrixFileName.c_str()); if (!matrixFile) diff --git a/Phylo/Sources/PhyloSupport.h b/Phylo/Sources/PhyloSupport.h index ce80d67..499692d 100644 --- a/Phylo/Sources/PhyloSupport.h +++ b/Phylo/Sources/PhyloSupport.h @@ -75,13 +75,21 @@ namespace Victor { namespace Phylo { //to merge 2 vector of double, order of merge is v=v1[0],v1[1]....v2[0]....v2[n-1] static vector mergeDoubleVector(vector v1,vector v2 ); - //static parameter for NJ UPGMA and ClustalW + //static parameter for NJ UPGMA and ClustalW: + + //gap for align static double openGapPenalty; static double extensionGapPenalty; - static double cSeq; - static double downs, downa, ups, upa; + //Cseq for align + static double cSeq; + //for align + static double downs, downa, ups, upa; static unsigned int weightingScheme; + //ClustalW static int tokenSize; + //Substitution matrix + static string matrix; + //verbose output static bool verbose; From eaa4a1d6b1d0f9e0ac2aa6ba808645613bec3279 Mon Sep 17 00:00:00 2001 From: snitz Date: Fri, 4 Sep 2015 13:36:22 +0200 Subject: [PATCH 31/31] fix name in clustaW --- Phylo/Sources/ClustalW.cc | 42 ++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/Phylo/Sources/ClustalW.cc b/Phylo/Sources/ClustalW.cc index 2133a34..db35a20 100644 --- a/Phylo/Sources/ClustalW.cc +++ b/Phylo/Sources/ClustalW.cc @@ -230,24 +230,10 @@ namespace Victor { namespace Phylo{ *@return string txt Correct rappresetation in CLustalW format(50 char by lane) */ string ClustalW::printClustalWFromat(vector seq){ - string txt="CLUSTALW \n"; - string tmp=""; - unsigned int j=0; - unsigned int index=0; - unsigned int dim=50; - while(j names(seq.size()); + for(unsigned int i=0; i seq, vector names){ - string txt=""; + string txt="CLUSTALW \n\n"; string tmp=""; unsigned int j=0; unsigned int index=0; unsigned int dim=50; + vector countSimb(seq.size(),0); + bool onlyGap=true; while(j