diff --git a/Align2/Sources/Profile.cc b/Align2/Sources/Profile.cc
index 670ec84..694f0ff 100755
--- a/Align2/Sources/Profile.cc
+++ b/Align2/Sources/Profile.cc
@@ -89,7 +89,7 @@ namespace Victor { namespace Align2{
time_t t;
pResetData();
seqLen = ali.getTarget().size();
- numSeq = ali.size() - 2;
+ numSeq = ali.size();
time(&t);
newtime = localtime(&t);
cout << "ready for pConstructData " << newtime->tm_hour << "/" << newtime->tm_min << endl;
diff --git a/Makefile b/Makefile
index c0f7aa4..7336999 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
# Path to project directory.
UPDIR = .
# Path to subdirectories.
-SUBDIRS = tools Energy/Sources Biopool/Sources Align2/Sources Energy/Sources/TorsionPotential Lobo/Sources Lobo/APPS Energy/APPS Biopool/APPS Align2/APPS
+SUBDIRS = tools Energy/Sources Biopool/Sources Align2/Sources Phylo/Sources Energy/Sources/TorsionPotential Lobo/Sources Lobo/APPS Energy/APPS Biopool/APPS Align2/APPS Phylo/APPS
#
# Libraries and paths (which are not defined globally).
diff --git a/Makefile.global b/Makefile.global
index 1f78a89..982f852 100644
--- a/Makefile.global
+++ b/Makefile.global
@@ -106,11 +106,11 @@ SUBDIRS += _dummy_
LIB_PATH += -L$(UPDIR)/lib
-INC_PATH += -I$(UPDIR)/tools -I$(UPDIR)/Energy/Sources -I$(UPDIR)/Biopool/Sources -I$(UPDIR)/Energy/Sources/TorsionPotential -I$(UPDIR)/Lobo/Sources -I$(UPDIR)/Align2/Sources -I$(UPDIR)/Biopool/APPS -I$(UPDIR)/Energy/APPS -I$(UPDIR)/Align2/APPS -I$(UPDIR)/Lobo/APPS
+INC_PATH += -I$(UPDIR)/tools -I$(UPDIR)/Energy/Sources -I$(UPDIR)/Biopool/Sources -I$(UPDIR)/Energy/Sources/TorsionPotential -I$(UPDIR)/Lobo/Sources -I$(UPDIR)/Align2/Sources -I$(UPDIR)/Phylo/Sources -I$(UPDIR)/Biopool/APPS -I$(UPDIR)/Energy/APPS -I$(UPDIR)/Align2/APPS -I$(UPDIR)/Phylo/APPS -I$(UPDIR)/Lobo/APPS
ifdef test
- INC_PATH += -I$(UPDIR)/Biopool/Tests -I$(UPDIR)/Energy/Tests -I$(UPDIR)/Align2/Tests -I$(UPDIR)/Lobo/Tests
- SUBDIRS = Biopool/Tests Energy/Tests Align2/Tests Lobo/Tests
+ INC_PATH += -I$(UPDIR)/Biopool/Tests -I$(UPDIR)/Energy/Tests -I$(UPDIR)/Align2/Tests -I$(UPDIR)/Phylo/Tests -I$(UPDIR)/Lobo/Tests
+ SUBDIRS = Biopool/Tests Energy/Tests Align2/Tests Phylo/Tests Lobo/Tests
endif
####### Implicit rules
diff --git a/Phylo/APPS/Makefile b/Phylo/APPS/Makefile
new file mode 100755
index 0000000..02a0733
--- /dev/null
+++ b/Phylo/APPS/Makefile
@@ -0,0 +1,55 @@
+#--*- makefile -*--------------------------------------------------------------
+#
+# Standard makefile
+#
+#------------------------------------------------------------------------------
+
+# Path to project directory
+UPDIR = ../..
+# Path to subdirectories
+SUBDIR =
+# Path to directory for binaries
+BINPATH = ../../bin
+
+#
+# Libraries and paths (which are not defined globally)
+#
+
+LIBS = -lPhylo -lAlign2 -ltools
+
+LIB_PATH = -L.
+
+INC_PATH = -I. -I../../tools/ -I../../Align2/Sources -I../../Phylo/Sources
+
+#
+# Objects and headers
+#
+
+SOURCES = phylogen.cc
+
+OBJECTS = phylogen.o
+
+TARGETS = phylogen
+
+
+EXECS = phylogen
+
+
+LIBRARY = APPSlibPhylo.a
+
+#
+# Install rule
+#
+
+compile: all
+
+all: install
+install: $(LIBRARY) $(TARGETS)
+ mv $(LIBRARY) $(UPDIR)/lib/
+ mv $(EXECS) $(BINPATH)/
+
+#
+# Call global Makefile to do the job
+#
+
+include ../../Makefile.global
diff --git a/Phylo/APPS/phylogen.cc b/Phylo/APPS/phylogen.cc
new file mode 100755
index 0000000..e9fbc8d
--- /dev/null
+++ b/Phylo/APPS/phylogen.cc
@@ -0,0 +1,256 @@
+/* This file is part of Victor.
+
+ Victor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Victor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Victor. If not, see .
+ */
+
+// --*- C++ -*------x------------------------------------------------------------------------------
+//
+//
+// Description: This program calculates phylogenetic trees starting from FASTA file.
+// Choose from 2 different methos Neighbor joining and UPGMA.
+// This program calculates ClustalW allignment and use NJ for create guide trees.
+// -----------------x------------------------------------------------------------------------------
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+using namespace Victor::Align2;
+using namespace Victor::Phylo;
+using namespace Victor;
+using namespace std;
+
+
+
+/// Show command line options and help text.
+
+void
+sShowHelp() {
+ cout << "\nP Phylogenetic Tree GENERATOR" //This program read FASTA file and calculate phylogenetic tree. Use NWALIGN for align.
+ << "\nThis program read FASTA file and calculate phylogenetic tree. Use NWALIGN for align."
+ << "\nOptions:"
+ << "\n"
+ << "\n * [--in ] \t Name of input FASTA file"
+ << "\n [--out ] \t Name of output file (default = to screen)"
+ << "\n [-m ] \t Name of substitution matrix file (default = blosum62.dat)"
+ << "\n"
+ << "\n [-d ] \t Min. master vs all seq. id. filter threshold (suggested = 0.00)"
+ << "\n [-D ] \t Min. all vs all seq. id. filter threshold (suggested = 0.00)"
+ << "\n [-u ] \t Max. master vs all seq. id. filter threshold (suggested = 1.00)"
+ << "\n [-U ] \t Max. all vs all seq. id. filter threshold (suggested = 1.00)"
+ << "\n"
+ << "\n [--ws <0|1|2|3>] \t Weighting scheme for profiles (default = 0, i.e. no weighting scheme)"
+ << "\n \t --ws=0: No weighting scheme (default)."
+ << "\n \t --ws=1: Calculate a frequency profile or PSSM using Henikoff weighting scheme."
+ << "\n \t --ws=2: Calculate a frequency profile or PSSM using PSIC weighting scheme."
+ << "\n \t --ws=3: Calculate a frequency profile or PSSM using SeqDivergence weighting scheme."
+ << "\n"
+ << "\n [-o ] \t Open gap penalty (default = 12.00)"
+ << "\n [-e ] \t Extension gap penalty (default = 3.00)"
+ << "\n"
+ << "\n [--function <0|1|2>] \t CLuster function (default = 0, i.e. UPGMA)"
+ << "\n \t --gf=0: UPGMA (default)."
+ << "\n \t --gf=1: NJ."
+ << "\n \t --gf=2: ClustalW use NJ."
+ << "\n [-t ] \t For decide dimension of token, very important for ClustalW(10%size of first seq)"
+ << "\n [--cSeq ] \t Coefficient for sequence alignment (default = 1.0)"
+ << "\n"
+ << "\n [--ktuples] \t use ktuples method formula for calculate distance of pairwise sequence"
+ << "\n [--verbose] \t Verbose mode"
+ << "\n" << endl;
+}
+
+
+/// Show command line options and help text.
+
+int main(int argc, char **argv) {
+
+ string inputFileName, outputFileName, matrixFileName, matrixStrFileName;
+ double openGapPenalty, extensionGapPenalty;
+ unsigned int function;
+ double cSeq;
+ double downs, downa, ups, upa;
+ bool verbose=false;
+ bool ktuples=false;
+ unsigned int weightingScheme;
+ int tokenSize;
+
+ struct tm* newtime;
+
+ time_t t;
+
+ // --------------------------------------------------
+ // 0. Treat options
+ // --------------------------------------------------
+ if (getArg("h", argc, argv)) {
+ sShowHelp();
+ return 1;
+ }
+
+
+ getArg("-in", inputFileName, argc, argv, "cyc1_input.fasta");
+ getArg("-out", outputFileName, argc, argv, "!");
+
+ getArg("m", matrixFileName, argc, argv, "blosum62.dat");
+
+ getArg("-function", function, argc, argv, 0);
+
+ getArg("-ws", weightingScheme, argc, argv, 0);
+
+ getArg("o", openGapPenalty, argc, argv, 12.00);
+ getArg("e", extensionGapPenalty, argc, argv, 3.00);
+
+ getArg("d", downs, argc, argv, 999.9);
+ getArg("D", downa, argc, argv, 999.9);
+ getArg("u", ups, argc, argv, 999.9);
+ getArg("U", upa, argc, argv, 999.9);
+
+ getArg("t", tokenSize, argc, argv, -1);
+
+ getArg("-cSeq", cSeq, argc, argv, 0.80);
+
+ ktuples = getArg("-ktuples", argc, argv);
+ verbose = getArg("-verbose", argc, argv);
+
+ // --------------------------------------------------
+ // 1. Load FASTA
+ // --------------------------------------------------
+ Alignment aliSec;
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+ string dataPath = path + "data/";
+ string finalPath=dataPath+inputFileName;
+
+ cout<<"path "<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ tree.upgma(aliSec,ktuples,verbose);
+ cout<<"##########################################Tree create with PhyloTreeUPGMA#############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ out=tree.printNewickTree();
+ }
+ else if(function==1){
+ cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ tree.neighborJoining(aliSec,ktuples,verbose);
+ cout<<"##########################################Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ out=tree.printNewickTree();
+ }
+ else if(function==2){
+ cout<<"##########################################Starting PhyloTreeNJ########################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ tree.neighborJoining(aliSec,ktuples,verbose);
+ cout<<"##########################################Guide Tree create with PhyloTreeNJ################################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ out=tree.printNewickTree();
+ cout<<"##########################################Starting ClustalW###############################"<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+ cout<<"number of leaf from main "<tm_hour << " hour " << newtime->tm_min<<" min "<tm_sec<<" sec "<< endl;
+
+ }
+ else
+ {
+ cout<<"invalid cluster, select [--cluster <0|1>]";
+ }
+
+ // --------------------------------------------------
+ // 2. Output Tree
+ // --------------------------------------------------
+ if(outputFileName=="!"){
+ cout<<"Print the Tree "<.
+ */
+
+// --*- C++ -*------x-----------------------------------------------------------
+//
+//
+// Description: ClustalW allignemt.
+// http://www.ebi.ac.uk/Tools/msa/clustalw2/
+//
+// -----------------x-----------------------------------------------------------
+
+
+// Includes:
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+// Global constants, typedefs, etc. (to avoid):
+using namespace Victor::Align2;
+using namespace Victor;
+using namespace std;
+
+namespace Victor { namespace Phylo{
+
+ // CONSTRUCTORS:
+
+ ClustalW::ClustalW(){
+ score=0;
+ tokenSize=0;
+ }
+
+ ClustalW::ClustalW(NewickTree gT) {
+
+ cout<<"constructor"< tmpV(2);
+ int tokenSize=PhyloSupport::tokenSize;
+
+
+ vector nodeTree(guideTree.getNumberOfLeaf());
+ for(unsigned int i=0;iseq.size()/10;
+ cout<<"Use Default token Size "< seqV(1);
+ vector weigthV(1);
+ vector nameV(1);
+ vector nameFinal(guideTree.getNumberOfLeaf());
+ cout<<"percent to the end:"<1){
+ cout<<""<<((j)*100/(guideTree.getNumberOfLeaf())*100)/100<<"% "<allignSeq[0].size()allignSeq[0].size()){
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[0]->allignSeq,nodeTree[1]->allignSeq,nodeTree[0]->weigthV,nodeTree[1]->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[0]->nameV=PhyloSupport::mergeStringVector(nodeTree[0]->nameV,nodeTree[1]->nameV);
+ //insert weigth
+ nodeTree[0]->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[0]->weigthV,nodeTree[1]->weigthV);
+ }
+ else {
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[1]->allignSeq,nodeTree[0]->allignSeq,nodeTree[1]->weigthV,nodeTree[0]->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[0]->nameV=PhyloSupport::mergeStringVector(nodeTree[1]->nameV,nodeTree[0]->nameV);
+ //insert weigth
+ nodeTree[0]->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[1]->weigthV,nodeTree[0]->weigthV);
+ }
+ nodeTree[1]->ClustalW=true;
+ nodeTree[0]->allignSeq=tmpV;
+
+ }
+ else if(j==1){//populate nodeTree
+ seqV[0]=nodeTree[i]->seq;
+ weigthV[0]=nodeTree[i]->weigth;
+ nameV[0]=nodeTree[i]->name;
+ nodeTree[i]->nameV=nameV;
+ nodeTree[i]->allignSeq=seqV;
+ nodeTree[i]->weigthV=weigthV;
+ }
+ else if(j==2 && nodeTree[i]->parent->numberOfChildLeaf==2){//first lap of align only seq VS seq
+ tmpV=PhyloSupport::AlingSvsS(nodeTree[i]->seq,nodeTree[i]->seq);
+ weigthV[0]=nodeTree[i]->weigth;
+ nameV[0]=nodeTree[i]->name;
+ if(weigthV.size()==1)
+ weigthV.push_back(nodeTree[i+1]->weigth);
+ else
+ weigthV[1]=nodeTree[i+1]->weigth;
+ if(nameV.size()==1)
+ nameV.push_back(nodeTree[i+1]->name);
+ else
+ nameV[1]=nodeTree[i+1]->name;
+
+ nodeTree.erase(nodeTree.begin()+i+1);
+ nodeTree[i]->parent->allignSeq=tmpV;
+ nodeTree[i]->parent->weigthV=weigthV;
+ nodeTree[i]->parent->nameV=nameV;
+ nodeTree[i]=nodeTree[i]->parent;
+ }
+ else if(j>2 && nodeTree[i]->parent->numberOfChildLeaf==(int)j && !nodeTree[i]->ClustalW && iisLeft){
+ if(nodeTree[i]->allignSeq[0].size()parent->right->allignSeq[0].size()) {
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->right->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->nameV,nodeTree[i]->parent->right->nameV);
+ //insert weigth
+ nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV);
+ }
+ else {
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->parent->right->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->parent->right->nameV,nodeTree[i]->nameV);
+ //insert weigth
+ nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV);
+ }
+ nodeTree[i]->parent->right->ClustalW=true;
+ }
+ else{//is right child
+ if(nodeTree[i]->allignSeq[0].size()parent->left->allignSeq[0].size()) {
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->allignSeq,nodeTree[i]->parent->left->allignSeq,nodeTree[i]->weigthV,nodeTree[i]->parent->right->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->nameV,nodeTree[i]->parent->left->nameV);
+ //insert weigth
+ nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->weigthV,nodeTree[i]->parent->left->weigthV);
+ }
+ else {
+ tmpV=PhyloSupport::AlingMultiSvsMultiS(nodeTree[i]->parent->left->allignSeq,nodeTree[i]->allignSeq,nodeTree[i]->parent->right->weigthV,nodeTree[i]->weigthV,false,tokenSize);
+ //insert name
+ nodeTree[i]->parent->nameV=PhyloSupport::mergeStringVector(nodeTree[i]->parent->left->nameV,nodeTree[i]->nameV);
+ //insert weigth
+ nodeTree[i]->parent->weigthV=PhyloSupport::mergeDoubleVector(nodeTree[i]->parent->left->weigthV,nodeTree[i]->weigthV);
+ }
+ nodeTree[i]->parent->left->ClustalW=true;
+ }
+ nodeTree[i]=nodeTree[i]->parent;
+ nodeTree[i]->allignSeq=tmpV;
+ }
+ else if(nodeTree[i]->ClustalW && iallignSeq,nodeTree[0]->nameV);
+
+ cout<allignSeq,nodeTree[0]->weigthV));
+ cout< seq Vector of sequence.
+ *@return string txt Correct rappresetation in CLustalW format(50 char by lane)
+ */
+ string ClustalW::printClustalWFromat(vector seq){
+ vector names(seq.size());
+ for(unsigned int i=0; i seq Vector of sequence.
+ *@return string txt Correct rappresetation in CLustalW format(50 char by lane)
+ */
+ string ClustalW::printClustalWFromat(vector seq, vector names){
+ string txt="CLUSTALW \n\n";
+ string tmp="";
+ unsigned int j=0;
+ unsigned int index=0;
+ unsigned int dim=50;
+ vector countSimb(seq.size(),0);
+ bool onlyGap=true;
+ while(j seq Vector of sequence.
+ *@return double score the score of similarity.
+ */
+ double ClustalW::scoreClustalW(vector allignSeq){
+ //matrix config
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
+ string dataPath = path + "data/";
+
+ //Default matrix
+ string matrixFileName=PhyloSupport::matrix;
+ matrixFileName = dataPath + matrixFileName;
+ ifstream matrixFile(matrixFileName.c_str());
+ if (!matrixFile)
+ ERROR("Error opening substitution matrix file.", exception);
+ SubMatrix sub(matrixFile);
+ //end matrix config
+ double score=0;
+ for(unsigned int x=0;x seq Vector of sequence.
+ *@param vector w vector of weigth from guide tree
+ *@return double score the score of similarity.
+ */
+ double ClustalW::scoreClustalW(vector allignSeq, vector w){
+ //matrix config
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
+ string dataPath = path + "data/";
+
+ //Default matrix
+ string matrixFileName=PhyloSupport::matrix;
+ matrixFileName = dataPath + matrixFileName;
+ ifstream matrixFile(matrixFileName.c_str());
+ if (!matrixFile)
+ ERROR("Error opening substitution matrix file.", exception);
+ SubMatrix sub(matrixFile);
+ //end matrix config
+ double score=0;
+ for(unsigned int x=0;x.
+ */
+
+
+#ifndef _ClustalW_h_
+#define _ClustalW_h_
+
+// Includes:
+
+#include
+#include
+#include
+#include
+
+
+using std::string;
+using std::vector;
+
+
+// Global constants, typedefs, etc. (to avoid):
+namespace Victor { namespace Phylo {
+
+ /**@brief Methods to manages the global statistic data.
+ *
+ *@Description Use for ClustalW
+ * */
+ class ClustalW {
+
+ public:
+
+ // CONSTRUCTORS
+ // void constructor.
+ ClustalW();
+ // starting from guide Tree constructor.
+ ClustalW(NewickTree gT);
+
+ //DESTRUCTOR:
+ ~ClustalW();
+
+
+ // HELPERS:
+ //Generate and return string of sequenze in format .ClustalW
+ static string printClustalWFromat(vector seq);
+ //Generate and return string of sequenze in format .ClustalW with name
+ static string printClustalWFromat(vector seq, vector names);
+ //calculate score based on similairty of all sequence in CLustalW allign no weigth consider
+ double scoreClustalW(vector allignSeq);
+ //calculate score based on similairty of all sequence in CLustalW allign with weigth
+ double scoreClustalW(vector allignSeq, vector w);
+
+ // OPERATORS:
+ void setTokenSize(unsigned int t);
+ void setScore(double s);
+
+ // OPERATORS:
+ unsigned int getTokenSize();
+ double getScore();
+
+ protected:
+ // OPERATORS:
+ //Apply ClustalW using Guide Tree
+ void progressiveAlign();
+ // ATTRIBUTES:
+ unsigned int tokenSize;
+ double score;
+
+
+
+ // ATTRIBUTES:
+ NewickTree guideTree;
+
+
+
+ private:
+ // HELPERS:
+
+ // ATTRIBUTES:
+
+
+ };
+
+
+}}
+#endif
+
diff --git a/Phylo/Sources/Makefile b/Phylo/Sources/Makefile
new file mode 100755
index 0000000..645c3f3
--- /dev/null
+++ b/Phylo/Sources/Makefile
@@ -0,0 +1,51 @@
+#--*- makefile -*--------------------------------------------------------------
+#
+# Standard makefile
+#
+#------------------------------------------------------------------------------
+
+# Path to project directory
+UPDIR = ../..
+# Path to subdirectories
+SUBDIR =
+# Path to directory for binaries
+BINPATH = ../../bin
+
+#
+# Libraries and paths (which are not defined globally)
+#
+
+LIBS = -lAlign2 -ltools
+
+LIB_PATH = -L.
+
+INC_PATH = -I. -I../../tools/ -I../../Align2/Sources
+
+#
+# Objects and headers
+#
+
+SOURCES = PhyloSupport.cc NewickTree.cc ClustalW.cc SeqNodeGraph.cc
+
+OBJECTS = PhyloSupport.o NewickTree.o ClustalW.o SeqNodeGraph.o
+
+TARGETS =
+
+EXECS =
+
+LIBRARY = libPhylo.a
+
+#
+# Install rule
+#
+
+compile: all
+
+install: $(LIBRARY) $(TARGETS)
+ mv $(LIBRARY) $(UPDIR)/lib
+
+#
+# Call global Makefile to do the job
+#
+all: install
+include ../../Makefile.global
diff --git a/Phylo/Sources/NewickTree.cc b/Phylo/Sources/NewickTree.cc
new file mode 100644
index 0000000..8b1bf45
--- /dev/null
+++ b/Phylo/Sources/NewickTree.cc
@@ -0,0 +1,784 @@
+/* This file is part of Victor.
+
+ Victor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Victor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Victor. If not, see .
+ */
+
+
+// Includes:
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+#include
+
+// Global constants, typedefs, etc. (to avoid):
+using namespace Victor::Align2;
+using namespace Victor;
+using namespace std;
+
+namespace Victor { namespace Phylo{
+
+ // CONSTRUCTORS:
+
+
+std::vector distance;
+
+/**
+ *@Description for create void node
+ */
+ NewickTree::NewickTree() {
+ //root->weigthV;
+ //root->nameV;
+ //root->allignSeq;
+ root=new iNode;
+ root->name="NULL";
+ root->left=NULL;
+ root->right=NULL;
+ root->parent=NULL;
+ root->seq="noPURESeq";
+ root->branchLength=-1;
+ root->divergenceR=0;
+ root->nseq=-1;
+ root->ClustalW=false;
+ root->isRigth=false;
+ root->isLeft=false;
+
+ root->isLeaf=true;
+ root->numberOfChildLeaf=0;
+ }
+
+ /**
+ *@Description for create node of tree
+ *@param int position numeber of sequenze in FASTA file
+ *@param string name name of sequence in FASTA file
+ *@param string pureSeq the Sequence with no modification like in FASTA file
+ *@param double divR divergence usefull inNJ method
+ */
+ NewickTree::NewickTree(int position,string name,string pureSeq,double divR){
+ //not need
+ //root->allignSeq;
+ //root->nameV;
+ //root->weigthV;
+ //void
+ root=new iNode;
+ root->left=NULL;
+ root->right=NULL;
+ root->parent=NULL;
+ root->branchLength=-1;
+ root->ClustalW=false;
+ root->isRigth=false;
+ root->isLeft=false;
+ //important
+ root->name=name;
+ root->divergenceR=divR;
+ root->seq=pureSeq;
+ root->nseq=position;
+ root->isLeaf=true;
+ root->numberOfChildLeaf=0;
+ }
+
+ /**
+ *@Description for create node of tree
+ *@param int position numeber of sequenze in FASTA file
+ *@param string name name of sequence in FASTA file
+ *@param string pureSeq the Sequence with no modification like in FASTA file
+ */
+ NewickTree::NewickTree(int position,string name,string pureSeq){
+ //void
+ //root->weigthV;
+ //root->nameV;
+ //root->allignSeq;
+ root=new iNode;
+ root->left=NULL;
+ root->right=NULL;
+ root->parent=NULL;
+ root->branchLength=0;
+ root->ClustalW=false;
+ root->isRigth=false;
+ root->isLeft=false;
+ //important
+ root->name=name;
+ root->divergenceR=0;
+ root->seq=pureSeq;
+ root->nseq=position;
+ root->isLeaf=true;
+ root->numberOfChildLeaf=0;
+ }
+
+
+
+ /**
+ *@Description Create new Tree. Used for union two different Tree.
+ *@param NewickTree *rTree the right child
+ *@param NewickTree *lTree the left child
+ */
+ NewickTree::NewickTree( NewickTree *rTree, NewickTree *lTree) {
+ //not need
+ //root->weigthV;
+ //root->nameV;
+ //root->allignSeq;
+ //void
+ root=new iNode;
+ root->branchLength=-1;
+ root->ClustalW=false;
+ root->isRigth=false;
+ root->isLeft=false;
+ //important
+ root->isLeaf=false;//internal
+ int tmp=0;
+ if(rTree->isLeaf())
+ tmp++;
+ if(lTree->isLeaf())
+ tmp++;
+ root->numberOfChildLeaf=tmp+rTree->getNumOfChildren()+lTree->getNumOfChildren();
+ root->name="";
+ root->seq="";
+
+ rTree->getRoot()->isRigth=true;
+ root->right=rTree->getRoot();
+
+ lTree->getRoot()->isLeft=true;
+ root->left=lTree->getRoot();
+
+ root->parent=NULL;
+
+ root->divergenceR=0;
+ }
+
+
+ /**
+ *@Description Basic destructor
+ */
+ NewickTree::~NewickTree() {
+ }
+
+
+ // PREDICATES:
+ iNode *NewickTree::getRoot(){
+ return root;
+ }
+
+ iNode *NewickTree::getRightChild(){
+ return root->right;
+ }
+
+ iNode*NewickTree::getLeftChild(){
+ return root->left;
+ }
+ iNode*NewickTree::getParent(){
+ return root->parent;
+ }
+
+ string NewickTree::getName(){
+ return root->name;
+ }
+
+ string NewickTree::getPureSeq(){
+ return root->seq;
+ }
+
+
+ double NewickTree::getRdiv(){
+ return root->divergenceR;
+ }
+
+
+ bool NewickTree::isLeaf(){
+ return root->isLeaf;
+ }
+
+ int NewickTree::getNumOfChildren(){
+ return root->numberOfChildLeaf;
+ }
+
+ double NewickTree::getWeigth(){
+ return root->weigth;
+ }
+
+ NewickTree NewickTree::getLeafInPosition(unsigned int index){
+ return leafList[index];
+ }
+
+ unsigned int NewickTree::getNumberOfLeaf(){
+ return leafList.size();
+ }
+
+ // OPERATORS:
+ /**
+ *@Description for create phylogenic Tree use NJ method
+ *@param Align2::Alignment ali alignament of starting sequence
+ *@param bool ktuples decide to use ktuples method for calculate identity of 2 sequence
+ *@param bool verbose for more information on video during computing.
+ */
+ void NewickTree::neighborJoining(Align2::Alignment ali,bool ktuples, bool verbose){
+
+ int Nseq=ali.size()+1;//number of seq
+ vector > distance(Nseq, std::vector(Nseq, 0));
+
+ if(verbose)
+ cout<<"---------Creating Distance Matrix--------------/"<vet=PhyloSupport::calcAlignmentV(&ali,distance,ktuples,verbose);
+
+
+
+ if (verbose){
+ cout<<"---------Print Distance Matrix--------------/"< trees(Nseq);
+
+ //Create all node
+ for(unsigned int i=0;i seqList(trees.size());//never modify, used after create guide tree
+ int count=0;//count number of seq insert in seqList
+
+
+ //memorize the nearest seq
+ unsigned int minj=0;
+ unsigned int mini=1;
+ //vector for new distance
+ vector newMediaD ( trees.size());
+
+ //for creat new matrix;
+ unsigned int newi=0;
+ unsigned int newj=0;
+
+
+ while(trees.size()!=3){// end 3
+ if(verbose)
+ cout<<"----------------While num= "<<(Nseq-trees.size())<<"---------------"< > Mij(trees.size(), std::vector(trees.size(), 0));
+
+ for(unsigned int i=0; i Mij[i][j] && i!=j)
+ {
+ minj=j;
+ mini=i;
+ }
+ }
+ }
+
+ if(verbose)
+ cout< > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0));
+
+ //reset
+ newi=0;
+ newj=0;
+ //copy old matrix in new matrix, but not mini (A) and minj (B) and not the new node (U)
+ for(unsigned int i=0; imini && imini && i>minj){
+ newi=i-2;
+ }
+ if(jmini && jmini && j>minj){
+ newj=j-2;
+ }
+ if(verbose)
+ cout<mini && imini && i>minj){
+ newi=i-2;
+ }
+ tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i]-distance[mini][minj])/2;
+ if(verbose)
+ cout<setBranchLength((distance[1][2]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength());
+ tmpT=new NewickTree(tmpT,&trees[2]);
+
+ //parent
+ trees[2].setParent(*tmpT);
+
+ }
+ else{//min distance 0,2
+ trees[0].setBranchLength(distance[0][2]/2-trees[0].getValueOFChildBranchLength());
+ trees[2].setBranchLength(distance[0][2]/2-trees[2].getValueOFChildBranchLength());
+ tmpT=new NewickTree(&trees[0],&trees[2]);
+ //parent
+ trees[0].setParent(*tmpT);
+ trees[2].setParent(*tmpT);
+
+ trees[1].setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-trees[1].getValueOFChildBranchLength());
+ tmpT->setBranchLength((distance[1][2]/2+distance[1][0]/2)/2-tmpT->getValueOFChildBranchLength());
+ tmpT=new NewickTree(tmpT,&trees[1]);
+ //parent
+ trees[1].setParent(*tmpT);
+
+ }
+
+ }
+ else{//min distance 1,2
+ trees[1].setBranchLength(distance[1][2]/2-trees[1].getValueOFChildBranchLength());
+ trees[2].setBranchLength(distance[1][2]/2-trees[2].getValueOFChildBranchLength());
+ tmpT=new NewickTree(&trees[1],&trees[2]);
+ //parent
+ trees[1].setParent(*tmpT);
+ trees[2].setParent(*tmpT);
+
+ trees[0].setBranchLength((distance[0][2]/2+distance[0][1]/2)/2-trees[0].getValueOFChildBranchLength());
+ tmpT->setBranchLength((distance[0][1]/2+distance[0][2]/2)/2-tmpT->getValueOFChildBranchLength());
+ tmpT=new NewickTree(tmpT,&trees[0]);
+ //parent
+ trees[0].setParent(*tmpT);
+
+ }
+
+ if(trees[0].isLeaf()){
+ seqList[count]=trees[0];
+ trees[0].setPosition(count);
+ count++;
+ }
+ if(trees[1].isLeaf()){
+ seqList[count]=trees[1];
+ trees[1].setPosition(count);
+ count++;
+ }
+ if(trees[2].isLeaf()){
+ seqList[count]=trees[2];
+ trees[2].setPosition(count);
+ count++;
+ }
+
+ //calculate weigth
+ //The weights are dependent upon the distance from the root of the tree
+ //but sequences which have a common branch with other sequences share the weight
+ //derived from the shared branch
+ if(verbose)
+ cout<getRoot();
+
+
+ }//end method
+
+
+ // OPERATORS:
+ /**
+ *@Description for create phylogenic Tree use UPGMA method
+ *@param Align2::Alignment ali alignament of starting sequence
+ *@param bool ktuples decide to use ktuples method for calculate identity of 2 sequence
+ *@param bool verbose for more information on video during computing.
+ */
+ void NewickTree::upgma(Align2::Alignment ali,bool ktuples, bool verbose){
+
+ int Nseq=ali.size()+1;//number of seq
+ vector > distance(Nseq, std::vector(Nseq, 0));
+
+ vectorvet=PhyloSupport::calcAlignmentV(&ali,distance,ktuples,verbose);
+ vector trees(Nseq);
+
+
+ if(verbose){
+ cout<<"---------Print Distance Matrix--------------/"< newMediaD ( trees.size());
+
+ //for creat new matrix;
+ unsigned int newi=0;
+ unsigned int newj=0;
+
+
+ while(trees.size()!=1){
+
+ mini=0;
+ minj=1;
+ //search nearest seq
+ for(unsigned int i=0; i distance[i][j] && i!=j)
+ {
+ minj=j;
+ mini=i;
+ }
+ }
+ }
+
+ if(verbose)
+ cout< > tmpdistance(trees.size()-1, std::vector(trees.size()-1, 0));
+ newi=0;
+ newj=0;
+
+ //copy old matrix in new matrix, but not mini (A) and minj (B) and not the new node (U)
+ for(unsigned int i=0; imini && imini && i>minj){
+ newi=i-2;
+ }
+ if(jmini && jmini && j>minj){
+ newj=j-2;
+ }
+ if(verbose)
+ cout<<"mini e minj "<mini && imini && i>minj){
+ newi=i-2;
+ }
+ tmpdistance[newi][tmpdistance.size()-1]=(distance[mini][i]+distance[minj][i])/2;
+ if(verbose)
+ cout<branchLength;
+ if(seq->parent!=NULL){
+ w+=( calcWeigth(seq->parent)/(seq->parent->numberOfChildLeaf) );
+ }
+ return w;
+ }
+
+
+ /**
+ *@Description Return max values of this node children branchLength.
+ * Return zero if left child and right are NULL
+ */
+
+ double NewickTree::getValueOFChildBranchLength(){
+ double value1=0;
+ double value2=0;
+ if(root->left!=NULL)
+ {
+ value1=root->left->branchLength;
+ }
+ if(root->right!=NULL)
+ {
+ value2=root->right->branchLength;
+ }
+ if(value1>value2)
+ return value1;
+ return value2;
+ }
+
+
+ /**
+ * Set the value of node if is -1 its escape during print
+ * @param val
+ *
+ */
+ void NewickTree::setBranchLength(double val){
+ if(val>0)
+ root->branchLength=val;
+ else
+ root->branchLength=0;
+ }
+
+ /**
+ * Set the value of divergence will use in NJ if is -1 its escape during print
+ * @param val
+ *
+ */
+ void NewickTree::setRdiv(double val){
+ root->divergenceR=val;
+ }
+
+
+ void NewickTree::setParent(NewickTree pTree) {
+ root->parent=pTree.getRoot();
+ }
+
+ void NewickTree::setWeigth(double w){
+ root->weigth=w;
+ }
+
+
+ void NewickTree::setPosition(unsigned int pos){
+ root->nseq=pos;
+ }
+
+ void NewickTree::setAlingSeq(vector vSeq){
+ root->allignSeq=vSeq;
+ }
+
+
+}} // namespace
diff --git a/Phylo/Sources/NewickTree.h b/Phylo/Sources/NewickTree.h
new file mode 100644
index 0000000..2e01377
--- /dev/null
+++ b/Phylo/Sources/NewickTree.h
@@ -0,0 +1,132 @@
+/* This file is part of Victor.
+
+ Victor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Victor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Victor. If not, see .
+ */
+
+
+#ifndef _NewickTree_h_
+#define _NewickTree_h_
+
+// Includes:
+
+#include
+#include
+#include
+#include
+
+
+using std::string;
+using std::vector;
+
+//rappresent the node of a phylogenic tree
+struct iNode
+{
+ string name;
+ int nseq;
+ string seq;//pure seq
+ vector allignSeq;
+ bool isRigth;
+ bool isLeft;
+ iNode *left;
+ iNode *right;
+ iNode *parent;
+ int numberOfChildLeaf;
+ bool isLeaf;
+ double branchLength;
+ double divergenceR;//for NJ
+ double weigth;//for ClustalW
+ vector weigthV;//for ClustalW
+ vector nameV;//for ClustalW
+ bool ClustalW;//for ClustalW
+
+};
+
+// Global constants, typedefs, etc. (to avoid):
+namespace Victor { namespace Phylo {
+
+ /**@brief Methods to manages the global statistic data.
+ *
+ *@Description Use for create Tree during phylogeny
+ * */
+ class NewickTree {
+ public:
+
+ // CONSTRUCTORS/DESTRUCTOR:
+ NewickTree();
+ NewickTree(int position,string name,string pureSeq,double divR);
+ NewickTree(NewickTree *rTree, NewickTree *lTree);
+ NewickTree(int position,string name,string pureSeq);
+
+ ~NewickTree();
+
+
+ // PREDICATES:
+ iNode *getRoot();
+ iNode *getRightChild();
+ iNode*getLeftChild();
+ iNode*getParent();
+
+ string getName();
+ string getPureSeq();
+
+ double getValueOFChildBranchLength();
+ double getRdiv();
+ double getWeigth();
+
+ bool isLeaf();
+
+ int getNumOfChildren();
+
+ unsigned int getNumberOfLeaf();
+
+ NewickTree getLeafInPosition(unsigned int index);
+
+
+
+
+ // OPERATORS:
+ void setBranchLength(double val);
+ void neighborJoining(Align2::Alignment ali,bool ktuples=false, bool verbose=false);
+ void upgma(Align2::Alignment ali, bool ktuples=false, bool verbose=false);
+ void setRdiv(double val);
+ void setParent(NewickTree pTree);
+ void setWeigth(double w);
+ void setPosition(unsigned int pos);
+ void setAlingSeq(vector vSeq);
+
+
+ // Print in a string the tree using Newick format
+ string printNewickTree();
+
+ protected:
+ // HELPERS:
+ string printNewickTreeNode(iNode node);
+ double calcWeigth(iNode* seq);
+
+ // ATTRIBUTES:
+ iNode *root;
+ vector leafList;
+
+
+ private:
+ // HELPERS:
+ void destroy_tree(iNode *leaf);
+
+
+ };
+
+
+}}
+#endif
+
diff --git a/Phylo/Sources/PhyloSupport.cc b/Phylo/Sources/PhyloSupport.cc
new file mode 100644
index 0000000..9f17d81
--- /dev/null
+++ b/Phylo/Sources/PhyloSupport.cc
@@ -0,0 +1,551 @@
+/* This file is part of Victor.
+
+ Victor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Victor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Victor. If not, see .
+ */
+
+
+// Includes:
+#include
+#include
+#include
+#include
+#include "PhyloSupport.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// Global constants, typedefs, etc. (to avoid):
+using namespace Victor::Align2;
+using namespace Victor;
+using std::string;
+using std::vector;
+using std::ifstream;
+using std::cout;
+using std::endl;
+
+
+namespace Victor { namespace Phylo{
+
+
+ /** @brief
+ *
+ **/
+ // CONSTRUCTORS/DESTRUCTOR:
+
+ /**
+ *@Description Basic contructor
+ */
+ PhyloSupport::PhyloSupport(){
+
+ }
+
+
+ /**
+ *@Description Basic destructor
+ */
+ PhyloSupport::~PhyloSupport() {
+
+ }
+
+ // ATTRIBUTES:
+ double PhyloSupport::openGapPenalty=12;
+ double PhyloSupport::extensionGapPenalty=3;
+ double PhyloSupport::cSeq=0.8;
+ double PhyloSupport::downs=999.9;
+ double PhyloSupport::downa=999.9;
+ double PhyloSupport::ups=999.9;
+ double PhyloSupport::upa=999.9;
+ int PhyloSupport::tokenSize=-1;
+ unsigned int PhyloSupport::weightingScheme=0;
+ string PhyloSupport::matrix="blosum62.dat";
+ bool PhyloSupport::verbose=false;
+
+
+ vector PhyloSupport::calcAlignmentV(Alignment *aliSec, vector > &distance , bool ktuples,bool verbose){
+ string seq1Name, seq2Name, seq1, seq2, sec1, sec2;
+
+ Alignment newAli;
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
+ string dataPath = path + "data/";
+
+ //n=4? no idea. Default in APP/subali.cc
+ int n=4;
+
+ AlignmentData *ad;
+
+ //Default matrix
+ string matrixFileName=PhyloSupport::matrix;
+ matrixFileName = dataPath + matrixFileName;
+ ifstream matrixFile(matrixFileName.c_str());
+ if (!matrixFile)
+ ERROR("Error opening substitution matrix file.", exception);
+ SubMatrix sub(matrixFile);
+
+ //Default gap function
+
+ GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty);
+
+
+ //Default Structure
+ Structure *str;
+ str = 0;
+
+ //Default ScoringScheme
+ ScoringScheme *ss;
+
+ //Global Align
+ Align *a;
+ if(verbose)
+ cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl;
+
+ double suboptPenaltyMul=1;
+ double suboptPenaltyAdd=1;
+
+ unsigned int suboptNum=1;
+
+ vector a2;
+
+ Alignment a3;
+
+ vector alignV(aliSec->size()+1);//vector of align
+
+ double score=0;
+ //size not count target. so +1
+ for(unsigned int index=0;indexsize()+1;index++){
+ if(verbose)
+ cout<<" index="<size()){
+ seq1 = Alignment::getPureSequence(aliSec->getTarget());
+ seq1Name = aliSec->getTargetName();
+ }
+ else{
+ seq1 = Alignment::getPureSequence(aliSec->getTemplate(index));
+ seq1Name = aliSec->getTemplateName(index);
+ }
+ for(unsigned int j=0;jsize()+1;j++){
+ if(verbose)
+ cout<<" j="<size()){
+ seq2 = Alignment::getPureSequence(aliSec->getTarget());
+ seq2Name = aliSec->getTargetName();
+ }else{
+ seq2 = Alignment::getPureSequence(aliSec->getTemplate(j));
+ seq2Name = aliSec->getTemplateName(j);
+ }
+
+ ad = new SequenceData(n, seq1, seq2, seq1Name, seq2Name);
+
+ ss = new ScoringS2S(&sub, ad, str, cSeq);
+
+ a = new NWAlign(ad, gf, ss);
+
+ a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd);
+ a2 = a->generateMultiMatch(suboptNum);
+ if (a2.size() == 0)
+ ERROR("No output alignments generated.", exception);
+
+ a2[0].cutTemplate(1);
+ if(ktuples==true)
+ {
+ score=PhyloSupport::distanceCalcTwoSeqktuples(a2[0].getTarget(),a2[0].getTemplate(0));
+ }
+ else
+ score=PhyloSupport::distanceCalcTwoSeq(a2[0].getTarget(),a2[0].getTemplate(0));
+ distance[index][j]=score;
+ if(verbose)
+ cout<<"SCORE="< PhyloSupport::AlingSvsS(string seq1,string seq2,bool verbose){
+
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
+ string dataPath = path + "data/";
+
+ //n=4? no idea. Default in APP/subali.cc
+ int n=4;
+
+ AlignmentData *ad;
+
+ //Default matrix
+ string matrixFileName=PhyloSupport::matrix;
+ matrixFileName = dataPath + matrixFileName;
+ ifstream matrixFile(matrixFileName.c_str());
+ if (!matrixFile)
+ ERROR("Error opening substitution matrix file.", exception);
+ SubMatrix sub(matrixFile);
+
+ //Default gap function
+
+ GapFunction *gf = new AGPFunction(openGapPenalty, extensionGapPenalty);
+
+
+ //Default Structure
+ Structure *str;
+ str = 0;
+
+ //Default ScoringScheme
+ ScoringScheme *ss;
+
+ //Global Align
+ Align *a;
+ if(verbose)
+ cout << "Start Suboptimal Needleman-Wunsch alignments:" << endl;
+
+ double suboptPenaltyMul=1;
+ double suboptPenaltyAdd=1;
+
+ unsigned int suboptNum=1;
+
+ vector a2;
+
+
+ ad = new SequenceData(n, seq1, seq2, "seq1", "seq2");
+
+ ss = new ScoringS2S(&sub, ad, str, cSeq);
+
+ a = new NWAlign(ad, gf, ss);
+
+ a->setPenalties(suboptPenaltyMul, suboptPenaltyAdd);
+ a2 = a->generateMultiMatch(suboptNum);
+ if (a2.size() == 0)
+ ERROR("No output alignments generated.", exception);
+ vector result(2);
+
+ result[0]=a2[0].getTarget();
+ result[1]=a2[0].getTemplate();
+ if(verbose){
+ cout<<"result "< PhyloSupport::AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose,int tokenSize){
+
+ string gap="-";
+ string tokenSizeGap="";
+ string tokenSizeGap2="";
+ int tokenSize2=tokenSize;
+
+ //insert gap at end for string with same size//not need
+ while(seq1[0].size()>seq2[0].size()){
+ for(unsigned int i=0; i tokenS1(seq1[0].size()/tokenSize);
+ vector tokenS2(seq2[0].size()/tokenSize2);
+
+ for(unsigned int i=0; i tempSV(seq1.size());
+ for(unsigned int j=0; j tempSV(seq2.size());
+ for(unsigned int j=0; j edgeFor1(tokenS1.size()+tokenS2.size(),-1);
+ vector finalS(seq1.size()+seq2.size(),"");
+ unsigned int count1=0;
+ unsigned int count2=0;
+ while(count1returnBestEdge();
+ count2=edgeFor1[count1];
+ count1++;
+ }
+ else {
+ edgeFor1[count1]=tokenS1[count1]->returnBestEdgeAfterIndex(count2);
+ count2=edgeFor1[count1];
+ count1++;
+ }
+
+ }
+
+
+ //insert in final vector of string
+ for(unsigned int i=0; igetTokenSeq(x);
+ }//if
+ else{
+ for(int j=0; jgetTokenSeq(x);
+ }
+ }//end for x
+ }//end if
+ else{
+ for(unsigned x=0;xgetTokenSeq(x);
+ }
+ }
+ }//ed for i
+
+ for(unsigned int i=0; ifinalS[seq1.size()].size()){
+ for(unsigned int i=0; i &PhyloSupport::split(const std::string &s, char delim, std::vector &elems) {
+ std::stringstream ss(s);
+ std::string item;
+ while (std::getline(ss, item, delim)) {
+ elems.push_back(item);
+ }
+ return elems;
+ }
+
+ std::vector PhyloSupport::split(const std::string &s, char delim) {
+ std::vector elems;
+ split(s, delim, elems);
+ return elems;
+ }
+
+
+ double PhyloSupport::calcDivR(vector vDist){
+ double tmpR=0;
+ for(unsigned int i=0;i > &distance){
+ string sMatrix="";
+ //std::setprecision(3)//for minus precision
+ for(unsigned int i=0; i PhyloSupport::mergeStringVector( vector v1,vector v2 ){
+ vector v(v1.size()+v2.size());
+ for(unsigned int i=0; i PhyloSupport::mergeDoubleVector( vector v1,vector v2 ){
+ vector v(v1.size()+v2.size());
+ for(unsigned int i=0; i.
+ */
+
+
+#ifndef _PhyloSupport_h_
+#define _PhyloSupport_h_
+
+// Includes:
+
+#include
+#include
+#include
+#include
+
+using namespace Victor::Align2;
+using std::string;
+using std::vector;
+
+
+// Global constants, typedefs, etc. (to avoid):
+namespace Victor { namespace Phylo {
+
+ /**@brief Methods to manages the global statistic data.
+ *
+ *@Description Use for create Tree during phylogeny and controll the parameter in NJ, UPGMA and CLUSTALW
+ * */
+ class PhyloSupport {
+ public:
+
+ // CONSTRUCTORS/DESTRUCTOR:
+ PhyloSupport();
+
+
+ ~PhyloSupport();
+
+
+ //STATIC:
+
+ //Calc multi Align
+ static vector calcAlignmentV(Alignment *aliSec, vector > &distance, bool ktuples=false, bool verbose=false);
+ //Calc distance from 2 seq of DNA or protein
+ static double distanceCalcTwoSeq(string seq1,string seq2);
+ //Calc distance from 2 seq of DNA or protein use k-tuples method
+ static double distanceCalcTwoSeqktuples(string seq1,string seq2);
+ //for split string
+ static std::vector &split(const std::string &s, char delim, std::vector &elems);
+ static std::vector split(const std::string &s, char delim);
+ //for calculate divergent usefull for NJ
+ static double calcDivR(vector vDist);
+ //for print matrix of double
+ static void printMatrix(vector > &distance);
+ //aling 2 sequence
+ static vector AlingSvsS(string seq1,string seq2,bool verbose=false);
+ //aling 2 or more sequence use method of graph
+ static vector AlingMultiSvsMultiS(vector seq1,vector seq2,vector vWeigth1,vector vWeigth2,bool verbose=false,int tokenSize=1);
+ //insert a gap in certain position in a string
+ static string insertGapPosition(string seq, int position);
+ //cast a int to string
+ static string intToString( int num );
+ //to merge 2 vector of string, order of merge is v=v1[0],v1[1]....v2[0]....v2[n-1]
+ static vector mergeStringVector(vector v1,vector v2 );
+ //to merge 2 vector of double, order of merge is v=v1[0],v1[1]....v2[0]....v2[n-1]
+ static vector mergeDoubleVector(vector v1,vector v2 );
+
+ //static parameter for NJ UPGMA and ClustalW:
+
+ //gap for align
+ static double openGapPenalty;
+ static double extensionGapPenalty;
+ //Cseq for align
+ static double cSeq;
+ //for align
+ static double downs, downa, ups, upa;
+ static unsigned int weightingScheme;
+ //ClustalW
+ static int tokenSize;
+ //Substitution matrix
+ static string matrix;
+ //verbose output
+ static bool verbose;
+
+
+
+ protected:
+
+
+ private:
+
+
+ };
+
+
+}}
+#endif
+
diff --git a/Phylo/Sources/SeqNodeGraph.cc b/Phylo/Sources/SeqNodeGraph.cc
new file mode 100644
index 0000000..3a73cae
--- /dev/null
+++ b/Phylo/Sources/SeqNodeGraph.cc
@@ -0,0 +1,292 @@
+/* This file is part of Victor.
+
+ Victor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Victor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Victor. If not, see .
+ */
+
+
+// Includes:
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+// Global constants, typedefs, etc. (to avoid):
+using namespace Victor::Align2;
+using namespace Victor;
+using namespace std;
+
+namespace Victor { namespace Phylo{
+
+ // CONSTRUCTORS:
+
+ /**
+ *@Description Basic constructor, void node
+ *
+ */
+ SeqNodeGraph::SeqNodeGraph(){
+ indexStart=0;
+ indexFinish=0;
+ totNumSeq=0;
+ vector taxV(0);
+ taxEdge=taxV;
+ tokenSize=0;
+ numSeq=0;
+ averageTax=0;
+ numTokenInSeq2=0;
+ }
+
+
+
+ //totNumSeq how much of seq in this node minimun 1
+ //numSeqInS2 how much seq in the seq2 important for size of vector TaxEge;
+ //Weigth depends on guide tree;
+
+ /**
+ *@Description Basic constructor
+ *@param totNumS how much of seq in this node minimun 1
+ *@param numSeqInS2 how much seq in the seq2 important for size of vector TaxEge;
+ *@param Weigth depends on guide tree
+ *@param indexS start index
+ *@param indexF final index
+ *
+ */
+
+ SeqNodeGraph::SeqNodeGraph(unsigned int indexS,unsigned int indexF, unsigned int totNumS,vector tokenS,unsigned int numSeqInS2){
+ indexStart=indexS;
+ indexFinish=indexF;
+ totNumSeq=totNumS;
+ vector taxV(numSeqInS2,0);
+ taxEdge=taxV;
+ tokenSeq=tokenS;
+ tokenSize=tokenSeq[0].size();
+ numSeq=tokenSeq.size();
+ averageTax=0;
+ numTokenInSeq2=numSeqInS2;
+ }
+
+
+ /**
+ *@Description Basic destructor
+ */
+ SeqNodeGraph::~SeqNodeGraph() {
+ }
+
+ // PREDICATES:
+ /**
+ *@Description find and return the best edge of node
+ *
+ */
+ unsigned int SeqNodeGraph::returnBestEdge(){
+ unsigned int best=0;
+ for(unsigned int i=0; i=taxEdge.size())
+ best=-1;
+ return best;
+ }
+
+ string SeqNodeGraph::getTokenSeq(unsigned int index){
+ return tokenSeq[index];
+ }
+
+ int SeqNodeGraph::getIndexStart(){
+ return indexStart;
+ }
+ int SeqNodeGraph::getIndexFinish(){
+ return indexFinish;
+ }
+ unsigned int SeqNodeGraph::getTotNumSeq(){
+ return totNumSeq;
+ }
+ double SeqNodeGraph::getTaxEdgeInPosition(unsigned int i){
+ return taxEdge[i];
+ }
+ char SeqNodeGraph::getCharOfTokenSeq(unsigned int seqNum, unsigned int pos){
+ return tokenSeq[seqNum][pos];
+ }
+ unsigned int SeqNodeGraph::getTokenSize(){
+ return tokenSize;
+ }
+ double SeqNodeGraph::getAverageTax(){
+ return averageTax;
+ }
+
+
+
+
+ // OPERATORS:
+ void SeqNodeGraph::setIndexStart(unsigned int i){
+ indexStart=i;
+ }
+ void SeqNodeGraph::setIndexFinish(unsigned int i){
+ indexFinish=i;
+ }
+ void SeqNodeGraph::setTotNumSeq(int i){
+ numSeq=i;
+ }
+ void SeqNodeGraph::setTaxEdgeInPosition(unsigned int i, double tax){
+ taxEdge[i]=tax+taxEdge[i];
+ }
+ void SeqNodeGraph::setTokenSize(int size){
+ tokenSize=size;
+ }
+
+ /**
+ *@Description for print all edge-taxes of one node
+ *
+ */
+ void SeqNodeGraph::printTaxEdge(){
+ cout<<"tax vector"< vNode array of nodes
+ *
+ */
+ void SeqNodeGraph::setNode(SeqNodeGraph* node, vector vNode){
+ //matrix config
+ string path = getenv("VICTOR_ROOT");
+ if (path.length() < 3)
+ cout << "Warning: environment variable VICTOR_ROOT is not set." << endl;
+
+ string dataPath = path + "data/";
+
+ //Default matrix
+ string matrixFileName="blosum62.dat";
+ matrixFileName = dataPath + matrixFileName;
+ ifstream matrixFile(matrixFileName.c_str());
+ if (!matrixFile)
+ ERROR("Error opening substitution matrix file.", exception);
+ SubMatrix sub(matrixFile);
+ //end matrix config
+
+ for(unsigned int i=0; igetTotNumSeq() ;j++)
+ {//how seq in node[i] of vNode
+ for(unsigned int x=0; xgetTokenSize();x++)
+ {//for all char in one string in node[i] of vNode
+ for(unsigned int y=0; ygetTotNumSeq();y++)
+ {//for all token of string in node
+ for(unsigned int w=0; w