-
Notifications
You must be signed in to change notification settings - Fork 1
Tree & Likelihood Computation
Laurent Guéguen edited this page Nov 10, 2021
·
1 revision
Given an alignment compute a distance tree under a model, and then compute maximum likelihood (including branch lengths) on this tree and alignment.
#include <Bpp/Seq/Alphabet/DNA.h>
#include <Bpp/Seq/Container/VectorSiteContainer.h>
#include <Bpp/Seq/Io/Fasta.h>
#include <Bpp/Phyl/Model/Nucleotide/T92.h>
#include <Bpp/Phyl/Model/FrequencySet/NucleotideFrequencySet.h>
#include <Bpp/Phyl/Model/RateDistribution/ConstantRateDistribution.h>
#include <Bpp/Phyl/Distance/DistanceEstimation.h>
#include <Bpp/Phyl/Distance/BioNJ.h>
#include <Bpp/Phyl/OptimizationTools.h>
#include <Bpp/Phyl/Tree/PhyloTreeTools.h>
#include <Bpp/Phyl/Likelihood/ParametrizablePhyloTree.h>
#include <Bpp/Phyl/Likelihood/NonHomogeneousSubstitutionProcess.h>
#include <Bpp/Phyl/Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h>
#include <iostream>
using namespace std;
using namespace bpp;
int main (int argc, char* argv[]){
string nameSeq = "lysozymeLarge.fasta";
Fasta Fst;
auto alpha = std::make_shared<DNA>();
auto sites = std::shared_ptr<AlignedSequenceContainer>(Fst.readAlignment(nameSeq , alpha.get()));
auto model = std::make_shared<T92>(alpha.get(), 3., .1);
auto rdist = std::make_shared<ConstantRateDistribution>();
auto rootFreqs = std::make_shared<GCFrequencySet>(alpha.get());
// Compute ML distance between sequences
DistanceEstimation distEstimation(model, rdist, sites.get(), 1, false);
// Method of clustering
auto distMethod = std::make_shared<BioNJ>();
distMethod->outputPositiveLengths(true);
// Build tree
ParameterList parametersToIgnore;
TreeTemplate<Node>* tree = OptimizationTools::buildDistanceTree(distEstimation, *distMethod, parametersToIgnore);
// Optimize likelihood on this tree
/* Convert Tree in PhyloTree */
auto phyloT = PhyloTreeTools::buildFromTreeTemplate(*tree);
ParametrizablePhyloTree parTree(*phyloT);
/* Create Substitution Process */
auto subProc= NonHomogeneousSubstitutionProcess::createHomogeneousSubstitutionProcess(model, rdist, &parTree, rootFreqs);
/* Put objects in DataFlow */
Context context;
auto lik = std::make_shared<LikelihoodCalculationSingleProcess>(context, *sites, *subProc);
SingleProcessPhyloLikelihood ntl(context, lik);
cout << endl << endl;
cout << "Initial value " << ntl.getValue() << endl;
OutputStream* profiler = new StlOutputStream(new ofstream("profile.txt", ios::out));
OutputStream* messenger = new StlOutputStream(new ofstream("messages.txt", ios::out));
OptimizationTools::optimizeNumericalParameters2( ntl, ntl.getParameters(), 0,
0.0001, 10000, messenger, profiler, false, false, 1,
OptimizationTools::OPTIMIZATION_NEWTON);
cout << "Final value " << ntl.getValue() << endl;
ntl.getParameters().printParameters(cout);
/* Update Substitution Process parameters */
subProc->matchParametersValues(ntl.getParameters());
subProc->getParameters().printParameters(cout);
}