diff --git a/.gitignore b/.gitignore index 83ae2d0..80c9ef9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ .RData .Ruserdata dimmod.mod +.DS_Store docs + diff --git a/NAMESPACE b/NAMESPACE index 32a74a6..79195ef 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,5 +4,6 @@ export(DAMOCLES_ML) export(DAMOCLES_bootstrap) export(DAMOCLES_loglik) export(DAMOCLES_sim) +export(DAMOCLES_sim_fast) export(HERACLES_ImportanceSampling) useDynLib(DAMOCLES) diff --git a/R/DAMOCLES_sim_fast.R b/R/DAMOCLES_sim_fast.R new file mode 100644 index 0000000..ad687ad --- /dev/null +++ b/R/DAMOCLES_sim_fast.R @@ -0,0 +1,78 @@ +#' Simulating DAMOCLES +#' +#' Simulates DAMOCLES +#' +#' +#' @param phy phylogeny in phylo format +#' @param gamma_0 initial per lineage rate of immigration (gamma) +#' @param mu per lineage rate of local extinction +#' @param root.state geographic state of ancestor i.e. present (1) or absent(0) +#' @param keepExtinct whether to retain data for extinct lineages +#' @param stateOnly whether to return only the presence or absence states of the extant species +#' @return Either a vector with presence absence states or a table containing the following +#' columns is realised: The first two columns contain the edge list of the phylogenetic tree, and the following two +#' contain when each edge starts and ends, and the fifth column if lineage is extant. +#' The sixth column contains the presence (1) or absence (0) of the species, which is identical +#' to the output if stateOnly == TRUE, when keepExtinct == FALSE and stateOnly == FALSE. Column +#' seven and eight contain information on respectively whetever the edge is a tip and its tip number. +#' @author Bouwe R. Reijenga +#' @seealso \code{\link{DAMOCLES_sim}} +#' @references Pigot, A.L. & R.S. Etienne (2015). A new dynamic null model for +#' phylogenetic community structure. Ecology Letters 18: 153-163. +#' @keywords models +#' @examples +#' +#' #create random phylogeny +#' library(ape) +#' phy = ape::rcoal(10) +#' +#' #run DAMOCLES +#' patable = DAMOCLES_sim_fast( +#' phy, +#' gamma_0 = 1.5, +#' mu = 0, +#' root.state = 1, +#' keepExtinct = FALSE, +#' stateOnly = FALSE +#' ) +#' +#' #show presence/absence on the tree +#' patable$col = rep("black",dim(patable)[1]) +#' patable$col[which(patable$state == 1)] = "red" +#' plot(phy,tip.col = patable$col) +#' +#' @export DAMOCLES_sim_fast +DAMOCLES_sim_fast = function(phy, gamma_0, mu, root.state, keepExtinct = FALSE, stateOnly = FALSE){ + # first construct the presence absence table from the phylogeny + dn = ape::dist.nodes(phy) + ntips = length(phy$tip.label) + nbranch = 2 * ntips - 2 + patable = data.frame(p = phy$edge[, 1], d = phy$edge[, 2], + age.start = rep(NA, nbranch), age.end = rep(NA, nbranch), + extant = rep(0, nbranch), state = rep(NA, nbranch)) + patable$age.start = dn[patable$p, ntips + 1] + patable$age.end = dn[patable$d, ntips + 1] + patable$age.end[which(patable$d < length(phy$tip.label) + + 1)] = max(patable$age.end) + ce = which(patable$p == min(patable$p)) + patable$extant[ce] = 1 + patable$state = rep(0, dim(patable)[1]) + patable$tips[which(patable$d <= ntips)] = 1 + patable$y[which(patable$tips == 1)] = patable$d[which(patable$tips == 1)] + # sample which species has which root.state + patable$state[ce] = sample(c(0, root.state)) + # simulte + patable$state <- DAMOCLES_sim_cpp(pa = patable, gamma = gamma_0, mu = mu) + # return the full table or only the extant states + if(stateOnly == TRUE){ + patable <- patable$state[which(patable$tips == 1)] + }else{ + if(keepExtinct == TRUE){ + patable$extant[which(patable$tips == 1)] + }else{ + patable <- patable[which(patable$tips == 1),] + patable$extant <- 1 + } + } + return(patable) +} \ No newline at end of file diff --git a/R/RcppExports.R b/R/RcppExports.R index 4401ff8..78fc54a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -8,3 +8,7 @@ DAMOCLES_integrate_odeint <- function(ry, times, M, atol, rtol, stepper) { .Call('_DAMOCLES_DAMOCLES_integrate_odeint', PACKAGE = 'DAMOCLES', ry, times, M, atol, rtol, stepper) } +DAMOCLES_sim_cpp <- function(pa, gamma, mu) { + .Call('_DAMOCLES_DAMOCLES_sim_cpp', PACKAGE = 'DAMOCLES', pa, gamma, mu) +} + diff --git a/man/DAMOCLES_sim_fast.Rd b/man/DAMOCLES_sim_fast.Rd new file mode 100644 index 0000000..b26ee64 --- /dev/null +++ b/man/DAMOCLES_sim_fast.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DAMOCLES_sim_fast.R +\name{DAMOCLES_sim_fast} +\alias{DAMOCLES_sim_fast} +\title{Simulating DAMOCLES} +\usage{ +DAMOCLES_sim_fast( + phy, + gamma_0, + mu, + root.state, + keepExtinct = FALSE, + stateOnly = FALSE +) +} +\arguments{ +\item{phy}{phylogeny in phylo format} + +\item{gamma_0}{initial per lineage rate of immigration (gamma)} + +\item{mu}{per lineage rate of local extinction} + +\item{root.state}{geographic state of ancestor i.e. present (1) or absent(0)} + +\item{keepExtinct}{whether to retain data for extinct lineages} + +\item{stateOnly}{whether to return only the presence or absence states of the extant species} +} +\value{ +Either a vector with presence absence states or a table containing the following +columns is realised: The first two columns contain the edge list of the phylogenetic tree, and the following two +contain when each edge starts and ends, and the fifth column if lineage is extant. +The sixth column contains the presence (1) or absence (0) of the species, which is identical +to the output if stateOnly == TRUE, when keepExtinct == FALSE and stateOnly == FALSE. Column +seven and eight contain information on respectively whetever the edge is a tip and its tip number. +} +\description{ +Simulates DAMOCLES +} +\examples{ + +#create random phylogeny +library(ape) +phy = ape::rcoal(10) + +#run DAMOCLES +patable = DAMOCLES_sim_fast( + phy, + gamma_0 = 1.5, + mu = 0, + root.state = 1, + keepExtinct = FALSE, + stateOnly = FALSE + ) + +#show presence/absence on the tree +patable$col = rep("black",dim(patable)[1]) +patable$col[which(patable$state == 1)] = "red" +plot(phy,tip.col = patable$col) + +} +\references{ +Pigot, A.L. & R.S. Etienne (2015). A new dynamic null model for +phylogenetic community structure. Ecology Letters 18: 153-163. +} +\seealso{ +\code{\link{DAMOCLES_sim}} +} +\author{ +Bouwe R. Reijenga +} +\keyword{models} diff --git a/src/DAMOCLES.so b/src/DAMOCLES.so new file mode 100755 index 0000000..29e469e Binary files /dev/null and b/src/DAMOCLES.so differ diff --git a/src/DAMOCLES_integrate_odeint.o b/src/DAMOCLES_integrate_odeint.o index b079a24..05e437d 100644 Binary files a/src/DAMOCLES_integrate_odeint.o and b/src/DAMOCLES_integrate_odeint.o differ diff --git a/src/DAMOCLES_sim_fast.cpp b/src/DAMOCLES_sim_fast.cpp new file mode 100644 index 0000000..aa3647f --- /dev/null +++ b/src/DAMOCLES_sim_fast.cpp @@ -0,0 +1,183 @@ +#include +#include +#include +using namespace Rcpp; + + +// [[Rcpp::export]] +IntegerVector DAMOCLES_sim_cpp(Rcpp::DataFrame pa, double gamma, double mu) { + + double tstep = 0, wt, event; + IntegerVector extant = pa["extant"]; + IntegerVector state = pa["state"]; + std::vector start = pa["age.start"]; + std::vector end = pa["age.end"]; + std::vector parent = pa["p"]; + std::vector daugther = pa["d"]; + + double tlastevent = *std::max_element(end.begin(), end.end()); + + int nExt = sum(extant); + int nP = sum(state); + int nA = nExt - nP; + std::vector focP, focA; + + double tnextevent = tlastevent; + for(auto j = 0; j < extant.size(); j++){ + if(extant[j] == 1){ + if(state[j] == 1){ + focP.push_back(j); + }else{ + focA.push_back(j); + } + if(end[j] < tnextevent){ + tnextevent = end[j]; + } + } + } + // rate totals across extant species + double gamma_tot = gamma * (nExt - nP); + double mu_tot = mu * nP; + // maximum rates based on the number of extant species + double mu_max = mu * nExt; + double gamma_max = gamma * nExt; + double tot_max = gamma_max + mu_max; + // relative rates based on the ratio between the actual rates and the maximum rates + double mu_rel = mu_tot / tot_max; + double gamma_rel = gamma_tot / tot_max; + + // create a vector with the probabilities of each event, equal to their total rate + NumericVector prob = NumericVector::create(1 - (gamma_rel + mu_rel), gamma_rel, mu_rel); + + if(tot_max <= 0){ + // if the total rate is <= 0 no event will be possible so kill the simulation + Rcout << "tot rate error" << std::endl; + return -1; + } + + while(tstep < tlastevent){ + wt = Rcpp::rexp(1, tot_max)[0]; + tstep += wt; + + if(tstep < tnextevent){ + // sample an event + event = Rcpp::sample(3, 1, false, prob)[0] - 1; + + // colonisation + if(event == 1){ + int foc = Rcpp::sample(nA, 1, false)[0] - 1; + focP.push_back(focA[foc]); + std::vector::iterator nth = focA.begin() + foc; + focA.erase(nth); + nP++; + nA--; + mu_tot = mu * nP; + gamma_tot = gamma * nA; + mu_rel = mu_tot / tot_max; + gamma_rel = gamma_tot / tot_max; + prob[0] = 1 - (gamma_rel + mu_rel); + prob[1] = gamma_rel; + prob[2] = mu_rel; + } + // local extinction + if(event == 2){ + int foc = Rcpp::sample(nP, 1, false)[0] - 1; + focA.push_back(focP[foc]); + std::vector::iterator nth = focP.begin() + foc; + focP.erase(nth); + nP--; + nA++; + mu_tot = mu * nP; + gamma_tot = gamma * nA; + mu_rel = mu_tot / tot_max; + gamma_rel = gamma_tot / tot_max; + prob[0] = 1 - (gamma_rel + mu_rel); + prob[1] = gamma_rel; + prob[2] = mu_rel; + } + // if it's time for the next event, update the states, presences/absences, and focal species + }else if (tnextevent != tlastevent){ + // find which species are speciating (might me more than one so a while loop is needed) + std::vector::iterator it = end.begin(); + while ((it = std::find_if(it, end.end(), [&tnextevent](double x){return x == tnextevent; })) != end.end()){ + int foc = std::distance(end.begin(), it); + // set the species to extinct + extant[foc] = 0; + std::vector::iterator iter1 = std::find(focA.begin(), focA.end(), foc); + std::vector::iterator iter2 = std::find(focP.begin(), focP.end(), foc); + // if absent, change state to absent as well + if (iter1 != focA.end()){ // == if iter1 != focA.end() this means that it is absent + state[foc] = 0; + focA.erase(iter1); + }else{ + state[foc] = 1; + focP.erase(iter2); + } + // find if the species has any daughters + if(std::find(parent.begin(), parent.end(), daugther[foc]) != parent.end()){ + std::vector B; + int da = daugther[foc]; // name of the ancestral species + // we're loopin through the parents of the daughters to see if any has the ancestral species as its parent + std::vector::iterator pa = parent.begin(); + while ((pa = std::find_if(pa, parent.end(), [&da](int x){return x == da; })) != parent.end()){ + int dau = std::distance(parent.begin(), pa); + extant[dau] = 1; + B.push_back(dau); + pa++; + } + // set the states for the daugther lineages, which depend on the state of the parent (either 1 or 0 if in the comm or both 0 if p is absent) + if(state[foc] == 1){ + IntegerVector A = {0,1}; + IntegerVector sts = Rcpp::sample(A, 2, false); + + int z = 0; + for(std::vector::const_iterator k = B.begin(); k != B.end(); ++k) { + if(sts[z] == 1){ + focP.push_back(*k); + }else{ + focA.push_back(*k); + } + z++; + } + // if the parent is absent, both daughters will be as well + }else{ + for(std::vector::const_iterator k = B.begin(); k != B.end(); ++k) { + focA.push_back(*k); + } + } + } + it++; + } + // update the counters + nExt = sum(extant); + nP = focP.size(); + nA = focA.size(); + mu_tot = mu * nP; + gamma_tot = gamma * nA; + + mu_max = mu * nExt; + gamma_max = gamma * nExt; + tot_max = gamma_max + mu_max; + + mu_rel = mu_tot / tot_max; + gamma_rel = gamma_tot / tot_max; + prob = {1 - (gamma_rel + mu_rel), gamma_rel, mu_rel}; + // change the tstep to the time of speciation + tstep = tnextevent; + // determine when the next event is going to happen + tnextevent = tlastevent; + for(int j = 0; j < extant.size(); j++){ + if(extant[j] == 1){ + if(end[j] < tnextevent){ + tnextevent = end[j]; + } + } + } + } + } + for(std::vector::const_iterator l = focP.begin(); l != focP.end(); ++l) { + state[*l] = 1; + } + return state; +} + diff --git a/src/DAMOCLES_sim_fast.o b/src/DAMOCLES_sim_fast.o new file mode 100644 index 0000000..1b25318 Binary files /dev/null and b/src/DAMOCLES_sim_fast.o differ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 05d6b9b..3a3cd6d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -27,9 +27,23 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// DAMOCLES_sim_cpp +IntegerVector DAMOCLES_sim_cpp(Rcpp::DataFrame pa, double gamma, double mu); +RcppExport SEXP _DAMOCLES_DAMOCLES_sim_cpp(SEXP paSEXP, SEXP gammaSEXP, SEXP muSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::DataFrame >::type pa(paSEXP); + Rcpp::traits::input_parameter< double >::type gamma(gammaSEXP); + Rcpp::traits::input_parameter< double >::type mu(muSEXP); + rcpp_result_gen = Rcpp::wrap(DAMOCLES_sim_cpp(pa, gamma, mu)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_DAMOCLES_DAMOCLES_integrate_odeint", (DL_FUNC) &_DAMOCLES_DAMOCLES_integrate_odeint, 6}, + {"_DAMOCLES_DAMOCLES_sim_cpp", (DL_FUNC) &_DAMOCLES_DAMOCLES_sim_cpp, 3}, {NULL, NULL, 0} }; diff --git a/src/RcppExports.o b/src/RcppExports.o index 8215526..4a80c17 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ