Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ To quickly install the package and run it, simply run the following
pixi run -e r-phylo2vec install-r
```

Some R test dependencies are not available on conda-forge and must be installed
once from CRAN:

```console
pixi run -e r-phylo2vec Rscript -e "install.packages('treestats', repos='https://cloud.r-project.org')"
```

Once the package is installed you can open up the R terminal:

```console
Expand Down
3 changes: 3 additions & 0 deletions r-phylo2vec/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(add_leaf)
export(apply_label_mapping)
export(b2)
export(check_m)
export(check_v)
export(cophenetic_distances)
Expand All @@ -16,6 +17,7 @@ export(get_node_depth)
export(get_node_depths)
export(has_branch_lengths)
export(incidence)
export(leaf_depth_variance)
export(load_newick)
export(load_p2v)
export(pre_precision)
Expand All @@ -25,6 +27,7 @@ export(remove_branch_lengths)
export(remove_leaf)
export(remove_parent_labels)
export(robinson_foulds)
export(sackin)
export(sample_matrix)
export(sample_tree)
export(sample_vector)
Expand Down
37 changes: 34 additions & 3 deletions r-phylo2vec/R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ NULL
#' @export
add_leaf <- function(vector, leaf, branch) .Call(wrap__add_leaf, vector, leaf, branch)

#' Compute the B2 index of a tree from Shao and Sokal (1990).
#'
#' The B2 index is a measure of tree balance based on the probabilities of random walks from the root to each leaf.
#' For a binary tree, the B2 index can be calculated as the sum of the depth of each leaf multiplied by 2 raised to the power of negative depth of that leaf.
#' Higher values indicate more balanced trees, while lower values indicate more imbalanced trees.
#' For more details, see https://doi.org/10.1007/s00285-021-01662-7.
#'
#' @param vector phylo2vec vector representation of a tree topology
#' @return B2 index (numeric)
#' @export
b2 <- function(vector) .Call(wrap__b2, vector)

#' Apply an integer-taxon label mapping (label_mapping)
#' to an integer-based newick (where leaves are integers)
#' and produce a mapped Newick (where leaves are strings (taxa))
Expand Down Expand Up @@ -61,7 +73,7 @@ check_v <- function(vector) invisible(.Call(wrap__check_v, vector))
#' @param unrooted If true, the distance is calculated as the distance between each leaf and their most recent common ancestor, multiplied by 2. If false, the distance is calculated as the distance from each leaf to their most recent common ancestor.
#' @return Cophenetic distance matrix (shape: (n_leaves, n_leaves))
#' @export
cophenetic_distances <- function(tree, unrooted) .Call(wrap__cophenetic_distances, tree, unrooted)
cophenetic_distances <- function(tree, unrooted = FALSE) .Call(wrap__cophenetic_distances, tree, unrooted)

#' Create an integer-taxon label mapping (label_mapping)
#' from a string-based newick (where leaves are strings)
Expand Down Expand Up @@ -112,7 +124,7 @@ from_pairs <- function(pairs) .Call(wrap__from_pairs, pairs)
#' Similar to ape's `getMRCA` function in R (for leaf nodes)
#' and ETE's `get_common_ancestor` in Python (for all nodes), but for phylo2vec vectors.
#'
#' @param vector phylo2vec vector representation of a tree topology
#' @param tree A phylo2vec tree
#' @param node1 The first node (0-indexed)
#' @param node2 The second node (0-indexed)
#' @return The common ancestor node (0-indexed)
Expand Down Expand Up @@ -152,6 +164,15 @@ incidence_csr <- function(input_vector) .Call(wrap__incidence_csr, input_vector)

incidence_dense <- function(input_vector) .Call(wrap__incidence_dense, input_vector)

#' Compute the variance of leaf depths in a tree.
#'
#' Higher values indicate more imbalanced trees, while lower values indicate more balanced trees.
#'
#' @param vector phylo2vec vector representation of a tree topology
#' @return Variance of leaf depths (numeric)
#' @export
leaf_depth_variance <- function(vector) .Call(wrap__leaf_depth_variance, vector)

#' Produce an ordered version (i.e., birth-death process version)
#' of a phylo2vec vector using the Queue Shuffle algorithm.
#'
Expand Down Expand Up @@ -212,7 +233,17 @@ remove_leaf <- function(vector, leaf) .Call(wrap__remove_leaf, vector, leaf)
#' @param normalize If TRUE, return normalized distance in range `[0.0, 1.0]`
#' @return RF distance (numeric)
#' @export
robinson_foulds <- function(v1, v2, normalize) .Call(wrap__robinson_foulds, v1, v2, normalize)
robinson_foulds <- function(v1, v2, normalize = FALSE) .Call(wrap__robinson_foulds, v1, v2, normalize)

#' Compute the Sackin index of a tree.
#'
#' The Sackin index is a measure of tree imbalance, defined as the sum of the depths of all leaves in the tree.
#' Higher values indicate more imbalanced trees, while lower values indicate more balanced trees.
#'
#' @param vector phylo2vec vector representation of a tree topology
#' @return Sackin index (numeric)
#' @export
sackin <- function(vector) .Call(wrap__sackin, vector)

#' Sample a random tree with branch lengths via phylo2vec
#'
Expand Down
15 changes: 0 additions & 15 deletions r-phylo2vec/R/stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,6 @@ precision <- function(vector_or_matrix) {
a - b %*% solve(c, d)
}

#' Compute the cophenetic distance matrix of a phylo2vec tree.
#'
#' The cophenetic distance between two leaves is the distance from each leaf
#' to their most recent common ancestor.
#' For vectors, this is the topological distance.
#' For matrices, this uses branch lengths.
#'
#' @param tree phylo2vec vector (1D) or matrix (2D)
#' @param unrooted If TRUE, compute unrooted distances. Default is FALSE.
#' @return Cophenetic distance matrix (shape: (n_leaves, n_leaves))
#' @export
cophenetic_distances <- function(tree, unrooted = FALSE) {
.Call(wrap__cophenetic_distances, tree, unrooted)
}

#' Compute the Robinson-Foulds distance between two trees.
#'
#' RF distance counts the number of bipartitions (splits) that differ
Expand Down
20 changes: 20 additions & 0 deletions r-phylo2vec/man/b2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 3 additions & 12 deletions r-phylo2vec/man/cophenetic_distances.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions r-phylo2vec/man/get_common_ancestor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions r-phylo2vec/man/leaf_depth_variance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions r-phylo2vec/man/sackin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 48 additions & 1 deletion r-phylo2vec/src/rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use phylo2vec::matrix::convert as mconvert;
use phylo2vec::matrix::graph as mgraph;
use phylo2vec::matrix::ops as mops;
use phylo2vec::newick;
use phylo2vec::vector::balance as vbalance;
use phylo2vec::vector::base as vbase;
use phylo2vec::vector::convert as vconvert;
use phylo2vec::vector::distance as vdist;
Expand Down Expand Up @@ -628,18 +629,62 @@ fn incidence_csr(input_vector: Vec<i32>) -> List {
/// @return RF distance (numeric)
/// @export
#[extendr]
fn robinson_foulds(v1: Vec<i32>, v2: Vec<i32>, normalize: bool) -> f64 {
fn robinson_foulds(v1: Vec<i32>, v2: Vec<i32>, #[default = "FALSE"] normalize: bool) -> f64 {
let v1_usize = as_usize(v1);
let v2_usize = as_usize(v2);
vdist::robinson_foulds(&v1_usize, &v2_usize, normalize)
}

/// Compute the Sackin index of a tree.
///
/// The Sackin index is a measure of tree imbalance, defined as the sum of the depths of all leaves in the tree.
/// Higher values indicate more imbalanced trees, while lower values indicate more balanced trees.
///
/// @param vector phylo2vec vector representation of a tree topology
/// @return Sackin index (numeric)
/// @export
#[extendr]
fn sackin(vector: Vec<i32>) -> i32 {
let v_usize = as_usize(vector);
vbalance::sackin(&v_usize) as i32
}

/// Compute the variance of leaf depths in a tree.
///
/// Higher values indicate more imbalanced trees, while lower values indicate more balanced trees.
///
/// @param vector phylo2vec vector representation of a tree topology
/// @return Variance of leaf depths (numeric)
/// @export
#[extendr]
fn leaf_depth_variance(vector: Vec<i32>) -> f64 {
let v_usize = as_usize(vector);
vbalance::leaf_depth_variance(&v_usize)
}

/// Compute the B2 index of a tree from Shao and Sokal (1990).
///
/// The B2 index is a measure of tree balance based on the probabilities of random walks from the root to each leaf.
/// For a binary tree, the B2 index can be calculated as the sum of the depth of each leaf multiplied by 2 raised to the power of negative depth of that leaf.
/// Higher values indicate more balanced trees, while lower values indicate more imbalanced trees.
/// For more details, see https://doi.org/10.1007/s00285-021-01662-7.
///
/// @param vector phylo2vec vector representation of a tree topology
/// @return B2 index (numeric)
/// @export
#[extendr]
fn b2(vector: Vec<i32>) -> f64 {
let v_usize = as_usize(vector);
vbalance::b2(&v_usize)
}

// Macro to generate exports.
// This ensures exported functions are registered with R.
// See corresponding C code in `entrypoint.c`.
extendr_module! {
mod phylo2vec;
fn add_leaf;
fn b2;
fn apply_label_mapping;
fn check_m;
fn check_v;
Expand All @@ -657,12 +702,14 @@ extendr_module! {
fn incidence_csc;
fn incidence_csr;
fn incidence_dense;
fn leaf_depth_variance;
fn queue_shuffle;
fn remove_branch_lengths;
fn remove_parent_labels;
fn pre_precision;
fn remove_leaf;
fn robinson_foulds;
fn sackin;
fn sample_matrix;
fn sample_vector;
fn to_ancestry;
Expand Down
Loading