Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@ License: GPL-3
LazyData: true
URL: https://github.com/paultpearson/TDAmapper/
BugReports: https://github.com/paultpearson/TDAmapper/issues
LinkingTo: Rcpp
Imports: Rcpp
RoxygenNote: 6.0.1
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Generated by roxygen2 (4.1.1): do not edit by hand
# Generated by roxygen2: do not edit by hand

export(mapper)
export(mapper1D)
export(mapper2D)
export(mapperEdges)
export(mapperVertices)
export(mapper_new)
importFrom(Rcpp,sourceCpp)
useDynLib(TDAmapper)
15 changes: 15 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

adjacencyCpp <- function(ls_pairs, nodes, ls_node_map) {
.Call('_TDAmapper_adjacencyCpp', PACKAGE = 'TDAmapper', ls_pairs, nodes, ls_node_map)
}

constructLevelSets <- function(filter_values, index_set, interval_length, step_size, filter_min) {
.Call('_TDAmapper_constructLevelSets', PACKAGE = 'TDAmapper', filter_values, index_set, interval_length, step_size, filter_min)
}

dist_subset <- function(dist, idx) {
.Call('_TDAmapper_dist_subset', PACKAGE = 'TDAmapper', dist, idx)
}

151 changes: 151 additions & 0 deletions R/cover.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#' Reference Class (R5) implementation of Mapper for the TDAmapper package in R
#' Cover reference class
#' filter_values := Filter values
#' num_intervals := vector of number of bins to cover the Z with (per dimension)
#' percent_overlap := vector of overlap percentages
#' level_sets := list of level_sets (including min/max bounds)
cover_ref <- setRefClass("Cover", fields = c("filter_values", "num_intervals", "percent_overlap", "index_set", "level_sets", "metric", "type"))

## Cover initialization
cover_ref$methods(initialize = function(filter_values, k, g, measure = "euclidean", type = "rectangular"){
if (is.null(dim(filter_values))) { filter_values <<- array(filter_values, dim = c(length(filter_values), 1)) }
else { filter_values <<- filter_values }
metric <<- measure
type <<- type

## TODO: move k and g to other instantiations
num_intervals <<- k; percent_overlap <<- g
constructCover()
})

cover_ref$methods(summary = function(){
type_pretty <- paste0(toupper(substr(type, start = 1, stop = 1)), tolower(substr(type, start = 2, stop = nchar(type))))
sprintf("%s cover: number intervals = %d, overlap = %.2f", type_pretty, num_intervals, percent_overlap)
})

cover_ref$methods(show = function(){
message <- c(sprintf("Open cover for %d objects (d = %d)", nrow(filter_values), ncol(filter_values)))
message <- append(message, sprintf("Parameters: number intervals = %d, overlap = %.2f", num_intervals, percent_overlap))
writeLines(message)
})

## Set overlap/gain threshold
cover_ref$methods(setOverlap = function(g){
dim_Z <- ncol(filter_values)
if (length(g) != dim_Z && length(g) != 1 || any(g > 0.50)){ stop("The gain 'g' must be a single scalar or a vector of scalars with length equal to the dimensionality of the filter space.") }
if (length(g) == 1 && dim_Z > 1){ g <- rep(g, dim_Z) } ## create a vector
percent_overlap <<- g
})

## Set resolution
cover_ref$methods(setResolution = function(k){
dim_Z <- ncol(filter_values)
if (length(k) != dim_Z && length(k) != 1){ stop("The resolution 'k' must be a single scalar or a vector of scalars with length equal to the dimensionality of the filter space.") }
if (length(k) == 1 && dim_Z > 1){ k <- rep(k, dim_Z) } ## create a vector
num_intervals <<- k
})

## Constructs a base cover where the Lebesgue covering dimension = 0 (i.e. a valid cover, but where each set is disjoint)
## TODO: Add support for other types of covering methods
cover_ref$methods(constructBaseCover = function(cover_type = c("rectangular")){
if ("uninitializedField" %in% class(percent_overlap)){ stop("'percent_overlap' must be set prior to constructing a valid cover.") }
if ("uninitializedField" %in% class(num_intervals)){ stop("'num_intervals' must be set prior to constructing a valid cover.") }

## Setup
filter_dim <- ncol(filter_values) ## filter dimensionality
indices <- lapply(k, function(k_l) 1:k_l) ## per-dimension possible indexes
index_set <<- structure(eval(as.call(append(quote(expand.grid), indices))), names = paste0("d", 1:filter_dim)) ## full indexing set

## Find which level set each point lies within under the case the given filter space is
## zero-dimensional w.r.t. its Lebesgue covering dimension
points_in_level_set <- sapply(1:filter_dim, function(i) {
x <- filter_values[, i]
findInterval(x = x, seq(min(x), max(x), length.out = num_intervals[[i]]+1), all.inside = TRUE)
})

## Create the level sets
level_sets <<- structure(points_in_level_set, names = index_set)

## Store all the information as a list in the cover field
# cover <- list(type = cover_type, index_set = index_set, points_in_level_set = points_in_level_set)
# print.mapper_cover <- function(){ print("something")}
})

cover_ref$methods(constructCover = function(){
if ("uninitializedField" %in% class(percent_overlap)){ stop("'percent_overlap' must be set prior to constructing a valid cover.") }
if ("uninitializedField" %in% class(num_intervals)){ stop("'num_intervals' must be set prior to constructing a valid cover.") }

## Setup unexpanded and full indexing set (the full indexing set is the cartesian product of each unexpanded index set)
filter_dim <- ncol(filter_values) ## filter dimensionality
indices <- lapply(num_intervals, function(k_l) 1:k_l) ## per-dimension possible indexes
index_set <<- structure(eval(as.call(append(quote(expand.grid), indices))), names = paste0("d", 1:filter_dim)) ## full indexing set

## Get filter min and max ranges
filter_rng <- apply(filter_values, 2, range)
{ filter_min <- filter_rng[1,]; filter_max <- filter_rng[2,] }

## Using the current gain, setup the cover. This computes the min and max bounds of each level set.
interval_length <- (filter_max - filter_min)/(num_intervals - percent_overlap * (num_intervals - 1)) ## interval length
step_size <- interval_length * (1 - percent_overlap) ## step size
index_set_str <- apply(index_set, 1, function(alpha) paste0(as.character(alpha), collapse = ",")) ## level set index tuple names

## Construct the level sets
# level_sets <<- structure(vector(mode = "list", length = nrow(index_set)), names = index_set_str) ## level sets
# for (i in 1:nrow(index_set)){
# ls_idx <- index_set[i,]
# ls_bnds <- rbind(filter_min + (ls_idx-1) * step_size, (filter_min + (ls_idx-1) * step_size) + interval_length)
# rownames(ls_bnds) <- c("min", "max") ## For clarity
# level_sets[[i]]$bounds <<- ls_bnds
#
# ## For the given level set, query which filter values lie within it, per dimension
# ls_queries <- sapply(1:filter_dim, function(d_i) filter_values[, d_i] >= ls_bnds[1, d_i] & filter_values[, d_i] <= ls_bnds[2, d_i])
# level_sets[[i]]$points_in_level_set <<- which(apply(ls_queries, 1, all))
# }
level_sets <<- constructLevelSets(filter_values, as.matrix(index_set), as.numeric(interval_length), as.numeric(step_size), as.numeric(filter_min))

# .GlobalEnv[["test"]] <- list(filter_values = filter_values, index_set = index_set, interval_length = interval_length, step_size = step_size, filter_min = filter_min)
# test <- constructLevelSets(filter_values = filter_values, index_set = index_set, interval_length = interval_length, step_size = step_size, filter_min = filter_min)
})

## Which level set (flat indices) are valid to compare against? When the overlap is <= 50%, it's required that the
## level sets are checked only against immediate neighbors of that set (when the index is at maximum one
## index away). If the overlap is > 50%, then check every combination.
cover_ref$methods(valid_pairs = function(){
all_ls_pairs <- t(combn(1:length(level_sets), 2))
idx_set_pairs <- as.matrix(index_set[all_ls_pairs[, 1],] - index_set[all_ls_pairs[, 2],])
if (all(percent_overlap <= 0.50)){
valid_ls_pairs <- as.vector(abs(apply(idx_set_pairs, 1, max))) <= 1
return(all_ls_pairs[valid_ls_pairs,])
} else {
## TODO: Extend mapper to compute more than the 1-skeleton efficiently
return(all_ls_pairs)
}
})

## Function to plot the filter space, including the level set bounds
cover_ref$methods(plotFilterSpace = function(show_ls_bounds = TRUE, ...){
filter_dim <- ncol(filter_values)
if (filter_dim > 2){ stop("Cannot plot a filter space with more than 3 dimensions") }
if (filter_dim == 1){
filter_sz <- nrow(m$filter_values)
alpha_scale <- ifelse(filter_sz > 1000, 0.01, 1 - (log(filter_sz)-1)/(log(1000)-1))
plot(cbind(m$filter_values, rep(0L, length(m$filter_values))), ylim = c(-1, 1), pch = "|", cex = 5,
col = adjustcolor("black", alpha.f = alpha_scale),
ylab = "", xlab = "Filter Values")
for (ls in cover$LS){
lines(x = c(ls[1, 1], ls[2, 1]), y = c(-0.5, -0.5), ...)
lines(x = c(ls[2, 1], ls[2, 1]), y = c(-0.5, 0.5), ...)
lines(x = c(ls[2, 1], ls[1, 1]), y = c(0.5, 0.5), ...)
lines(x = c(ls[1, 1], ls[1, 1]), y = c(0.5, -0.5), ...)
}
} else if (filter_dim == 2){
plot(m$filter_values)
for (ls in cover$LS){
lines(x = c(ls[1, 1], ls[2, 1]), y = c(ls[1, 2], ls[1, 2]), ...)
lines(x = c(ls[2, 1], ls[2, 1]), y = c(ls[1, 2], ls[2, 2]), ...)
lines(x = c(ls[2, 1], ls[1, 1]), y = c(ls[2, 2], ls[2, 2]), ...)
lines(x = c(ls[1, 1], ls[1, 1]), y = c(ls[2, 2], ls[1, 2]), ...)
}
}
})

9 changes: 6 additions & 3 deletions R/mapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,12 @@
#' g1 <- graph.adjacency(m1$adjacency, mode="undirected")
#' plot(g1, layout = layout.auto(g1) )
#' }
#'
#' @useDynLib TDAmapper
#' @importFrom Rcpp sourceCpp
#'
#' @export
#'
#'


mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, num_bins_when_clustering) {
Expand Down Expand Up @@ -80,7 +84,6 @@ mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, n
filter_max <- as.vector(sapply(filter_values,max))
interval_width <- (filter_max - filter_min) / num_intervals


# initialize variables
vertex_index <- 0
level_of_vertex <- c()
Expand Down Expand Up @@ -172,7 +175,7 @@ mapper <- function(dist_object, filter_values, num_intervals, percent_overlap, n
# cut the cluster tree
# internal indices refers to 1:num_points_in_this_level
# external indices refers to the row number of the original data point
level_cutoff <- cluster_cutoff_at_first_empty_bin(level_heights, level_max_dist, num_bins_when_clustering)
level_cutoff <- cluster_cutoff_at_first_empty_bin(level_heights, level_max_dist, num_bins_when_clustering)
level_external_indices <- points_in_this_level[level_hclust$order]
level_internal_indices <- as.vector(cutree(list(
merge = level_hclust$merge,
Expand Down
4 changes: 2 additions & 2 deletions R/mapper2D.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ mapper2D <- function(
points_in_level[[level]] <- which(points_in_level_logical==TRUE)

if (num_points_in_level == 0) {
print('Level set is empty')
# print('Level set is empty')
vertices_in_level[[level]] <- -1 # Bertrand Michel's suggestion for empty levels: use flag -1
next
}

if (num_points_in_level == 1) {
print('Level set has only one point')
# print('Level set has only one point')
num_vertices_in_level <- 1
cluster_indices_within_level <- c(1)
vertices_in_level[[level]] <- vertex_index + (1:num_vertices_in_level)
Expand Down
161 changes: 161 additions & 0 deletions R/mapperRef.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#' Reference Class (R5) implementation of Mapper for the TDAmapper package in R
#' Composes a set of classes to perform Mapper efficiently.
#' Author: Matt Piekenbrock

## Create the reference class, along with required field types
mapper_ref <- setRefClass("MapperRef", fields = list(X="ANY", cover="ANY", clustering_algorithm="ANY", G="ANY", config="list"))

## The cover stores the filter values
mapper_ref$methods(setCover = function(fv, k, g, measure = "euclidean"){
if (is.null(dim(fv)) && is.numeric(fv)){ fv <- matrix(fv, nrow = length(fv), ncol = 1) }
if (is.null(dim(fv)) || (!"matrix" %in% class(fv))) { stop("Filter values must be numeric and in matrix form") }
cover <<- cover_ref$new(filter_values = fv, k, g, measure = measure)
})

## Changes the clustering algorithm used by mapper.
## Must accept a 'dist' object and return a static or 'flat' clustering result
mapper_ref$methods(setClusteringAlgorithm = function(cl = c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid")){

## Default to single linkage clustering with heuristic histogram-like cut rule
if (missing(cl) || cl == "single"){
clustering_algorithm <<- function(dist_x, num_bins_when_clustering = 10){
hcl <- hclust(dist_x, method = "single")
cut_height <- cluster_cutoff_at_first_empty_bin(heights = hcl$height, diam = max(dist_x), num_bins_when_clustering = num_bins_when_clustering)
as.vector(cutree(hcl, h = cut_height))
}
}
## If other linkage criterion is provided, use that
else if (class(cl) == "character"){
hclust_opts <- c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid")
if (!cl %in% hclust_opts){ stop(sprint("Unknown linkage method passed. Please use one of (%s). See ?hclust for details.", paste0(hclust_opts, collapse = ", "))) }
clustering_algorithm <<- function(dist_x, num_bins_when_clustering = 10){
hcl <- hclust(dist_x, method = cl)
cut_height <- cluster_cutoff_at_first_empty_bin(heights = hcl$height, diam = max(dist_x), num_bins_when_clustering = num_bins_when_clustering)
as.vector(cutree(hcl, h = cut_height))
}
}
## Allow the user to use their own clustering algorithm if they desire. Must accept a dist object and return a flat clustering result.
else if (class(f) == "function"){
clustering_algorithm <<- f
} else {
stop("'cl' must be either a linkage criterion (character) or a clustering algorithm (function)")
}
})

## The cover stores the filter values
mapper_ref$methods(computeNodes = function(...){
if ("uninitializedField" %in% class(cover)){ stop("Cover must be constructed prior to computing nodes.") }

## Compute the interpoint distances for each subset
# LS_dist <- lapply(m$cover$level_sets, function(ls){ dist(X[ls$points_in_level_set,], method = "euclidean") })
LS_dist <- lapply(cover$level_sets, function(ls){
if ("dist" %in% class(X)){
dist_subset(dist = X, idx = ls$points_in_level_set)
} else { dist(X[ls$points_in_level_set,], method = "euclidean") }
})

## Initialize the graph as an empty list
G <<- list()

## Iterate through the 'dist' objects stored for each level set, perform the clustering
cl_res <- lapply(LS_dist, function(ls_dist) {
if (length(ls_dist) > 0) clustering_algorithm(ls_dist, ...)
})

## Precompute useful variables to know
n_nodes <- sum(sapply(cl_res, function(cl) length(unique(cl))))
node_idx <- unlist(mapply(function(cl, ls_i) if (length(cl) > 0) paste0(ls_i, ".", unique(cl)), cl_res, 1:length(cl_res)))
n_lvlsets <- length(cover$level_sets)

## Agglomerate the nodes into a list. This matches up the original indexes of the filter values with the
## the clustering results, such that each node stores the original filter index values as well as the
## creating a correspondence between the node and it's corresponding level set flat index (lsfi)
## TODO: Cleanup and either vectorize or send down to C++
G$nodes <<- vector(mode = "list", length = n_nodes)
n_i <- 1L
for (lsfi in 1:n_lvlsets){
cl_i <- cl_res[[lsfi]]
## Extract the node point indices for each cluster
node_pt_idx <- lapply(unique(cl_i), function(cl_idx) cover$level_sets[[lsfi]]$points_in_level_set[which(cl_i == cl_idx)])
for (node in node_pt_idx){
attr(node, "level_set") <- lsfi
G$nodes[[n_i]] <<- node
n_i <- n_i + 1L
}
}
})

mapper_ref$methods(computeEdges = function(){
if ("uninitializedField" %in% class(G)){ stop("Graph with nodes must be constructed before computing edges.") }

## Retrieve the level set flat indices (LSFI) for each corresponding node
node_lsfi <- sapply(G$nodes, function(node) attr(node, "level_set")) # which level set (by value) each node (by index) is in

## Create map from the level set flat index (by index) to the node indices the level set stores
## Note in this map empty level sets are NULL
ls_node_map <- lapply(seq(length(cover$level_sets)), function(lvl_set_idx) {
node_indices <- which(node_lsfi == lvl_set_idx)
if (length(node_indices) == 0){ return(NULL) } else { return(node_indices) }
})

## Retrieve the valid level set index pairs to compare. In the worst case, with no cover-specific optimization
## or 1-skeleton assumption, this may just be all pairwise combinations of LSFI's for the full simplicial complex
ls_to_compare <- cover$valid_pairs()

## Let Rcpp handle the O(n^2) non-empty intersection checks
G$adjacency <<- adjacencyCpp(ls_pairs = ls_to_compare, nodes = G$nodes, ls_node_map = ls_node_map);
})


mapper_ref$methods(plotNetwork = function(){
agg_pt_fv <- sapply(G$nodes, function(n_idx){ apply(matrix(cover$filter_values[n_idx,], nrow = length(n_idx)), 1, mean)})
agg_node_fv <- sapply(agg_pt_fv, mean)
exp_factor <- log(sapply(G$nodes, function(n_idx){ length(n_idx) }))
rbw_pal <- rev(rainbow(100, start = 0, end = 4/6))
binned_idx <- cut(agg_node_fv, breaks = 100, labels = F)
base_net <- network::network.initialize(nrow(G$adjacency), directed = F)
plot(network::network.adjacency(x = G$adjacency, g = base_net),
vertex.col = rbw_pal[binned_idx], vertex.cex = exp_factor,
label = 1:length(G$nodes), displaylabels = TRUE)
})

mapper_ref$methods(computeGain = function(){
if ("uninitializedField" %in% class(filter_values)){ stop("Filter values must be set prior to constructing a valid cover.") }

})

## S3-like print override
mapper_ref$methods(show = function(){
if ("dist" %in% class(X)){ n <- attr(X, "Size") } else { n <- nrow(X) }
if (!is.null(config$call))
cat("\nCall:\n", deparse(config$call), "\n\n", sep = "")
message <- sprintf("Mapper construction for %d objects", n)
if (!"uninitializedField" %in% class(cover)){ message <- append(message, cover$summary()) }
if (!"uninitializedField" %in% class(clustering_algorithm)){ message <- append(message, cover$summary()) }
writeLines(message)
})



## Exports the internal mapper core structures to a TDAmapper output
mapper_ref$methods(exportTDAmapper = function(){
level_of_vertex <- sapply(G$nodes, function(ni) attr(ni, "level_set"))
structure(list( adjacency = G$adjacency,
num_vertices = length(G$nodes),
level_of_vertex = level_of_vertex,
points_in_vertex = lapply(G$nodes, as.vector),
points_in_level = unname(lapply(cover$level_sets, function(ls) ls$points_in_level_set)),
vertices_in_level = lapply(1:length(cover$level_sets), function(ls_idx) {
tmp <- which(level_of_vertex == ls_idx)
if (length(tmp) == 0){ return(-1) }
else return(tmp)
})),
class = "TDAmapper")
})

mapper_ref$methods(getLevel = function(){

})
mapper_ref$methods(computeGraph = function(){

})
Loading