Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ Authors@R: c(
person("Genevera", "Allen", role = c("ths", "aut"), email = "gallen@rice.edu"),
person("Lewis", "Brian W.", role = "cph", comment = "R/util_hcs.R"),
person("Daniel", "Englund", role = "aut", email = "dse1@rice.edu"),
person("Yue", "Zhuo", role = "aut", email = "yz154@rice.edu"))
person("Yue", "Zhuo", role = "aut", email = "yz154@rice.edu"),
person("R Core", role = "cph", comment = "R/scale_complex.R"))
Description: Fast computation and interactive for the convex clustering and
bi-clustering problems. The CARP and CBASS algorithms use an
algorithmic regularization scheme to obtain high-quality global
Expand Down
2 changes: 1 addition & 1 deletion R/carp.R
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ CARP <- function(X,
#' and the variant of \code{CARP} used.
#'
#' @details The \code{as.dendrogram} and \code{as.hclust} methods convert the
#' \code{CBASS} output to an object of class \code{dendrogram} or \code{hclust}
#' \code{CARP} output to an object of class \code{dendrogram} or \code{hclust}
#' respectively.
#'
#' @param x an object of class \code{CARP} as returned by \code{\link{CARP}}
Expand Down
46 changes: 46 additions & 0 deletions R/scale_complex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' @export
#' @author R Core (\code{base} package)
#
# This function is basically just scale.default, but with is.numeric replaced by
# (is.numeric | is.complex) so that it scales complex matrices correctly and returns
# the appropriate scale factors as attributes.
scale.complex <- function(x, center = TRUE, scale = TRUE){
is.nc <- function(x) is.numeric(x) | is.complex(x)

x <- as.matrix(x)
nc <- ncol(x)
if (is.logical(center)) {
if (center) {
center <- colMeans(x, na.rm=TRUE)
x <- sweep(x, 2L, center, check.margin=FALSE)
}
}
else {
if(!is.nc(center)) center <- as.complex(center)
if (length(center) == nc)
x <- sweep(x, 2L, center, check.margin=FALSE)
else
stop("length of 'center' must equal the number of columns of 'x'")
}
if (is.logical(scale)) {
if (scale) {
f <- function(v) {
v <- v[!is.na(v)]
# Use squared modulus in SD calc here rather than just squaring
sqrt(sum(abs(v)^2) / max(1, length(v) - 1L))
}
scale <- apply(x, 2L, f)
x <- sweep(x, 2L, scale, "/", check.margin=FALSE)
}
}
else {
if(!is.nc(scale)) scale <- as.complex(scale)
if (length(scale) == nc)
x <- sweep(x, 2L, scale, "/", check.margin=FALSE)
else
stop("length of 'scale' must equal the number of columns of 'x'")
}
if(is.nc(center)) attr(x, "scaled:center") <- center
if(is.nc(scale )) attr(x, "scaled:scale" ) <- scale
x
}
283 changes: 283 additions & 0 deletions R/solvers.R
Original file line number Diff line number Diff line change
Expand Up @@ -649,3 +649,286 @@ print.ConvexBiClustering <- function(x, ...) {

invisible(x)
}

#' Compute Time Series (Phase Aligned Complex) Convex Clustering Solution Path on a User-Specified Grid
#'
#' \code{ts_convex_clustering} calculates the phase-aligned complex convex clustering solution path
#' at a user-specified grid of lambda values (or just a single value). It is,
#' in general, difficult to know a useful set of lambda values \emph{a priori},
#' so this function is more useful for timing comparisons and methodological
#' research than applied work.
#'
#' Compared to the \code{\link{TROUT}} function, the returned object
#' is much more "bare-bones," containing only the estimated \eqn{U} matrices,
#' and no information used for dendrogram or path visualizations.
#'
#' @param X The data matrix (\eqn{X \in C^{n \times p}}{X}): rows correspond to
#' the observations (to be clustered) and columns to the variables (which
#' will not be clustered).
#' @param lambda_grid A user-supplied set of \eqn{\lambda}{lambda} values at which
#' to solve the convex clustering problem. These must be strictly
#' positive values and will be automatically sorted internally.
#' @param X.center A logical: Should \code{X} be centered columnwise?
#' @param X.scale A logical: Should \code{X} be scaled columnwise?
#' @param norm Which norm to use in the fusion penalty? Currently only \code{1}
#' and \code{2} (default) are supported.
#' @param ... Unused arguements. An error will be thrown if any unrecognized
#' arguments as given. All arguments other than \code{X} must be given
#' by name.
#' @param weights One of the following: \itemize{
#' \item A function which, when called with argument \code{X},
#' returns an b-by-n matrix of fusion weights.
#' \item A matrix of size n-by-n containing fusion weights
#' }
#' @param status Should a status message be printed to the console?
#' @return An object of class \code{convex_clustering} containing the following elements (among others):
#' \itemize{
#' \item \code{X}: the original data matrix
#' \item \code{n}: the number of observations (rows of \code{X})
#' \item \code{p}: the number of variables (columns of \code{X})
#' \item \code{X.center}: a logical indicating whether \code{X} was centered
#' column-wise before clustering
#' \item \code{X.scale}: a logical indicating whether \code{X} was scaled
#' column-wise before centering
#' \item \code{weight_type}: a record of the scheme used to create
#' fusion weights
#' \item \code{U}: a tensor (3-array) of clustering solutions
#' }
#' @importFrom utils data
#' @importFrom dplyr %>% mutate group_by ungroup as_tibble n_distinct
#' @importFrom rlang %||%
#' @importFrom stats var
#' @export
ts_convex_clustering <- function(X,
...,
lambda_grid,
weights = sparse_rbf_kernel_weights(k = "auto",
phi = "auto",
dist.method = "trout",
p = 2),
X.center = TRUE,
X.scale = FALSE,
norm = 2,
status = (interactive() && (clustRviz_logger_level() %in% c("MESSAGE", "WARNING", "ERROR")))) {

tic <- Sys.time()

####################
##
## Input validation
##
####################

dots <- list(...)

if (length(dots) != 0L) {
if (!is.null(names(dots))) {
crv_error("Unknown argument ", sQuote(names(dots)[1L]), " passed to ", sQuote("ts_convex_clustering."))
} else {
crv_error("Unknown ", sQuote("..."), " arguments passed to ", sQuote("ts_convex_clustering."))
}
}

if (!is.matrix(X)) {
crv_warning(sQuote("X"), " should be a matrix, not a " , class(X)[1],
". Converting with as.matrix().")
X <- as.matrix(X)
}

if (!is.complex(X)) {
crv_error(sQuote("X"), " must be complex.")
}

# Missing data mask: M_{ij} = 1 means we see X_{ij};
# Currently we don't support missing values in X so this better be all ones
M <- 1 - is.na(X)

## Check that imputation was successful.
if (anyNA(X)) {
crv_error("ts_convex_clustering() does not support missing values in ", sQuote("X."))
}

if (!all(is.finite(X))) {
crv_error("All elements of ", sQuote("X"), " must be finite.")
}

if (!is_logical_scalar(X.center)) {
crv_error(sQuote("X.center"), "must be either ", sQuote("TRUE"), " or ", sQuote("FALSE."))
}

if (!is_logical_scalar(X.scale)) {
crv_error(sQuote("X.scale"), "must be either ", sQuote("TRUE"), " or ", sQuote("FALSE."))
}

if (norm %not.in% c(1, 2)){
crv_error(sQuote("norm"), " must be either 1 or 2.")
}

if(missing(lambda_grid)){
crv_error(sQuote("lambda_grid"), " must be supplied.")
}

if(!all(is.finite(lambda_grid))){
crv_error(sQuote("lambda_grid"), " containts non-finite values.")
}

if(any(lambda_grid < 0)){
crv_error(sQuote("lambda_grid"), " must be strictly positive.")
}

if(any(lambda_grid == 0)){
crv_error(sQuote("lambda_grid"), " must be strictly positive - 0 will be automatically added.")
}

if(is.unsorted(lambda_grid)){
crv_warning(sQuote("lambda_grid"), " is unsorted: sorting for maximum performance.")
lambda_grid <- sort(lambda_grid)
}

l1 <- (norm == 1)

n <- NROW(X)
p <- NCOL(X)

X.orig <- X
# Center and scale X
if (X.center | X.scale) {
X <- scale(X, center = X.center, scale = X.scale)
}

scale_vector <- attr(X, "scaled:scale", exact=TRUE) %||% rep(1, p)
center_vector <- attr(X, "scaled:center", exact=TRUE) %||% rep(0, p)

crv_message("Pre-computing weights and edge sets")

# Calculate clustering weights
if (is.function(weights)) { # Usual case, `weights` is a function which calculates the weight matrix
weight_result <- weights(X)

if (is.matrix(weight_result)) {
weight_matrix <- weight_result
weight_type <- UserFunction()
} else {
weight_matrix <- weight_result$weight_mat
weight_type <- weight_result$type
}
} else if (is.matrix(weights)) {

if (!is_square(weights)) {
crv_error(sQuote("weights"), " must be a square matrix.")
}

if (NROW(weights) != NROW(X)) {
crv_error(sQuote("NROW(weights)"), " must be equal to ", sQuote("NROW(X)."))
}

weight_matrix <- weights
weight_type <- UserMatrix()
} else {
crv_error(sQuote("ts_convex_clustering"), " does not know how to handle ", sQuote("weights"),
" of class ", class(weights)[1], ".")
}

if (any(weight_matrix < 0) || anyNA(weight_matrix)) {
crv_error("All fusion weights must be positive or zero.")
}

weight_matrix_ut <- weight_matrix * upper.tri(weight_matrix);

edge_list <- which(weight_matrix_ut != 0, arr.ind = TRUE)
edge_list <- edge_list[order(edge_list[, 1], edge_list[, 2]), ]
cardE <- NROW(edge_list)
D <- matrix(0, ncol = n, nrow = cardE)
D[cbind(seq_len(cardE), edge_list[,1])] <- 1
D[cbind(seq_len(cardE), edge_list[,2])] <- -1

weight_vec <- weight_mat_to_vec(weight_matrix)

crv_message("Computing Time-Series Convex Clustering Solutions")
tic_inner <- Sys.time()

clustering_sol <- TroutClusteringCPP(X = X,
M = M,
D = D,
lambda_grid = lambda_grid,
weights = weight_vec[weight_vec != 0],
rho = .clustRvizOptionsEnv[["rho"]],
thresh = .clustRvizOptionsEnv[["stopping_threshold"]],
max_iter = .clustRvizOptionsEnv[["max_iter"]],
max_inner_iter = .clustRvizOptionsEnv[["max_inner_iter"]],
l1 = l1,
show_progress = status)

toc_inner <- Sys.time()

crv_message("Post-processing")

lambda_grid <- clustering_sol$gamma_path
U_raw <- array(clustering_sol$u_path,
dim = c(n, p, length(lambda_grid)),
dimnames = list(rownames(X.orig),
colnames(X.orig),
paste0("Lambda_", seq_along(lambda_grid) - 1)))

center_tensor <- aperm(array(center_vector, dim = c(p, n, length(lambda_grid))), c(2, 1, 3))
scale_tensor <- aperm(array(scale_vector, dim = c(p, n, length(lambda_grid))), c(2, 1, 3))

U <- U_raw * scale_tensor + center_tensor

trout_clustering_fit <- list(
X = X.orig,
M = M,
D = D,
U = U,
n = n,
p = p,
norm = norm,
lambda_grid = lambda_grid,
weights = weight_matrix,
weight_type = weight_type,
X.center = X.center,
center_vector = center_vector,
X.scale = X.scale,
scale_vector = scale_vector,
time = Sys.time() - tic,
fit_time = toc_inner - tic_inner
)

class(trout_clustering_fit) <- "TimeSeriesConvexClustering"

return(trout_clustering_fit)
}

#' Print \code{\link{ts_convex_clustering}} Results
#'
#' Prints a brief descripton of a fitted \code{ts_convex_clustering} object.
#'
#' Reports number of observations and variables of dataset, any preprocessing
#' done by the \code{\link{ts_convex_clustering}} function, and regularization
#' details.
#'
#' @param x an object of class \code{ts_convex_clustering} as returned by
#' \code{\link{ts_convex_clustering}}
#' @param ... Additional unused arguments
#' @export
print.TimeSeriesConvexClustering <- function(x, ...) {
alg_string <- paste("ADMM", if(x$norm == 1) "[L1]" else "[L2]")

cat("Time Series [Phase-Aligned Convex] Convex Clustering Fit Summary\n")
cat("================================================================\n\n")
cat("Algorithm:", alg_string, "\n")
cat("Grid:", length(x$lambda_grid), "values of lambda. \n")
cat("Fit Time:", sprintf("%2.3f %s", x$fit_time, attr(x$fit_time, "units")), "\n")
cat("Total Time:", sprintf("%2.3f %s", x$time, attr(x$time, "units")), "\n\n")
cat("Number of Observations:", x$n, "\n")
cat("Number of Variables: ", x$p, "\n\n")

cat("Pre-processing options:\n")
cat(" - Columnwise centering:", x$X.center, "\n")
cat(" - Columnwise scaling: ", x$X.scale, "\n\n")

cat("Weights:\n")
print(x$weight_type)

invisible(x)
}
Loading