DataSlingers · michaelweylandt · Oct 6, 2020 · Oct 6, 2020 · Oct 6, 2020 · Oct 6, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: c(
   person("Genevera", "Allen", role = c("ths", "aut"), email = "gallen@rice.edu"),
   person("Lewis", "Brian W.", role = "cph", comment = "R/util_hcs.R"),
   person("Daniel", "Englund", role = "aut", email = "dse1@rice.edu"),
-  person("Yue", "Zhuo", role = "aut", email = "yz154@rice.edu"))
+  person("Yue", "Zhuo", role = "aut", email = "yz154@rice.edu"), 
+  person("R Core", role = "cph", comment = "R/scale_complex.R"))
 Description: Fast computation and interactive for the convex clustering and 
              bi-clustering problems. The CARP and CBASS algorithms use an 
              algorithmic regularization scheme to obtain high-quality global 

diff --git a/R/carp.R b/R/carp.R
@@ -330,7 +330,7 @@ CARP <- function(X,
 #' and the variant of \code{CARP} used.
 #'
 #' @details The \code{as.dendrogram} and \code{as.hclust} methods convert the
-#'          \code{CBASS} output to an object of class \code{dendrogram} or \code{hclust}
+#'          \code{CARP} output to an object of class \code{dendrogram} or \code{hclust}
 #'          respectively.
 #'
 #' @param x an object of class \code{CARP} as returned by \code{\link{CARP}}

diff --git a/R/scale_complex.R b/R/scale_complex.R
@@ -0,0 +1,46 @@
+#' @export
+#' @author R Core (\code{base} package)
+#
+# This function is basically just scale.default, but with is.numeric replaced by
+# (is.numeric | is.complex) so that it scales complex matrices correctly and returns
+# the appropriate scale factors as attributes.
+scale.complex <- function(x, center = TRUE, scale = TRUE){
+  is.nc <- function(x) is.numeric(x) | is.complex(x)
+
+  x <- as.matrix(x)
+  nc <- ncol(x)
+  if (is.logical(center)) {
+    if (center) {
+      center <- colMeans(x, na.rm=TRUE)
+      x <- sweep(x, 2L, center, check.margin=FALSE)
+    }
+  }
+  else {
+    if(!is.nc(center)) center <- as.complex(center)
+    if (length(center) == nc)
+      x <- sweep(x, 2L, center, check.margin=FALSE)
+    else
+      stop("length of 'center' must equal the number of columns of 'x'")
+  }
+  if (is.logical(scale)) {
+    if (scale) {
+      f <- function(v) {
+        v <- v[!is.na(v)]
+        # Use squared modulus in SD calc here rather than just squaring
+        sqrt(sum(abs(v)^2) / max(1, length(v) - 1L))
+      }
+      scale <- apply(x, 2L, f)
+      x <- sweep(x, 2L, scale, "/", check.margin=FALSE)
+    }
+  }
+  else {
+    if(!is.nc(scale)) scale <- as.complex(scale)
+    if (length(scale) == nc)
+      x <- sweep(x, 2L, scale, "/", check.margin=FALSE)
+    else
+      stop("length of 'scale' must equal the number of columns of 'x'")
+  }
+  if(is.nc(center)) attr(x, "scaled:center") <- center
+  if(is.nc(scale )) attr(x, "scaled:scale" ) <- scale
+  x
+}
diff --git a/R/solvers.R b/R/solvers.R
@@ -649,3 +649,286 @@ print.ConvexBiClustering <- function(x, ...) {
 
   invisible(x)
 }
+
+#' Compute Time Series (Phase Aligned Complex) Convex Clustering Solution Path on a User-Specified Grid
+#'
+#' \code{ts_convex_clustering} calculates the phase-aligned complex convex clustering solution path
+#' at a user-specified grid of lambda values (or just a single value). It is,
+#' in general, difficult to know a useful set of lambda values \emph{a priori},
+#' so this function is more useful for timing comparisons and methodological
+#' research than applied work.
+#'
+#' Compared to the \code{\link{TROUT}} function, the returned object
+#' is much more "bare-bones," containing only the estimated \eqn{U} matrices,
+#' and no information used for dendrogram or path visualizations.
+#'
+#' @param X The data matrix (\eqn{X \in C^{n \times p}}{X}): rows correspond to
+#'          the observations (to be clustered) and columns to the variables (which
+#'          will not be clustered).
+#' @param lambda_grid A user-supplied set of \eqn{\lambda}{lambda} values at which
+#'                    to solve the convex clustering problem. These must be strictly
+#'                    positive values and will be automatically sorted internally.
+#' @param X.center A logical: Should \code{X} be centered columnwise?
+#' @param X.scale A logical: Should \code{X} be scaled columnwise?
+#' @param norm Which norm to use in the fusion penalty? Currently only \code{1}
+#'             and \code{2} (default) are supported.
+#' @param ... Unused arguements. An error will be thrown if any unrecognized
+#'            arguments as given. All arguments other than \code{X} must be given
+#'            by name.
+#' @param weights One of the following: \itemize{
+#'                \item A function which, when called with argument \code{X},
+#'                      returns an b-by-n matrix of fusion weights.
+#'                \item A matrix of size n-by-n containing fusion weights
+#'                }
+#' @param status Should a status message be printed to the console?
+#' @return An object of class \code{convex_clustering} containing the following elements (among others):
+#'         \itemize{
+#'         \item \code{X}: the original data matrix
+#'         \item \code{n}: the number of observations (rows of \code{X})
+#'         \item \code{p}: the number of variables (columns of \code{X})
+#'         \item \code{X.center}: a logical indicating whether \code{X} was centered
+#'                                column-wise before clustering
+#'         \item \code{X.scale}: a logical indicating whether \code{X} was scaled
+#'                               column-wise before centering
+#'         \item \code{weight_type}: a record of the scheme used to create
+#'                                   fusion weights
+#'         \item \code{U}: a tensor (3-array) of clustering solutions
+#'         }
+#' @importFrom utils data
+#' @importFrom dplyr %>% mutate group_by ungroup as_tibble n_distinct
+#' @importFrom rlang %||%
+#' @importFrom stats var
+#' @export
+ts_convex_clustering <- function(X,
+                                 ...,
+                                 lambda_grid,
+                                 weights = sparse_rbf_kernel_weights(k = "auto",
+                                                                     phi = "auto",
+                                                                     dist.method = "trout",
+                                                                     p = 2),
+                                 X.center = TRUE,
+                                 X.scale = FALSE,
+                                 norm = 2,
+                                 status = (interactive() && (clustRviz_logger_level() %in% c("MESSAGE", "WARNING", "ERROR")))) {
+
+  tic <- Sys.time()
+
+  ####################
+  ##
+  ## Input validation
+  ##
+  ####################
+
+  dots <- list(...)
+
+  if (length(dots) != 0L) {
+    if (!is.null(names(dots))) {
+      crv_error("Unknown argument ", sQuote(names(dots)[1L]), " passed to ", sQuote("ts_convex_clustering."))
+    } else {
+      crv_error("Unknown ", sQuote("..."), " arguments passed to ", sQuote("ts_convex_clustering."))
+    }
+  }
+
+  if (!is.matrix(X)) {
+    crv_warning(sQuote("X"), " should be a matrix, not a " , class(X)[1],
+                ". Converting with as.matrix().")
+    X <- as.matrix(X)
+  }
+
+  if (!is.complex(X)) {
+    crv_error(sQuote("X"), " must be complex.")
+  }
+
+  # Missing data mask: M_{ij} = 1 means we see X_{ij};
+  # Currently we don't support missing values in X so this better be all ones
+  M <- 1 - is.na(X)
+
+  ## Check that imputation was successful.
+  if (anyNA(X)) {
+    crv_error("ts_convex_clustering() does not support missing values in ", sQuote("X."))
+  }
+
+  if (!all(is.finite(X))) {
+    crv_error("All elements of ", sQuote("X"), " must be finite.")
+  }
+
+  if (!is_logical_scalar(X.center)) {
+    crv_error(sQuote("X.center"), "must be either ", sQuote("TRUE"), " or ", sQuote("FALSE."))
+  }
+
+  if (!is_logical_scalar(X.scale)) {
+    crv_error(sQuote("X.scale"), "must be either ", sQuote("TRUE"), " or ", sQuote("FALSE."))
+  }
+
+  if (norm %not.in% c(1, 2)){
+    crv_error(sQuote("norm"), " must be either 1 or 2.")
+  }
+
+  if(missing(lambda_grid)){
+    crv_error(sQuote("lambda_grid"), " must be supplied.")
+  }
+
+  if(!all(is.finite(lambda_grid))){
+    crv_error(sQuote("lambda_grid"), " containts non-finite values.")
+  }
+
+  if(any(lambda_grid < 0)){
+    crv_error(sQuote("lambda_grid"), " must be strictly positive.")
+  }
+
+  if(any(lambda_grid == 0)){
+    crv_error(sQuote("lambda_grid"), " must be strictly positive - 0 will be automatically added.")
+  }
+
+  if(is.unsorted(lambda_grid)){
+    crv_warning(sQuote("lambda_grid"), " is unsorted: sorting for maximum performance.")
+    lambda_grid <- sort(lambda_grid)
+  }
+
+  l1 <- (norm == 1)
+
+  n <- NROW(X)
+  p <- NCOL(X)
+
+  X.orig <- X
+  # Center and scale X
+  if (X.center | X.scale) {
+    X <- scale(X, center = X.center, scale = X.scale)
+  }
+
+  scale_vector  <- attr(X, "scaled:scale", exact=TRUE)  %||% rep(1, p)
+  center_vector <- attr(X, "scaled:center", exact=TRUE) %||% rep(0, p)
+
+  crv_message("Pre-computing weights and edge sets")
+
+  # Calculate clustering weights
+  if (is.function(weights)) { # Usual case, `weights` is a function which calculates the weight matrix
+    weight_result <- weights(X)
+
+    if (is.matrix(weight_result)) {
+      weight_matrix <- weight_result
+      weight_type   <- UserFunction()
+    } else {
+      weight_matrix <- weight_result$weight_mat
+      weight_type   <- weight_result$type
+    }
+  } else if (is.matrix(weights)) {
+
+    if (!is_square(weights)) {
+      crv_error(sQuote("weights"), " must be a square matrix.")
+    }
+
+    if (NROW(weights) != NROW(X)) {
+      crv_error(sQuote("NROW(weights)"), " must be equal to ", sQuote("NROW(X)."))
+    }
+
+    weight_matrix <- weights
+    weight_type   <- UserMatrix()
+  } else {
+    crv_error(sQuote("ts_convex_clustering"), " does not know how to handle ", sQuote("weights"),
+              " of class ", class(weights)[1], ".")
+  }
+
+  if (any(weight_matrix < 0) || anyNA(weight_matrix)) {
+    crv_error("All fusion weights must be positive or zero.")
+  }
+
+  weight_matrix_ut <- weight_matrix * upper.tri(weight_matrix);
+
+  edge_list <- which(weight_matrix_ut != 0, arr.ind = TRUE)
+  edge_list <- edge_list[order(edge_list[, 1], edge_list[, 2]), ]
+  cardE <- NROW(edge_list)
+  D <- matrix(0, ncol = n, nrow = cardE)
+  D[cbind(seq_len(cardE), edge_list[,1])] <-  1
+  D[cbind(seq_len(cardE), edge_list[,2])] <- -1
+
+  weight_vec <- weight_mat_to_vec(weight_matrix)
+
+  crv_message("Computing Time-Series Convex Clustering Solutions")
+  tic_inner <- Sys.time()
+
+  clustering_sol <- TroutClusteringCPP(X = X,
+                                       M = M,
+                                       D = D,
+                                       lambda_grid = lambda_grid,
+                                       weights = weight_vec[weight_vec != 0],
+                                       rho = .clustRvizOptionsEnv[["rho"]],
+                                       thresh = .clustRvizOptionsEnv[["stopping_threshold"]],
+                                       max_iter = .clustRvizOptionsEnv[["max_iter"]],
+                                       max_inner_iter = .clustRvizOptionsEnv[["max_inner_iter"]],
+                                       l1 = l1,
+                                       show_progress = status)
+
+  toc_inner <- Sys.time()
+
+  crv_message("Post-processing")
+
+  lambda_grid <- clustering_sol$gamma_path
+  U_raw <- array(clustering_sol$u_path,
+                 dim = c(n, p, length(lambda_grid)),
+                 dimnames = list(rownames(X.orig),
+                                 colnames(X.orig),
+                                 paste0("Lambda_", seq_along(lambda_grid) - 1)))
+
+  center_tensor <- aperm(array(center_vector, dim = c(p, n, length(lambda_grid))), c(2, 1, 3))
+  scale_tensor  <- aperm(array(scale_vector, dim = c(p, n, length(lambda_grid))), c(2, 1, 3))
+
+  U <- U_raw * scale_tensor + center_tensor
+
+  trout_clustering_fit <- list(
+    X = X.orig,
+    M = M,
+    D = D,
+    U = U,
+    n = n,
+    p = p,
+    norm = norm,
+    lambda_grid = lambda_grid,
+    weights = weight_matrix,
+    weight_type = weight_type,
+    X.center = X.center,
+    center_vector = center_vector,
+    X.scale = X.scale,
+    scale_vector = scale_vector,
+    time = Sys.time() - tic,
+    fit_time = toc_inner - tic_inner
+  )
+
+  class(trout_clustering_fit) <- "TimeSeriesConvexClustering"
+
+  return(trout_clustering_fit)
+}
+
+#' Print \code{\link{ts_convex_clustering}} Results
+#'
+#' Prints a brief descripton of a fitted \code{ts_convex_clustering} object.
+#'
+#' Reports number of observations and variables of dataset, any preprocessing
+#' done by the \code{\link{ts_convex_clustering}} function, and regularization
+#' details.
+#'
+#' @param x an object of class \code{ts_convex_clustering} as returned by
+#'          \code{\link{ts_convex_clustering}}
+#' @param ... Additional unused arguments
+#' @export
+print.TimeSeriesConvexClustering <- function(x, ...) {
+  alg_string <- paste("ADMM", if(x$norm == 1) "[L1]" else "[L2]")
+
+  cat("Time Series [Phase-Aligned Convex] Convex Clustering Fit Summary\n")
+  cat("================================================================\n\n")
+  cat("Algorithm:", alg_string, "\n")
+  cat("Grid:", length(x$lambda_grid), "values of lambda. \n")
+  cat("Fit Time:", sprintf("%2.3f %s", x$fit_time, attr(x$fit_time, "units")), "\n")
+  cat("Total Time:", sprintf("%2.3f %s", x$time, attr(x$time, "units")), "\n\n")
+  cat("Number of Observations:", x$n, "\n")
+  cat("Number of Variables:   ", x$p, "\n\n")
+
+  cat("Pre-processing options:\n")
+  cat(" - Columnwise centering:", x$X.center, "\n")
+  cat(" - Columnwise scaling:  ", x$X.scale, "\n\n")
+
+  cat("Weights:\n")
+  print(x$weight_type)
+
+  invisible(x)
+}