diff --git a/R/StandardiseCov.R b/R/StandardiseCov.R new file mode 100644 index 0000000..bc0e896 --- /dev/null +++ b/R/StandardiseCov.R @@ -0,0 +1,62 @@ +#' @name StandardiseCov +#' +#' @title Scale numeric covariates +#' +#' @description The module scales numeric covariates. By default conducts a regular standardisation on all numeric covariates: values minus the mean and divide by the standard deviation. If Gelman = TRUE then divides by 2 sd as suggested by Gelman. Excluded covariates are not standardised. +#' +#' @details +#' +#' @param .data \strong{Internal parameter, do not use in the workflow function}. \code{.data} is a list of a data frame and a raster object returned from occurrence modules and covariate modules respectively. \code{.data} is passed automatically in workflow from the occurrence and covariate modules to the process module(s) and should not be passed by the user. +#' +#' @param Gelman If TRUE divides by two rather than one standard deviation. +#' +#' @param exclude Variable names of numeric covariates to exclude from standardisation +#' +#' @family process +#' +#' @author Alison Johnston & Carsten F. Dormann, \email{alison.johnston@@bto.org} +#' +#' @section Data type: presence-only, presence/absence, abundance, proportion +#' +#' @section Version: 0 +#' +#' @section Date submitted: 2016-06-16 +StandardiseCov <- function (.data, Gelman = FALSE, exclude = NULL) +{ + df <- .data$df + ras <- .data$ras + + #### step 1. error checking + if(!is.logical(Gelman)) stop("StandardiseCov: Gelman parameter can only be TRUE or FALSE") + if(!all(exclude %in% names(ras))) warning("StandardiseCov: Excluded terms are not all covariate names") + + #### step 2. identify numeric covariates and exclude those specified + numericLayer <- NA + for (i in 1:nlayers(ras)) numericLayer[i] <- is.numeric(ras[i]) & + !names(ras)[i] %in% exclude + if (sum(numericLayer) == 0) + stop("StandardiseCov not used, as no numeric covariates") + numericNames <- names(ras)[which(as.logical(numericLayer))] + + #### step 3. standardise all numeric covariates + + # specify whether to divide by 1 or 2 standard deviations + # Gelman's suggestion to scale by 2 SDs (http://andrewgelman.com/2006/06/21/standardizing_r/) + sd_mult <- ifelse(Gelman==TRUE, 2, 1) + + for (cov in numericNames) { + m <- cellStats(ras[[cov]], "mean", na.rm = TRUE) + s <- cellStats(ras[[cov]], "sd", na.rm = TRUE) * sd_mult + if (nlayers(ras) == 1) { + ras <- (ras - m)/s + } + else { + ras[[cov]] <- (ras[[cov]] - m)/s + } + } + + #### step 4. put new values into df as well + layer <- extract(ras, df[, c("longitude", "latitude")]) + df <- cbind(df[, 1:5], layer) + return(list(df = df, ras = ras)) +}