From 3506fb242a314b59a724e25de0b829ba677b8446 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Fri, 9 May 2014 12:46:09 +0200 Subject: [PATCH 01/13] Handling of the offset parameter for the ols and lrm functions - also changed the name to offset from offs as to the variable is now a parameter --- R/lrm.s | 18 +++++++++--------- R/ols.s | 5 +++-- man/lrm.Rd | 5 ++++- man/ols.Rd | 6 +++++- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/R/lrm.s b/R/lrm.s index 2c2b558..de2bb13 100644 --- a/R/lrm.s +++ b/R/lrm.s @@ -2,8 +2,8 @@ lrm <- function(formula, data,subset, na.action=na.delete, method="lrm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0, - var.penalty=c('simple','sandwich'), - weights, normwt=FALSE, ...) + var.penalty=c('simple','sandwich'), + weights, normwt=FALSE, offset, ...) { call <- match.call() var.penalty <- match.arg(var.penalty) @@ -39,8 +39,8 @@ lrm <- function(formula, data,subset, na.action=na.delete, atr <- atrx$Design Y <- model.extract(X, 'response') - offs <- model.offset(X) - if(!length(offs)) offs <- 0 + if (missing(offset)) offset <- model.offset(X) + if(!length(offset)) offset <- 0 weights <- wt <- model.extract(X, 'weights') if(length(weights)) warning('currently weights are ignored in model validation and bootstrapping lrm fits') @@ -88,8 +88,8 @@ lrm <- function(formula, data,subset, na.action=na.delete, else { X <- eval.parent(m) - offs <- model.offset(X) - if(!length(offs)) offs <- 0 + if (missing(offset)) offset <- model.offset(X) + if(!length(offset)) offset <- 0 Y <- model.extract(X, 'response') Y <- Y[!is.na(Y)] Terms <- X <- NULL @@ -101,7 +101,7 @@ lrm <- function(formula, data,subset, na.action=na.delete, if(method=="model.matrix") return(X) if(nstrata > 1) - f <- lrm.fit.strat(X,Y,Strata,offset=offs, + f <- lrm.fit.strat(X,Y,Strata,offset=offset, penalty.matrix=penalty.matrix, strata.penalty=strata.penalty, tol=tol, @@ -109,7 +109,7 @@ lrm <- function(formula, data,subset, na.action=na.delete, else { if(existsFunction(method)) { fitter <- getFunction(method) - f <- fitter(X, Y, offset=offs, + f <- fitter(X, Y, offset=offset, penalty.matrix=penalty.matrix, tol=tol, weights=weights, normwt=normwt, ...) } @@ -133,7 +133,7 @@ lrm <- function(formula, data,subset, na.action=na.delete, v <- f$var if(var.penalty=='sandwich') f$var.from.info.matrix <- v f.nopenalty <- - fitter(X, Y, offset=offs, initial=f$coef, maxit=1, tol=tol) + fitter(X, Y, offset=offset, initial=f$coef, maxit=1, tol=tol) ## info.matrix.unpenalized <- solvet(f.nopenalty$var, tol=tol) info.matrix.unpenalized <- f.nopenalty$info.matrix dag <- diag(info.matrix.unpenalized %*% v) diff --git a/R/ols.s b/R/ols.s index 1a5029b..0f758f7 100644 --- a/R/ols.s +++ b/R/ols.s @@ -2,7 +2,8 @@ ols <- function(formula, data, weights, subset, na.action=na.delete, method = "qr", model = FALSE, x = FALSE, y = FALSE, se.fit=FALSE, linear.predictors=TRUE, penalty=0, penalty.matrix, tol=1e-7, sigma=NULL, - var.penalty=c('simple','sandwich'), ...) + var.penalty=c('simple','sandwich'), offset, + ...) { call <- match.call() var.penalty <- match.arg(var.penalty) @@ -24,7 +25,7 @@ ols <- function(formula, data, weights, subset, na.action=na.delete, } X <- eval.parent(m) - offset <- model.offset(X) + if (missing(offset)) offset <- model.offset(X) X <- Design(X) options(drop.unused.levels=dul) atrx <- attributes(X) diff --git a/man/lrm.Rd b/man/lrm.Rd index 6b9ea46..b1a6520 100644 --- a/man/lrm.Rd +++ b/man/lrm.Rd @@ -14,7 +14,7 @@ lrm(formula, data, subset, na.action=na.delete, method="lrm.fit", model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0, var.penalty=c('simple','sandwich'), - weights, normwt, \dots) + weights, normwt, offset, \dots) \method{print}{lrm}(x, digits=4, strata.coefs=FALSE, coefs=TRUE, latex=FALSE, title='Logistic Regression Model', \dots) @@ -130,6 +130,9 @@ variances estimates that are too low. \code{y}; useful for sample surveys as opposed to the default of frequency weighting } +\item{offset}{this can be used to specify an a \emph{priori} known component + to be included in the linear predictor during fitting. One or more \code{\link[stats ]{offset}} terms can be included in the formula instead or as well, and if more than one is specified their sum is used. +} \item{\dots}{arguments that are passed to \code{lrm.fit}, or from \code{print}, to \code{\link{prModFit}}} \item{digits}{number of significant digits to use} diff --git a/man/ols.Rd b/man/ols.Rd index 032cc7b..989e38f 100644 --- a/man/ols.Rd +++ b/man/ols.Rd @@ -16,7 +16,7 @@ ols(formula, data, weights, subset, na.action=na.delete, method="qr", model=FALSE, x=FALSE, y=FALSE, se.fit=FALSE, linear.predictors=TRUE, penalty=0, penalty.matrix, tol=1e-7, sigma, - var.penalty=c('simple','sandwich'), \dots) + var.penalty=c('simple','sandwich'), offset, \dots) } \arguments{ \item{formula}{ @@ -94,6 +94,10 @@ inverse of the penalized information matrix. Specify under \code{var}), which limited simulation studies have shown yields variances estimates that are too low. } +\item{offset}{this can be used to specify an a \emph{priori} known component + to be included in the linear predictor during fitting, see \code{\link[stats]{lm}} + for more details. +} \item{\dots}{arguments to pass to \code{\link{lm.wfit}} or \code{\link{lm.fit}}} } From fc5b864acef8c30bb0bfd37dc3007e0ffd66d517 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Fri, 16 May 2014 14:09:29 +0200 Subject: [PATCH 02/13] Offset fixes --- R/Glm.s | 21 ++++++++++++--------- R/lrm.s | 3 ++- R/ols.s | 3 ++- R/rms.s | 18 ++++++++++++++---- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/R/Glm.s b/R/Glm.s index 4cad59d..f00474a 100644 --- a/R/Glm.s +++ b/R/Glm.s @@ -11,23 +11,26 @@ Glm <- print(family) stop("`family' not recognized") } + mt <- terms(formula, data = data) if (missing(data)) data <- environment(formula) - mf <- match.call(expand.dots = FALSE) - mf$family <- mf$start <- mf$control <- mf$maxit <- NULL - mf$model <- mf$method <- mf$x <- mf$y <- mf$contrasts <- NULL - mf$... <- NULL - mf$drop.unused.levels <- TRUE - mf$na.action <- na.action - mf[[1]] <- as.name("model.frame") - + dul <- .Options$drop.unused.levels if(!length(dul) || dul) { on.exit(options(drop.unused.levels=dul)) options(drop.unused.levels=FALSE) } - mf <- Design(eval(mf, parent.frame())) + # Generate the data.frame + mf <- match.call(expand.dots = FALSE) + m <- match(c("formula", "data", "subset", "weights", "na.action", + "etastart", "mustart", "offset"), names(mf), 0L) + mf <- mf[c(1L, m)] + mf$drop.unused.levels <- TRUE + mf[[1L]] <- quote(stats::model.frame) + mf <- eval(mf, parent.frame()) + + mf <- Design(mf) desatr <- attr(mf,'Design') attr(mf, 'Design') <- NULL nact <- attr(mf, 'na.action') diff --git a/R/lrm.s b/R/lrm.s index de2bb13..47e3346 100644 --- a/R/lrm.s +++ b/R/lrm.s @@ -8,7 +8,7 @@ lrm <- function(formula, data,subset, na.action=na.delete, call <- match.call() var.penalty <- match.arg(var.penalty) m <- match.call(expand.dots=FALSE) - mc <- match(c("formula", "data", "subset", "weights", "na.action"), + mc <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(m), 0) m <- m[c(1, mc)] m$na.action <- na.action @@ -40,6 +40,7 @@ lrm <- function(formula, data,subset, na.action=na.delete, Y <- model.extract(X, 'response') if (missing(offset)) offset <- model.offset(X) + else offset <- X[,"(offset)"] if(!length(offset)) offset <- 0 weights <- wt <- model.extract(X, 'weights') if(length(weights)) diff --git a/R/ols.s b/R/ols.s index 0f758f7..80a9030 100644 --- a/R/ols.s +++ b/R/ols.s @@ -8,7 +8,7 @@ ols <- function(formula, data, weights, subset, na.action=na.delete, call <- match.call() var.penalty <- match.arg(var.penalty) m <- match.call(expand.dots = FALSE) - mc <- match(c("formula", "data", "subset", "weights", "na.action"), + mc <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(m), 0) m <- m[c(1, mc)] m$na.action <- na.action @@ -26,6 +26,7 @@ ols <- function(formula, data, weights, subset, na.action=na.delete, X <- eval.parent(m) if (missing(offset)) offset <- model.offset(X) + else offset <- X[,"(offset)"] X <- Design(X) options(drop.unused.levels=dul) atrx <- attributes(X) diff --git a/R/rms.s b/R/rms.s index 66b313c..8824829 100644 --- a/R/rms.s +++ b/R/rms.s @@ -67,10 +67,20 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) { response.pres <- attr(Terms, 'response') > 0 offs <- attr(Terms, "offset") - if(!length(offs)) offs <- 0 + if(is.null(offs)){ + # stats::model.frame generates a (offset) variable + # if the model was specified with a offset parameter + # and no offset in the formula + if ("(offset)" %in% names(mf) && + !"(offset)" %in% attr(Terms, "term.labels")){ + offs <- which("(offset)" == names(mf)) + }else{ + offs <- 0 + } + } if(offs>0 & !allow.offset) - stop("offset variable not allowed in formula") - + stop("offset variable not allowed in formula") + factors <- attr(Terms, "factors") if(length(factors) && response.pres) factors <- factors[-1,,drop=FALSE] @@ -191,7 +201,7 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) { ## S-Plus, if only offset in model, has factors as 2 rows 0 cols if(nrf || length(fname.incl.dup)) - if((nrf-(offs > 0)) != length(fname.incl.dup)) + if((nrf-length(attr(Terms, "offset"))) != length(fname.incl.dup)) stop("program logic error 1") if(length(factors)) for(i in 1:ncol(factors)) { f <- factors[,i] From ab15242ec12e2419c0fbc422324ba0c67c446188 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sat, 17 May 2014 00:58:01 +0200 Subject: [PATCH 03/13] Added extensive test-cases for Glm and ols with offset terms --- tests/testthat/test-offset-glm.R | 137 +++++++++++++++++++++++++++++++ tests/testthat/test-offset-ols.R | 110 +++++++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 tests/testthat/test-offset-glm.R create mode 100644 tests/testthat/test-offset-ols.R diff --git a/tests/testthat/test-offset-glm.R b/tests/testthat/test-offset-glm.R new file mode 100644 index 0000000..57a3129 --- /dev/null +++ b/tests/testthat/test-offset-glm.R @@ -0,0 +1,137 @@ +library(testthat) +library(rms) +# sessionInfo("rms") # v. 4.2-1 from github +test_df <- data.frame(Y = c(15, 7, 36, 4, 16, 12, 41, 15), + N = c(4949, 3534, 12210, 344, 6178, + 4883, 11256, 7125), + x1 = c(-0.1, 0, 0.2, 0, 1, 1.1, 1.1, 1), + x2 = c(2.2, 1.5, 4.5, 7.2, 4.5, 3.2, 9.1, 5.2)) +test_df$fact_var <- factor(rep(c("A", "B", "C"), times=c(4,2, nrow(test_df)-6))) +ddist <- datadist(test_df) +options(datadist="ddist") + +test_that("Compare basic Glm with glm",{ + # Base check + fit.Glm <- Glm(Y ~ x1 + x2 + fact_var, data=test_df) + fit.glm <- glm(Y ~ x1 + x2 + fact_var, data=test_df) + expect_equivalent(coef(fit.Glm), coef(fit.glm)) + nd <- data.frame(x1=1, x2=4, fact_var="A") + expect_equivalent(predict(fit.Glm, newdata=nd), + predict(fit.glm, newdata=nd)) + nd <- data.frame(x1=1, x2=4, fact_var="B") + expect_equivalent(predict(fit.Glm, newdata=nd), + predict(fit.glm, newdata=nd)) + + expect_equivalent(vcov(fit.Glm), + vcov(fit.glm), + info="The covariance matrices should be identical") +}) + +test_that("Compare offset Glm with glm with data parameter",{ + # Check offset term + fit.Glm <- list(plain=list(formula = Glm(Y ~ x1 + x2 + fact_var + offset(N), + data=test_df), + parameter = Glm(Y ~ x1 + x2 + fact_var , offset=N, + data=test_df)), + poisson=list(formula = Glm(Y ~ x1 + x2 + fact_var + offset(log(N)), + data=test_df, family=poisson), + parameter = Glm(Y ~ x1 + x2 + fact_var , offset=log(N), + data=test_df, family=poisson))) + fit.glm <- list(plain=glm(Y ~ x1 + x2 + fact_var , offset=N, + data=test_df), + poisson=glm(Y ~ x1 + x2 + fact_var , offset=log(N), + data=test_df, family=poisson)) + # Check offset term + for (glm_family in names(fit.Glm)){ + for (fn in names(fit.Glm[[glm_family]])){ + expect_equivalent(coef(fit.Glm[[glm_family]][[fn]]), + coef(fit.glm[[glm_family]]), + info=paste("Coefficients are not right", + "for", fn, + "for family =", glm_family)) + + expect_equivalent(contrast(fit.Glm[[glm_family]][[fn]], + a=list(x1=1), + b=list(x1=0))$Contrast, + coef(fit.Glm[[glm_family]][[fn]])["x1"], + info=paste("Contrast fails to provide the correct coefficient", + "for", fn, + "for family =", glm_family)) + + expect_equivalent(vcov(fit.Glm[[glm_family]][[fn]]), + vcov(fit.glm[[glm_family]]), + info=paste("The covariance matrices should be identical", + "when using", fn, + "for family =", glm_family)) + + + nd <- data.frame(x1=1, x2=4, N=0, fact_var="A") + expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd), + predict(fit.glm[[glm_family]], newdata=nd), + info=paste("Offset term is not equal to 0", + "for the", fn, "version", + "for family =", glm_family)) + nd$N <- 1000 + expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd), + predict(fit.glm[[glm_family]], newdata=nd), + info=paste("Offset term is not equal to 1000", + "for the", fn, "version", + "for family =", glm_family)) + } + } +}) + +test_that("Compare offset Glm with lm with(dataset, Glm(...))",{ + fit.Glm <- list(plain=list(param = with(test_df, + Glm(Y ~ x1 + x2 + fact_var, offset=N)), + formula = with(test_df, + Glm(Y ~ x1 + x2 + offset(N) + fact_var))), + poisson=list(param = with(test_df, + Glm(Y ~ x1 + x2 + fact_var, offset=log(N), family=poisson)), + formula = with(test_df, + Glm(Y ~ x1 + x2 + offset(log(N)) + fact_var,, family=poisson)))) + fit.glm <- list(plain=with(test_df, + glm(Y ~ x1 + x2 + fact_var, offset=N)), + poisson=with(test_df, + glm(Y ~ x1 + x2 + fact_var, offset=log(N), family=poisson))) + + + # Check offset term + for (glm_family in names(fit.Glm)){ + for (fn in names(fit.Glm[[glm_family]])){ + expect_equivalent(coef(fit.Glm[[glm_family]][[fn]]), + coef(fit.glm[[glm_family]]), + info=paste("Coefficients are not right", + "for", fn, + "for family =", glm_family)) + + expect_equivalent(contrast(fit.Glm[[glm_family]][[fn]], + a=list(x1=1), + b=list(x1=0))$Contrast, + coef(fit.Glm[[glm_family]][[fn]])["x1"], + info=paste("Contrast fails to provide the correct coefficient", + "for", fn, + "for family =", glm_family)) + + expect_equivalent(vcov(fit.Glm[[glm_family]][[fn]]), + vcov(fit.glm[[glm_family]]), + info=paste("The covariance matrices should be identical", + "when using", fn, + "for family =", glm_family)) + + + nd <- data.frame(x1=1, x2=4, N=0, fact_var="A") + expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd), + predict(fit.glm[[glm_family]], newdata=nd), + info=paste("Offset term is not equal to 0", + "for the", fn, "version", + "for family =", glm_family)) + nd$N <- 1000 + expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd), + predict(fit.glm[[glm_family]], newdata=nd), + info=paste("Offset term is not equal to 1000", + "for the", fn, "version", + "for family =", glm_family)) + } + } +}) \ No newline at end of file diff --git a/tests/testthat/test-offset-ols.R b/tests/testthat/test-offset-ols.R new file mode 100644 index 0000000..69541a7 --- /dev/null +++ b/tests/testthat/test-offset-ols.R @@ -0,0 +1,110 @@ +library(testthat) +library(rms) +# sessionInfo("rms") # v. 4.2-1 from github +test_df <- data.frame(Y = c(15, 7, 36, 4, 16, 12, 41, 15), + N = c(4949, 3534, 12210, 344, 6178, + 4883, 11256, 7125), + x1 = c(-0.1, 0, 0.2, 0, 1, 1.1, 1.1, 1), + x2 = c(2.2, 1.5, 4.5, 7.2, 4.5, 3.2, 9.1, 5.2)) +test_df$fact_var <- factor(rep(c("A", "B", "C"), times=c(4,2, nrow(test_df)-6))) +ddist <- datadist(test_df) +options(datadist="ddist") + +test_that("Compare basic ols with lm",{ + # Base check + fit.ols <- ols(Y ~ x1 + x2 + fact_var, data=test_df) + fit.lm <- lm(Y ~ x1 + x2 + fact_var, data=test_df) + expect_equivalent(coef(fit.ols), coef(fit.lm)) + nd <- data.frame(x1=1, x2=4, fact_var="A") + expect_equivalent(predict(fit.ols, newdata=nd), + predict(fit.lm, newdata=nd)) + nd <- data.frame(x1=1, x2=4, fact_var="B") + expect_equivalent(predict(fit.ols, newdata=nd), + predict(fit.lm, newdata=nd)) + + expect_equivalent(vcov(fit.ols), + vcov(fit.lm), + info="The covariance matrices should be identical") +}) + +test_that("Compare offset ols with lm with data parameter",{ + # Check offset term + fit.ols <- list(formula = ols(Y ~ x1 + x2 + fact_var + offset(N), data=test_df), + parameter = ols(Y ~ x1 + x2 + fact_var , offset=N, data=test_df)) + fit.lm <- lm(Y ~ x1 + x2 + fact_var , offset=N, data=test_df) + for (fn in names(fit.ols)){ + expect_equivalent(coef(fit.ols[[fn]]), + coef(fit.lm), + info=paste("The coefficients are not the same", + "when the ols fit is specified using", + "an offset", fn)) + + expect_equivalent(contrast(fit.ols[[fn]], + a=list(x1=1), + b=list(x1=0))$Contrast, + coef(fit.ols[[fn]])["x1"], + info=paste("Contrast fails to provide the correct coefficient", + "for", fn)) + + expect_equivalent(vcov(fit.ols[[fn]]), + vcov(fit.lm), + info=paste("The covariance matrices should be identical", + "when using", fn)) + + + nd <- data.frame(x1=1, x2=4, N=0, fact_var="A") + expect_equivalent(predict(fit.ols[[fn]], newdata=nd), + predict(fit.lm, newdata=nd), + info=paste("The predictions are not the same", + "for offset parameter=0 and", + "when the ols fit is specified using", + "an offset", fn)) + nd$N <- 1000 + expect_equivalent(predict(fit.ols[[fn]], newdata=nd), + predict(fit.lm, newdata=nd), + info=paste("The predictions are not the same", + "for offset parameter=1000 and", + "when the ols fit is specified using", + "an offset", fn)) + + } +}) + +test_that("Compare offset ols with lm with(dataset, ols(...))",{ + fit.ols <- list(param = with(test_df, + ols(Y ~ x1 + x2 + fact_var, offset=N)), + formula = with(test_df, + ols(Y ~ x1 + x2 + offset(N) + fact_var))) + fit.lm <- with(test_df, + lm(Y ~ x1 + x2 + fact_var, offset=N)) + + # Check offset term + for (fn in names(fit.ols)){ + expect_equivalent(coef(fit.ols[[fn]]), coef(fit.lm)) + + expect_equivalent(contrast(fit.ols[[fn]], + a=list(x1=1), + b=list(x1=0))$Contrast, + coef(fit.ols[[fn]])["x1"], + info=paste("Contrast fails to provide the correct coefficient", + "for", fn)) + + expect_equivalent(vcov(fit.ols[[fn]]), + vcov(fit.lm), + info=paste("The covariance matrices should be identical", + "when using", fn)) + + + nd <- data.frame(x1=1, x2=4, N=0, fact_var="A") + expect_equivalent(predict(fit.ols[[fn]], newdata=nd), + predict(fit.lm, newdata=nd), + info=paste("Offset term is not equal to 0", + "for the", fn, "version")) + nd$N <- 1000 + expect_equivalent(predict(fit.ols[[fn]], newdata=nd), + predict(fit.lm, newdata=nd), + info=paste("Offset term is not equal to 1000", + "for the", fn, "version")) + } +}) + From 8866bbbce67b858fe53e14f816a9bf3840cd0109 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 13:13:47 +0200 Subject: [PATCH 04/13] Basic ignore settings for RStudio projects --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9fbc3cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.Rproj.user +.Rhistory +.RData +.RBuildignore +*.Rproj +src/*.o +src/*.so +src/*.dll From 2e0182c0f2ea5575c1e93625ea6692b93a01f941 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 13:14:40 +0200 Subject: [PATCH 05/13] model.frame needs fit-object and not the terms --- R/predictrms.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/predictrms.s b/R/predictrms.s index 162ddd4..a1bd160 100644 --- a/R/predictrms.s +++ b/R/predictrms.s @@ -65,7 +65,7 @@ predictrms <- off <- attr(Terms, 'offset') offset <- if(! length(off)) 0 else - model.offset(model.frame(Terms, newdata, na.action=na.action, ...)) + model.offset(model.frame(fit, newdata, na.action=na.action, ...)) ## Terms.nooff <- if(length(off)) drop.terms(Terms, off) else Terms ## Only works correctly if offset is at the end of the formula From ebd7fba6cc5cf0c3109045c8b8f121d1e9e00240 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 13:15:05 +0200 Subject: [PATCH 06/13] For testthat to work with the datadist the <<- has to be used --- tests/testthat/test-offset-ols.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-offset-ols.R b/tests/testthat/test-offset-ols.R index 69541a7..290e7d3 100644 --- a/tests/testthat/test-offset-ols.R +++ b/tests/testthat/test-offset-ols.R @@ -7,7 +7,7 @@ test_df <- data.frame(Y = c(15, 7, 36, 4, 16, 12, 41, 15), x1 = c(-0.1, 0, 0.2, 0, 1, 1.1, 1.1, 1), x2 = c(2.2, 1.5, 4.5, 7.2, 4.5, 3.2, 9.1, 5.2)) test_df$fact_var <- factor(rep(c("A", "B", "C"), times=c(4,2, nrow(test_df)-6))) -ddist <- datadist(test_df) +ddist <<- datadist(test_df) options(datadist="ddist") test_that("Compare basic ols with lm",{ From 863dd31aad5b938489cb7f0820ced48fbebd97f5 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 14:37:57 +0200 Subject: [PATCH 07/13] Test cases for time dependent variables --- tests/testthat/test-time_interaction.R | 164 +++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tests/testthat/test-time_interaction.R diff --git a/tests/testthat/test-time_interaction.R b/tests/testthat/test-time_interaction.R new file mode 100644 index 0000000..0cd5722 --- /dev/null +++ b/tests/testthat/test-time_interaction.R @@ -0,0 +1,164 @@ +# Same simulated data set as used in the cph-example +n <- 1000 +set.seed(731) +age <- 50 + 12*rnorm(n) +sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) +cens <- 15*runif(n) +h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) +dt <- -log(runif(n))/h +e <- ifelse(dt <= cens,1,0) +dt <- pmin(dt, cens) + +test_df <- data.frame(age = age, + sex = sex, + dt = dt, + e = e) +label(test_dt$age) <- "Age" +label(test_dt$dt) <- 'Follow-up Time' +units(test_dt$dt) <- "Year" + +dd <<- datadist(test_dt) +options(datadist='dd') + +f <- cph(Surv(dt,e) ~ rcs(age,4) + sex, x=TRUE, y=TRUE) +cox.zph(f, "rank") # tests of PH + + +# Now to the actual time-interaction +library(Epi) +test_df$Start <- 0 +lxs_obj <- Lexis(entry = list(Timeband = Start), + exit = list(Timeband = dt, Age = age + dt), + exit.status = e, + data = test_df) +subset(lxs_obj, lex.id %in% 1:3) + +spl_obj <- + splitLexis(lxs_obj, + time.scale = "Timeband", + breaks = seq(from = 0, + to = ceiling(max(lxs_obj$lex.dur)), + by = .5)) + +subset(spl_obj, lex.id %in% 1:3) + +spl_obj$Stop <- spl_obj$Timeband + spl_obj$lex.dur + +dd <<- datadist(spl_obj) +options(datadist='dd') + +####################### +# Regular interaction # +####################### +coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, + data = spl_obj) +# Gives: +# Call: +# coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ +# Age + sex * Timeband, data = spl_obj) +# +# +# coef exp(coef) se(coef) z p +# Age 0.0420 1.043 0.00558 7.53 5.0e-14 +# sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 +# Timeband NA NA 0.00000 NA NA +# sexMale:Timeband 0.0868 1.091 0.05360 1.62 1.1e-01 +# +# Likelihood ratio test=72.7 on 3 df, p=1.11e-15 n= 13421, number of events= 183 +# Warning message: +# In coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ : +# X matrix deemed to be singular; variable 3 + +cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, + data = spl_obj) +# Gives: +# X matrix deemed to be singular; variable Timeband +# +# Model Did Not Converge. No summary provided. + +############################### +# Forced singular interaction # +############################### +coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ + Age + sex + sex:Timeband, + data = spl_obj) +# Gives: +# Call: +# coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ +# Age + sex + sex:Timeband, data = spl_obj) +# +# +# coef exp(coef) se(coef) z p +# Age 0.0420 1.043 0.00558 7.53 5.0e-14 +# sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 +# sexFemale:Timeband -0.0868 0.917 0.05360 -1.62 1.1e-01 +# sexMale:Timeband NA NA 0.00000 NA NA +# +# Likelihood ratio test=72.7 on 3 df, p=1.11e-15 n= 13421, number of events= 183 +# Warning message: +# In coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ : +# X matrix deemed to be singular; variable 4 + +coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), + data = spl_obj) +# Gives: +# Call: +# coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ +# Age + sex + I((sex == "Male") * Timeband), data = spl_obj) +# +# +# coef exp(coef) se(coef) z p +# Age 0.0420 1.043 0.00558 7.53 5.0e-14 +# sexMale -0.9457 0.388 0.26014 -3.64 2.8e-04 +# I((sex == "Male") * Timeband) 0.0868 1.091 0.05360 1.62 1.1e-01 + +cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeband, + data = spl_obj) +# Gives: +# X matrix deemed to be singular; variable sex=Male * NA +# +# Model Did Not Converge. No summary provided. + +cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + asis((sex == "Male")*Timeband), + data = spl_obj) +# Gives: +# Error in limits[[zname]] <- if (any(Limnames == zname)) { : +# more elements supplied than there are to replace + +############# +# After fix # +############# +fit_coxph <- coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), + data = spl_obj) + +fit_cph <- cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + asis((sex == "Male")*Timeband), + data = spl_obj) + +# Basically the same +cbind(coxph=coef(fit_coxph), + cph=coef(fit_cph)) +# Although numerically not equivalent +expect_true(sum(abs(coef(fit_cph) - coef(fit_coxph))) < + .Machine$double.eps) + +############# +# Needs fix # +############# + +Predict(fit_cph) +# Error in asis((sex == "Male") * Timeband) : object 'Timeband' not found + +# Not really working as expected +contrast(fit_cph, + a=list(sex = "Male"), + b=list(sex = "Female")) +# Error in Getlimi(name[i], Limval, need.all = TRUE) : +# no limits defined by datadist for variable sex_Timeband +contrast(fit_cph, + a=list(sex = "Male", + Timeband = 0), + b=list(sex = "Female", + Timeband = seq(0, 10, by=.1))) +# Error in gendata(list(coefficients = c(0.0420352254526414, -0.945650117874665, : +# factor(s) not in design: Timeband From 87b9a367c2aca98f79a432541df75b367de520d8 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 14:39:42 +0200 Subject: [PATCH 08/13] asis-variables with an interaction effect appear as a vector and not as a single variable --- R/rms.s | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/rms.s b/R/rms.s index 8824829..718b7f1 100644 --- a/R/rms.s +++ b/R/rms.s @@ -140,7 +140,7 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) { } za <- z$assume.code - zname <- z$name + zname <- paste(z$name, collapse="_") fname.incl.dup <- c(fname.incl.dup, zname) if(!length(fname) || !any(fname==zname)) { # unique factor @@ -149,7 +149,8 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) { flabel <- c(flabel,z$label) asm <- c(asm,za) colnam[[i1]] <- z$colnames - if(za != 8 && length(colnam)) name <- c(name, colnam[[i1]]) + if(za != 8 && length(colnam)) name <- c(name, + paste(colnam[[i1]], collapse="_")) if(za != 9) { funits <- c(funits, if(length(z$units))z$units else '') if(length(z$parms)) parm[[zname]] <- z$parms From 64b4f0e8d4eb64ff6b5956136159d54f7f7dd138 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 15:00:09 +0200 Subject: [PATCH 09/13] Spelling error --- tests/testthat/test-time_interaction.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-time_interaction.R b/tests/testthat/test-time_interaction.R index 0cd5722..e5c1d68 100644 --- a/tests/testthat/test-time_interaction.R +++ b/tests/testthat/test-time_interaction.R @@ -14,11 +14,11 @@ test_df <- data.frame(age = age, sex = sex, dt = dt, e = e) -label(test_dt$age) <- "Age" -label(test_dt$dt) <- 'Follow-up Time' -units(test_dt$dt) <- "Year" +label(test_df$age) <- "Age" +label(test_df$dt) <- 'Follow-up Time' +units(test_df$dt) <- "Year" -dd <<- datadist(test_dt) +dd <<- datadist(test_df) options(datadist='dd') f <- cph(Surv(dt,e) ~ rcs(age,4) + sex, x=TRUE, y=TRUE) From b3d02f77cf22fad105b97695207b9ccde64aa136 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 15:41:51 +0200 Subject: [PATCH 10/13] Code didn't run after spell-check due to that the Epi-package can't handle units() --- tests/testthat/test-time_interaction.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-time_interaction.R b/tests/testthat/test-time_interaction.R index e5c1d68..81faea1 100644 --- a/tests/testthat/test-time_interaction.R +++ b/tests/testthat/test-time_interaction.R @@ -12,11 +12,11 @@ dt <- pmin(dt, cens) test_df <- data.frame(age = age, sex = sex, + Start = 0, dt = dt, e = e) label(test_df$age) <- "Age" label(test_df$dt) <- 'Follow-up Time' -units(test_df$dt) <- "Year" dd <<- datadist(test_df) options(datadist='dd') @@ -27,7 +27,6 @@ cox.zph(f, "rank") # tests of PH # Now to the actual time-interaction library(Epi) -test_df$Start <- 0 lxs_obj <- Lexis(entry = list(Timeband = Start), exit = list(Timeband = dt, Age = age + dt), exit.status = e, @@ -120,7 +119,8 @@ cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeb # # Model Did Not Converge. No summary provided. -cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + asis((sex == "Male")*Timeband), +cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ + rcs(Age, 4) + sex + asis((sex == "Male")*Timeband), data = spl_obj) # Gives: # Error in limits[[zname]] <- if (any(Limnames == zname)) { : From 9e1b0673dd5c7d38730bd7ee735f889497d06607 Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 15:42:36 +0200 Subject: [PATCH 11/13] The needed pase-fix should only be applied to objects of assume.code == 1 --- R/rms.s | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/rms.s b/R/rms.s index 718b7f1..5cd2eb8 100644 --- a/R/rms.s +++ b/R/rms.s @@ -149,8 +149,10 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) { flabel <- c(flabel,z$label) asm <- c(asm,za) colnam[[i1]] <- z$colnames - if(za != 8 && length(colnam)) name <- c(name, + if(za == 1 && length(colnam)) name <- c(name, paste(colnam[[i1]], collapse="_")) + else if(za != 8 && length(colnam)) name <- c(name, + colnam[[i1]]) if(za != 9) { funits <- c(funits, if(length(z$units))z$units else '') if(length(z$parms)) parm[[zname]] <- z$parms From 550d5a4b22ea04d82d8ec92711aa8450f10992fb Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Sun, 24 Aug 2014 16:05:41 +0200 Subject: [PATCH 12/13] label() was causing issues with the Lexis() call. Also cleaned up the names. --- tests/testthat/test-time_interaction.R | 46 ++++++++++++-------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/tests/testthat/test-time_interaction.R b/tests/testthat/test-time_interaction.R index 81faea1..80413c2 100644 --- a/tests/testthat/test-time_interaction.R +++ b/tests/testthat/test-time_interaction.R @@ -10,15 +10,13 @@ dt <- -log(runif(n))/h e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) -test_df <- data.frame(age = age, +test <- data.frame(age = age, sex = sex, Start = 0, dt = dt, e = e) -label(test_df$age) <- "Age" -label(test_df$dt) <- 'Follow-up Time' -dd <<- datadist(test_df) +dd <<- datadist(test) options(datadist='dd') f <- cph(Surv(dt,e) ~ rcs(age,4) + sex, x=TRUE, y=TRUE) @@ -27,35 +25,35 @@ cox.zph(f, "rank") # tests of PH # Now to the actual time-interaction library(Epi) -lxs_obj <- Lexis(entry = list(Timeband = Start), +lxs <- Lexis(entry = list(Timeband = Start), exit = list(Timeband = dt, Age = age + dt), exit.status = e, - data = test_df) -subset(lxs_obj, lex.id %in% 1:3) + data = test) +subset(lxs, lex.id %in% 1:3) -spl_obj <- - splitLexis(lxs_obj, +spl <- + splitLexis(lxs, time.scale = "Timeband", breaks = seq(from = 0, - to = ceiling(max(lxs_obj$lex.dur)), + to = ceiling(max(lxs$lex.dur)), by = .5)) -subset(spl_obj, lex.id %in% 1:3) +subset(spl, lex.id %in% 1:3) -spl_obj$Stop <- spl_obj$Timeband + spl_obj$lex.dur +spl$Stop <- spl$Timeband + spl$lex.dur -dd <<- datadist(spl_obj) +dd <<- datadist(spl) options(datadist='dd') ####################### # Regular interaction # ####################### coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, - data = spl_obj) + data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ -# Age + sex * Timeband, data = spl_obj) +# Age + sex * Timeband, data = spl) # # # coef exp(coef) se(coef) z p @@ -70,7 +68,7 @@ coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, # X matrix deemed to be singular; variable 3 cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, - data = spl_obj) + data = spl) # Gives: # X matrix deemed to be singular; variable Timeband # @@ -81,11 +79,11 @@ cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex*Timeband, ############################### coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeband, - data = spl_obj) + data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ -# Age + sex + sex:Timeband, data = spl_obj) +# Age + sex + sex:Timeband, data = spl) # # # coef exp(coef) se(coef) z p @@ -100,11 +98,11 @@ coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ # X matrix deemed to be singular; variable 4 coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), - data = spl_obj) + data = spl) # Gives: # Call: # coxph(formula = Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ -# Age + sex + I((sex == "Male") * Timeband), data = spl_obj) +# Age + sex + I((sex == "Male") * Timeband), data = spl) # # # coef exp(coef) se(coef) z p @@ -113,7 +111,7 @@ coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex # I((sex == "Male") * Timeband) 0.0868 1.091 0.05360 1.62 1.1e-01 cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeband, - data = spl_obj) + data = spl) # Gives: # X matrix deemed to be singular; variable sex=Male * NA # @@ -121,7 +119,7 @@ cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + sex:Timeb cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ rcs(Age, 4) + sex + asis((sex == "Male")*Timeband), - data = spl_obj) + data = spl) # Gives: # Error in limits[[zname]] <- if (any(Limnames == zname)) { : # more elements supplied than there are to replace @@ -130,10 +128,10 @@ cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ # After fix # ############# fit_coxph <- coxph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + I((sex == "Male")*Timeband), - data = spl_obj) + data = spl) fit_cph <- cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ Age + sex + asis((sex == "Male")*Timeband), - data = spl_obj) + data = spl) # Basically the same cbind(coxph=coef(fit_coxph), From 0740eb1af70eaaf1baaa6b1c59856d2264adcc2f Mon Sep 17 00:00:00 2001 From: Max Gordon Date: Wed, 27 Aug 2014 17:27:30 +0200 Subject: [PATCH 13/13] Alternative solution to the time-interaction with a manual interaction variable --- tests/testthat/test-time_interaction.R | 42 +++++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-time_interaction.R b/tests/testthat/test-time_interaction.R index 80413c2..a88fbd2 100644 --- a/tests/testthat/test-time_interaction.R +++ b/tests/testthat/test-time_interaction.R @@ -10,11 +10,12 @@ dt <- -log(runif(n))/h e <- ifelse(dt <= cens,1,0) dt <- pmin(dt, cens) -test <- data.frame(age = age, - sex = sex, - Start = 0, - dt = dt, - e = e) +test <- + data.frame(age = age, + sex = sex, + Start = 0, + dt = dt, + e = e) dd <<- datadist(test) options(datadist='dd') @@ -26,9 +27,9 @@ cox.zph(f, "rank") # tests of PH # Now to the actual time-interaction library(Epi) lxs <- Lexis(entry = list(Timeband = Start), - exit = list(Timeband = dt, Age = age + dt), - exit.status = e, - data = test) + exit = list(Timeband = dt, Age = age + dt), + exit.status = e, + data = test) subset(lxs, lex.id %in% 1:3) spl <- @@ -42,6 +43,7 @@ subset(spl, lex.id %in% 1:3) spl$Stop <- spl$Timeband + spl$lex.dur + dd <<- datadist(spl) options(datadist='dd') @@ -160,3 +162,27 @@ contrast(fit_cph, Timeband = seq(0, 10, by=.1))) # Error in gendata(list(coefficients = c(0.0420352254526414, -0.945650117874665, : # factor(s) not in design: Timeband + + +################# +# Alt. solution # +################# + +spl_alt <- + within(spl, { + Male_time_int = (sex == "Male")*Timeband + }) + +spl_alt$lex.Cst <- NULL +spl_alt$Start <- NULL +dd <- datadist(spl_alt) +options(datadist = "dd") + +model <- + cph(Surv(time = Timeband, time2 = Stop, event = lex.Xst) ~ + Age + sex + Male_time_int, + data = spl_alt) + +contrast(model, + a = list(sex = "Male", Male_time_int = 0:5), + b = list(sex = "Female", Male_time_int = 0))