harrelfe · gforge · May 9, 2014 · May 16, 2014 · May 16, 2014 · Aug 13, 2014
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+.Rproj.user
+.Rhistory
+.RData
+.RBuildignore
+*.Rproj
+src/*.o
+src/*.so
+src/*.dll
diff --git a/R/Glm.s b/R/Glm.s
@@ -11,23 +11,26 @@ Glm <-
     print(family)
     stop("`family' not recognized")
   }
+
   mt <- terms(formula, data = data)
   if (missing(data)) data <- environment(formula)
-  mf <- match.call(expand.dots = FALSE)
-  mf$family <- mf$start <- mf$control <- mf$maxit <- NULL
-  mf$model <- mf$method <- mf$x <- mf$y <- mf$contrasts <- NULL
-  mf$... <- NULL
-  mf$drop.unused.levels <- TRUE
-  mf$na.action <- na.action
-  mf[[1]] <- as.name("model.frame")
-
+
   dul <- .Options$drop.unused.levels
   if(!length(dul) || dul) {
     on.exit(options(drop.unused.levels=dul))
     options(drop.unused.levels=FALSE)
   }
 
-  mf <- Design(eval(mf, parent.frame()))
+  # Generate the data.frame
+  mf <- match.call(expand.dots = FALSE)
+  m <- match(c("formula", "data", "subset", "weights", "na.action", 
+               "etastart", "mustart", "offset"), names(mf), 0L)
+  mf <- mf[c(1L, m)]
+  mf$drop.unused.levels <- TRUE
+  mf[[1L]] <- quote(stats::model.frame)
+  mf <- eval(mf, parent.frame())
+
+  mf <- Design(mf)
   desatr <- attr(mf,'Design')
   attr(mf, 'Design') <- NULL
   nact <- attr(mf, 'na.action')

diff --git a/R/lrm.s b/R/lrm.s
@@ -3,12 +3,12 @@ lrm <- function(formula, data,subset, na.action=na.delete,
                 linear.predictors=TRUE, se.fit=FALSE, 
                 penalty=0, penalty.matrix, tol=1e-7, strata.penalty=0,
                 var.penalty=c('simple','sandwich'),
-                weights, normwt=FALSE, scale=FALSE, ...)
+                weights, normwt=FALSE, scale=FALSE, offset, ...)
 {
   call <- match.call()
   var.penalty <- match.arg(var.penalty)
   m <- match.call(expand.dots=FALSE)
-  mc <- match(c("formula", "data", "subset", "weights", "na.action"), 
+  mc <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), 
              names(m), 0)
   m <- m[c(1, mc)]
   m$na.action <- na.action
@@ -39,8 +39,9 @@ lrm <- function(formula, data,subset, na.action=na.delete,
     atr <- atrx$Design
 
     Y <- model.extract(X, 'response')
-    offs <- model.offset(X)
-    if(!length(offs)) offs <- 0
+    if (missing(offset)) offset <- model.offset(X)
+    else offset <- X[,"(offset)"]
+    if(!length(offset)) offset <- 0
     weights <- wt <- model.extract(X, 'weights')
     if(length(weights))
       warning('currently weights are ignored in model validation and bootstrapping lrm fits')
@@ -88,8 +89,8 @@ lrm <- function(formula, data,subset, na.action=na.delete,
   else
     {
       X <- eval.parent(m)
-      offs <- model.offset(X)
-      if(!length(offs)) offs <- 0
+      if (missing(offset)) offset <- model.offset(X)
+      if(!length(offset)) offset <- 0
       Y <- model.extract(X, 'response')
       Y <- Y[!is.na(Y)]
       Terms <- X <- NULL
@@ -102,16 +103,16 @@ lrm <- function(formula, data,subset, na.action=na.delete,
 
   if(nstrata > 1) {
     if(scale) stop('scale=TRUE not implemented for stratified model')
-    f <- lrm.fit.strat(X,Y,Strata,offset=offs,
-                       penalty.matrix=penalty.matrix,
-                       strata.penalty=strata.penalty,
-                       tol=tol,
-                       weights=weights,normwt=normwt, ...)
+    f <- lrm.fit.strat(X,Y,Strata,offset=offset,
+                         penalty.matrix=penalty.matrix,
+                         strata.penalty=strata.penalty,
+                         tol=tol,
+                         weights=weights,normwt=normwt, ...)
   }
   else {
     if(existsFunction(method)) {
       fitter <- getFunction(method)
-      f <- fitter(X, Y, offset=offs,
+      f <- fitter(X, Y, offset=offset,
                   penalty.matrix=penalty.matrix, tol=tol,
                   weights=weights, normwt=normwt, scale=scale, ...)
     }
@@ -135,7 +136,7 @@ lrm <- function(formula, data,subset, na.action=na.delete,
       v <- f$var
       if(var.penalty=='sandwich') f$var.from.info.matrix <- v
       f.nopenalty <- 
-        fitter(X, Y, offset=offs, initial=f$coef, maxit=1, tol=tol,
+        fitter(X, Y, offset=offset, initial=f$coef, maxit=1, tol=tol,
                scale=scale)
       ##  info.matrix.unpenalized <- solvet(f.nopenalty$var, tol=tol)
       info.matrix.unpenalized <- f.nopenalty$info.matrix

diff --git a/R/ols.s b/R/ols.s
@@ -2,12 +2,13 @@ ols <- function(formula, data, weights, subset, na.action=na.delete,
                 method = "qr", model = FALSE, x = FALSE, y = FALSE,
                 se.fit=FALSE, linear.predictors=TRUE,
                 penalty=0, penalty.matrix, tol=1e-7, sigma=NULL,
-                var.penalty=c('simple','sandwich'), ...)
+                var.penalty=c('simple','sandwich'), offset, 
+                ...)
 {
   call <- match.call()
   var.penalty <- match.arg(var.penalty)
   m <- match.call(expand.dots = FALSE)
-  mc <- match(c("formula", "data", "subset", "weights", "na.action"), 
+  mc <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), 
               names(m), 0)
   m <- m[c(1, mc)]
   m$na.action <- na.action
@@ -24,7 +25,8 @@ ols <- function(formula, data, weights, subset, na.action=na.delete,
     }
 
     X      <- eval.parent(m)
-    offset <- model.offset(X)
+    if (missing(offset)) offset <- model.offset(X)
+    else offset <- X[,"(offset)"]
     X      <- Design(X)
     options(drop.unused.levels=dul)
     atrx  <- attributes(X)

diff --git a/R/predictrms.s b/R/predictrms.s
@@ -65,7 +65,7 @@ predictrms <-
   off <- attr(Terms, 'offset')
   offset <- if(! length(off)) 0
   else
-    model.offset(model.frame(Terms, newdata, na.action=na.action, ...))
+    model.offset(model.frame(fit, newdata, na.action=na.action, ...))
 
   ## Terms.nooff <- if(length(off)) drop.terms(Terms, off) else Terms
   ## Only works correctly if offset is at the end of the formula

diff --git a/R/rms.s b/R/rms.s
@@ -67,10 +67,20 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) {
   response.pres <- attr(Terms, 'response') > 0
 
   offs <- attr(Terms, "offset")
-  if(!length(offs)) offs <- 0
+  if(is.null(offs)){
+    # stats::model.frame generates a (offset) variable
+    # if the model was specified with a offset parameter
+    # and no offset in the formula
+    if ("(offset)" %in% names(mf) &&
+          !"(offset)" %in% attr(Terms, "term.labels")){
+      offs <- which("(offset)" == names(mf))
+    }else{
+      offs <- 0
+    }
+  } 
   if(offs>0 & !allow.offset)
-	stop("offset variable not allowed in formula")
-
+    stop("offset variable not allowed in formula")
+  
 
   factors <- attr(Terms, "factors")
   if(length(factors) && response.pres) factors <- factors[-1,,drop=FALSE]
@@ -130,7 +140,7 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) {
       }
 
       za <- z$assume.code
-      zname <- z$name
+      zname <- paste(z$name, collapse="_")
 
       fname.incl.dup <- c(fname.incl.dup, zname)
       if(!length(fname) || !any(fname==zname)) { # unique factor
@@ -139,7 +149,10 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) {
         flabel <- c(flabel,z$label)
         asm <- c(asm,za)
         colnam[[i1]] <- z$colnames
-        if(za != 8 && length(colnam)) name <- c(name, colnam[[i1]])
+        if(za == 1 && length(colnam)) name <- c(name, 
+                                                paste(colnam[[i1]], collapse="_"))
+        else if(za != 8 && length(colnam)) name <- c(name, 
+                                                     colnam[[i1]])
         if(za != 9) {
           funits <- c(funits, if(length(z$units))z$units else '')
           if(length(z$parms)) parm[[zname]] <- z$parms
@@ -191,7 +204,7 @@ Design <- function(mf, allow.offset=TRUE, intercept=1) {
 
     ## S-Plus, if only offset in model, has factors as 2 rows 0 cols
     if(nrf || length(fname.incl.dup))
-      if((nrf-(offs > 0)) != length(fname.incl.dup))
+      if((nrf-length(attr(Terms, "offset"))) != length(fname.incl.dup))
         stop("program logic error 1")
     if(length(factors)) for(i in 1:ncol(factors)) {
       f <- factors[,i]

diff --git a/man/lrm.Rd b/man/lrm.Rd
@@ -14,7 +14,7 @@ lrm(formula, data, subset, na.action=na.delete, method="lrm.fit",
     model=FALSE, x=FALSE, y=FALSE, linear.predictors=TRUE, se.fit=FALSE, 
     penalty=0, penalty.matrix, tol=1e-7, 
     strata.penalty=0, var.penalty=c('simple','sandwich'),
-    weights, normwt, scale=FALSE, \dots)
+    weights, normwt, scale=FALSE, offset, \dots)
 
 \method{print}{lrm}(x, digits=4, strata.coefs=FALSE,
     coefs=TRUE, latex=FALSE, title='Logistic Regression Model', \dots)
@@ -138,6 +138,9 @@ variances estimates that are too low.
     sample sizes where for example spline or polynomial component
     variables create scaling problems leading to loss of precision when
     accumulating sums of squares and crossproducts.}
+\item{offset}{this can be used to specify an a \emph{priori} known component
+ to be included in the linear predictor during fitting.  One or more \code{\link[stats  ]{offset}} terms can be included in the formula instead or as well, and if more than one is specified their sum is used. 
+}
 \item{\dots}{arguments that are passed to \code{lrm.fit}, or from
     \code{print}, to \code{\link{prModFit}}}
 \item{digits}{number of significant digits to use}

diff --git a/man/ols.Rd b/man/ols.Rd
@@ -16,7 +16,7 @@ ols(formula, data, weights, subset, na.action=na.delete,
     method="qr", model=FALSE,
     x=FALSE, y=FALSE, se.fit=FALSE, linear.predictors=TRUE,
     penalty=0, penalty.matrix, tol=1e-7, sigma,
-    var.penalty=c('simple','sandwich'), \dots)
+    var.penalty=c('simple','sandwich'), offset, \dots)
 }
 \arguments{
 \item{formula}{
@@ -94,6 +94,10 @@ inverse of the penalized information matrix.  Specify
 under \code{var}), which limited simulation studies have shown yields
 variances estimates that are too low.
 }
+\item{offset}{this can be used to specify an a \emph{priori} known component
+ to be included in the linear predictor during fitting, see \code{\link[stats]{lm}}
+ for more details.
+}
 \item{\dots}{arguments to pass to \code{\link{lm.wfit}} or
   \code{\link{lm.fit}}}
 }

diff --git a/tests/testthat/test-offset-glm.R b/tests/testthat/test-offset-glm.R
@@ -0,0 +1,137 @@
+library(testthat)
+library(rms)
+# sessionInfo("rms") # v. 4.2-1 from github
+test_df <- data.frame(Y  = c(15,  7, 36,  4, 16, 12, 41, 15),
+                      N  = c(4949, 3534, 12210, 344, 6178, 
+                             4883, 11256, 7125),
+                      x1 = c(-0.1, 0, 0.2, 0, 1, 1.1, 1.1, 1),
+                      x2 = c(2.2, 1.5, 4.5, 7.2, 4.5, 3.2, 9.1, 5.2))
+test_df$fact_var <- factor(rep(c("A", "B", "C"), times=c(4,2, nrow(test_df)-6)))
+ddist <- datadist(test_df)
+options(datadist="ddist")
+
+test_that("Compare basic Glm with glm",{
+  # Base check
+  fit.Glm <- Glm(Y ~ x1 + x2 + fact_var, data=test_df)
+  fit.glm <- glm(Y ~ x1 + x2 + fact_var, data=test_df)
+  expect_equivalent(coef(fit.Glm), coef(fit.glm))
+  nd <- data.frame(x1=1, x2=4, fact_var="A")
+  expect_equivalent(predict(fit.Glm, newdata=nd),
+                    predict(fit.glm, newdata=nd))
+  nd <- data.frame(x1=1, x2=4, fact_var="B")
+  expect_equivalent(predict(fit.Glm, newdata=nd),
+                    predict(fit.glm, newdata=nd))
+
+  expect_equivalent(vcov(fit.Glm),
+                    vcov(fit.glm),
+                    info="The covariance matrices should be identical")
+})
+
+test_that("Compare offset Glm with glm with data parameter",{
+  # Check offset term
+  fit.Glm <- list(plain=list(formula = Glm(Y ~ x1 + x2 + fact_var + offset(N), 
+                                           data=test_df),
+                             parameter = Glm(Y ~ x1 + x2 + fact_var , offset=N, 
+                                             data=test_df)),
+                  poisson=list(formula = Glm(Y ~ x1 + x2 + fact_var + offset(log(N)), 
+                                             data=test_df, family=poisson),
+                               parameter = Glm(Y ~ x1 + x2 + fact_var , offset=log(N), 
+                                               data=test_df, family=poisson)))
+  fit.glm <- list(plain=glm(Y ~ x1 + x2 + fact_var , offset=N, 
+                            data=test_df),
+                  poisson=glm(Y ~ x1 + x2 + fact_var , offset=log(N), 
+                              data=test_df, family=poisson))
+  # Check offset term
+  for (glm_family in names(fit.Glm)){
+    for (fn in names(fit.Glm[[glm_family]])){
+      expect_equivalent(coef(fit.Glm[[glm_family]][[fn]]), 
+                        coef(fit.glm[[glm_family]]),
+                        info=paste("Coefficients are not right",
+                                   "for", fn,
+                                   "for family =", glm_family))
+
+      expect_equivalent(contrast(fit.Glm[[glm_family]][[fn]], 
+                                 a=list(x1=1), 
+                                 b=list(x1=0))$Contrast,
+                        coef(fit.Glm[[glm_family]][[fn]])["x1"],
+                        info=paste("Contrast fails to provide the correct coefficient",
+                                   "for", fn,
+                                   "for family =", glm_family))
+
+      expect_equivalent(vcov(fit.Glm[[glm_family]][[fn]]),
+                        vcov(fit.glm[[glm_family]]),
+                        info=paste("The covariance matrices should be identical", 
+                                   "when using", fn,
+                                   "for family =", glm_family))
+
+
+      nd <- data.frame(x1=1, x2=4, N=0, fact_var="A")
+      expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd),
+                        predict(fit.glm[[glm_family]], newdata=nd),
+                        info=paste("Offset term is not equal to 0",
+                                   "for the", fn, "version",
+                                   "for family =", glm_family))
+      nd$N <- 1000
+      expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd),
+                        predict(fit.glm[[glm_family]], newdata=nd),
+                        info=paste("Offset term is not equal to 1000",
+                                   "for the", fn, "version",
+                                   "for family =", glm_family))
+    }
+  }
+})
+
+test_that("Compare offset Glm with lm with(dataset, Glm(...))",{
+  fit.Glm <- list(plain=list(param = with(test_df, 
+                                          Glm(Y ~ x1 + x2 + fact_var, offset=N)),
+                             formula = with(test_df, 
+                                            Glm(Y ~ x1 + x2 + offset(N) + fact_var))),
+                  poisson=list(param = with(test_df, 
+                                             Glm(Y ~ x1 + x2 + fact_var, offset=log(N), family=poisson)),
+                                formula = with(test_df, 
+                                               Glm(Y ~ x1 + x2 + offset(log(N)) + fact_var,, family=poisson))))
+  fit.glm <- list(plain=with(test_df, 
+                             glm(Y ~ x1 + x2 + fact_var, offset=N)),
+                  poisson=with(test_df, 
+                               glm(Y ~ x1 + x2 + fact_var, offset=log(N), family=poisson)))
+
+
+  # Check offset term
+  for (glm_family in names(fit.Glm)){
+    for (fn in names(fit.Glm[[glm_family]])){
+      expect_equivalent(coef(fit.Glm[[glm_family]][[fn]]), 
+                        coef(fit.glm[[glm_family]]),
+                        info=paste("Coefficients are not right",
+                                   "for", fn,
+                                   "for family =", glm_family))
+
+      expect_equivalent(contrast(fit.Glm[[glm_family]][[fn]], 
+                                 a=list(x1=1), 
+                                 b=list(x1=0))$Contrast,
+                        coef(fit.Glm[[glm_family]][[fn]])["x1"],
+                        info=paste("Contrast fails to provide the correct coefficient",
+                                   "for", fn,
+                                   "for family =", glm_family))
+
+      expect_equivalent(vcov(fit.Glm[[glm_family]][[fn]]),
+                        vcov(fit.glm[[glm_family]]),
+                        info=paste("The covariance matrices should be identical", 
+                                   "when using", fn,
+                                   "for family =", glm_family))
+
+
+      nd <- data.frame(x1=1, x2=4, N=0, fact_var="A")
+      expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd),
+                        predict(fit.glm[[glm_family]], newdata=nd),
+                        info=paste("Offset term is not equal to 0",
+                                   "for the", fn, "version",
+                                   "for family =", glm_family))
+      nd$N <- 1000
+      expect_equivalent(predict(fit.Glm[[glm_family]][[fn]], newdata=nd),
+                        predict(fit.glm[[glm_family]], newdata=nd),
+                        info=paste("Offset term is not equal to 1000",
+                                   "for the", fn, "version",
+                                   "for family =", glm_family))
+    }
+  }
+})