diff --git a/R/logregbin.b.R b/R/logregbin.b.R index 573e9b57..a2d7d357 100644 --- a/R/logregbin.b.R +++ b/R/logregbin.b.R @@ -76,6 +76,12 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( return(private$.deviance) }, + logLik = function() { + if (is.null(private$.logLik)) + private$.logLik <- private$.computeLogLik() + + return(private$.logLik) + }, AIC = function() { if (is.null(private$.AIC)) private$.AIC <- private$.computeAIC() @@ -167,6 +173,7 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( .lrtModelComparison = NULL, .lrtModelTerms = NULL, .deviance = NULL, + .logLik = NULL, .AIC = NULL, .BIC = NULL, .pseudoR2 = NULL, @@ -227,14 +234,41 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( data <- self$dataProcessed formulas <- self$formulas - if (is.numeric(modelNo)) + if (is.numeric(modelNo)) { formulas <- formulas[modelNo] + } + raw_weights <- self$weights + weight_scale_ratio <- 1 + if (!is.null(raw_weights)) { + sum_w <- sum(raw_weights) + n_rows <- length(raw_weights) + if (sum_w != 0) { + weight_scale_ratio <- n_rows / sum_w + norm_weights <- raw_weights * weight_scale_ratio + } else { + norm_weights <- raw_weights + } + + data$.NORM_WEIGHTS <- norm_weights + } + models <- list() for (i in seq_along(formulas)) { - models[[i]] <- stats::glm( - formulas[[i]], data=data, family="binomial", weights=self$weights - ) + if (! is.null(raw_weights)) { + models[[i]] <- stats::glm( + formulas[[i]], + data=data, + family="binomial", + weights=.NORM_WEIGHTS + ) + + models[[i]]$prior.weights <- raw_weights + models[[i]]$weight_scale_ratio <- weight_scale_ratio + } else { + models[[i]] <- stats::glm(formulas[[i]], data=data, family="binomial") + models[[i]]$weight_scale_ratio <- 1 + } } return(models) @@ -287,12 +321,54 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( .computeLrtModelTerms = function() { lrtModelTerms <- list() for (i in seq_along(self$models)) { - lrtModelTerms[[i]] <- car::Anova( - self$models[[i]], + # If weights are used, the model object has 'raw' weights in prior.weights (from .computeModels swap) + # But car::Anova uses the call/formula which has 'normalized' weights. + # This mismatch causes car::Anova to calculate incorrect stats. + # We interpret a local copy of the model with restored normalized weights. + + model <- self$models[[i]] + if (! is.null(model$weight_scale_ratio)) { + model$prior.weights <- model$data$.NORM_WEIGHTS + } + + lrt <- car::Anova( + model, test="LR", type=3, singular.ok=TRUE ) + + # Use a fresh model to avoid prior.weights swap issues and environment scope problems + modelOrig <- self$models[[i]] + formula <- self$formulas[[i]] + data <- modelOrig$data + + # Check if weights are used (if .NORM_WEIGHTS column exists) + weights <- NULL + if (".NORM_WEIGHTS" %in% names(data)) { + weights <- as.name(".NORM_WEIGHTS") + } + + # Use do.call to avoid scope issues with 'weights' argument (avoiding stats::weights conflict) + args <- list(formula=formula, data=data, family=stats::binomial) + if ( ! is.null(weights)) + args$weights <- weights + + model <- do.call(stats::glm, args) + + lrt <- car::Anova( + model, + test="LR", + type=3, + singular.ok=TRUE + ) + + if (! is.null(modelOrig$weight_scale_ratio)) { + lrt[['LR Chisq']] <- lrt[['LR Chisq']] / modelOrig$weight_scale_ratio + lrt[['Pr(>Chisq)']] <- stats::pchisq(lrt[['LR Chisq']], lrt[['Df']], lower.tail=FALSE) + } + + lrtModelTerms[[i]] <- lrt } return(lrtModelTerms) @@ -300,39 +376,78 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( .computeDeviance = function() { dev <- list() for (i in seq_along(self$models)) - dev[[i]] <- self$models[[i]]$deviance + dev[[i]] <- -2 * self$logLik[[i]] return(dev) }, + #' Calculate log-likelihood manually + #' + #' @description + #' Manually calculates the log-likelihood to correctly handle non-integer weights, + #' bypassing stats::glm behavior which rounds weights/counts for binomial family. + .computeLogLik = function() { + logLik <- list() + for (i in seq_along(self$models)) { + logLik[[i]] <- private$.calcLogLik(self$models[[i]]) + } + return(logLik) + }, + #' Calculate AIC manually + #' + #' @description + #' Uses the manually calculated log-likelihood to compute AIC, ensuring consistency + #' with weighted data where stats::AIC would fail due to rounding. .computeAIC = function() { AIC <- list() - for (i in seq_along(self$models)) - AIC[[i]] <- stats::AIC(self$models[[i]]) + for (i in seq_along(self$models)) { + k <- self$models[[i]]$rank + AIC[[i]] <- 2 * k - 2 * self$logLik[[i]] + } return(AIC) }, + #' Calculate BIC manually + #' + #' @description + #' Uses the manually calculated log-likelihood and sum(weights) as sample size + #' to compute BIC, ensuring consistency with weighted data. .computeBIC = function() { BIC <- list() - for (i in seq_along(self$models)) - BIC[[i]] <- stats::BIC(self$models[[i]]) + for (i in seq_along(self$models)) { + model <- self$models[[i]] + k <- model$rank + n <- sum(model$prior.weights) + BIC[[i]] <- k * log(n) - 2 * self$logLik[[i]] + } return(BIC) }, .computePseudoR2 = function() { pR2 <- list() for (i in seq_along(self$models)) { - dev <- self$deviance[[i]] - nullDev <- self$models[[i]]$null.deviance - n <- length(self$models[[i]]$fitted.values) + model <- self$models[[i]] + y <- model$y + mu <- model$fitted.values + w <- model$prior.weights + n <- sum(w) + + ll <- self$logLik[[i]] + + # Null model logLik + y_bar <- sum(w * y) / sum(w) + logLikNull <- sum(w * (y * log(y_bar) + (1 - y) * log(1 - y_bar))) - r2mf <- 1 - dev/nullDev + dev <- -2 * ll + nullDev <- -2 * logLikNull + + r2mf <- 1 - dev / nullDev r2cs <- 1 - exp(-(nullDev - dev) / n) r2n <- r2cs / (1 - exp(-nullDev / n)) - meanFittedProbs <- tapply( - self$models[[i]]$fitted.values, self$models[[i]]$y, mean, na.rm=TRUE - ) - r2t <- unname(diff(meanFittedProbs)) + # For Tjur's R2, we need weighted means of fitted probs for y=0 and y=1 + mu_y1 <- sum(mu[y == 1] * w[y == 1]) / sum(w[y == 1]) + mu_y0 <- sum(mu[y == 0] * w[y == 0]) / sum(w[y == 0]) + r2t <- mu_y1 - mu_y0 pR2[[i]] <- list(r2mf=r2mf, r2cs=r2cs, r2n=r2n, r2t=r2t) } @@ -341,8 +456,20 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( .computeModelTest = function() { modelTest <- list() for (i in seq_along(self$models)) { - chi <- self$models[[i]]$null.deviance - self$deviance[[i]] - df <- self$models[[i]]$df.null - self$models[[i]]$df.residual + model <- self$models[[i]] + + # Calculate null probability + w <- model$prior.weights + y <- model$y + mu_null <- sum(w * y) / sum(w) + + # Calculate Null Deviance MANUALLY (On the correct Raw Scale) + ll_null <- private$.calcLogLik(model, mu = mu_null) + null_deviance <- -2 * ll_null + + # Calculate Chi-Square + chi <- null_deviance - self$deviance[[i]] + df <- model$df.null - model$df.residual p <- 1 - pchisq(chi, df) modelTest[[i]] <- list(chi=chi, df=df, p=p) @@ -370,15 +497,35 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( }, .computeCICoefEst = function(type="LOR") { if (type == "OR") - level <- self$options$ciWidthOR/100 + width <- self$options$ciWidthOR else - level <- self$options$ciWidth/100 + width <- self$options$ciWidth + + alpha <- (1 - (width / 100)) / 2 + z_crit <- stats::qnorm(1 - alpha) ci <- list() for (i in seq_along(self$models)) { - ci[[i]] <- confint.default(self$models[[i]], level=level) - if (type == "OR") - ci[[i]] <- exp(ci[[i]]) + model <- self$models[[i]] + + summ <- summary(model) + est <- summ$coefficients[, "Estimate"] + se_orig <- summ$coefficients[, "Std. Error"] + + ratio <- if (!is.null(model$weight_scale_ratio)) model$weight_scale_ratio else 1 + se_correct <- se_orig * sqrt(ratio) + + moe <- z_crit * se_correct + + lower <- est - moe + upper <- est + moe + + if (type == "OR") { + lower <- exp(lower) + upper <- exp(upper) + } + + ci[[i]] <- cbind(lower, upper) } return(ci) }, @@ -386,9 +533,13 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( class <- list() for (i in seq_along(self$models)) { pred <- ifelse(self$fitted[[i]] > self$options$cutOff, 1, 0) - df <- data.frame(response=factor(self$models[[i]]$y, levels=c(0,1)), - predicted=factor(pred, levels=c(0,1))) - class[[i]] <- stats::xtabs(~ response + predicted, data=df) + w <- self$models[[i]]$prior.weights + df <- data.frame( + response=factor(self$models[[i]]$y, levels=c(0,1)), + predicted=factor(pred, levels=c(0,1)), + weights=w + ) + class[[i]] <- stats::xtabs(weights ~ response + predicted, data=df) } return(class) }, @@ -396,10 +547,38 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( AUC <- list() for (i in seq_along(self$models)) { prob <- self$fitted[[i]] - pred <- ROCR::prediction(prob, self$models[[i]]$y) - perf <- ROCR::performance(pred, measure = "auc") - - AUC[[i]] <- unlist(perf@y.values) + y <- self$models[[i]]$y + w <- self$models[[i]]$prior.weights + + # Order by predicted probability descending + o <- order(prob, decreasing = TRUE) + prob_o <- prob[o] + y_o <- y[o] + w_o <- w[o] + + # Cumulative sums of weights for positives (y=1) and negatives (y=0) + TP <- cumsum(w_o * y_o) + FP <- cumsum(w_o * (1 - y_o)) + + # Total weighted positives and negatives + P_tot <- TP[length(TP)] + N_tot <- FP[length(FP)] + + # TPR and FPR + TPR <- TP / P_tot + FPR <- FP / N_tot + + # Area under curve using trapezoidal rule + # We need to prepend 0 to TPR and FPR to start from (0,0) + TPR <- c(0, TPR) + FPR <- c(0, FPR) + + # diff(FPR) gives the width of each trapezoid/rectangle + # (TPR[-1] + TPR[-length(TPR)]) / 2 gives average height + + auc <- sum(diff(FPR) * (TPR[-1] + TPR[-length(TPR)]) / 2) + + AUC[[i]] <- auc } return(AUC) }, @@ -446,7 +625,7 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( model, formula, cov.reduce=FUN, - type='response', + type='link', options=list(level=self$options$ciWidthEmm / 100), weights=weights, data=self$dataProcessed, @@ -455,7 +634,24 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( silent = TRUE ) - emmTable[[j]] <- try(as.data.frame(summary(mm)), silent = TRUE) + df <- try(as.data.frame(summary(mm)), silent = TRUE) + + if ( ! isError(df) && ! is.null(model$weight_scale_ratio)) { + df$SE <- df$SE * sqrt(model$weight_scale_ratio) + z_crit <- stats::qnorm(1 - (1 - (self$options$ciWidthEmm / 100)) / 2) + df$asymp.LCL <- df$emmean - z_crit * df$SE + df$asymp.UCL <- df$emmean + z_crit * df$SE + } + + # Transform to response scale + if ( ! isError(df)) { + df$prob <- stats::plogis(df$emmean) + df$asymp.LCL <- stats::plogis(df$asymp.LCL) + df$asymp.UCL <- stats::plogis(df$asymp.UCL) + emmTable[[j]] <- df + } else { + emmTable[[j]] <- df + } }) } } @@ -881,10 +1077,15 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( for (i in seq_along(termsAll)) { table <- groups$get(key=i)$coef - model <- summary(self$models[[i]]) + model <- self$models[[i]] + modelSummary <- summary(model) + + se_scale <- sqrt(model$weight_scale_ratio) + CI <- self$CICoefEst[[i]] CIOR <- self$CICoefEstOR[[i]] - coef<- model$coefficients + + coef<- modelSummary$coefficients terms <- termsAll[[i]] rowTerms <- jmvcore::decomposeTerms(rownames(coef)) @@ -892,8 +1093,10 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( term <- terms[[j]] # check which rows have the same length + same terms - index <- which(length(term) == sapply(rowTerms, length) & - sapply(rowTerms, function(x) all(term %in% x))) + index <- which( + length(term) == sapply(rowTerms, length) & + sapply(rowTerms, function(x) all(term %in% x)) + ) if (length(index) != 1) { table$setNote( @@ -902,13 +1105,18 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( ) next } - + + est <- coef[index, 'Estimate'] + se <- coef[index, 'Std. Error'] * se_scale + z <- est / se + p <- 2 * (1 - stats::pnorm(abs(z))) + row <- list() - row[["est"]] <- coef[index, 'Estimate'] - row[["se"]] <- coef[index, 'Std. Error'] - row[["odds"]] <- exp(coef[index, 'Estimate']) - row[["z"]] <- coef[index, 'z value'] - row[["p"]] <- coef[index, 'Pr(>|z|)'] + row[["est"]] <- est + row[["se"]] <- se + row[["odds"]] <- exp(est) + row[["z"]] <- z + row[["p"]] <- p row[["lower"]] <- CI[index, 1] row[["upper"]] <- CI[index, 2] row[["oddsLower"]] <- CIOR[index, 1] @@ -1515,5 +1723,26 @@ logRegBinClass <- if (requireNamespace('jmvcore')) R6::R6Class( height <- xAxis + height return(c(width, height)) - }) + }, + .calcLogLik = function(model, mu=NULL) { + # Extract basic data from the model + y <- model$y + w <- model$prior.weights + + # Determine Probabilities (mu) + # If mu is NULL, use the model's own fitted values (Full Model) + # If mu is provided (e.g., Null Model mean), use that. + if (is.null(mu)) { + mu <- model$fitted.values + } + + # Safety Clamp (Avoid log(0)) + mu <- ifelse(mu <= 1e-16, 1e-16, mu) + mu <- ifelse(mu >= 1 - 1e-16, 1 - 1e-16, mu) + + # Calculate Weighted Log-Likelihood + logLik <- sum(w * (y * log(mu) + (1 - y) * log(1 - mu))) + return(logLik) + } + ) ) diff --git a/R/logregbin.h.R b/R/logregbin.h.R index 29c92360..f4ab4c12 100644 --- a/R/logregbin.h.R +++ b/R/logregbin.h.R @@ -863,7 +863,7 @@ logRegBinBase <- if (requireNamespace("jmvcore", quietly=TRUE)) R6::R6Class( pause = NULL, completeWhenFilled = FALSE, requiresMissings = FALSE, - weightsSupport = 'integerOnly') + weightsSupport = 'full') })) #' Binomial Logistic Regression diff --git a/jamovi/logregbin.a.yaml b/jamovi/logregbin.a.yaml index a0e9eb7f..afefc6de 100644 --- a/jamovi/logregbin.a.yaml +++ b/jamovi/logregbin.a.yaml @@ -7,7 +7,7 @@ menuTitle: 2 Outcomes menuSubtitle: Binomial version: '1.0.0' jas: '1.2' -weightsSupport: 'integerOnly' +weightsSupport: 'full' description: main: Binomial Logistic Regression diff --git a/tests/testthat/data/logRegBin_weightsData.csv b/tests/testthat/data/logRegBin_weightsData.csv new file mode 100644 index 00000000..d895aacb --- /dev/null +++ b/tests/testthat/data/logRegBin_weightsData.csv @@ -0,0 +1,21 @@ +"y","x","w_freq","w_large" +1,1.37095844714667,5,58.9980516349897 +0,-0.564698171396089,2,58.5612064925954 +1,0.363128411337339,2,80.5218369467184 +0,0.63286260496104,2,116.742651467212 +1,0.404268323140999,4,50.0238896580413 +1,-0.106124516091484,3,70.8569956943393 +0,1.51152199743894,5,143.303412734531 +1,-0.0946590384130976,2,142.564474861138 +0,2.01842371387704,2,123.409430100583 +1,-0.062714099052421,2,83.3071983419359 +1,1.30486965422349,5,101.506332983263 +1,2.28664539270111,1,124.397464632057 +0,-1.38886070111234,1,111.915924004279 +1,-0.278788766817371,4,112.624534452334 +0,-0.133321336393658,5,71.7157698236406 +0,0.635950398070074,2,71.6567310970277 +1,-0.284252921416072,1,88.8945028651506 +0,-2.65645542090478,5,144.245569198392 +0,-2.44046692857552,4,146.260801376775 +1,1.32011334573019,4,123.985527921468 diff --git a/tests/testthat/testlogregbin.R b/tests/testthat/testlogregbin.R index 137a414c..6af9d7af 100644 --- a/tests/testthat/testlogregbin.R +++ b/tests/testthat/testlogregbin.R @@ -344,6 +344,9 @@ testthat::test_that("Analysis works with global weights", { factors="factor", blocks=list(list("cov", "factor")), refLevels=refLevels, + ci=TRUE, + OR=TRUE, + ciOR=TRUE ) # Test model fit table @@ -359,6 +362,11 @@ testthat::test_that("Analysis works with global weights", { testthat::expect_equal(c(0.14, 0.085, NA, 0.201, 0.21), coefTable[['se']], tolerance = 1e-3) testthat::expect_equal(c(-1.418, -2.949, NA, 2.276, 0.128), coefTable[['z']], tolerance = 1e-3) testthat::expect_equal(c(0.156, 0.003, NA, 0.023, 0.899), coefTable[['p']], tolerance = 1e-3) + testthat::expect_equal(c(-0.474, -0.417, NA, 0.064, -0.385), coefTable[['lower']], tolerance = 1e-3) + testthat::expect_equal(c(0.076, -0.084, NA, 0.851, 0.438), coefTable[['upper']], tolerance = 1e-3) + testthat::expect_equal(c(0.820, 0.778, NA, 1.58, 1.027), coefTable[['odds']], tolerance = 1e-3) + testthat::expect_equal(c(0.622, 0.659, NA, 1.066, 0.681), coefTable[['oddsLower']], tolerance = 1e-3) + testthat::expect_equal(c(1.079, 0.919, NA, 2.343, 1.550), coefTable[['oddsUpper']], tolerance = 1e-3) }) testthat::test_that("Analysis adds note when design matrix is singular", { @@ -438,3 +446,217 @@ testthat::test_that('Reference level defaults to first level for faulty referenc } }) +testthat::test_that("Analysis works with non-integer weights", { + suppressWarnings(RNGversion("3.5.0")) + set.seed(1337) + + N <- 100 + cov1 <- rnorm(N) + cov2 <- rnorm(N) + z <- 1 + 2 * cov1 + 3 * cov2 + pr <- 1 / ( 1 + exp(-z)) + dep <- factor(rbinom(N, 1, pr)) + df <- data.frame(dep=rep(dep, 2), cov1=rep(cov1, 2), cov2=rep(cov2, 2)) + + weights <- rep(0.5, N * 2) + attr(df, "jmv-weights") <- weights + + r <- jmv::logRegBin( + data = df, + bic = TRUE, + dep = "dep", + covs = c("cov1", "cov2"), + blocks = list(list("cov1", "cov2")), + modelTest = TRUE, + pseudoR2 = c("r2mf", "r2cs", "r2n", "r2t") + ) + + # Test model fit table + modelFitTable <- r$modelFit$asDF + testthat::expect_equal(1, modelFitTable[['model']]) + testthat::expect_equal(39.039, modelFitTable[['dev']], tolerance = 1e-3) + testthat::expect_equal(45.039, modelFitTable[['aic']], tolerance = 1e-3) + testthat::expect_equal(52.854, modelFitTable[['bic']], tolerance = 1e-3) + testthat::expect_equal(0.701, modelFitTable[['r2mf']], tolerance = 1e-3) + testthat::expect_equal(0.600, modelFitTable[['r2cs']], tolerance = 1e-3) + testthat::expect_equal(0.823, modelFitTable[['r2n']], tolerance = 1e-3) + testthat::expect_equal(0.733, modelFitTable[['r2t']], tolerance = 1e-3) + testthat::expect_equal(91.645, modelFitTable[['chi']], tolerance = 1e-3) + testthat::expect_equal(2, modelFitTable[['df']]) + testthat::expect_equal(0, modelFitTable[['p']], tolerance = 1e-3) + + # Test model coefficients table + coefTable <- r$models[[1]]$coef$asDF + testthat::expect_equal(c('Intercept', 'cov1', 'cov2'), coefTable[['term']]) + testthat::expect_equal(c(0.926, 3.042, 4.929), coefTable[['est']], tolerance = 1e-3) + testthat::expect_equal(c(0.461, 0.815, 1.166), coefTable[['se']], tolerance = 1e-3) + testthat::expect_equal(c(2.008, 3.734, 4.226), coefTable[['z']], tolerance = 1e-3) + testthat::expect_equal(c(0.045, 0, 0), coefTable[['p']], tolerance = 1e-3) +}) + +testthat::test_that("Analysis produces correct Class Table and AUC with weights", { + suppressWarnings(RNGversion("3.5.0")) + set.seed(1337) + + # Weighted dataset: high weight on a "hard" error case to ensure visible difference if ignored + df_weighted <- data.frame( + y = c(1, 0, 0), + x = c(0.8, 0.9, 0.2), + w = c(1, 10, 1) # Total N = 12 + ) + attr(df_weighted, "jmv-weights") <- df_weighted$w + + # Expanded dataset (unweighted equivalent) + df_expanded <- df_weighted[rep(1:3, times=df_weighted$w), ] + attr(df_expanded, "jmv-weights") <- NULL + + # Run Weighted Model + r_weighted <- jmv::logRegBin( + data = df_weighted, + dep = "y", + covs = "x", + blocks = list(list("x")), + class = TRUE, + auc = TRUE, + acc = TRUE + ) + + # Run Expanded Model + r_expanded <- jmv::logRegBin( + data = df_expanded, + dep = "y", + covs = "x", + blocks = list(list("x")), + class = TRUE, + auc = TRUE, + acc = TRUE + ) + + # Check AUC + auc_w <- r_weighted$models[[1]]$pred$measures$asDF[['auc']] + auc_e <- r_expanded$models[[1]]$pred$measures$asDF[['auc']] + testthat::expect_equal(auc_w, auc_e, tolerance = 1e-5) + + # Check Accuracy (derived from Class Table) + acc_w <- r_weighted$models[[1]]$pred$measures$asDF[['accuracy']] + acc_e <- r_expanded$models[[1]]$pred$measures$asDF[['accuracy']] + testthat::expect_equal(acc_w, acc_e, tolerance = 1e-5) + + # Check Class Table Counts directly + ct_w <- r_weighted$models[[1]]$pred$class$asDF + ct_e <- r_expanded$models[[1]]$pred$class$asDF + + # 0 = Neg, 1 = Pos. + # Columns: neg[0] (Obs 0, Pred 0), pos[0] (Obs 0, Pred 1), neg[1] (Obs 1, Pred 0), pos[1] (Obs 1, Pred 1) + # We compare the counts. + testthat::expect_equal(ct_w[['neg[0]']], ct_e[['neg[0]']]) + testthat::expect_equal(ct_w[['pos[0]']], ct_e[['pos[0]']]) + testthat::expect_equal(ct_w[['neg[1]']], ct_e[['neg[1]']]) + testthat::expect_equal(ct_w[['pos[1]']], ct_e[['pos[1]']]) +}) + +testthat::test_that("SPSS Parity: Frequency weights produce correct BIC and Deviance", { + # GIVEN a dataset with integer frequency weights + csv_path <- testthat::test_path("data", "logRegBin_weightsData.csv") + df <- read.csv(csv_path) + attr(df, "jmv-weights") <- df$w_freq + + # WHEN the analysis is run with BIC and Model Test enabled + r <- jmv::logRegBin( + data = df, + dep = "y", + covs = "x", + blocks = list(list("x")), + modelTest = TRUE, + bic = TRUE + ) + + # THEN the Model Fit statistics should match SPSS exactly (N treated as sum of weights = 61) + modelFit <- r$modelFit$asDF + # Deviance: 74.076 (Matches SPSS -2 Log Likelihood) + testthat::expect_equal(74.076, modelFit[['dev']], tolerance = 1e-3) + # AIC: 78.076 (74.076 + 2*2) + testthat::expect_equal(78.076, modelFit[['aic']], tolerance = 1e-3) + # BIC: 82.298 (74.076 + 2 * ln(61)) + # This confirms n=61 (sum of weights) was used for the penalty + testthat::expect_equal(82.298, modelFit[['bic']], tolerance = 1e-3) + # Chi-square: 10.078 + testthat::expect_equal(10.078, modelFit[['chi']], tolerance = 1e-3) + # AND the coefficients should match the SPSS weighted estimates + coef <- r$models[[1]]$coef$asDF + testthat::expect_equal(0.052, coef[['est']][1], tolerance = 1e-3) # Intercept + testthat::expect_equal(0.669, coef[['est']][2], tolerance = 1e-3) # x +}) + +testthat::test_that("SPSS Parity: Large weights handle numerical stability and scale correctly", { + # GIVEN a dataset with large non-integer weights + csv_path <- testthat::test_path("data", "logRegBin_weightsData.csv") + df <- read.csv(csv_path) + attr(df, "jmv-weights") <- df$w_large + + # WHEN the analysis is run + r <- jmv::logRegBin( + data = df, + dep = "y", + covs = "x", + blocks = list(list("x")), + modelTest = TRUE, + bic = TRUE + ) + + # THEN the model statistics match SPSS's + modelFit <- r$modelFit$asDF + testthat::expect_equal(2568.675, modelFit[['dev']], tolerance = 1e-3) + testthat::expect_equal(2572.675, modelFit[['aic']], tolerance = 1e-3) + testthat::expect_equal(238.026, modelFit[['chi']], tolerance = 1e-3) + # AND the coefficients should be stable and match SPSS + coef <- r$models[[1]]$coef$asDF + testthat::expect_equal(-0.030, coef[['est']][1], tolerance = 1e-3) # Intercept + testthat::expect_equal(0.514, coef[['est']][2], tolerance = 1e-3) # x +}) + +testthat::test_that("Analysis handles perfect separation gracefully (weighted)", { + # GIVEN a dataset with Perfect Separation + # All x < 0 are y=0, All x > 0 are y=1 + suppressWarnings(RNGversion("3.5.0")) + set.seed(1337) + + x <- c(rnorm(20, mean = -5), rnorm(20, mean = 5)) + y <- ifelse(x > 0, 1, 0) + + # Add random weights to ensure the weighted code path is active + w <- runif(40, 1, 10) + + df <- data.frame(y=y, x=x) + attr(df, "jmv-weights") <- w + + # WHEN the analysis is run + # We wrap it in expect_error(..., NA) to assert that it DOES NOT throw an error (crash) + testthat::expect_error( + r <- jmv::logRegBin( + data = df, + dep = "y", + covs = "x", + blocks = list(list("x")), + refLevels = list(list(var="y", ref="0")), + ci = TRUE + ), + NA # NA implies "We expect NO error" + ) + + # THEN we expect the analysis to have completed and produced a table + coef <- r$models[[1]]$coef$asDF + + # 1. Estimates should exist (they will be huge, e.g., > 10, but finite) + testthat::expect_true(!is.null(coef[['est']])) + testthat::expect_true(all(is.finite(coef[['est']]))) + + # 2. Standard Errors should be massive (classic sign of separation) + # In perfect separation, SEs often explode to > 1000 + testthat::expect_true(all(coef[['se']] > 100)) + + # 3. Model Fit statistics should still be calculated + modelFit <- r$modelFit$asDF + testthat::expect_true(is.numeric(modelFit[['dev']])) + testthat::expect_true(is.numeric(modelFit[['aic']])) +}) \ No newline at end of file