From 42f8df25308cff35e746c6c406d7d36b366d8bd1 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 12:42:27 -0400 Subject: [PATCH 01/10] pkgdown and GHA updates --- .Rbuildignore | 1 + .github/workflows/R-CMD-check.yaml | 3 +++ .github/workflows/pkgdown.yaml | 2 ++ .github/workflows/test-coverage.yaml | 2 +- .gitignore | 1 + DESCRIPTION | 3 +-- _pkgdown.yml | 16 +++++----------- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 14a6d3c..e60b238 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^CODE_OF_CONDUCT\.md$ ^\.github$ ^codecov\.yml$ +^README\.Rmd$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4e06c17..ee65ccb 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -33,6 +33,9 @@ jobs: - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 087f0b0..ed7650c 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -19,6 +19,8 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2c5bb50..27d4528 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -31,7 +31,7 @@ jobs: covr::codecov( quiet = FALSE, clean = FALSE, - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) shell: Rscript {0} diff --git a/.gitignore b/.gitignore index 66b86ae..a385dea 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .Rproj.user .DS_Store inst/doc +docs diff --git a/DESCRIPTION b/DESCRIPTION index 358abcd..d861766 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,8 +12,7 @@ Description: A modeling package compiling applicability domain methods in et al (2005) for an overview of applicability domains. License: MIT + file LICENSE -URL: https://github.com/tidymodels/applicable, - https://applicable.tidymodels.org +URL: https://github.com/tidymodels/applicable, https://applicable.tidymodels.org BugReports: https://github.com/tidymodels/applicable/issues Depends: ggplot2, diff --git a/_pkgdown.yml b/_pkgdown.yml index 28b49fe..b7a660d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,21 +1,15 @@ -url: https://applicable.tidymodels.org - +url: https://applicable.tidymodels.org/ template: package: tidytemplate bootstrap: 5 bslib: - primary: "#CA225E" - + primary: '#CA225E' includes: - in_header: | - - -# https://github.com/tidyverse/tidytemplate for css - + in_header: | + development: mode: auto - - figures: fig.width: 8 fig.height: 5.75 + From 01f03f7c5e9495d3e24fa70c8e0f89b2d919ae5f Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 12:43:15 -0400 Subject: [PATCH 02/10] usethis::use_tidy_style() --- R/print.R | 2 +- README.Rmd | 1 - tests/testthat/test-hat_values-fit.R | 12 ++++++++---- tests/testthat/test-hat_values-score.R | 12 ++++++++---- tests/testthat/test-misc.R | 3 ++- tests/testthat/test-pca-fit.R | 18 ++++++++++++------ tests/testthat/test-pca-score.R | 18 ++++++++++++------ tests/testthat/test-similarity.R | 22 ++++++++++++++-------- vignettes/continuous-data.Rmd | 26 ++++++++++++-------------- 9 files changed, 69 insertions(+), 45 deletions(-) diff --git a/R/print.R b/R/print.R index 259eaf4..2cd14d8 100644 --- a/R/print.R +++ b/R/print.R @@ -74,7 +74,7 @@ print.apd_hat_values <- function(x, ...) { #' tr_x <- matrix( #' sample(0:1, size = 20 * 50, prob = rep(.5, 2), replace = TRUE), #' ncol = 20 -#' ) +#' ) #' model <- apd_similarity(tr_x) #' print(model) #' @export diff --git a/README.Rmd b/README.Rmd index 709b57a..2766fe6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,6 @@ knitr::opts_chunk$set( ) options(rlang__backtrace_on_error = "reminder") - ``` # applicable diff --git a/tests/testthat/test-hat_values-fit.R b/tests/testthat/test-hat_values-fit.R index 5837809..bd8cfc0 100644 --- a/tests/testthat/test-hat_values-fit.R +++ b/tests/testthat/test-hat_values-fit.R @@ -12,13 +12,15 @@ test_that("`new_apd_hat_values` arguments are assigned correctly", { }) test_that("XtX_inv is provided", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, new_apd_hat_values(blueprint = hardhat::default_xy_blueprint()) ) }) test_that("`new_apd_hat_values` fails when blueprint is numeric", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, new_apd_hat_values(XtX_inv = 1, blueprint = 1) ) }) @@ -108,7 +110,8 @@ test_that("`apd_hat_values` fails when matrix has more predictors than samples", bad_data <- mtcars %>% slice(1:5) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_hat_values(bad_data) ) }) @@ -120,7 +123,8 @@ test_that("`apd_hat_values` fails when the matrix X^tX is singular", { ) colnames(bad_data) <- c("A", "B") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_hat_values(bad_data) ) }) diff --git a/tests/testthat/test-hat_values-score.R b/tests/testthat/test-hat_values-score.R index 84c0ed2..dacfd89 100644 --- a/tests/testthat/test-hat_values-score.R +++ b/tests/testthat/test-hat_values-score.R @@ -1,12 +1,14 @@ test_that("`score_apd_hat_values_numeric` fails when model has no pcs argument", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score_apd_hat_values_numeric(mtcars, mtcars) ) }) test_that("`score` fails when predictors only contain factors", { model <- apd_hat_values(~., iris) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score(model, iris$Species) ) }) @@ -14,7 +16,8 @@ test_that("`score` fails when predictors only contain factors", { test_that("`score` fails when predictors are vectors", { object <- iris - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score(object) ) }) @@ -30,7 +33,8 @@ test_that("`score` calculated hat_values are correct", { actual_output <- actual_output$hat_values # Data frame method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, actual_output, expected ) diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 4aef57f..d27b823 100644 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -1,6 +1,7 @@ test_that("`names0` fails if `num` is less than 1", { num <- 0 - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, names0(num) ) }) diff --git a/tests/testthat/test-pca-fit.R b/tests/testthat/test-pca-fit.R index ace00bf..8901c7a 100644 --- a/tests/testthat/test-pca-fit.R +++ b/tests/testthat/test-pca-fit.R @@ -18,13 +18,15 @@ test_that("`new_apd_pca` arguments are assigned correctly", { }) test_that("pcs is provided", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, new_apd_pca(blueprint = hardhat::default_xy_blueprint()) ) }) test_that("`new_apd_pca` fails when blueprint is numeric", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, new_apd_pca(pcs = 1, blueprint = 1) ) }) @@ -58,7 +60,8 @@ test_that("pcs matches `prcomp` output for the data frame method", { expected$x <- NULL # Data frame method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, apd_pca(mtcars)$pcs, expected ) @@ -69,7 +72,8 @@ test_that("pcs matches `prcomp` output for the formula method", { expected$x <- NULL # Formula method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, apd_pca(~., mtcars)$pcs, expected ) @@ -81,7 +85,8 @@ test_that("pcs matches `prcomp` output for the recipe method", { # Recipe method rec <- recipes::recipe(~., mtcars) - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, apd_pca(rec, data = mtcars)$pcs, expected ) @@ -92,7 +97,8 @@ test_that("pcs matches `prcomp` output for the matrix method", { expected$x <- NULL # Matrix method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, apd_pca(as.matrix(mtcars))$pcs, expected ) diff --git a/tests/testthat/test-pca-score.R b/tests/testthat/test-pca-score.R index 3a20844..8a12a00 100644 --- a/tests/testthat/test-pca-score.R +++ b/tests/testthat/test-pca-score.R @@ -1,12 +1,14 @@ test_that("`score_apd_pca_numeric` fails when model has no pcs argument", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score_apd_pca_numeric(mtcars, mtcars) ) }) test_that("`score` fails when predictors only contain factors", { model <- apd_pca(~., iris) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score(model, iris$Species) ) }) @@ -14,7 +16,8 @@ test_that("`score` fails when predictors only contain factors", { test_that("`score` fails when predictors are vectors", { object <- iris - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, score(object) ) }) @@ -31,7 +34,8 @@ test_that("`score_apd_pca_numeric` pcs output matches `stats::predict` output", dplyr::select(dplyr::matches("^PC\\d+$")) # Data frame method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, actual_output, expected ) @@ -49,7 +53,8 @@ test_that("`score` pcs output matches `stats::predict` output", { dplyr::select(dplyr::matches("^PC\\d+$")) # Data frame method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, actual_output, expected ) @@ -67,7 +72,8 @@ test_that("`score_apd_pca_bridge` output is correct", { dplyr::select(dplyr::matches("^PC\\d+$")) # Data frame method - expect_equal(ignore_attr = TRUE, + expect_equal( + ignore_attr = TRUE, actual_output, expected ) diff --git a/tests/testthat/test-similarity.R b/tests/testthat/test-similarity.R index 08b0788..d6eb8e0 100644 --- a/tests/testthat/test-similarity.R +++ b/tests/testthat/test-similarity.R @@ -114,10 +114,12 @@ test_that("matrix method - quantile similarity", { # ------------------------------------------------------------------------------ test_that("bad args", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(tr_x, quantile = 2) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(tr_x_sp) ) }) @@ -143,16 +145,18 @@ test_that("plot output", { # ------------------------------------------------------------------------------ test_that("apd_similarity fails when quantile is neither NA nor a number in [0, 1]", { - - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(tr_x, quantile = -1) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(tr_x, quantile = 3) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(tr_x, quantile = "la") ) }) @@ -183,7 +187,8 @@ test_that("apd_similarity fails when all the variables have zero variance", { ) bad_data <- as.data.frame(bad_data) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(bad_data) ) }) @@ -199,7 +204,8 @@ test_that("apd_similarity fails data is not binary", { ) bad_data <- as.data.frame(bad_data) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, apd_similarity(bad_data) ) }) diff --git a/vignettes/continuous-data.Rmd b/vignettes/continuous-data.Rmd index 4fc5f6e..89de9bd 100644 --- a/vignettes/continuous-data.Rmd +++ b/vignettes/continuous-data.Rmd @@ -71,10 +71,10 @@ library(dplyr) ames_cols <- intersect(names(ames), names(ames_new)) -training_data <- - ames %>% +training_data <- + ames %>% # For consistency, only analyze the data on new properties - dplyr::select(one_of(ames_cols)) %>% + dplyr::select(one_of(ames_cols)) %>% mutate( # There is a new neighborhood in ames_new Neighborhood = as.character(Neighborhood), @@ -83,10 +83,10 @@ training_data <- training_recipe <- - recipe( ~ ., data = training_data) %>% - step_dummy(all_nominal()) %>% + recipe(~., data = training_data) %>% + step_dummy(all_nominal()) %>% # Remove variables that have the same value for every data point. - step_zv(all_predictors()) %>% + step_zv(all_predictors()) %>% # Transform variables to be distributed as Gaussian-like as possible. step_YeoJohnson(all_numeric()) %>% # Normalize numeric data to have a mean of zero and @@ -163,8 +163,8 @@ set in the first component: ```{r, echo = FALSE} #| fig-alt: "Histogram chart. PC001 along the x-axis, count along the y-axis. A vertical red line is placed inside the distribution." training_scores <- score(ames_pca, training_data) -ggplot(training_scores, aes(x = PC001)) + - geom_histogram(col = "white", binwidth = .5) + +ggplot(training_scores, aes(x = PC001)) + + geom_histogram(col = "white", binwidth = .5) + geom_vline(xintercept = pca_score$PC001, col = "red") ``` @@ -183,8 +183,8 @@ they were new homes. The also tend to have fairly large garages: ```{r, echo = FALSE} #| fig-alt: "Histogram chart. Garage_Area along the x-axis, count along the y-axis. Two red vertical lines are places. One on the edge of the distribution, another outside the distribution." -ggplot(training_data, aes(x = Garage_Area)) + - geom_histogram(col = "white", binwidth = 50) + +ggplot(training_data, aes(x = Garage_Area)) + + geom_histogram(col = "white", binwidth = 50) + geom_vline(xintercept = ames_new$Garage_Area, col = "red") ``` @@ -221,8 +221,8 @@ Two caveats for using the hat values: Let us apply `apd_hat_values` modeling function to our data (while ensuring that there are no linear dependencies): ```{r} -non_singular_recipe <- - training_recipe %>% +non_singular_recipe <- + training_recipe %>% step_lincomb(all_predictors()) # Recipe interface @@ -231,7 +231,5 @@ ames_hat <- apd_hat_values(non_singular_recipe, training_data) ```{r reset_options} - options(prev_options) - ``` From c978eef45b8d76d3c246d354f6b65b011fb767cc Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 12:43:31 -0400 Subject: [PATCH 03/10] usethis::use_tidy_description() --- DESCRIPTION | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d861766..15d1034 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,7 @@ Version: 0.0.1.1 Authors@R: c( person("Marly", "Gotti", , "marlygotti@gmail.com", role = c("aut", "cre")), person("Max", "Kuhn", , "max@posit.co", role = "aut"), - person(given = "Posit Software, PBC", role = c("cph", "fnd")) + person("Posit Software, PBC", role = c("cph", "fnd")) ) Description: A modeling package compiling applicability domain methods in R. It combines different methods to measure the amount of @@ -12,7 +12,8 @@ Description: A modeling package compiling applicability domain methods in et al (2005) for an overview of applicability domains. License: MIT + file LICENSE -URL: https://github.com/tidymodels/applicable, https://applicable.tidymodels.org +URL: https://github.com/tidymodels/applicable, + https://applicable.tidymodels.org BugReports: https://github.com/tidymodels/applicable/issues Depends: ggplot2, @@ -41,10 +42,10 @@ Suggests: xml2 VignetteBuilder: knitr +Config/Needs/website: tidyverse/tidytemplate +Config/testthat/edition: 3 Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 -Config/Needs/website: tidyverse/tidytemplate -Config/testthat/edition: 3 From 037af281bbf337c1b831c4b8acb6cd2ffeb6207c Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 12:49:25 -0400 Subject: [PATCH 04/10] regenerate snapshots with newer rlang --- DESCRIPTION | 2 +- man/print.apd_similarity.Rd | 2 +- tests/testthat/_snaps/hat_values-fit.md | 2 +- tests/testthat/_snaps/pca-fit.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 15d1034..e527251 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,7 @@ Imports: Matrix, proxyC, purrr, - rlang, + rlang (>= 1.1.1), stats, tibble, tidyr, diff --git a/man/print.apd_similarity.Rd b/man/print.apd_similarity.Rd index 5b95bd4..31f782b 100644 --- a/man/print.apd_similarity.Rd +++ b/man/print.apd_similarity.Rd @@ -23,7 +23,7 @@ set.seed(535) tr_x <- matrix( sample(0:1, size = 20 * 50, prob = rep(.5, 2), replace = TRUE), ncol = 20 - ) +) model <- apd_similarity(tr_x) print(model) } diff --git a/tests/testthat/_snaps/hat_values-fit.md b/tests/testthat/_snaps/hat_values-fit.md index 779501b..6e323bc 100644 --- a/tests/testthat/_snaps/hat_values-fit.md +++ b/tests/testthat/_snaps/hat_values-fit.md @@ -10,7 +10,7 @@ Code new_apd_hat_values(XtX_inv = 1, blueprint = 1) Error - blueprint should be a blueprint, not a numeric. + `blueprint` must be a , not the number 1. # `apd_hat_values` fails when matrix has more predictors than samples diff --git a/tests/testthat/_snaps/pca-fit.md b/tests/testthat/_snaps/pca-fit.md index d66228e..bd725be 100644 --- a/tests/testthat/_snaps/pca-fit.md +++ b/tests/testthat/_snaps/pca-fit.md @@ -10,5 +10,5 @@ Code new_apd_pca(pcs = 1, blueprint = 1) Error - blueprint should be a blueprint, not a numeric. + `blueprint` must be a , not the number 1. From bfcdb6060069d26fe408df19a5f441d20a359ff7 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 12:53:21 -0400 Subject: [PATCH 05/10] rstudio -> posit, fix urls --- R/data.R | 4 ++-- README.Rmd | 2 +- README.md | 2 +- inst/WORDLIST | 23 +++++++++++++---------- man/ames_new.Rd | 2 +- man/okc_binary.Rd | 2 +- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/R/data.R b/R/data.R index 0c5bcee..a530a3c 100644 --- a/R/data.R +++ b/R/data.R @@ -30,7 +30,7 @@ NULL #' @return \item{okc_binary_train,okc_binary_test}{data frame frames with 61 columns} #' #' @source -#' Kim (2015), "OkCupid Data for Introductory Statistics and Data Science Courses", _Journal of Statistics Education_, Volume 23, Number 2. \url{https://www.tandfonline.com/doi/abs/10.1080/10691898.2015.11889737} +#' Kim (2015), "OkCupid Data for Introductory Statistics and Data Science Courses", _Journal of Statistics Education_, Volume 23, Number 2. \doi{10.1080/10691898.2015.11889737} #' #' Kuhn and Johnson (2020), _Feature Engineering and Selection_, Chapman and Hall/CRC . \url{https://bookdown.org/max/FES/} and \url{https://github.com/topepo/FES} #' @@ -60,7 +60,7 @@ NULL #' Data as an End of Semester Regression Project," \emph{Journal of Statistics #' Education}, Volume 19, Number 3. #' -#' \url{https://www.cityofames.org/government/departments-divisions-a-h/city-assessor} +#' `https://www.cityofames.org/government/departments-divisions-a-h/city-assessor` #' #' \url{http://jse.amstat.org/v19n3/decock/DataDocumentation.txt} #' diff --git a/README.Rmd b/README.Rmd index 2766fe6..5650fb7 100644 --- a/README.Rmd +++ b/README.Rmd @@ -57,7 +57,7 @@ To learn about how to use applicable, check out the vignettes: This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. -- For questions and discussions about tidymodels packages, modeling, and machine learning, please [post on RStudio Community](https://community.rstudio.com/new-topic?category_id=15&tags=tidymodels,question). +- For questions and discussions about tidymodels packages, modeling, and machine learning, please [post on Posit Community](https://community.rstudio.com/new-topic?category_id=15&tags=tidymodels,question). - If you think you have encountered a bug, please [submit an issue](https://github.com/tidymodels/applicable/issues). diff --git a/README.md b/README.md index 5fbc494..7769fe1 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. - For questions and discussions about tidymodels packages, modeling, and - machine learning, please [post on RStudio + machine learning, please [post on Posit Community](https://community.rstudio.com/new-topic?category_id=15&tags=tidymodels,question). - If you think you have encountered a bug, please [submit an diff --git a/inst/WORDLIST b/inst/WORDLIST index fa3b318..a0cc3cd 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,24 +1,27 @@ -al Ames -Chemoinformatics +CMD CRC +Chemoinformatics +Codecov De -doi -et -extensibility Gillet Jaccard Lifecycle Netzeva OkCupid -pca -pcas -pre +PBC QSAR Springer -tibble X'X +al +doi +et +extensibility +funder intercal -CMD +pca +pcas +pre reprex +tibble tidymodels diff --git a/man/ames_new.Rd b/man/ames_new.Rd index bf239a1..0124f25 100644 --- a/man/ames_new.Rd +++ b/man/ames_new.Rd @@ -9,7 +9,7 @@ De Cock, D. (2011). "Ames, Iowa: Alternative to the Boston Housing Data as an End of Semester Regression Project," \emph{Journal of Statistics Education}, Volume 19, Number 3. -\url{https://www.cityofames.org/government/departments-divisions-a-h/city-assessor} +\verb{https://www.cityofames.org/government/departments-divisions-a-h/city-assessor} \url{http://jse.amstat.org/v19n3/decock/DataDocumentation.txt} diff --git a/man/okc_binary.Rd b/man/okc_binary.Rd index dce9dce..35ad4c7 100644 --- a/man/okc_binary.Rd +++ b/man/okc_binary.Rd @@ -7,7 +7,7 @@ \alias{okc_binary_test} \title{OkCupid Binary Predictors} \source{ -Kim (2015), "OkCupid Data for Introductory Statistics and Data Science Courses", \emph{Journal of Statistics Education}, Volume 23, Number 2. \url{https://www.tandfonline.com/doi/abs/10.1080/10691898.2015.11889737} +Kim (2015), "OkCupid Data for Introductory Statistics and Data Science Courses", \emph{Journal of Statistics Education}, Volume 23, Number 2. \doi{10.1080/10691898.2015.11889737} Kuhn and Johnson (2020), \emph{Feature Engineering and Selection}, Chapman and Hall/CRC . \url{https://bookdown.org/max/FES/} and \url{https://github.com/topepo/FES} } From 3f139b8339b1e44b8469eda4224dbf179a89c1ca Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 13:08:43 -0400 Subject: [PATCH 06/10] re-regenerate snapshots --- tests/testthat/_snaps/hat_values-fit.md | 20 +++++++----- tests/testthat/_snaps/hat_values-score.md | 15 +++++---- tests/testthat/_snaps/misc.md | 5 +-- tests/testthat/_snaps/pca-fit.md | 10 +++--- tests/testthat/_snaps/pca-score.md | 15 +++++---- tests/testthat/_snaps/similarity.md | 38 ++++++++++++++--------- 6 files changed, 62 insertions(+), 41 deletions(-) diff --git a/tests/testthat/_snaps/hat_values-fit.md b/tests/testthat/_snaps/hat_values-fit.md index 6e323bc..5cd8d25 100644 --- a/tests/testthat/_snaps/hat_values-fit.md +++ b/tests/testthat/_snaps/hat_values-fit.md @@ -2,29 +2,33 @@ Code new_apd_hat_values(blueprint = hardhat::default_xy_blueprint()) - Error - argument "XtX_inv" is missing, with no default + Condition + Error in `new_apd_hat_values()`: + ! argument "XtX_inv" is missing, with no default # `new_apd_hat_values` fails when blueprint is numeric Code new_apd_hat_values(XtX_inv = 1, blueprint = 1) - Error - `blueprint` must be a , not the number 1. + Condition + Error in `hardhat::new_model()`: + ! `blueprint` must be a , not the number 1. # `apd_hat_values` fails when matrix has more predictors than samples Code apd_hat_values(bad_data) - Error - The number of columns must be less than the number of rows. + Condition + Error in `apd_hat_values_bridge()`: + ! The number of columns must be less than the number of rows. # `apd_hat_values` fails when the matrix X^tX is singular Code apd_hat_values(bad_data) - Error - Unable to compute the hat values of the matrix X of + Condition + Error in `get_inv()`: + ! Unable to compute the hat values of the matrix X of predictors because the matrix resulting from multiplying the transpose of X by X is singular. diff --git a/tests/testthat/_snaps/hat_values-score.md b/tests/testthat/_snaps/hat_values-score.md index b3cbc4d..1c8e939 100644 --- a/tests/testthat/_snaps/hat_values-score.md +++ b/tests/testthat/_snaps/hat_values-score.md @@ -2,22 +2,25 @@ Code score_apd_hat_values_numeric(mtcars, mtcars) - Error - The model must contain an XtX_inv argument. + Condition + Error in `score_apd_hat_values_numeric()`: + ! The model must contain an XtX_inv argument. # `score` fails when predictors only contain factors Code score(model, iris$Species) - Error - The class of `new_data`, 'factor', is not recognized. + Condition + Error in `hardhat::forge()`: + ! The class of `new_data`, 'factor', is not recognized. # `score` fails when predictors are vectors Code score(object) - Error - `object` is not of a recognized type. + Condition + Error in `score()`: + ! `object` is not of a recognized type. Only data.frame, matrix, recipe, and formula objects are allowed. A data.frame was specified. diff --git a/tests/testthat/_snaps/misc.md b/tests/testthat/_snaps/misc.md index 68860d0..247bf44 100644 --- a/tests/testthat/_snaps/misc.md +++ b/tests/testthat/_snaps/misc.md @@ -2,6 +2,7 @@ Code names0(num) - Error - `num` should be > 0 + Condition + Error in `names0()`: + ! `num` should be > 0 diff --git a/tests/testthat/_snaps/pca-fit.md b/tests/testthat/_snaps/pca-fit.md index bd725be..2ef90b2 100644 --- a/tests/testthat/_snaps/pca-fit.md +++ b/tests/testthat/_snaps/pca-fit.md @@ -2,13 +2,15 @@ Code new_apd_pca(blueprint = hardhat::default_xy_blueprint()) - Error - argument "pcs" is missing, with no default + Condition + Error in `new_apd_pca()`: + ! argument "pcs" is missing, with no default # `new_apd_pca` fails when blueprint is numeric Code new_apd_pca(pcs = 1, blueprint = 1) - Error - `blueprint` must be a , not the number 1. + Condition + Error in `hardhat::new_model()`: + ! `blueprint` must be a , not the number 1. diff --git a/tests/testthat/_snaps/pca-score.md b/tests/testthat/_snaps/pca-score.md index 30ac29f..75b58b7 100644 --- a/tests/testthat/_snaps/pca-score.md +++ b/tests/testthat/_snaps/pca-score.md @@ -2,22 +2,25 @@ Code score_apd_pca_numeric(mtcars, mtcars) - Error - The model must contain a pcs argument. + Condition + Error in `score_apd_pca_numeric()`: + ! The model must contain a pcs argument. # `score` fails when predictors only contain factors Code score(model, iris$Species) - Error - The class of `new_data`, 'factor', is not recognized. + Condition + Error in `hardhat::forge()`: + ! The class of `new_data`, 'factor', is not recognized. # `score` fails when predictors are vectors Code score(object) - Error - `object` is not of a recognized type. + Condition + Error in `score()`: + ! `object` is not of a recognized type. Only data.frame, matrix, recipe, and formula objects are allowed. A data.frame was specified. diff --git a/tests/testthat/_snaps/similarity.md b/tests/testthat/_snaps/similarity.md index abb0705..afde47b 100644 --- a/tests/testthat/_snaps/similarity.md +++ b/tests/testthat/_snaps/similarity.md @@ -2,15 +2,17 @@ Code apd_similarity(tr_x, quantile = 2) - Error - The `quantile` argument should be NA or a single numeric value in [0, 1]. + Condition + Error in `apd_similarity_bridge()`: + ! The `quantile` argument should be NA or a single numeric value in [0, 1]. --- Code apd_similarity(tr_x_sp) - Error - `x` is not of a recognized type. + Condition + Error in `apd_similarity()`: + ! `x` is not of a recognized type. Only data.frame, matrix, recipe, and formula objects are allowed. A dgCMatrix was specified. @@ -54,28 +56,32 @@ Code apd_similarity(tr_x, quantile = -1) - Error - The `quantile` argument should be NA or a single numeric value in [0, 1]. + Condition + Error in `apd_similarity_bridge()`: + ! The `quantile` argument should be NA or a single numeric value in [0, 1]. --- Code apd_similarity(tr_x, quantile = 3) - Error - The `quantile` argument should be NA or a single numeric value in [0, 1]. + Condition + Error in `apd_similarity_bridge()`: + ! The `quantile` argument should be NA or a single numeric value in [0, 1]. --- Code apd_similarity(tr_x, quantile = "la") - Error - The `quantile` argument should be NA or a single numeric value in [0, 1]. + Condition + Error in `apd_similarity_bridge()`: + ! The `quantile` argument should be NA or a single numeric value in [0, 1]. # apd_similarity outputs warning with zero variance variables Code apd_similarity(bad_data) - Warning + Condition + Warning: The following variables had zero variance and were removed: a, b, and d Output Applicability domain via similarity @@ -86,13 +92,15 @@ Code apd_similarity(bad_data) - Error - All variables have a single unique value. + Condition + Error in `apd_similarity_bridge()`: + ! All variables have a single unique value. # apd_similarity fails data is not binary Code apd_similarity(bad_data) - Error - The following variables are not binary: b, and d + Condition + Error in `apd_similarity_bridge()`: + ! The following variables are not binary: b, and d From 0c587a9604665fc63dfaf5c3e5159aa467855cf7 Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 13:38:45 -0400 Subject: [PATCH 07/10] move imports to package docs --- R/0.R | 47 ---------------------------------------- R/applicable-package.R | 49 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 49 deletions(-) delete mode 100644 R/0.R diff --git a/R/0.R b/R/0.R deleted file mode 100644 index 2c8adba..0000000 --- a/R/0.R +++ /dev/null @@ -1,47 +0,0 @@ -#' @importFrom dplyr %>% -#' @importFrom dplyr select -#' @importFrom dplyr slice -#' @importFrom dplyr matches -#' @importFrom dplyr starts_with -#' @importFrom dplyr rename_all -#' @importFrom dplyr mutate -#' @importFrom dplyr mutate_all -#' @importFrom dplyr group_by -#' @importFrom dplyr ungroup -#' @importFrom dplyr count -#' @importFrom dplyr sample_n -#' @importFrom glue glue -#' @importFrom tibble as_tibble -#' @importFrom tibble tibble -#' @importFrom purrr map_dfc -#' @importFrom purrr map2_dfc -#' @importFrom rlang abort -#' @importFrom rlang enquos -#' @importFrom rlang arg_match -#' @importFrom stats predict -#' @importFrom stats prcomp -#' @importFrom stats approx -#' @importFrom stats quantile -#' @importFrom stats ecdf -#' @importFrom stats setNames -#' @importFrom hardhat validate_prediction_size -#' @importFrom hardhat forge -#' @importFrom hardhat mold -#' @importFrom hardhat new_model -#' @importFrom ggplot2 ggplot geom_step xlab ylab aes autoplot -#' @importFrom Matrix Matrix colSums -#' @importFrom tidyselect vars_select -#' @importFrom tidyr gather -#' @importFrom proxyC simil - -# ------------------------------------------------------------------------------ -# nocov - -# Reduce false positives when R CMD check runs its "no visible binding for -# global variable" check -#' @importFrom utils globalVariables -utils::globalVariables( - c("cumulative", "n", "sim", "percentile", "component", "value") -) - -# nocov end diff --git a/R/applicable-package.R b/R/applicable-package.R index b30bbda..f824053 100644 --- a/R/applicable-package.R +++ b/R/applicable-package.R @@ -1,8 +1,53 @@ #' @keywords internal "_PACKAGE" -# The following block is used by usethis to automatically manage -# roxygen namespace tags. Modify with care! ## usethis namespace: start +#' @importFrom dplyr %>% +#' @importFrom dplyr select +#' @importFrom dplyr slice +#' @importFrom dplyr matches +#' @importFrom dplyr starts_with +#' @importFrom dplyr rename_all +#' @importFrom dplyr mutate +#' @importFrom dplyr mutate_all +#' @importFrom dplyr group_by +#' @importFrom dplyr ungroup +#' @importFrom dplyr count +#' @importFrom dplyr sample_n +#' @importFrom glue glue +#' @importFrom tibble as_tibble +#' @importFrom tibble tibble +#' @importFrom purrr map_dfc +#' @importFrom purrr map2_dfc +#' @importFrom rlang abort +#' @importFrom rlang enquos +#' @importFrom rlang arg_match +#' @importFrom stats predict +#' @importFrom stats prcomp +#' @importFrom stats approx +#' @importFrom stats quantile +#' @importFrom stats ecdf +#' @importFrom stats setNames +#' @importFrom hardhat validate_prediction_size +#' @importFrom hardhat forge +#' @importFrom hardhat mold +#' @importFrom hardhat new_model +#' @importFrom ggplot2 ggplot geom_step xlab ylab aes autoplot +#' @importFrom Matrix Matrix colSums +#' @importFrom tidyselect vars_select +#' @importFrom tidyr gather +#' @importFrom proxyC simil + +# ------------------------------------------------------------------------------ +# nocov + +# Reduce false positives when R CMD check runs its "no visible binding for +# global variable" check +#' @importFrom utils globalVariables +utils::globalVariables( + c("cumulative", "n", "sim", "percentile", "component", "value") +) + +# nocov end ## usethis namespace: end NULL From a499e966f415c817ed80b19018e05c5cfd6ce56d Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 13:54:48 -0400 Subject: [PATCH 08/10] udpate testthat and other pkg version --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e527251..a5540c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ Depends: Imports: dplyr, glue, - hardhat (>= 0.1.2), + hardhat (>= 1.3.0), Matrix, proxyC, purrr, @@ -35,10 +35,10 @@ Suggests: covr, knitr, modeldata, - recipes (>= 0.1.7), + recipes (>= 1.0.8), rmarkdown, spelling, - testthat (>= 3.0.0), + testthat (>= 3.2.0), xml2 VignetteBuilder: knitr From a673e8586e88fa492ed0b8e6c3495050851871af Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 30 Oct 2023 19:57:41 -0400 Subject: [PATCH 09/10] update imports --- DESCRIPTION | 2 +- NAMESPACE | 4 +-- R/applicable-package.R | 55 ++++++++++++++++++++---------------------- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a5540c2..fb35c12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,7 @@ URL: https://github.com/tidymodels/applicable, BugReports: https://github.com/tidymodels/applicable/issues Depends: ggplot2, - R (>= 3.4) + R (>= 3.6) Imports: dplyr, glue, diff --git a/NAMESPACE b/NAMESPACE index 391f9ad..6cc910e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(autoplot.apd_pca) export(autoplot.apd_similarity) export(score) export(score.default) +import(rlang) importFrom(Matrix,Matrix) importFrom(Matrix,colSums) importFrom(dplyr,"%>%") @@ -59,9 +60,6 @@ importFrom(hardhat,validate_prediction_size) importFrom(proxyC,simil) importFrom(purrr,map2_dfc) importFrom(purrr,map_dfc) -importFrom(rlang,abort) -importFrom(rlang,arg_match) -importFrom(rlang,enquos) importFrom(stats,approx) importFrom(stats,ecdf) importFrom(stats,prcomp) diff --git a/R/applicable-package.R b/R/applicable-package.R index f824053..ff213e8 100644 --- a/R/applicable-package.R +++ b/R/applicable-package.R @@ -2,52 +2,49 @@ "_PACKAGE" ## usethis namespace: start + +#' @import rlang #' @importFrom dplyr %>% -#' @importFrom dplyr select -#' @importFrom dplyr slice +#' @importFrom dplyr count +#' @importFrom dplyr group_by #' @importFrom dplyr matches -#' @importFrom dplyr starts_with -#' @importFrom dplyr rename_all #' @importFrom dplyr mutate #' @importFrom dplyr mutate_all -#' @importFrom dplyr group_by -#' @importFrom dplyr ungroup -#' @importFrom dplyr count +#' @importFrom dplyr rename_all #' @importFrom dplyr sample_n +#' @importFrom dplyr select +#' @importFrom dplyr slice +#' @importFrom dplyr starts_with +#' @importFrom dplyr ungroup +#' @importFrom ggplot2 ggplot geom_step xlab ylab aes autoplot #' @importFrom glue glue -#' @importFrom tibble as_tibble -#' @importFrom tibble tibble +#' @importFrom hardhat forge +#' @importFrom hardhat mold +#' @importFrom hardhat new_model +#' @importFrom hardhat validate_prediction_size +#' @importFrom Matrix Matrix colSums +#' @importFrom proxyC simil #' @importFrom purrr map_dfc #' @importFrom purrr map2_dfc -#' @importFrom rlang abort -#' @importFrom rlang enquos -#' @importFrom rlang arg_match -#' @importFrom stats predict -#' @importFrom stats prcomp #' @importFrom stats approx -#' @importFrom stats quantile #' @importFrom stats ecdf +#' @importFrom stats prcomp +#' @importFrom stats predict +#' @importFrom stats quantile #' @importFrom stats setNames -#' @importFrom hardhat validate_prediction_size -#' @importFrom hardhat forge -#' @importFrom hardhat mold -#' @importFrom hardhat new_model -#' @importFrom ggplot2 ggplot geom_step xlab ylab aes autoplot -#' @importFrom Matrix Matrix colSums -#' @importFrom tidyselect vars_select +#' @importFrom tibble as_tibble +#' @importFrom tibble tibble #' @importFrom tidyr gather -#' @importFrom proxyC simil +#' @importFrom tidyselect vars_select +#' @importFrom utils globalVariables +## usethis namespace: end # ------------------------------------------------------------------------------ +# global variable" check # nocov - +# nocov end # Reduce false positives when R CMD check runs its "no visible binding for -# global variable" check -#' @importFrom utils globalVariables utils::globalVariables( c("cumulative", "n", "sim", "percentile", "component", "value") ) - -# nocov end -## usethis namespace: end NULL From f0b5cb084d6e9ef5f2d29b6e91915be2a475dc86 Mon Sep 17 00:00:00 2001 From: Marly Gotti Date: Tue, 23 Apr 2024 12:41:08 +0200 Subject: [PATCH 10/10] retrigger checks