diff --git a/NAMESPACE b/NAMESPACE index e964796..21319ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand export(crosswalk_data) +export(get_available_crosswalks) export(get_crosswalk) export(list_nhgis_crosswalks) diff --git a/R/crosswalk_data.R b/R/crosswalk_data.R index d820b48..beeb6cf 100644 --- a/R/crosswalk_data.R +++ b/R/crosswalk_data.R @@ -48,7 +48,11 @@ #' about join quality, including the number of data rows not matching the crosswalk #' and vice versa. For state-nested geographies (tract, county, block group, etc.), #' also reports state-level concentration of unmatched rows. Set to FALSE to -#' suppress these messages. +#' suppress these messages. Automatically suppressed when `silent = TRUE`. +#' @param silent Logical. If `TRUE`, suppresses all informational messages and +#' warnings, including join quality diagnostics regardless of `show_join_quality`. +#' Defaults to `getOption("crosswalk.silent", FALSE)`. Set +#' `options(crosswalk.silent = TRUE)` to silence all calls by default. #' #' @return If `return_intermediate = FALSE` (default), a tibble with data summarized #' to the final target geography. @@ -162,7 +166,14 @@ crosswalk_data <- function( count_columns = NULL, non_count_columns = NULL, return_intermediate = FALSE, - show_join_quality = TRUE) { + show_join_quality = TRUE, + silent = getOption("crosswalk.silent", FALSE)) { + + old_opts <- options(crosswalk.silent = silent) + on.exit(options(old_opts), add = TRUE) + + # When silent, suppress join quality regardless of show_join_quality + if (silent) show_join_quality <- FALSE # Determine if we need to fetch the crosswalk crosswalk_provided <- !is.null(crosswalk) @@ -175,14 +186,15 @@ crosswalk_data <- function( } if (crosswalk_provided && geography_provided) { - warning( + cw_warning( "Both 'crosswalk' and geography parameters provided. ", - "Using the provided 'crosswalk' and ignoring geography parameters.") + "Using the provided 'crosswalk' and ignoring geography parameters.", + call. = FALSE) } # Fetch crosswalk if not provided if (!crosswalk_provided) { - message("Fetching crosswalk from ", source_geography, " to ", target_geography, "...") + cw_message("Fetching crosswalk from ", source_geography, " to ", target_geography, "...") crosswalk <- get_crosswalk( source_geography = source_geography, target_geography = target_geography, @@ -240,7 +252,7 @@ crosswalk_data <- function( step_name <- names(crosswalk_list)[i] step_crosswalk <- crosswalk_list[[i]] - message(stringr::str_c("Applying crosswalk step ", i, " of ", n_steps, "...")) + cw_message(stringr::str_c("Applying crosswalk step ", i, " of ", n_steps, "...")) # Apply single crosswalk step current_data <- apply_single_crosswalk( @@ -460,7 +472,7 @@ format_join_quality_message <- function(join_quality, step_number, total_steps) format(join_quality$n_data_unmatched, big.mark = ","), " of ", format(join_quality$n_data_total, big.mark = ","), - " data rows (", + " unique data GEOIDs (", sprintf("%.1f%%", join_quality$pct_data_unmatched), ") did not match the crosswalk." ) @@ -614,7 +626,7 @@ report_join_quality <- function(data, crosswalk, geoid_column, step_number = 1, # Print messages if there are issues messages <- format_join_quality_message(join_quality, step_number, total_steps) if (length(messages) > 0) { - purrr::walk(messages, message) + purrr::walk(messages, cw_message) } return(join_quality) @@ -648,9 +660,10 @@ apply_single_crosswalk <- function( # Check if crosswalk is empty if (nrow(crosswalk) == 0) { - warning( + cw_warning( "Crosswalk is empty. If source geography is nested within target geography, ", - "consider aggregating your data directly instead.") + "consider aggregating your data directly instead.", + call. = FALSE) return(tibble::tibble()) } diff --git a/R/get_crosswalk.R b/R/get_crosswalk.R index c3c8b35..144d1af 100644 --- a/R/get_crosswalk.R +++ b/R/get_crosswalk.R @@ -59,6 +59,9 @@ #' @param cache Directory path. Where to download the crosswalk to. If NULL (default), #' crosswalk is returned but not saved to disk. Individual component crosswalks #' are cached separately when provided. +#' @param silent Logical. If `TRUE`, suppresses all informational messages and +#' warnings. Defaults to `getOption("crosswalk.silent", FALSE)`. Set +#' `options(crosswalk.silent = TRUE)` to silence all calls by default. #' #' @return A list with a consistent structure: #' \describe{ @@ -137,7 +140,11 @@ get_crosswalk <- function( source_year = NULL, target_year = NULL, cache = NULL, - weight = "population") { + weight = "population", + silent = getOption("crosswalk.silent", FALSE)) { + + old_opts <- options(crosswalk.silent = silent) + on.exit(options(old_opts), add = TRUE) # Check for nested geographies (no crosswalk needed) # Determine if years match (both NULL, or both non-NULL and equal) @@ -150,10 +157,10 @@ get_crosswalk <- function( (source_geography == "county" && target_geography == "core_based_statistical_area") if (is_nested && years_match) { - warning( + cw_warning( "The source geography is nested within the target geography and an empty result will be returned. No crosswalk is needed to translate data between nested geographies; -simply aggregate your data to the desired geography.") +simply aggregate your data to the desired geography.", call. = FALSE) # Return empty list structure for consistency return(list( @@ -418,6 +425,64 @@ and counties. The provided geography '", geography, "' is not supported.")} return(result) } +#' List All Available Crosswalk Combinations +#' +#' Returns a tibble of all source/target geography and year combinations +#' supported by `get_crosswalk()`. +#' +#' @return A tibble with columns: `source_geography`, `target_geography`, +#' `source_year`, `target_year`. +#' @export +get_available_crosswalks <- function() { + + # 1. NHGIS: reuse list_nhgis_crosswalks(), select and coerce years to integer + nhgis <- list_nhgis_crosswalks() |> + dplyr::select(source_geography, target_geography, source_year, target_year) |> + dplyr::mutate( + source_year = as.integer(source_year), + target_year = as.integer(target_year)) + + # 2. Geocorr 2022: all pairwise combinations of 9 canonical geographies + geocorr_2022_geographies <- c( + "block", "block_group", "tract", "county", "place", + "zcta", "puma22", "cd118", "cd119") + + geocorr_2022 <- tidyr::crossing( + source_geography = geocorr_2022_geographies, + target_geography = geocorr_2022_geographies) |> + dplyr::filter(source_geography != target_geography) |> + dplyr::mutate( + source_year = 2022L, + target_year = 2022L) + + # 3. Geocorr 2018: all pairwise combinations of 9 canonical geographies + geocorr_2018_geographies <- c( + "block", "block_group", "tract", "county", "place", + "zcta", "puma12", "cd115", "cd116") + + geocorr_2018 <- tidyr::crossing( + source_geography = geocorr_2018_geographies, + target_geography = geocorr_2018_geographies) |> + dplyr::filter(source_geography != target_geography) |> + dplyr::mutate( + source_year = 2018L, + target_year = 2018L) + + # 4. CTData: 7 manually specified combinations (2020<->2022) + ctdata <- tibble::tibble( + source_geography = c("block", "block_group", "tract", "county", + "block", "block_group", "tract"), + target_geography = c("block", "block_group", "tract", "county", + "block", "block_group", "tract"), + source_year = c(rep(2020L, 4), rep(2022L, 3)), + target_year = c(rep(2022L, 4), rep(2020L, 3))) + + # 5. Combine, deduplicate, and sort + dplyr::bind_rows(nhgis, geocorr_2022, geocorr_2018, ctdata) |> + dplyr::distinct() |> + dplyr::arrange(source_geography, target_geography, source_year, target_year) +} + utils::globalVariables(c( - "allocation_factor_source_to_target", "geoid", "label", + "allocation_factor_source_to_target", "geoid", "label", "n_unmatched", "pct_of_unmatched", "state_abbr")) \ No newline at end of file diff --git a/R/get_crosswalk_chain.R b/R/get_crosswalk_chain.R index 44bcdc4..a59fca0 100644 --- a/R/get_crosswalk_chain.R +++ b/R/get_crosswalk_chain.R @@ -48,11 +48,11 @@ get_crosswalk_chain <- function( message = format_chain_plan_message(plan)) # Print the plan message - message(result$message) + cw_message(result$message) # Handle case where no crosswalk is needed if (nrow(plan$steps) > 0 && plan$steps$crosswalk_source[1] == "none") { - message("Returning empty crosswalk list since no transformation is needed.") + cw_message("Returning empty crosswalk list since no transformation is needed.") return(result) } @@ -61,7 +61,7 @@ get_crosswalk_chain <- function( step <- plan$steps[i, ] step_name <- stringr::str_c("step_", i) - message(stringr::str_c("\nFetching ", step_name, ": ", step$description)) + cw_message(stringr::str_c("\nFetching ", step_name, ": ", step$description)) crosswalk_i <- get_crosswalk_single( source_geography = step$source_geography, diff --git a/R/get_ctdata_crosswalk.R b/R/get_ctdata_crosswalk.R index ebbc705..b4ef83c 100644 --- a/R/get_ctdata_crosswalk.R +++ b/R/get_ctdata_crosswalk.R @@ -113,7 +113,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio # Check cache for full national crosswalk if (file.exists(csv_path) & !is.null(cache)) { - message(stringr::str_c("Reading national ", source_year, "-", target_year, " crosswalk from cache.")) + cw_message(stringr::str_c("Reading national ", source_year, "-", target_year, " crosswalk from cache.")) result <- readr::read_csv( csv_path, col_types = readr::cols(.default = readr::col_character(), @@ -150,7 +150,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio return(result) } - message("Constructing nationally comprehensive 2020-2022 crosswalk...") + cw_message("Constructing nationally comprehensive 2020-2022 crosswalk...") # =========================================================================== # STEP 1: Get all 2020 GEOIDs from NHGIS crosswalk (non-CT) or tidycensus (county) @@ -167,7 +167,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio if (geography_standardized == "county") { # For county, use tidycensus since NHGIS doesn't have county -> county crosswalks - message("Fetching all 2020 county GEOIDs via tidycensus...") + cw_message("Fetching all 2020 county GEOIDs via tidycensus...") all_2020_geoids <- suppressMessages({ tidycensus::get_acs( @@ -182,7 +182,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio } else { # For block, block_group, tract: use NHGIS 2010 -> 2020 crosswalk - message(stringr::str_c( + cw_message(stringr::str_c( "Fetching NHGIS ", nhgis_source_geog, " 2010 -> 2020 crosswalk to obtain all 2020 GEOIDs...")) nhgis_crosswalk <- get_nhgis_crosswalk( @@ -200,7 +200,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio dplyr::pull(target_geoid) } - message(stringr::str_c( + cw_message(stringr::str_c( "Found ", format(length(all_2020_geoids), big.mark = ","), " non-CT 2020 ", geography_standardized, " GEOIDs.")) @@ -223,7 +223,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio # STEP 3: Get CT-specific crosswalk from CT Data Collaborative # =========================================================================== - message("Fetching Connecticut crosswalk from CT Data Collaborative...") + cw_message("Fetching Connecticut crosswalk from CT Data Collaborative...") if (geography_standardized == "block") { raw_df <- readr::read_csv(ctdata_urls$block, show_col_types = FALSE) @@ -326,7 +326,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio result <- dplyr::bind_rows(ct_crosswalk, non_ct_crosswalk) |> dplyr::arrange(source_geoid) - message(stringr::str_c( + cw_message(stringr::str_c( "National 2020-2022 crosswalk constructed: ", format(nrow(ct_crosswalk), big.mark = ","), " CT records + ", format(nrow(non_ct_crosswalk), big.mark = ","), " non-CT records = ", @@ -337,7 +337,7 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio # =========================================================================== if (is_reverse) { - message("Reversing crosswalk direction to 2022 -> 2020...") + cw_message("Reversing crosswalk direction to 2022 -> 2020...") # For identity crosswalks (block, block_group, tract), simply swap columns # Note: County is not supported for reverse direction (checked earlier) @@ -363,10 +363,10 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio dir.create(cache_path, recursive = TRUE) } readr::write_csv(result, csv_path) - message(stringr::str_c("Cached to: ", csv_path)) + cw_message(stringr::str_c("Cached to: ", csv_path)) } - message(stringr::str_c( + cw_message(stringr::str_c( "National ", source_year, "-", target_year, " crosswalk constructed: - Connecticut: CT Data Collaborative (https://github.com/CT-Data-Collaborative) - Other states: Identity mapping derived from NHGIS 2010-2020 crosswalk")) diff --git a/R/get_geocorr_crosswalk.R b/R/get_geocorr_crosswalk.R index 15cf4fd..4db677c 100644 --- a/R/get_geocorr_crosswalk.R +++ b/R/get_geocorr_crosswalk.R @@ -164,7 +164,7 @@ get_geocorr_crosswalk <- function( if (file.exists(outpath) & !is.null(cache)) { result = readr::read_csv(outpath, show_col_types = FALSE) - message("Reading file from cache.") + cw_message("Reading file from cache.") # Attach metadata to cached result attr(result, "crosswalk_metadata") <- list( @@ -188,7 +188,7 @@ get_geocorr_crosswalk <- function( base_url <- "https://mcdc.missouri.edu/cgi-bin/broker" if (is.null(weight)) { - message("Setting the default crosswalk weighting variable to: population.") + cw_message("Setting the default crosswalk weighting variable to: population.") weight = "population" } diff --git a/R/get_nhgis_crosswalk.R b/R/get_nhgis_crosswalk.R index 40547bd..59680f7 100644 --- a/R/get_nhgis_crosswalk.R +++ b/R/get_nhgis_crosswalk.R @@ -551,10 +551,10 @@ get_nhgis_crosswalk <- function( if (file.exists(csv_path) & !is.null(cache)) { result = readr::read_csv(csv_path, show_col_types = FALSE) - message( + cw_message( "Use of NHGIS crosswalks is subject to the same conditions as for all NHGIS data. See https://www.nhgis.org/citation-and-use-nhgis-data.") - message("Reading file from cache.") + cw_message("Reading file from cache.") # Attach metadata to cached result attr(result, "crosswalk_metadata") <- list( @@ -721,19 +721,19 @@ variable. Get your key at https://account.ipums.org/api_keys") } zip_contents = safe_unzip_list(zip_path) if (is.null(zip_contents) || nrow(zip_contents) == 0) { - warning( + cw_warning( "The downloaded zip file for crosswalk ", crosswalk_sub_path, " is empty or cannot be opened. This crosswalk may not be available from NHGIS. ", - "Returning an empty tibble.") + "Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } # Extract the outer zip to temp directory extract_success = safe_unzip_extract(zip_path, temp_dir) if (!extract_success) { - warning( + cw_warning( "Failed to extract the downloaded zip file for crosswalk ", crosswalk_sub_path, - ". The file may be corrupted. Returning an empty tibble.") + ". The file may be corrupted. Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } @@ -749,9 +749,9 @@ variable. Get your key at https://account.ipums.org/api_keys") } nested_zips = all_files[stringr::str_detect(all_files, "\\.zip$")] if (length(nested_zips) == 0) { - warning( + cw_warning( "No CSV or nested zip file found in the downloaded archive for ", - crosswalk_sub_path, ". Returning an empty tibble.") + crosswalk_sub_path, ". Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } @@ -760,19 +760,19 @@ variable. Get your key at https://account.ipums.org/api_keys") } nested_contents = safe_unzip_list(nested_zip) if (is.null(nested_contents) || nrow(nested_contents) == 0) { - warning( + cw_warning( "The nested zip file for crosswalk ", crosswalk_sub_path, " is empty or cannot be opened. This crosswalk may not be available from NHGIS. ", - "Returning an empty tibble.") + "Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } # Extract the nested zip nested_extract_success = safe_unzip_extract(nested_zip, temp_dir) if (!nested_extract_success) { - warning( + cw_warning( "Failed to extract the nested zip file for crosswalk ", crosswalk_sub_path, - ". The file may be corrupted. Returning an empty tibble.") + ". The file may be corrupted. Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } @@ -782,9 +782,9 @@ variable. Get your key at https://account.ipums.org/api_keys") } } if (length(csv_files) == 0) { - warning( + cw_warning( "No CSV file found after extracting zip archive(s) for ", crosswalk_sub_path, - ". Returning an empty tibble.") + ". Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) } @@ -797,8 +797,8 @@ variable. Get your key at https://account.ipums.org/api_keys") } crosswalk_df }, error = function(e) { - warning("Failed to retrieve crosswalk ", crosswalk_sub_path, ": ", e$message, - ". Returning an empty tibble.") + cw_warning("Failed to retrieve crosswalk ", crosswalk_sub_path, ": ", e$message, + ". Returning an empty tibble.", call. = FALSE) return(tibble::tibble()) }) @@ -856,7 +856,7 @@ variable. Get your key at https://account.ipums.org/api_keys") } readr::write_csv(crosswalk_df, csv_path) } -message( +cw_message( "Use of NHGIS crosswalks is subject to the same conditions as for all NHGIS data. See https://www.nhgis.org/citation-and-use-nhgis-data.") diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..8c7e710 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,25 @@ +#' Conditionally Emit a Message +#' +#' Wrapper around `message()` that respects the `crosswalk.silent` option. +#' When `getOption("crosswalk.silent")` is `TRUE`, the message is suppressed. +#' +#' @param ... Arguments passed to `message()`. +#' @keywords internal +#' @noRd +cw_message <- function(...) { + if (!getOption("crosswalk.silent", FALSE)) message(...) +} + +#' Conditionally Emit a Warning +#' +#' Wrapper around `warning()` that respects the `crosswalk.silent` option. +#' When `getOption("crosswalk.silent")` is `TRUE`, the warning is suppressed. +#' +#' @param ... Arguments passed to `warning()`. +#' @param call. Logical. Whether to include the call in the warning message. +#' @param immediate. Logical. Whether to print the warning immediately. +#' @keywords internal +#' @noRd +cw_warning <- function(..., call. = TRUE, immediate. = FALSE) { + if (!getOption("crosswalk.silent", FALSE)) warning(..., call. = call., immediate. = immediate.) +} diff --git a/README.Rmd b/README.Rmd index 43a5324..b663855 100644 --- a/README.Rmd +++ b/README.Rmd @@ -18,7 +18,7 @@ devtools::load_all() # crosswalk - An R package providing a simple interface to access and apply crosswalks. + An R package for translating data across space and time. ## Overview @@ -30,17 +30,16 @@ and crosswalks. The package sources crosswalks from: -- **Geocorr** (Missouri Census Data Center) - for same-year crosswalks between geographies. Uses GeoCorr 2022 (2020 Census geography) for 2020s data and GeoCorr 2018 (2010 Census geography) for 2010s data. The correct version is selected automatically based on year context. -- **IPUMS NHGIS** - for inter-temporal crosswalks (across different census years) +- **Geocorr** (Missouri Census Data Center) - for inter-geography crosswalks (same-decade) +- **IPUMS NHGIS** - for inter-temporal crosswalks (across decades) - **CT Data Collaborative** - for Connecticut 2020→2022 crosswalks (planning region changes) ## Why Use `crosswalk`? -- **Programmatic access**: No more manual downloads from web interfaces +- **Programmatic access**: No more manual downloads from web interfaces; data is cached for speed - **Standardized output**: Consistent column names across all crosswalk sources - **Metadata tracking**: Full provenance of crosswalks stored as attributes - **Crosswalk chaining**: Automatic chaining when multiple crosswalks are required -- **Local caching**: Reproducible workflows with locally-cached crosswalks for speed ## Installation @@ -58,13 +57,16 @@ library(dplyr) library(ggplot2) library(stringr) library(sf) +library(tidycensus) +library(tigris) +library(scales) -source_data = tidycensus::get_acs( +source_data = get_acs( year = 2023, geography = "zcta", output = "wide", variables = c(below_poverty_level = "B17001_002")) %>% - dplyr::select( + select( source_geoid = GEOID, count_below_poverty_level = below_poverty_levelE) @@ -89,7 +91,8 @@ crosswalked_data = crosswalk_data( What does the crosswalk(s) reflect and how was it sourced? ```{r} -names(attr(crosswalked_data, "crosswalk_metadata")) +## and there's more (not shown) +names(attr(crosswalked_data, "crosswalk_metadata")) %>% head() ``` How well did the crosswalk join to our source data? @@ -105,46 +108,47 @@ join_quality$pct_data_unmatched ## zctas aren't nested within states, otherwise join_quality$state_analysis_data ## would help us to ID whether non-joining source data were clustered within one ## or a few states. instead we can join to spatial data to diagnose further: -zctas_sf = tigris::zctas(year = 2023, progress_bar = FALSE) -states_sf = tigris::states(year = 2023, cb = TRUE, progress_bar = FALSE) +zctas_sf = zctas(year = 2023, progress_bar = FALSE) +states_sf = states(year = 2023, cb = TRUE, progress_bar = FALSE) ## apart from DC, which has a disproportionate number of non-joining ZCTAs-- ## seemingly corresponding to federal areas and buildings--the distribution of ## non-joining ZCTAs appears proportionate to state-level populations and is ## distributed across many states: -zctas_sf %>% - dplyr::filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% - sf::st_intersection(states_sf %>% select(NAME)) %>% - sf::st_drop_geometry() %>% - dplyr::count(NAME, sort = TRUE) +zctas_sf %>% + filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% + st_intersection(states_sf %>% select(NAME)) %>% + st_drop_geometry() %>% + count(NAME, sort = TRUE) %>% + head() ``` And how accurate was the crosswalking process? ```{r} -comparison_data = tidycensus::get_acs( +comparison_data = get_acs( year = 2023, geography = "puma", output = "wide", variables = c( below_poverty_level = "B17001_002")) %>% - dplyr::select( + select( source_geoid = GEOID, count_below_poverty_level_acs = below_poverty_levelE) -combined_data = dplyr::left_join( +combined_data = left_join( comparison_data, crosswalked_data, - by = c("source_geoid" = "geoid")) - + by = c("source_geoid" = "geoid")) + combined_data %>% - dplyr::select(source_geoid, dplyr::matches("count")) %>% - dplyr::mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% - ggplot2::ggplot() + - ggplot2::geom_histogram(ggplot2::aes(x = difference_percent)) + - ggplot2::theme_minimal() + - ggplot2::theme(panel.grid = ggplot2::element_blank()) + - ggplot2::scale_x_continuous(labels = scales::percent) + - ggplot2::labs( + select(source_geoid, matches("count")) %>% + mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% + ggplot() + + geom_histogram(aes(x = difference_percent)) + + theme_minimal() + + theme(panel.grid = element_blank()) + + scale_x_continuous(labels = percent) + + labs( title = "Crosswalked data approximates observed values", subtitle = "Block group-level source data would produce more accurate crosswalked values", y = "", @@ -161,70 +165,36 @@ directly from `crosswalk_data()` and omit the intermediate `get_crosswalk()` cal | `get_crosswalk()` | Fetch crosswalk(s) | | `crosswalk_data()` | Apply crosswalk(s) to interpolate data to the target geography-year | -## Understanding `get_crosswalk()` Output +## Output Structure `get_crosswalk()` **always returns a list** structured as follows: -```{r} -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - source_year = 2010, - target_year = 2020, - weight = "population") - -names(result) -``` - The list contains three elements: | Element | Description | |------------------------------|------------------------------------------| -| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) of length one or greater | +| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) | | `plan` | Details about what crosswalks are being fetched | -| `message` | A human-readable description of the crosswalk chain | - -### Single-Step vs. Multi-Step Crosswalks - -**Single-step crosswalks** (same year, different geography OR same geography, different year): - -```{r} -# Same year, different geography (Geocorr) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - weight = "population") -# result$crosswalks$step_1 contains one crosswalk - -# Same geography, different year (NHGIS) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "tract", - source_year = 2010, - target_year = 2020) -# result$crosswalks$step_1 contains one crosswalk -``` +| `message` | A description of the crosswalk chain | -**Multi-step crosswalks** (when a single, direct crosswalk is not available): +### Multi-Step Crosswalks -For some source year/geography -> target year/geography specifications do not have a crosswalk. -In such cases, two or more crosswalks may be needed. The package automatically plans and fetches the -required crosswalks: +For some source year/geography -> target year/geography combinations, there is not a single direct crosswalk. +In such cases, we need two crosswalks. The package automatically plans and fetches the required crosswalks: 1. **Step 1 (NHGIS)**: Change year, keep geography constant 2. **Step 2 (Geocorr)**: Change geography at target year -```{r} +```{r, eval = FALSE} result <- get_crosswalk( source_geography = "tract", target_geography = "zcta", source_year = 2010, target_year = 2020, - weight = "population") + weight = "population", + silent = TRUE) # Two crosswalks are returned -names(result$crosswalks) - # Step 1: 2010 tracts -> 2020 tracts (NHGIS) # Step 2: 2020 tracts -> 2020 ZCTAs (Geocorr) ``` @@ -248,16 +218,17 @@ and `land_area_sqmi` depending on the source of the crosswalk. Each crosswalk tibble has a `crosswalk_metadata` attribute that documents what the crosswalk represents and how it was created: -```{r} +```{r, eval = FALSE} metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata") names(metadata) ``` -## Using `crosswalk_data()` to Interpolate Data +## Interpolation -`crosswalk_data()` applies crosswalk weights to transform your data. It automatically handles multi-step crosswalks. -If you're in a hurry, you can omit a call to `get_crosswalk()` and specify the needed crosswalk parameters -to `crosswalk_data()`, which will pass these to `get_crosswalk()` behind the scenes. +`crosswalk_data()` applies crosswalk weights to transform your data. If you're in a hurry, +you can omit a call to `get_crosswalk()` and specify the needed crosswalk parameters +to `crosswalk_data()`, which will pass these to `get_crosswalk()` behind the scenes. Or you +can call `get_crosswalk()` explicitly and then pass the result to `crosswalk_data()`. ### Column Naming Convention @@ -273,49 +244,12 @@ All non-count variables are interpolated using weighted means, weighting by the ## Supported Geography and Year Combinations -### Inter-Geography Crosswalks (Geocorr) - -The package supports inter-geography crosswalks for both 2020s and 2010s Census geographies, automatically selecting the correct GeoCorr version based on the year context you provide. - -**2020s geography (GeoCorr 2022, 2020 Census):** Used when years are 2020+, or when no year is specified. - -- block, block group, tract, county -- place, zcta, puma22 -- cd118, cd119, urban_area, core_based_statistical_area - -**2010s geography (GeoCorr 2018, 2010 Census):** Used when years are in the 2010s (2010-2019). +`get_available_crosswalks()` returns a listing of all supported year-geography combinations. -- block, block group, tract, county -- place, zcta, puma12 -- cd115, cd116 - -### Inter-Temporal Crosswalks (NHGIS) - -NHGIS provides cross-decade crosswalks with the following structure: - -**Source geographies:** block, block_group, tract - -**Target geographies:** -- From blocks (decennial years only): block, block_group, tract, county, place, zcta, puma, urban_area, cbsa -- From block_group or tract: block_group, tract, county - -| Source Years | Target Years | -|------------------------------|------------------------------| -| 1990, 2000 | 2010, 2014, 2015, 2020, 2022 | -| 2010, 2011, 2012, 2014, 2015 | 1990, 2000, 2020, 2022 | -| 2020, 2022 | 1990, 2000, 2010, 2014, 2015 | - -**Notes:** -- Within-decade crosswalks (e.g., 2010→2014) are not available from NHGIS -- Block→ZCTA, Block→PUMA, etc. are only available for decennial years (1990, 2000, 2010, 2020) -- The package automatically uses direct NHGIS crosswalks when available (e.g., -`get_crosswalk(source_geography = "block", target_geography = "zcta", source_year = 2010, target_year = 2020)` -returns a single-step NHGIS crosswalk) - -### 2020→2022 Crosswalks (CTData) - -For 2020 to 2022 transformations, the package uses CT Data Collaborative crosswalks for Connecticut -(where planning regions replaced counties) and identity mappings for other states (where no changes occurred). +```{r} +get_available_crosswalks() %>% + head() +``` ## API Keys @@ -341,15 +275,13 @@ result <- get_crosswalk( ## Citations -The intellectual credit for the underlying crosswalks belongs to the original developers. - -**For NHGIS**, see citation requirements at: https://www.nhgis.org/citation-and-use-nhgis-data +Cite the organizations that produce the crosswalks returned by this package: -**For Geocorr**, suggested citations: +**For NHGIS**, see requirements at: https://www.nhgis.org/citation-and-use-nhgis-data -> Missouri Census Data Center, University of Missouri. (2022). Geocorr 2022: Geographic Correspondence Engine. Retrieved from: https://mcdc.missouri.edu/applications/geocorr2022.html +**For Geocorr**, a suggested citation (update the year): -> Missouri Census Data Center, University of Missouri. (2018). Geocorr 2018: Geographic Correspondence Engine. Retrieved from: https://mcdc.missouri.edu/applications/geocorr2018.html +> Missouri Census Data Center, University of Missouri. (2022/2018). Geocorr 2022/2018: Geographic Correspondence Engine. Retrieved from: https://mcdc.missouri.edu/applications/geocorr2022/2018.html **For CTData**, a suggested citation (adjust for alternate source geography): diff --git a/README.md b/README.md index 8ff1fa1..203b8e3 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # crosswalk -An R package providing a simple interface to access and apply -crosswalks. +An R package for translating data across space and time. ## Overview @@ -16,26 +15,22 @@ and crosswalks. The package sources crosswalks from: -- **Geocorr** (Missouri Census Data Center) - for same-year crosswalks - between geographies. Uses GeoCorr 2022 (2020 Census geography) for - 2020s data and GeoCorr 2018 (2010 Census geography) for 2010s data. - The correct version is selected automatically based on year context. -- **IPUMS NHGIS** - for inter-temporal crosswalks (across different - census years) +- **Geocorr** (Missouri Census Data Center) - for inter-geography + crosswalks (same-decade) +- **IPUMS NHGIS** - for inter-temporal crosswalks (across decades) - **CT Data Collaborative** - for Connecticut 2020→2022 crosswalks (planning region changes) ## Why Use `crosswalk`? -- **Programmatic access**: No more manual downloads from web interfaces +- **Programmatic access**: No more manual downloads from web interfaces; + data is cached for speed - **Standardized output**: Consistent column names across all crosswalk sources - **Metadata tracking**: Full provenance of crosswalks stored as attributes - **Crosswalk chaining**: Automatic chaining when multiple crosswalks are required -- **Local caching**: Reproducible workflows with locally-cached - crosswalks for speed ## Installation @@ -54,13 +49,16 @@ library(dplyr) library(ggplot2) library(stringr) library(sf) +library(tidycensus) +library(tigris) +library(scales) -source_data = tidycensus::get_acs( +source_data = get_acs( year = 2023, geography = "zcta", output = "wide", variables = c(below_poverty_level = "B17001_002")) %>% - dplyr::select( + select( source_geoid = GEOID, count_below_poverty_level = below_poverty_levelE) @@ -86,19 +84,10 @@ crosswalked_data = crosswalk_data( What does the crosswalk(s) reflect and how was it sourced? ``` r -names(attr(crosswalked_data, "crosswalk_metadata")) -#> [1] "call_parameters" "data_source" -#> [3] "data_source_full_name" "download_url" -#> [5] "api_endpoint" "documentation_url" -#> [7] "citation_url" "github_repository" -#> [9] "source_geography" "source_geography_standardized" -#> [11] "target_geography" "target_geography_standardized" -#> [13] "source_year" "target_year" -#> [15] "reference_year" "weighting_variable" -#> [17] "state_coverage" "notes" -#> [19] "retrieved_at" "cached" -#> [21] "cache_path" "read_from_cache" -#> [23] "is_multi_step" "crosswalk_package_version" +## and there's more (not shown) +names(attr(crosswalked_data, "crosswalk_metadata")) %>% head() +#> [1] "call_parameters" "data_source" "data_source_full_name" +#> [4] "download_url" "api_endpoint" "documentation_url" ``` How well did the crosswalk join to our source data? @@ -116,90 +105,55 @@ join_quality$pct_data_unmatched ## zctas aren't nested within states, otherwise join_quality$state_analysis_data ## would help us to ID whether non-joining source data were clustered within one ## or a few states. instead we can join to spatial data to diagnose further: -zctas_sf = tigris::zctas(year = 2023, progress_bar = FALSE) -states_sf = tigris::states(year = 2023, cb = TRUE, progress_bar = FALSE) +zctas_sf = zctas(year = 2023, progress_bar = FALSE) +states_sf = states(year = 2023, cb = TRUE, progress_bar = FALSE) ## apart from DC, which has a disproportionate number of non-joining ZCTAs-- ## seemingly corresponding to federal areas and buildings--the distribution of ## non-joining ZCTAs appears proportionate to state-level populations and is ## distributed across many states: -zctas_sf %>% - dplyr::filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% - sf::st_intersection(states_sf %>% select(NAME)) %>% - sf::st_drop_geometry() %>% - dplyr::count(NAME, sort = TRUE) -#> NAME n -#> 1 District of Columbia 19 -#> 2 New York 15 -#> 3 Texas 9 -#> 4 California 8 -#> 5 Colorado 6 -#> 6 Utah 6 -#> 7 Florida 5 -#> 8 Pennsylvania 5 -#> 9 Tennessee 5 -#> 10 Virginia 5 -#> 11 Alabama 4 -#> 12 Arizona 4 -#> 13 Kentucky 4 -#> 14 Maryland 4 -#> 15 Ohio 4 -#> 16 Washington 4 -#> 17 Georgia 3 -#> 18 Louisiana 3 -#> 19 Michigan 3 -#> 20 North Carolina 3 -#> 21 Alaska 2 -#> 22 Massachusetts 2 -#> 23 Mississippi 2 -#> 24 North Dakota 2 -#> 25 Arkansas 1 -#> 26 Hawaii 1 -#> 27 Idaho 1 -#> 28 Illinois 1 -#> 29 Indiana 1 -#> 30 Iowa 1 -#> 31 Kansas 1 -#> 32 Maine 1 -#> 33 Minnesota 1 -#> 34 Missouri 1 -#> 35 Montana 1 -#> 36 Nebraska 1 -#> 37 Nevada 1 -#> 38 New Mexico 1 -#> 39 Oregon 1 -#> 40 South Carolina 1 -#> 41 Vermont 1 -#> 42 Wisconsin 1 +zctas_sf %>% + filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% + st_intersection(states_sf %>% select(NAME)) %>% + st_drop_geometry() %>% + count(NAME, sort = TRUE) %>% + head() +#> NAME n +#> 1 District of Columbia 19 +#> 2 New York 15 +#> 3 Texas 9 +#> 4 California 8 +#> 5 Colorado 6 +#> 6 Utah 6 ``` And how accurate was the crosswalking process? ``` r -comparison_data = tidycensus::get_acs( +comparison_data = get_acs( year = 2023, geography = "puma", output = "wide", variables = c( below_poverty_level = "B17001_002")) %>% - dplyr::select( + select( source_geoid = GEOID, count_below_poverty_level_acs = below_poverty_levelE) -combined_data = dplyr::left_join( +combined_data = left_join( comparison_data, crosswalked_data, - by = c("source_geoid" = "geoid")) - + by = c("source_geoid" = "geoid")) + combined_data %>% - dplyr::select(source_geoid, dplyr::matches("count")) %>% - dplyr::mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% - ggplot2::ggplot() + - ggplot2::geom_histogram(ggplot2::aes(x = difference_percent)) + - ggplot2::theme_minimal() + - ggplot2::theme(panel.grid = ggplot2::element_blank()) + - ggplot2::scale_x_continuous(labels = scales::percent) + - ggplot2::labs( + select(source_geoid, matches("count")) %>% + mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% + ggplot() + + geom_histogram(aes(x = difference_percent)) + + theme_minimal() + + theme(panel.grid = element_blank()) + + scale_x_continuous(labels = percent) + + labs( title = "Crosswalked data approximates observed values", subtitle = "Block group-level source data would produce more accurate crosswalked values", y = "", @@ -219,58 +173,23 @@ intermediate `get_crosswalk()` call. | `get_crosswalk()` | Fetch crosswalk(s) | | `crosswalk_data()` | Apply crosswalk(s) to interpolate data to the target geography-year | -## Understanding `get_crosswalk()` Output +## Output Structure `get_crosswalk()` **always returns a list** structured as follows: -``` r -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - source_year = 2010, - target_year = 2020, - weight = "population") - -names(result) -#> [1] "crosswalks" "plan" "message" -``` - The list contains three elements: -| Element | Description | -|----|----| -| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) of length one or greater | -| `plan` | Details about what crosswalks are being fetched | -| `message` | A human-readable description of the crosswalk chain | - -### Single-Step vs. Multi-Step Crosswalks +| Element | Description | +|--------------|-------------------------------------------------------| +| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) | +| `plan` | Details about what crosswalks are being fetched | +| `message` | A description of the crosswalk chain | -**Single-step crosswalks** (same year, different geography OR same -geography, different year): +### Multi-Step Crosswalks -``` r -# Same year, different geography (Geocorr) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - weight = "population") -# result$crosswalks$step_1 contains one crosswalk - -# Same geography, different year (NHGIS) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "tract", - source_year = 2010, - target_year = 2020) -# result$crosswalks$step_1 contains one crosswalk -``` - -**Multi-step crosswalks** (when a single, direct crosswalk is not -available): - -For some source year/geography -\> target year/geography specifications -do not have a crosswalk. In such cases, two or more crosswalks may be -needed. The package automatically plans and fetches the required +For some source year/geography -\> target year/geography combinations, +there is not a single direct crosswalk. In such cases, we need two +crosswalks. The package automatically plans and fetches the required crosswalks: 1. **Step 1 (NHGIS)**: Change year, keep geography constant @@ -282,12 +201,10 @@ result <- get_crosswalk( target_geography = "zcta", source_year = 2010, target_year = 2020, - weight = "population") + weight = "population", + silent = TRUE) # Two crosswalks are returned -names(result$crosswalks) -#> [1] "step_1" "step_2" - # Step 1: 2010 tracts -> 2020 tracts (NHGIS) # Step 2: 2020 tracts -> 2020 ZCTAs (Geocorr) ``` @@ -315,27 +232,16 @@ documents what the crosswalk represents and how it was created: ``` r metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata") names(metadata) -#> [1] "call_parameters" "data_source" -#> [3] "data_source_full_name" "download_url" -#> [5] "api_endpoint" "documentation_url" -#> [7] "citation_url" "github_repository" -#> [9] "source_geography" "source_geography_standardized" -#> [11] "target_geography" "target_geography_standardized" -#> [13] "source_year" "target_year" -#> [15] "reference_year" "weighting_variable" -#> [17] "state_coverage" "notes" -#> [19] "retrieved_at" "cached" -#> [21] "cache_path" "read_from_cache" -#> [23] "is_multi_step" "crosswalk_package_version" ``` -## Using `crosswalk_data()` to Interpolate Data +## Interpolation -`crosswalk_data()` applies crosswalk weights to transform your data. It -automatically handles multi-step crosswalks. If you’re in a hurry, you -can omit a call to `get_crosswalk()` and specify the needed crosswalk -parameters to `crosswalk_data()`, which will pass these to -`get_crosswalk()` behind the scenes. +`crosswalk_data()` applies crosswalk weights to transform your data. If +you’re in a hurry, you can omit a call to `get_crosswalk()` and specify +the needed crosswalk parameters to `crosswalk_data()`, which will pass +these to `get_crosswalk()` behind the scenes. Or you can call +`get_crosswalk()` explicitly and then pass the result to +`crosswalk_data()`. ### Column Naming Convention @@ -352,54 +258,22 @@ weighted means, weighting by the allocation factor from the crosswalk. ## Supported Geography and Year Combinations -### Inter-Geography Crosswalks (Geocorr) - -The package supports inter-geography crosswalks for both 2020s and 2010s -Census geographies, automatically selecting the correct GeoCorr version -based on the year context you provide. - -**2020s geography (GeoCorr 2022, 2020 Census):** Used when years are -2020+, or when no year is specified. - -- block, block group, tract, county -- place, zcta, puma22 -- cd118, cd119, urban_area, core_based_statistical_area - -**2010s geography (GeoCorr 2018, 2010 Census):** Used when years are in -the 2010s (2010-2019). - -- block, block group, tract, county -- place, zcta, puma12 -- cd115, cd116 +`get_available_crosswalks()` returns a listing of all supported +year-geography combinations. -### Inter-Temporal Crosswalks (NHGIS) - -NHGIS provides cross-decade crosswalks with the following structure: - -**Source geographies:** block, block_group, tract - -**Target geographies:** - From blocks (decennial years only): block, -block_group, tract, county, place, zcta, puma, urban_area, cbsa - From -block_group or tract: block_group, tract, county - -| Source Years | Target Years | -|------------------------------|------------------------------| -| 1990, 2000 | 2010, 2014, 2015, 2020, 2022 | -| 2010, 2011, 2012, 2014, 2015 | 1990, 2000, 2020, 2022 | -| 2020, 2022 | 1990, 2000, 2010, 2014, 2015 | - -**Notes:** - Within-decade crosswalks (e.g., 2010→2014) are not -available from NHGIS - Block→ZCTA, Block→PUMA, etc. are only available -for decennial years (1990, 2000, 2010, 2020) - The package automatically -uses direct NHGIS crosswalks when available (e.g., -`get_crosswalk(source_geography = "block", target_geography = "zcta", source_year = 2010, target_year = 2020)` -returns a single-step NHGIS crosswalk) - -### 2020→2022 Crosswalks (CTData) - -For 2020 to 2022 transformations, the package uses CT Data Collaborative -crosswalks for Connecticut (where planning regions replaced counties) -and identity mappings for other states (where no changes occurred). +``` r +get_available_crosswalks() %>% + head() +#> # A tibble: 6 × 4 +#> source_geography target_geography source_year target_year +#> +#> 1 block block 1990 2010 +#> 2 block block 2000 2010 +#> 3 block block 2010 2020 +#> 4 block block 2020 2010 +#> 5 block block 2020 2022 +#> 6 block block 2022 2020 +``` ## API Keys @@ -425,21 +299,17 @@ result <- get_crosswalk( ## Citations -The intellectual credit for the underlying crosswalks belongs to the -original developers. +Cite the organizations that produce the crosswalks returned by this +package: -**For NHGIS**, see citation requirements at: +**For NHGIS**, see requirements at: -**For Geocorr**, suggested citations: - -> Missouri Census Data Center, University of Missouri. (2022). Geocorr -> 2022: Geographic Correspondence Engine. Retrieved from: -> +**For Geocorr**, a suggested citation (update the year): -> Missouri Census Data Center, University of Missouri. (2018). Geocorr -> 2018: Geographic Correspondence Engine. Retrieved from: -> +> Missouri Census Data Center, University of Missouri. (2022/2018). +> Geocorr 2022/2018: Geographic Correspondence Engine. Retrieved from: +> **For CTData**, a suggested citation (adjust for alternate source geography): diff --git a/_pkgdown.yml b/_pkgdown.yml index c7ff89e..b49f5d3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,6 @@ url: https://ui-research.github.io/crosswalk/ template: bootstrap: 5 - +home: + sidebar: + structure: [toc, links, license, community, citation, authors, dev] diff --git a/man/crosswalk_data.Rd b/man/crosswalk_data.Rd index cd65ea0..97d9fcc 100644 --- a/man/crosswalk_data.Rd +++ b/man/crosswalk_data.Rd @@ -17,7 +17,8 @@ crosswalk_data( count_columns = NULL, non_count_columns = NULL, return_intermediate = FALSE, - show_join_quality = TRUE + show_join_quality = TRUE, + silent = getOption("crosswalk.silent", FALSE) ) } \arguments{ @@ -73,7 +74,12 @@ from each step. Default is FALSE, which returns only the final result.} about join quality, including the number of data rows not matching the crosswalk and vice versa. For state-nested geographies (tract, county, block group, etc.), also reports state-level concentration of unmatched rows. Set to FALSE to -suppress these messages.} +suppress these messages. Automatically suppressed when \code{silent = TRUE}.} + +\item{silent}{Logical. If \code{TRUE}, suppresses all informational messages and +warnings, including join quality diagnostics regardless of \code{show_join_quality}. +Defaults to \code{getOption("crosswalk.silent", FALSE)}. Set +\code{options(crosswalk.silent = TRUE)} to silence all calls by default.} } \value{ If \code{return_intermediate = FALSE} (default), a tibble with data summarized diff --git a/man/get_available_crosswalks.Rd b/man/get_available_crosswalks.Rd new file mode 100644 index 0000000..ab05fa7 --- /dev/null +++ b/man/get_available_crosswalks.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_crosswalk.R +\name{get_available_crosswalks} +\alias{get_available_crosswalks} +\title{List All Available Crosswalk Combinations} +\usage{ +get_available_crosswalks() +} +\value{ +A tibble with columns: \code{source_geography}, \code{target_geography}, +\code{source_year}, \code{target_year}. +} +\description{ +Returns a tibble of all source/target geography and year combinations +supported by \code{get_crosswalk()}. +} diff --git a/man/get_crosswalk.Rd b/man/get_crosswalk.Rd index 74c8f0a..84d0728 100644 --- a/man/get_crosswalk.Rd +++ b/man/get_crosswalk.Rd @@ -10,7 +10,8 @@ get_crosswalk( source_year = NULL, target_year = NULL, cache = NULL, - weight = "population" + weight = "population", + silent = getOption("crosswalk.silent", FALSE) ) } \arguments{ @@ -38,6 +39,10 @@ are cached separately when provided.} \item{weight}{Character. Weighting variable for Geocorr crosswalks. One of c("population", "housing", "land").} + +\item{silent}{Logical. If \code{TRUE}, suppresses all informational messages and +warnings. Defaults to \code{getOption("crosswalk.silent", FALSE)}. Set +\code{options(crosswalk.silent = TRUE)} to silence all calls by default.} } \value{ A list with a consistent structure: diff --git a/renv.lock b/renv.lock index 95a2fee..d694186 100644 --- a/renv.lock +++ b/renv.lock @@ -35,63 +35,6 @@ "Maintainer": "Winston Chang ", "Repository": "CRAN" }, - "RColorBrewer": { - "Package": "RColorBrewer", - "Version": "1.1-3", - "Source": "Repository", - "Date": "2022-04-03", - "Title": "ColorBrewer Palettes", - "Authors@R": "c(person(given = \"Erich\", family = \"Neuwirth\", role = c(\"aut\", \"cre\"), email = \"erich.neuwirth@univie.ac.at\"))", - "Author": "Erich Neuwirth [aut, cre]", - "Maintainer": "Erich Neuwirth ", - "Depends": [ - "R (>= 2.0.0)" - ], - "Description": "Provides color schemes for maps (and other graphics) designed by Cynthia Brewer as described at http://colorbrewer2.org.", - "License": "Apache License 2.0", - "NeedsCompilation": "no", - "Repository": "CRAN", - "Encoding": "UTF-8" - }, - "S7": { - "Package": "S7", - "Version": "0.2.1", - "Source": "Repository", - "Title": "An Object Oriented System Meant to Become a Successor to S3 and S4", - "Authors@R": "c( person(\"Object-Oriented Programming Working Group\", role = \"cph\"), person(\"Davis\", \"Vaughan\", role = \"aut\"), person(\"Jim\", \"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Tomasz\", \"Kalinowski\", role = \"aut\"), person(\"Will\", \"Landau\", role = \"aut\"), person(\"Michael\", \"Lawrence\", role = \"aut\"), person(\"Martin\", \"Maechler\", role = \"aut\", comment = c(ORCID = \"0000-0002-8685-9910\")), person(\"Luke\", \"Tierney\", role = \"aut\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-4757-117X\")) )", - "Description": "A new object oriented programming system designed to be a successor to S3 and S4. It includes formal class, generic, and method specification, and a limited form of multiple dispatch. It has been designed and implemented collaboratively by the R Consortium Object-Oriented Programming Working Group, which includes representatives from R-Core, 'Bioconductor', 'Posit'/'tidyverse', and the wider R community.", - "License": "MIT + file LICENSE", - "URL": "https://rconsortium.github.io/S7/, https://github.com/RConsortium/S7", - "BugReports": "https://github.com/RConsortium/S7/issues", - "Depends": [ - "R (>= 3.5.0)" - ], - "Imports": [ - "utils" - ], - "Suggests": [ - "bench", - "callr", - "covr", - "knitr", - "methods", - "rmarkdown", - "testthat (>= 3.2.0)", - "tibble" - ], - "VignetteBuilder": "knitr", - "Config/build/compilation-database": "true", - "Config/Needs/website": "sloop", - "Config/testthat/edition": "3", - "Config/testthat/parallel": "TRUE", - "Config/testthat/start-first": "external-generic", - "Encoding": "UTF-8", - "RoxygenNote": "7.3.3", - "NeedsCompilation": "yes", - "Author": "Object-Oriented Programming Working Group [cph], Davis Vaughan [aut], Jim Hester [aut] (ORCID: ), Tomasz Kalinowski [aut], Will Landau [aut], Michael Lawrence [aut], Martin Maechler [aut] (ORCID: ), Luke Tierney [aut], Hadley Wickham [aut, cre] (ORCID: )", - "Maintainer": "Hadley Wickham ", - "Repository": "CRAN" - }, "askpass": { "Package": "askpass", "Version": "1.2.1", @@ -470,29 +413,6 @@ "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, - "farver": { - "Package": "farver", - "Version": "2.1.2", - "Source": "Repository", - "Type": "Package", - "Title": "High Performance Colour Space Manipulation", - "Authors@R": "c( person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Berendea\", \"Nicolae\", role = \"aut\", comment = \"Author of the ColorSpace C++ library\"), person(\"Romain\", \"François\", , \"romain@purrple.cat\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Posit, PBC\", role = c(\"cph\", \"fnd\")) )", - "Description": "The encoding of colour can be handled in many different ways, using different colour spaces. As different colour spaces have different uses, efficient conversion between these representations are important. The 'farver' package provides a set of functions that gives access to very fast colour space conversion and comparisons implemented in C++, and offers speed improvements over the 'convertColor' function in the 'grDevices' package.", - "License": "MIT + file LICENSE", - "URL": "https://farver.data-imaginist.com, https://github.com/thomasp85/farver", - "BugReports": "https://github.com/thomasp85/farver/issues", - "Suggests": [ - "covr", - "testthat (>= 3.0.0)" - ], - "Config/testthat/edition": "3", - "Encoding": "UTF-8", - "RoxygenNote": "7.3.1", - "NeedsCompilation": "yes", - "Author": "Thomas Lin Pedersen [cre, aut] (), Berendea Nicolae [aut] (Author of the ColorSpace C++ library), Romain François [aut] (), Posit, PBC [cph, fnd]", - "Maintainer": "Thomas Lin Pedersen ", - "Repository": "CRAN" - }, "generics": { "Package": "generics", "Version": "0.1.4", @@ -525,79 +445,6 @@ "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, - "ggplot2": { - "Package": "ggplot2", - "Version": "4.0.2", - "Source": "Repository", - "Title": "Create Elegant Data Visualisations Using the Grammar of Graphics", - "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Winston\", \"Chang\", role = \"aut\", comment = c(ORCID = \"0000-0002-1576-2126\")), person(\"Lionel\", \"Henry\", role = \"aut\"), person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Kohske\", \"Takahashi\", role = \"aut\"), person(\"Claus\", \"Wilke\", role = \"aut\", comment = c(ORCID = \"0000-0002-7470-9261\")), person(\"Kara\", \"Woo\", role = \"aut\", comment = c(ORCID = \"0000-0002-5125-4188\")), person(\"Hiroaki\", \"Yutani\", role = \"aut\", comment = c(ORCID = \"0000-0002-3385-7233\")), person(\"Dewey\", \"Dunnington\", role = \"aut\", comment = c(ORCID = \"0000-0002-9415-4582\")), person(\"Teun\", \"van den Brand\", role = \"aut\", comment = c(ORCID = \"0000-0002-9335-7468\")), person(\"Posit, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", - "Description": "A system for 'declaratively' creating graphics, based on \"The Grammar of Graphics\". You provide the data, tell 'ggplot2' how to map variables to aesthetics, what graphical primitives to use, and it takes care of the details.", - "License": "MIT + file LICENSE", - "URL": "https://ggplot2.tidyverse.org, https://github.com/tidyverse/ggplot2", - "BugReports": "https://github.com/tidyverse/ggplot2/issues", - "Depends": [ - "R (>= 4.1)" - ], - "Imports": [ - "cli", - "grDevices", - "grid", - "gtable (>= 0.3.6)", - "isoband", - "lifecycle (> 1.0.1)", - "rlang (>= 1.1.0)", - "S7", - "scales (>= 1.4.0)", - "stats", - "vctrs (>= 0.6.0)", - "withr (>= 2.5.0)" - ], - "Suggests": [ - "broom", - "covr", - "dplyr", - "ggplot2movies", - "hexbin", - "Hmisc", - "hms", - "knitr", - "mapproj", - "maps", - "MASS", - "mgcv", - "multcomp", - "munsell", - "nlme", - "profvis", - "quantreg", - "quarto", - "ragg (>= 1.2.6)", - "RColorBrewer", - "roxygen2", - "rpart", - "sf (>= 0.7-3)", - "svglite (>= 2.1.2)", - "testthat (>= 3.1.5)", - "tibble", - "vdiffr (>= 1.0.6)", - "xml2" - ], - "Enhances": [ - "sp" - ], - "VignetteBuilder": "quarto", - "Config/Needs/website": "ggtext, tidyr, forcats, tidyverse/tidytemplate", - "Config/testthat/edition": "3", - "Config/usethis/last-upkeep": "2025-04-23", - "Encoding": "UTF-8", - "LazyData": "true", - "RoxygenNote": "7.3.3", - "Collate": "'ggproto.R' 'ggplot-global.R' 'aaa-.R' 'aes-colour-fill-alpha.R' 'aes-evaluation.R' 'aes-group-order.R' 'aes-linetype-size-shape.R' 'aes-position.R' 'all-classes.R' 'compat-plyr.R' 'utilities.R' 'aes.R' 'annotation-borders.R' 'utilities-checks.R' 'legend-draw.R' 'geom-.R' 'annotation-custom.R' 'annotation-logticks.R' 'scale-type.R' 'layer.R' 'make-constructor.R' 'geom-polygon.R' 'geom-map.R' 'annotation-map.R' 'geom-raster.R' 'annotation-raster.R' 'annotation.R' 'autolayer.R' 'autoplot.R' 'axis-secondary.R' 'backports.R' 'bench.R' 'bin.R' 'coord-.R' 'coord-cartesian-.R' 'coord-fixed.R' 'coord-flip.R' 'coord-map.R' 'coord-munch.R' 'coord-polar.R' 'coord-quickmap.R' 'coord-radial.R' 'coord-sf.R' 'coord-transform.R' 'data.R' 'docs_layer.R' 'facet-.R' 'facet-grid-.R' 'facet-null.R' 'facet-wrap.R' 'fortify-map.R' 'fortify-models.R' 'fortify-spatial.R' 'fortify.R' 'stat-.R' 'geom-abline.R' 'geom-rect.R' 'geom-bar.R' 'geom-tile.R' 'geom-bin2d.R' 'geom-blank.R' 'geom-boxplot.R' 'geom-col.R' 'geom-path.R' 'geom-contour.R' 'geom-point.R' 'geom-count.R' 'geom-crossbar.R' 'geom-segment.R' 'geom-curve.R' 'geom-defaults.R' 'geom-ribbon.R' 'geom-density.R' 'geom-density2d.R' 'geom-dotplot.R' 'geom-errorbar.R' 'geom-freqpoly.R' 'geom-function.R' 'geom-hex.R' 'geom-histogram.R' 'geom-hline.R' 'geom-jitter.R' 'geom-label.R' 'geom-linerange.R' 'geom-pointrange.R' 'geom-quantile.R' 'geom-rug.R' 'geom-sf.R' 'geom-smooth.R' 'geom-spoke.R' 'geom-text.R' 'geom-violin.R' 'geom-vline.R' 'ggplot2-package.R' 'grob-absolute.R' 'grob-dotstack.R' 'grob-null.R' 'grouping.R' 'properties.R' 'margins.R' 'theme-elements.R' 'guide-.R' 'guide-axis.R' 'guide-axis-logticks.R' 'guide-axis-stack.R' 'guide-axis-theta.R' 'guide-legend.R' 'guide-bins.R' 'guide-colorbar.R' 'guide-colorsteps.R' 'guide-custom.R' 'guide-none.R' 'guide-old.R' 'guides-.R' 'guides-grid.R' 'hexbin.R' 'import-standalone-obj-type.R' 'import-standalone-types-check.R' 'labeller.R' 'labels.R' 'layer-sf.R' 'layout.R' 'limits.R' 'performance.R' 'plot-build.R' 'plot-construction.R' 'plot-last.R' 'plot.R' 'position-.R' 'position-collide.R' 'position-dodge.R' 'position-dodge2.R' 'position-identity.R' 'position-jitter.R' 'position-jitterdodge.R' 'position-nudge.R' 'position-stack.R' 'quick-plot.R' 'reshape-add-margins.R' 'save.R' 'scale-.R' 'scale-alpha.R' 'scale-binned.R' 'scale-brewer.R' 'scale-colour.R' 'scale-continuous.R' 'scale-date.R' 'scale-discrete-.R' 'scale-expansion.R' 'scale-gradient.R' 'scale-grey.R' 'scale-hue.R' 'scale-identity.R' 'scale-linetype.R' 'scale-linewidth.R' 'scale-manual.R' 'scale-shape.R' 'scale-size.R' 'scale-steps.R' 'scale-view.R' 'scale-viridis.R' 'scales-.R' 'stat-align.R' 'stat-bin.R' 'stat-summary-2d.R' 'stat-bin2d.R' 'stat-bindot.R' 'stat-binhex.R' 'stat-boxplot.R' 'stat-connect.R' 'stat-contour.R' 'stat-count.R' 'stat-density-2d.R' 'stat-density.R' 'stat-ecdf.R' 'stat-ellipse.R' 'stat-function.R' 'stat-identity.R' 'stat-manual.R' 'stat-qq-line.R' 'stat-qq.R' 'stat-quantilemethods.R' 'stat-sf-coordinates.R' 'stat-sf.R' 'stat-smooth-methods.R' 'stat-smooth.R' 'stat-sum.R' 'stat-summary-bin.R' 'stat-summary-hex.R' 'stat-summary.R' 'stat-unique.R' 'stat-ydensity.R' 'summarise-plot.R' 'summary.R' 'theme.R' 'theme-defaults.R' 'theme-current.R' 'theme-sub.R' 'utilities-break.R' 'utilities-grid.R' 'utilities-help.R' 'utilities-patterns.R' 'utilities-resolution.R' 'utilities-tidy-eval.R' 'zxx.R' 'zzz.R'", - "NeedsCompilation": "no", - "Author": "Hadley Wickham [aut] (ORCID: ), Winston Chang [aut] (ORCID: ), Lionel Henry [aut], Thomas Lin Pedersen [aut, cre] (ORCID: ), Kohske Takahashi [aut], Claus Wilke [aut] (ORCID: ), Kara Woo [aut] (ORCID: ), Hiroaki Yutani [aut] (ORCID: ), Dewey Dunnington [aut] (ORCID: ), Teun van den Brand [aut] (ORCID: ), Posit, PBC [cph, fnd] (ROR: )", - "Maintainer": "Thomas Lin Pedersen ", - "Repository": "CRAN" - }, "glue": { "Package": "glue", "Version": "1.8.0", @@ -639,46 +486,6 @@ "Maintainer": "Jennifer Bryan ", "Repository": "CRAN" }, - "gtable": { - "Package": "gtable", - "Version": "0.3.6", - "Source": "Repository", - "Title": "Arrange 'Grobs' in Tables", - "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", - "Description": "Tools to make it easier to work with \"tables\" of 'grobs'. The 'gtable' package defines a 'gtable' grob class that specifies a grid along with a list of grobs and their placement in the grid. Further the package makes it easy to manipulate and combine 'gtable' objects so that complex compositions can be built up sequentially.", - "License": "MIT + file LICENSE", - "URL": "https://gtable.r-lib.org, https://github.com/r-lib/gtable", - "BugReports": "https://github.com/r-lib/gtable/issues", - "Depends": [ - "R (>= 4.0)" - ], - "Imports": [ - "cli", - "glue", - "grid", - "lifecycle", - "rlang (>= 1.1.0)", - "stats" - ], - "Suggests": [ - "covr", - "ggplot2", - "knitr", - "profvis", - "rmarkdown", - "testthat (>= 3.0.0)" - ], - "VignetteBuilder": "knitr", - "Config/Needs/website": "tidyverse/tidytemplate", - "Config/testthat/edition": "3", - "Config/usethis/last-upkeep": "2024-10-25", - "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", - "NeedsCompilation": "no", - "Author": "Hadley Wickham [aut], Thomas Lin Pedersen [aut, cre], Posit Software, PBC [cph, fnd]", - "Maintainer": "Thomas Lin Pedersen ", - "Repository": "CRAN" - }, "hms": { "Package": "hms", "Version": "1.1.4", @@ -813,48 +620,6 @@ "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, - "isoband": { - "Package": "isoband", - "Version": "0.3.0", - "Source": "Repository", - "Title": "Generate Isolines and Isobands from Regularly Spaced Elevation Grids", - "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Claus O.\", \"Wilke\", , \"wilke@austin.utexas.edu\", role = \"aut\", comment = c(\"Original author\", ORCID = \"0000-0002-7470-9261\")), person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Posit, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", - "Description": "A fast C++ implementation to generate contour lines (isolines) and contour polygons (isobands) from regularly spaced grids containing elevation data.", - "License": "MIT + file LICENSE", - "URL": "https://isoband.r-lib.org, https://github.com/r-lib/isoband", - "BugReports": "https://github.com/r-lib/isoband/issues", - "Imports": [ - "cli", - "grid", - "rlang", - "utils" - ], - "Suggests": [ - "covr", - "ggplot2", - "knitr", - "magick", - "bench", - "rmarkdown", - "sf", - "testthat (>= 3.0.0)", - "xml2" - ], - "VignetteBuilder": "knitr", - "Config/Needs/website": "tidyverse/tidytemplate", - "Config/testthat/edition": "3", - "Config/usethis/last-upkeep": "2025-12-05", - "Encoding": "UTF-8", - "RoxygenNote": "7.3.3", - "Config/build/compilation-database": "true", - "LinkingTo": [ - "cpp11" - ], - "NeedsCompilation": "yes", - "Author": "Hadley Wickham [aut] (ORCID: ), Claus O. Wilke [aut] (Original author, ORCID: ), Thomas Lin Pedersen [aut, cre] (ORCID: ), Posit, PBC [cph, fnd] (ROR: )", - "Maintainer": "Thomas Lin Pedersen ", - "Repository": "CRAN" - }, "janitor": { "Package": "janitor", "Version": "2.2.1", @@ -931,26 +696,6 @@ "Author": "Jeroen Ooms [aut, cre] (), Duncan Temple Lang [ctb], Lloyd Hilaiel [cph] (author of bundled libyajl)", "Repository": "CRAN" }, - "labeling": { - "Package": "labeling", - "Version": "0.4.3", - "Source": "Repository", - "Type": "Package", - "Title": "Axis Labeling", - "Date": "2023-08-29", - "Author": "Justin Talbot,", - "Maintainer": "Nuno Sempere ", - "Description": "Functions which provide a range of axis labeling algorithms.", - "License": "MIT + file LICENSE | Unlimited", - "Collate": "'labeling.R'", - "NeedsCompilation": "no", - "Imports": [ - "stats", - "graphics" - ], - "Repository": "CRAN", - "Encoding": "UTF-8" - }, "lifecycle": { "Package": "lifecycle", "Version": "1.0.5", @@ -1554,50 +1299,6 @@ "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, - "scales": { - "Package": "scales", - "Version": "1.4.0", - "Source": "Repository", - "Title": "Scale Functions for Visualization", - "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Thomas Lin\", \"Pedersen\", , \"thomas.pedersen@posit.co\", role = c(\"cre\", \"aut\"), comment = c(ORCID = \"0000-0002-5147-4711\")), person(\"Dana\", \"Seidel\", role = \"aut\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", - "Description": "Graphical scales map data to aesthetics, and provide methods for automatically determining breaks and labels for axes and legends.", - "License": "MIT + file LICENSE", - "URL": "https://scales.r-lib.org, https://github.com/r-lib/scales", - "BugReports": "https://github.com/r-lib/scales/issues", - "Depends": [ - "R (>= 4.1)" - ], - "Imports": [ - "cli", - "farver (>= 2.0.3)", - "glue", - "labeling", - "lifecycle", - "R6", - "RColorBrewer", - "rlang (>= 1.1.0)", - "viridisLite" - ], - "Suggests": [ - "bit64", - "covr", - "dichromat", - "ggplot2", - "hms (>= 0.5.0)", - "stringi", - "testthat (>= 3.0.0)" - ], - "Config/Needs/website": "tidyverse/tidytemplate", - "Config/testthat/edition": "3", - "Config/usethis/last-upkeep": "2025-04-23", - "Encoding": "UTF-8", - "LazyLoad": "yes", - "RoxygenNote": "7.3.2", - "NeedsCompilation": "no", - "Author": "Hadley Wickham [aut], Thomas Lin Pedersen [cre, aut] (), Dana Seidel [aut], Posit Software, PBC [cph, fnd] (03wc8by49)", - "Maintainer": "Thomas Lin Pedersen ", - "Repository": "CRAN" - }, "selectr": { "Package": "selectr", "Version": "0.5-1", @@ -2094,34 +1795,6 @@ "Maintainer": "Davis Vaughan ", "Repository": "CRAN" }, - "viridisLite": { - "Package": "viridisLite", - "Version": "0.4.3", - "Source": "Repository", - "Type": "Package", - "Title": "Colorblind-Friendly Color Maps (Lite Version)", - "Date": "2026-02-03", - "Authors@R": "c( person(\"Simon\", \"Garnier\", email = \"garnier@njit.edu\", role = c(\"aut\", \"cre\")), person(\"Noam\", \"Ross\", email = \"noam.ross@gmail.com\", role = c(\"ctb\", \"cph\")), person(\"Bob\", \"Rudis\", email = \"bob@rud.is\", role = c(\"ctb\", \"cph\")), person(\"Marco\", \"Sciaini\", email = \"sciaini.marco@gmail.com\", role = c(\"ctb\", \"cph\")), person(\"Antônio Pedro\", \"Camargo\", role = c(\"ctb\", \"cph\")), person(\"Cédric\", \"Scherer\", email = \"scherer@izw-berlin.de\", role = c(\"ctb\", \"cph\")) )", - "Maintainer": "Simon Garnier ", - "Description": "Color maps designed to improve graph readability for readers with common forms of color blindness and/or color vision deficiency. The color maps are also perceptually-uniform, both in regular form and also when converted to black-and-white for printing. This is the 'lite' version of the 'viridis' package that also contains 'ggplot2' bindings for discrete and continuous color and fill scales and can be found at .", - "License": "MIT + file LICENSE", - "Encoding": "UTF-8", - "Depends": [ - "R (>= 2.10)" - ], - "Suggests": [ - "hexbin (>= 1.27.0)", - "ggplot2 (>= 1.0.1)", - "testthat", - "covr" - ], - "URL": "https://sjmgarnier.github.io/viridisLite/, https://github.com/sjmgarnier/viridisLite/", - "BugReports": "https://github.com/sjmgarnier/viridisLite/issues/", - "RoxygenNote": "7.3.3", - "NeedsCompilation": "no", - "Author": "Simon Garnier [aut, cre], Noam Ross [ctb, cph], Bob Rudis [ctb, cph], Marco Sciaini [ctb, cph], Antônio Pedro Camargo [ctb, cph], Cédric Scherer [ctb, cph]", - "Repository": "CRAN" - }, "vroom": { "Package": "vroom", "Version": "1.7.0", diff --git a/tests/testthat/test-crosswalk_data.R b/tests/testthat/test-crosswalk_data.R index a2ac389..9b45351 100644 --- a/tests/testthat/test-crosswalk_data.R +++ b/tests/testthat/test-crosswalk_data.R @@ -687,6 +687,7 @@ test_that("crosswalk_data reports unmatched data rows", { join_quality <- attr(result, "join_quality") expect_equal(join_quality$n_data_unmatched, 1) expect_equal(join_quality$n_data_total, 3) + expect_equal(join_quality$pct_data_unmatched, 1 / 3 * 100) expect_true("C" %in% join_quality$data_geoids_unmatched) }) @@ -712,6 +713,7 @@ test_that("crosswalk_data reports unmatched crosswalk rows", { join_quality <- attr(result, "join_quality") expect_equal(join_quality$n_crosswalk_unmatched, 2) expect_equal(join_quality$n_crosswalk_total, 3) + expect_equal(join_quality$pct_crosswalk_unmatched, 2 / 3 * 100) }) test_that("crosswalk_data reports state concentration for unmatched data rows", { diff --git a/tests/testthat/test-get_available_crosswalks.R b/tests/testthat/test-get_available_crosswalks.R new file mode 100644 index 0000000..b409c54 --- /dev/null +++ b/tests/testthat/test-get_available_crosswalks.R @@ -0,0 +1,111 @@ +# Tests for get_available_crosswalks() + +# ============================================================================== +# Structure tests +# ============================================================================== + +test_that("get_available_crosswalks returns a tibble with expected columns", { + result <- get_available_crosswalks() + + expect_s3_class(result, "tbl_df") + expect_equal(ncol(result), 4) + expect_equal( + colnames(result), + c("source_geography", "target_geography", "source_year", "target_year")) +}) + +test_that("get_available_crosswalks has correct column types", { + result <- get_available_crosswalks() + + expect_type(result$source_geography, "character") + expect_type(result$target_geography, "character") + expect_type(result$source_year, "integer") + expect_type(result$target_year, "integer") +}) + +test_that("get_available_crosswalks has no duplicate rows", { + result <- get_available_crosswalks() + + expect_equal(nrow(result), nrow(dplyr::distinct(result))) +}) + +test_that("get_available_crosswalks has no same-year self-pairs for Geocorr rows", { + result <- get_available_crosswalks() + + geocorr_self_pairs <- result |> + dplyr::filter( + source_geography == target_geography, + source_year == target_year, + source_year %in% c(2018L, 2022L)) + + expect_equal(nrow(geocorr_self_pairs), 0) +}) + +# ============================================================================== +# Content tests - NHGIS rows +# ============================================================================== + +test_that("get_available_crosswalks contains NHGIS rows", { + result <- get_available_crosswalks() + + nhgis_match <- result |> + dplyr::filter( + source_geography == "tract", + target_geography == "tract", + source_year == 2010L, + target_year == 2020L) + + expect_equal(nrow(nhgis_match), 1) +}) + +# ============================================================================== +# Content tests - Geocorr 2022 rows +# ============================================================================== + +test_that("get_available_crosswalks contains Geocorr 2022 rows", { + result <- get_available_crosswalks() + + geocorr_2022_match <- result |> + dplyr::filter( + source_geography == "tract", + target_geography == "zcta", + source_year == 2022L, + target_year == 2022L) + + expect_equal(nrow(geocorr_2022_match), 1) +}) + +# ============================================================================== +# Content tests - Geocorr 2018 rows +# ============================================================================== + +test_that("get_available_crosswalks contains Geocorr 2018 rows", { + result <- get_available_crosswalks() + + geocorr_2018_match <- result |> + dplyr::filter( + source_geography == "tract", + target_geography == "puma12", + source_year == 2018L, + target_year == 2018L) + + expect_equal(nrow(geocorr_2018_match), 1) +}) + +# ============================================================================== +# Content tests - CTData rows +# ============================================================================== + +test_that("get_available_crosswalks contains CTData rows", { + result <- get_available_crosswalks() + + # county 2020 -> 2022 + ctdata_match <- result |> + dplyr::filter( + source_geography == "county", + target_geography == "county", + source_year == 2020L, + target_year == 2022L) + + expect_equal(nrow(ctdata_match), 1) +}) diff --git a/tests/testthat/test-silent-mode.R b/tests/testthat/test-silent-mode.R new file mode 100644 index 0000000..d16b632 --- /dev/null +++ b/tests/testthat/test-silent-mode.R @@ -0,0 +1,190 @@ +# Tests for silent mode functionality + +# ============================================================================== +# Helper: mock crosswalk for testing crosswalk_data() without network calls +# ============================================================================== + +make_mock_crosswalk <- function() { + xwalk <- tibble::tibble( + source_geoid = c("A", "A", "B"), + target_geoid = c("X", "Y", "Y"), + allocation_factor_source_to_target = c(0.6, 0.4, 1.0), + source_geography_name = "mock", + target_geography_name = "mock") + + attr(xwalk, "crosswalk_metadata") <- list( + source_geography = "tract", + target_geography = "zcta") + + list( + crosswalks = list(step_1 = xwalk), + plan = NULL, + message = "Mock crosswalk") +} + +make_mock_data <- function() { + tibble::tibble( + source_geoid = c("A", "B"), + count_population = c(100, 200)) +} + +# ============================================================================== +# get_crosswalk() silent parameter +# ============================================================================== + +test_that("get_crosswalk with silent = TRUE suppresses warning for nested geographies", { + expect_silent( + get_crosswalk( + source_geography = "tract", + target_geography = "county", + silent = TRUE)) +}) + +test_that("get_crosswalk with silent = FALSE emits warning for nested geographies", { + expect_warning( + get_crosswalk( + source_geography = "tract", + target_geography = "county", + silent = FALSE), + "nested within the target geography") +}) + +# ============================================================================== +# crosswalk_data() silent parameter +# ============================================================================== + +test_that("crosswalk_data with silent = TRUE suppresses messages", { + mock_cw <- make_mock_crosswalk() + mock_data <- make_mock_data() + + expect_silent( + crosswalk_data( + data = mock_data, + crosswalk = mock_cw, + count_columns = "count_population", + silent = TRUE)) +}) + +test_that("crosswalk_data with silent = FALSE emits messages", { + mock_cw <- make_mock_crosswalk() + mock_data <- make_mock_data() + + expect_message( + crosswalk_data( + data = mock_data, + crosswalk = mock_cw, + count_columns = "count_population", + silent = FALSE), + "Applying crosswalk step") +}) + +test_that("crosswalk_data silent = TRUE suppresses join quality even when show_join_quality = TRUE", { + mock_cw <- make_mock_crosswalk() + # Data with an unmatched GEOID to trigger join quality message + mock_data <- tibble::tibble( + source_geoid = c("A", "B", "C"), + count_population = c(100, 200, 300)) + + expect_silent( + crosswalk_data( + data = mock_data, + crosswalk = mock_cw, + count_columns = "count_population", + show_join_quality = TRUE, + silent = TRUE)) +}) + +test_that("crosswalk_data silent = TRUE suppresses warning for both crosswalk and geography params", { + mock_cw <- make_mock_crosswalk() + mock_data <- make_mock_data() + + expect_silent( + crosswalk_data( + data = mock_data, + crosswalk = mock_cw, + source_geography = "tract", + target_geography = "zcta", + count_columns = "count_population", + silent = TRUE)) +}) + +# ============================================================================== +# Global option: crosswalk.silent +# ============================================================================== + +test_that("global option crosswalk.silent = TRUE works as default", { + old_opts <- options(crosswalk.silent = TRUE) + on.exit(options(old_opts), add = TRUE) + + expect_silent( + get_crosswalk( + source_geography = "tract", + target_geography = "county")) +}) + +test_that("explicit silent = FALSE overrides global crosswalk.silent = TRUE", { + old_opts <- options(crosswalk.silent = TRUE) + on.exit(options(old_opts), add = TRUE) + + expect_warning( + get_crosswalk( + source_geography = "tract", + target_geography = "county", + silent = FALSE), + "nested within the target geography") +}) + +# ============================================================================== +# Errors still surface in silent mode +# ============================================================================== + +test_that("stop() errors still surface when silent = TRUE", { + expect_error( + get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + source_year = 2005, + target_year = 2020, + silent = TRUE)) +}) + +test_that("crosswalk_data stop() errors still surface when silent = TRUE", { + expect_error( + crosswalk_data( + data = tibble::tibble(x = 1), + count_columns = "count_foo", + silent = TRUE), + "Either provide a crosswalk") +}) + +# ============================================================================== +# Internal helpers respect the option +# ============================================================================== + +test_that("cw_message is silent when option is TRUE", { + old_opts <- options(crosswalk.silent = TRUE) + on.exit(options(old_opts), add = TRUE) + + expect_silent(crosswalk:::cw_message("test message")) +}) + +test_that("cw_message emits message when option is FALSE", { + old_opts <- options(crosswalk.silent = FALSE) + on.exit(options(old_opts), add = TRUE) + + expect_message(crosswalk:::cw_message("test message"), "test message") +}) + +test_that("cw_warning is silent when option is TRUE", { + old_opts <- options(crosswalk.silent = TRUE) + on.exit(options(old_opts), add = TRUE) + + expect_silent(crosswalk:::cw_warning("test warning")) +}) + +test_that("cw_warning emits warning when option is FALSE", { + old_opts <- options(crosswalk.silent = FALSE) + on.exit(options(old_opts), add = TRUE) + + expect_warning(crosswalk:::cw_warning("test warning"), "test warning") +}) diff --git a/vignettes/standardizing-longitudinal-data.Rmd b/vignettes/standardizing-longitudinal-data.Rmd index 38b3466..ad7a601 100644 --- a/vignettes/standardizing-longitudinal-data.Rmd +++ b/vignettes/standardizing-longitudinal-data.Rmd @@ -52,7 +52,7 @@ all six years (2018-2023): ```{r download-data} ## metadata object describing data year/vintage/url -metadata = tibble::tribble( +metadata = tribble( ~ year, ~ vintage, ~ url, 2018, 2010, "https://urban-data-catalog.s3.amazonaws.com/drupal-root-live/2023/12/20/hmda_tract_2018.csv", 2019, 2010, "https://urban-data-catalog.s3.amazonaws.com/drupal-root-live/2023/12/20/hmda_tract_2019.csv",