diff --git a/.github/workflows/R-CMD-check-all.yaml b/.github/workflows/R-CMD-check-all.yaml new file mode 100644 index 0000000..c06f2fd --- /dev/null +++ b/.github/workflows/R-CMD-check-all.yaml @@ -0,0 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check-all.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + COMPILE_VIG: ${{ secrets.COMPILE_VIG }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5762b51..55858f7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,10 +1,6 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - push: - branches: [main, master] - pull_request: - branches: [main, master] schedule: - cron: "15 15 * * *" @@ -23,10 +19,6 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/R/cansim.R b/R/cansim.R index fbef320..50e8dea 100644 --- a/R/cansim.R +++ b/R/cansim.R @@ -138,11 +138,16 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz } if (strip_classification_code){ - for (field in fields) { - if (sum(!is.na(data[[paste0(classification_prefix,field)]]))>0) { - data <- data %>% - mutate(!!field:=gsub(" \\[.+\\]$","",!!as.name(field))) - } + # Identify fields that have classification codes to strip (non-NA values in classification column) + fields_to_strip <- fields[vapply(fields, function(field) { + cc_col <- paste0(classification_prefix, field) + cc_col %in% names(data) && sum(!is.na(data[[cc_col]])) > 0 + }, logical(1))] + + if (length(fields_to_strip) > 0) { + # Use across() to strip all classification codes in a single pass + data <- data %>% + mutate(across(all_of(fields_to_strip), ~gsub(" \\[.+\\]$", "", .x))) } } @@ -333,7 +338,7 @@ fold_in_metadata_for_columns <- function(data,data_path,column_names){ select(setdiff(c(member_id_column,"GeoUID",hierarchy_name),names(data))) hierarchy_data <- hierarchy_data %>% - mutate(!!member_id_column:=lapply(.data$...pos,function(d)d[column_index]) %>% unlist) %>% + mutate(!!member_id_column:=purrr::map_chr(.data$...pos, ~.x[column_index])) %>% dplyr::left_join(join_column,by=member_id_column) %>% dplyr::select(-!!as.name(member_id_column)) } else if (column[[dimension_name_column]] %in% names(data)){ @@ -345,7 +350,7 @@ fold_in_metadata_for_columns <- function(data,data_path,column_names){ select(setdiff(c(member_id_column,classification_name,hierarchy_name),names(data))) hierarchy_data <- hierarchy_data %>% - mutate(!!member_id_column:=lapply(.data$...pos,function(d)d[column_index]) %>% unlist) %>% + mutate(!!member_id_column:=purrr::map_chr(.data$...pos, ~.x[column_index])) %>% dplyr::left_join(join_column,by=member_id_column) %>% dplyr::select(-!!as.name(member_id_column)) } else { @@ -853,7 +858,7 @@ categories_for_level <- function(data,column_name, level=NA, strict=FALSE, remov hierarchy_name=paste0("Hierarchy for ",column_name) h <- data %>% dplyr::select(column_name,hierarchy_name) %>% unique %>% - dplyr::mutate(hierarchy_level=(strsplit(!!as.name(hierarchy_name),"\\.") %>% lapply(length) %>% unlist)-1) + dplyr::mutate(hierarchy_level=lengths(strsplit(!!as.name(hierarchy_name),"\\."))-1) max_level=max(h$hierarchy_level,na.rm = TRUE) if (is.na(level) | level>max_level) level=max_level h <- h %>% diff --git a/R/cansim_metadata.R b/R/cansim_metadata.R index 2745e9b..71be238 100644 --- a/R/cansim_metadata.R +++ b/R/cansim_metadata.R @@ -68,8 +68,8 @@ parse_metadata <- function(meta,data_path){ quote="\"",na.strings="", colClasses="character",check.names=FALSE) %>% names() - notes <- tibble(!!h[1]:=meta_part[-1] %>% lapply(\(x)gsub(",.+","",x)) %>% unlist(), - !!h[2]:=meta_part[-1] %>% lapply(\(x)gsub("^\\d+,","",x) %>% gsub("^\"|\"$","",.)) %>% unlist()) + notes <- tibble(!!h[1]:=gsub(",.+", "", meta_part[-1]), + !!h[2]:=gsub("^\"|\"$", "", gsub("^\\d+,", "", meta_part[-1]))) } @@ -95,11 +95,16 @@ parse_metadata <- function(meta,data_path){ column_ids <- dplyr::pull(meta2,dimension_id_column) column_names <- dplyr::pull(meta2,dimension_name_column) + + # P2: Pre-split meta3 by dimension_id for O(1) lookup instead of O(n) filter per column + meta3_split <- split(meta3, meta3[[dimension_id_column]]) + meta2_split <- split(meta2, meta2[[dimension_id_column]]) + for (column_index in column_ids) { # iterate through columns for which we have meta data - column <- meta2 %>% dplyr::filter(.data[[dimension_id_column]]==column_index) + column_key <- as.character(column_index) + column <- meta2_split[[column_key]] is_geo_column <- grepl(geography_column,column[[dimension_name_column]]) & !(column[[dimension_name_column]] %in% column_names) - meta_x <- meta3 %>% - dplyr::filter(.data[[dimension_id_column]]==column_index) %>% + meta_x <- meta3_split[[column_key]] %>% add_hierarchy(parent_member_id_column=parent_member_id_column, member_id_column=member_id_column, hierarchy_column=hierarchy_column, @@ -208,7 +213,7 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh= if (!file.exists(meta1_path)||refresh) { m1 <- d %>% tibble::enframe() %>% - mutate(l=lapply(.data$value,class) %>% unlist()) %>% + mutate(l=vapply(.data$value, function(x) class(x)[1], character(1))) %>% filter(.data$l!="list" | .data$name %in% c("surveyCode","subjectCode")) %>% select(-"l") %>% tidyr::pivot_wider() %>%