Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/R-CMD-check-all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check-all.yaml

permissions: read-all

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
COMPILE_VIG: ${{ secrets.COMPILE_VIG }}

steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
8 changes: 0 additions & 8 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
schedule:
- cron: "15 15 * * *"

Expand All @@ -23,10 +19,6 @@ jobs:
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
21 changes: 13 additions & 8 deletions R/cansim.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,16 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz
}

if (strip_classification_code){
for (field in fields) {
if (sum(!is.na(data[[paste0(classification_prefix,field)]]))>0) {
data <- data %>%
mutate(!!field:=gsub(" \\[.+\\]$","",!!as.name(field)))
}
# Identify fields that have classification codes to strip (non-NA values in classification column)
fields_to_strip <- fields[vapply(fields, function(field) {
cc_col <- paste0(classification_prefix, field)
cc_col %in% names(data) && sum(!is.na(data[[cc_col]])) > 0
}, logical(1))]

if (length(fields_to_strip) > 0) {
# Use across() to strip all classification codes in a single pass
data <- data %>%
mutate(across(all_of(fields_to_strip), ~gsub(" \\[.+\\]$", "", .x)))
}
}

Expand Down Expand Up @@ -333,7 +338,7 @@ fold_in_metadata_for_columns <- function(data,data_path,column_names){
select(setdiff(c(member_id_column,"GeoUID",hierarchy_name),names(data)))

hierarchy_data <- hierarchy_data %>%
mutate(!!member_id_column:=lapply(.data$...pos,function(d)d[column_index]) %>% unlist) %>%
mutate(!!member_id_column:=purrr::map_chr(.data$...pos, ~.x[column_index])) %>%
dplyr::left_join(join_column,by=member_id_column) %>%
dplyr::select(-!!as.name(member_id_column))
} else if (column[[dimension_name_column]] %in% names(data)){
Expand All @@ -345,7 +350,7 @@ fold_in_metadata_for_columns <- function(data,data_path,column_names){
select(setdiff(c(member_id_column,classification_name,hierarchy_name),names(data)))

hierarchy_data <- hierarchy_data %>%
mutate(!!member_id_column:=lapply(.data$...pos,function(d)d[column_index]) %>% unlist) %>%
mutate(!!member_id_column:=purrr::map_chr(.data$...pos, ~.x[column_index])) %>%
dplyr::left_join(join_column,by=member_id_column) %>%
dplyr::select(-!!as.name(member_id_column))
} else {
Expand Down Expand Up @@ -853,7 +858,7 @@ categories_for_level <- function(data,column_name, level=NA, strict=FALSE, remov
hierarchy_name=paste0("Hierarchy for ",column_name)
h <- data %>% dplyr::select(column_name,hierarchy_name) %>%
unique %>%
dplyr::mutate(hierarchy_level=(strsplit(!!as.name(hierarchy_name),"\\.") %>% lapply(length) %>% unlist)-1)
dplyr::mutate(hierarchy_level=lengths(strsplit(!!as.name(hierarchy_name),"\\."))-1)
max_level=max(h$hierarchy_level,na.rm = TRUE)
if (is.na(level) | level>max_level) level=max_level
h <- h %>%
Expand Down
17 changes: 11 additions & 6 deletions R/cansim_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ parse_metadata <- function(meta,data_path){
quote="\"",na.strings="",
colClasses="character",check.names=FALSE) %>%
names()
notes <- tibble(!!h[1]:=meta_part[-1] %>% lapply(\(x)gsub(",.+","",x)) %>% unlist(),
!!h[2]:=meta_part[-1] %>% lapply(\(x)gsub("^\\d+,","",x) %>% gsub("^\"|\"$","",.)) %>% unlist())
notes <- tibble(!!h[1]:=gsub(",.+", "", meta_part[-1]),
!!h[2]:=gsub("^\"|\"$", "", gsub("^\\d+,", "", meta_part[-1])))

}

Expand All @@ -95,11 +95,16 @@ parse_metadata <- function(meta,data_path){

column_ids <- dplyr::pull(meta2,dimension_id_column)
column_names <- dplyr::pull(meta2,dimension_name_column)

# P2: Pre-split meta3 by dimension_id for O(1) lookup instead of O(n) filter per column
meta3_split <- split(meta3, meta3[[dimension_id_column]])
meta2_split <- split(meta2, meta2[[dimension_id_column]])

for (column_index in column_ids) { # iterate through columns for which we have meta data
column <- meta2 %>% dplyr::filter(.data[[dimension_id_column]]==column_index)
column_key <- as.character(column_index)
column <- meta2_split[[column_key]]
is_geo_column <- grepl(geography_column,column[[dimension_name_column]]) & !(column[[dimension_name_column]] %in% column_names)
meta_x <- meta3 %>%
dplyr::filter(.data[[dimension_id_column]]==column_index) %>%
meta_x <- meta3_split[[column_key]] %>%
add_hierarchy(parent_member_id_column=parent_member_id_column,
member_id_column=member_id_column,
hierarchy_column=hierarchy_column,
Expand Down Expand Up @@ -208,7 +213,7 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=

if (!file.exists(meta1_path)||refresh) {
m1 <- d %>% tibble::enframe() %>%
mutate(l=lapply(.data$value,class) %>% unlist()) %>%
mutate(l=vapply(.data$value, function(x) class(x)[1], character(1))) %>%
filter(.data$l!="list" | .data$name %in% c("surveyCode","subjectCode")) %>%
select(-"l") %>%
tidyr::pivot_wider() %>%
Expand Down
Loading