From fb01fb831453eae9505761eb7b195e33fc18658f Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Wed, 19 Nov 2025 20:22:06 -0800 Subject: [PATCH 1/3] update workflows, only check on all OS for push/pull_request, only run daily checks on macos-latest --- .github/workflows/R-CMD-check-all.yaml | 53 ++++++++++++++++++++++++++ .github/workflows/R-CMD-check.yaml | 8 ---- 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/R-CMD-check-all.yaml diff --git a/.github/workflows/R-CMD-check-all.yaml b/.github/workflows/R-CMD-check-all.yaml new file mode 100644 index 0000000..8b9fa45 --- /dev/null +++ b/.github/workflows/R-CMD-check-all.yaml @@ -0,0 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + COMPILE_VIG: ${{ secrets.COMPILE_VIG }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5762b51..55858f7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,10 +1,6 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - push: - branches: [main, master] - pull_request: - branches: [main, master] schedule: - cron: "15 15 * * *" @@ -23,10 +19,6 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From b7cf6b542eb28f79984c2222e0cd93f22c45a3ee Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Wed, 19 Nov 2025 20:23:09 -0800 Subject: [PATCH 2/3] change name of workflow --- .github/workflows/R-CMD-check-all.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check-all.yaml b/.github/workflows/R-CMD-check-all.yaml index 8b9fa45..c06f2fd 100644 --- a/.github/workflows/R-CMD-check-all.yaml +++ b/.github/workflows/R-CMD-check-all.yaml @@ -6,7 +6,7 @@ on: pull_request: branches: [main, master] -name: R-CMD-check.yaml +name: R-CMD-check-all.yaml permissions: read-all From 320147e194bf4046058f4ed29d584d1417626d0c Mon Sep 17 00:00:00 2001 From: dshkol Date: Wed, 21 Jan 2026 17:49:43 -0800 Subject: [PATCH 3/3] fix: Edge case and error handling improvements (H8, H9, H10, M3, M4, M8, M9) - H9: Fix NULL/empty check order in view_cansim_webpage - H8: Add data context to pull(date_field) when sample_date is NA - H10: Pass correct cache_path and language to list_cansim_cached_tables - M3: Pass warn_only through recursive retry calls in get/post helpers - M4: Route API calls through retry helpers (get_cansim_table_url, get_cansim_changed_tables, get_cansim_cube_metadata) - M8: Handle NULL/empty vector in cache freshness check - M9: Add tryCatch error handling for cache write operations Co-Authored-By: Claude Opus 4.5 --- R/cansim.R | 18 +++++------------- R/cansim_helpers.R | 4 ++-- R/cansim_metadata.R | 11 ++--------- R/cansim_parquet.R | 29 +++++++++++++++++++++-------- 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/R/cansim.R b/R/cansim.R index fbef320..856be23 100644 --- a/R/cansim.R +++ b/R/cansim.R @@ -87,8 +87,7 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz sample_date <- data[1:10,date_field] %>% pull(date_field) %>% na.omit() %>% first() if (is.na(sample_date)) { - sample_date <- pull(date_field) %>% na.omit() %>% first() - + sample_date <- data %>% pull(date_field) %>% na.omit() %>% first() } # sample_date <- data[[date_field]] %>% # na.omit %>% @@ -893,11 +892,10 @@ categories_for_level <- function(data,column_name, level=NA, strict=FALSE, remov #' @export view_cansim_webpage <- function(cansimTableNumber = NULL){ browser <- getOption("browser") - cansimTableNumber <- tolower(cansimTableNumber) - if (is.null(cansimTableNumber)) { + if (is.null(cansimTableNumber) || length(cansimTableNumber) == 0) { url <- 'https://www150.statcan.gc.ca/t1/tbl1/en/sbv.action#tables' - } else if (grepl("^v\\d+$",cansimTableNumber)) { + } else if (grepl("^v\\d+$", tolower(cansimTableNumber))) { url <- paste0("https://www150.statcan.gc.ca/t1/tbl1/en/sbv.action?vectorNumbers=",cansimTableNumber) } else { cansimTableNumber <- paste0(gsub("-","",cleaned_ndm_table_number(cansimTableNumber)),"01") @@ -928,10 +926,7 @@ get_cansim_table_url <- function(cansimTableNumber, language = "en"){ cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber) l <- cleaned_ndm_language(language) %>% substr(1,2) url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/",naked_ndm_table_number(cansimTableNumber),"/",l) - response <- httr::GET(url) - if (response$status_code!=200) { - stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE) - } + response <- get_with_timeout_retry(url) httr::content(response)$object } @@ -975,10 +970,7 @@ get_cansim_changed_tables <- function(start_date,end_date=NULL){ seq(as.Date(start_date),as.Date(end_date),"days") %>% lapply(function(date){ url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getChangedCubeList/",strftime(date,"%Y-%m-%d")) - response <- httr::GET(url) - if (response$status_code!=200) { - stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE) - } + response <- get_with_timeout_retry(url) httr::content(response)$object %>% map(function(o)tibble(productId=o$productId,releaseTime=o$releaseTime)) %>% bind_rows diff --git a/R/cansim_helpers.R b/R/cansim_helpers.R index 2a433aa..abc6e75 100644 --- a/R/cansim_helpers.R +++ b/R/cansim_helpers.R @@ -95,7 +95,7 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA,warn_only=FAL } if (retry>0) { message("Got timeout from StatCan, trying again") - response <- get_with_timeout_retry(url,timeout=timeout,retry=retry-1,path=path) + response <- get_with_timeout_retry(url,timeout=timeout,retry=retry-1,path=path,warn_only=warn_only) } else { message("Got timeout from StatCan, giving up") } @@ -145,7 +145,7 @@ post_with_timeout_retry <- function(url,body,timeout=200,retry=3,warn_only=FALSE } if (retry>0) { message("Got timeout from StatCan, trying again") - response <- post_with_timeout_retry(url,body=body,timeout=timeout,retry=retry-1) + response <- post_with_timeout_retry(url,body=body,timeout=timeout,retry=retry-1,warn_only=warn_only) } else { message("Got timeout from StatCan, giving up") response=response$result diff --git a/R/cansim_metadata.R b/R/cansim_metadata.R index 2745e9b..e4d96b2 100644 --- a/R/cansim_metadata.R +++ b/R/cansim_metadata.R @@ -173,15 +173,8 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh= if (!file.exists(tmp) || refresh) { table_id <- naked_ndm_table_number(cansimTableNumber) url <- "https://www150.statcan.gc.ca/t1/wds/rest/getCubeMetadata" - response <- httr::POST(url, - #body=jsonlite::toJSON(list("productId"=table_id),auto_unbox =TRUE), - body=paste0("[",paste(paste0('{"productId":',table_id,'}'),collapse = ", "),"]"), - encode="json", - httr::add_headers("Content-Type"="application/json") - ) - if (response$status_code!=200) { - stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE) - } + body <- paste0("[",paste(paste0('{"productId":',table_id,'}'),collapse = ", "),"]") + response <- post_with_timeout_retry(url, body=body) data <- httr::content(response) data1 <- Filter(function(x)x$status=="SUCCESS",data) data2 <- Filter(function(x)x$status!="SUCCESS",data) diff --git a/R/cansim_parquet.R b/R/cansim_parquet.R index fa70ce0..8349b04 100644 --- a/R/cansim_parquet.R +++ b/R/cansim_parquet.R @@ -52,6 +52,7 @@ get_cansim_connection <- function(cansimTableNumber, cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber) have_custom_path <- !is.null(cache_path) if (!have_custom_path) cache_path <- tempdir() + base_cache_path <- cache_path # Save base cache path before it's overwritten cleaned_number <- cansimTableNumber cleaned_language <- cleaned_ndm_language(language) base_table <- naked_ndm_table_number(cansimTableNumber) @@ -68,15 +69,18 @@ get_cansim_connection <- function(cansimTableNumber, if (is.na(last_updated)) { warning("Could not determine if existing table is out of date.") } else { - last_downloaded <- list_cansim_cached_tables() %>% - filter(.data$cansimTableNumber==cleaned_number, .data$dataFormat==format) %>% + last_downloaded <- list_cansim_cached_tables(cache_path=base_cache_path) %>% + filter(.data$cansimTableNumber==cleaned_number, .data$dataFormat==format, .data$language==cleaned_language) %>% pull(.data$timeCached) - if (file.exists(db_path) && auto_refresh && !is.na(last_downloaded) && !is.null(last_updated) && - as.numeric(last_downloaded) 0 && !is.na(last_downloaded[1]) + + if (file.exists(db_path) && auto_refresh && has_valid_last_downloaded && !is.null(last_updated) && + as.numeric(last_downloaded[1])% dplyr::slice_head(n=1), format=format, schema_path) - saveRDS(partitioning,partitioning_path) + tryCatch( + saveRDS(partitioning,partitioning_path), + error = function(e) warning("Failed to save partitioning metadata: ", e$message) + ) } @@ -485,7 +495,10 @@ cansim_repartition_cached_table <- function(cansimTableNumber, unlink(old_path,recursive=TRUE) - saveRDS(new_partitioning,partitioning_path) + tryCatch( + saveRDS(new_partitioning,partitioning_path), + error = function(e) warning("Failed to save partitioning metadata: ", e$message) + ) invisible() }