Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/R-CMD-check-all.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check-all.yaml

permissions: read-all

jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
COMPILE_VIG: ${{ secrets.COMPILE_VIG }}

steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
8 changes: 0 additions & 8 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
schedule:
- cron: "15 15 * * *"

Expand All @@ -23,10 +19,6 @@ jobs:
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
18 changes: 5 additions & 13 deletions R/cansim.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz

sample_date <- data[1:10,date_field] %>% pull(date_field) %>% na.omit() %>% first()
if (is.na(sample_date)) {
sample_date <- pull(date_field) %>% na.omit() %>% first()

sample_date <- data %>% pull(date_field) %>% na.omit() %>% first()
}
# sample_date <- data[[date_field]] %>%
# na.omit %>%
Expand Down Expand Up @@ -893,11 +892,10 @@ categories_for_level <- function(data,column_name, level=NA, strict=FALSE, remov
#' @export
view_cansim_webpage <- function(cansimTableNumber = NULL){
browser <- getOption("browser")
cansimTableNumber <- tolower(cansimTableNumber)

if (is.null(cansimTableNumber)) {
if (is.null(cansimTableNumber) || length(cansimTableNumber) == 0) {
url <- 'https://www150.statcan.gc.ca/t1/tbl1/en/sbv.action#tables'
} else if (grepl("^v\\d+$",cansimTableNumber)) {
} else if (grepl("^v\\d+$", tolower(cansimTableNumber))) {
url <- paste0("https://www150.statcan.gc.ca/t1/tbl1/en/sbv.action?vectorNumbers=",cansimTableNumber)
} else {
cansimTableNumber <- paste0(gsub("-","",cleaned_ndm_table_number(cansimTableNumber)),"01")
Expand Down Expand Up @@ -928,10 +926,7 @@ get_cansim_table_url <- function(cansimTableNumber, language = "en"){
cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber)
l <- cleaned_ndm_language(language) %>% substr(1,2)
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getFullTableDownloadCSV/",naked_ndm_table_number(cansimTableNumber),"/",l)
response <- httr::GET(url)
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
response <- get_with_timeout_retry(url)
httr::content(response)$object
}

Expand Down Expand Up @@ -975,10 +970,7 @@ get_cansim_changed_tables <- function(start_date,end_date=NULL){
seq(as.Date(start_date),as.Date(end_date),"days") %>%
lapply(function(date){
url=paste0("https://www150.statcan.gc.ca/t1/wds/rest/getChangedCubeList/",strftime(date,"%Y-%m-%d"))
response <- httr::GET(url)
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
response <- get_with_timeout_retry(url)
httr::content(response)$object %>%
map(function(o)tibble(productId=o$productId,releaseTime=o$releaseTime)) %>%
bind_rows
Expand Down
4 changes: 2 additions & 2 deletions R/cansim_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA,warn_only=FAL
}
if (retry>0) {
message("Got timeout from StatCan, trying again")
response <- get_with_timeout_retry(url,timeout=timeout,retry=retry-1,path=path)
response <- get_with_timeout_retry(url,timeout=timeout,retry=retry-1,path=path,warn_only=warn_only)
} else {
message("Got timeout from StatCan, giving up")
}
Expand Down Expand Up @@ -145,7 +145,7 @@ post_with_timeout_retry <- function(url,body,timeout=200,retry=3,warn_only=FALSE
}
if (retry>0) {
message("Got timeout from StatCan, trying again")
response <- post_with_timeout_retry(url,body=body,timeout=timeout,retry=retry-1)
response <- post_with_timeout_retry(url,body=body,timeout=timeout,retry=retry-1,warn_only=warn_only)
} else {
message("Got timeout from StatCan, giving up")
response=response$result
Expand Down
11 changes: 2 additions & 9 deletions R/cansim_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,8 @@ get_cansim_cube_metadata <- function(cansimTableNumber, type="overview",refresh=
if (!file.exists(tmp) || refresh) {
table_id <- naked_ndm_table_number(cansimTableNumber)
url <- "https://www150.statcan.gc.ca/t1/wds/rest/getCubeMetadata"
response <- httr::POST(url,
#body=jsonlite::toJSON(list("productId"=table_id),auto_unbox =TRUE),
body=paste0("[",paste(paste0('{"productId":',table_id,'}'),collapse = ", "),"]"),
encode="json",
httr::add_headers("Content-Type"="application/json")
)
if (response$status_code!=200) {
stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response),call.=FALSE)
}
body <- paste0("[",paste(paste0('{"productId":',table_id,'}'),collapse = ", "),"]")
response <- post_with_timeout_retry(url, body=body)
data <- httr::content(response)
data1 <- Filter(function(x)x$status=="SUCCESS",data)
data2 <- Filter(function(x)x$status!="SUCCESS",data)
Expand Down
29 changes: 21 additions & 8 deletions R/cansim_parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ get_cansim_connection <- function(cansimTableNumber,
cansimTableNumber <- cleaned_ndm_table_number(cansimTableNumber)
have_custom_path <- !is.null(cache_path)
if (!have_custom_path) cache_path <- tempdir()
base_cache_path <- cache_path # Save base cache path before it's overwritten
cleaned_number <- cansimTableNumber
cleaned_language <- cleaned_ndm_language(language)
base_table <- naked_ndm_table_number(cansimTableNumber)
Expand All @@ -68,15 +69,18 @@ get_cansim_connection <- function(cansimTableNumber,
if (is.na(last_updated)) {
warning("Could not determine if existing table is out of date.")
} else {
last_downloaded <- list_cansim_cached_tables() %>%
filter(.data$cansimTableNumber==cleaned_number, .data$dataFormat==format) %>%
last_downloaded <- list_cansim_cached_tables(cache_path=base_cache_path) %>%
filter(.data$cansimTableNumber==cleaned_number, .data$dataFormat==format, .data$language==cleaned_language) %>%
pull(.data$timeCached)

if (file.exists(db_path) && auto_refresh && !is.na(last_downloaded) && !is.null(last_updated) &&
as.numeric(last_downloaded)<as.numeric(last_updated)) {
# Handle empty vector (no matching cache entry) or NA
has_valid_last_downloaded <- !is.null(last_downloaded) && length(last_downloaded) > 0 && !is.na(last_downloaded[1])

if (file.exists(db_path) && auto_refresh && has_valid_last_downloaded && !is.null(last_updated) &&
as.numeric(last_downloaded[1])<as.numeric(last_updated)) {
message(paste0("A newer version of ",cansimTableNumber," is available, auto-refreshing the table..."))
refresh=TRUE
} else if (file.exists(db_path) && auto_refresh && (is.na(last_updated)||is.na(last_downloaded))){
} else if (file.exists(db_path) && auto_refresh && (is.na(last_updated)||!has_valid_last_downloaded)){
message(paste0("Could not determine if ",cansimTableNumber," is up to date..."))
}
}
Expand Down Expand Up @@ -256,7 +260,10 @@ get_cansim_connection <- function(cansimTableNumber,
}

# saving timestamp
saveRDS(strftime(time_check,format=TIME_FORMAT),paste0(meta_base_path,"_time"))
tryCatch(
saveRDS(strftime(time_check,format=TIME_FORMAT),paste0(meta_base_path,"_time")),
error = function(e) warning("Failed to save cache timestamp: ", e$message)
)


} else {
Expand Down Expand Up @@ -413,7 +420,10 @@ csv2arrow <- function(csv_file, arrow_file, format="parquet",
schema_path <- file.path(dirname(arrow_file),paste0(basename(arrow_file),".schema"))
partitioning_path <- file.path(dirname(arrow_file),paste0(basename(arrow_file),".partitioning"))
arrow::write_dataset(input %>% dplyr::slice_head(n=1), format=format, schema_path)
saveRDS(partitioning,partitioning_path)
tryCatch(
saveRDS(partitioning,partitioning_path),
error = function(e) warning("Failed to save partitioning metadata: ", e$message)
)
}


Expand Down Expand Up @@ -485,7 +495,10 @@ cansim_repartition_cached_table <- function(cansimTableNumber,

unlink(old_path,recursive=TRUE)

saveRDS(new_partitioning,partitioning_path)
tryCatch(
saveRDS(new_partitioning,partitioning_path),
error = function(e) warning("Failed to save partitioning metadata: ", e$message)
)
invisible()
}

Expand Down