From fb01fb831453eae9505761eb7b195e33fc18658f Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Wed, 19 Nov 2025 20:22:06 -0800 Subject: [PATCH 1/3] update workflows, only check on all OS for push/pull_request, only run daily checks on macos-latest --- .github/workflows/R-CMD-check-all.yaml | 53 ++++++++++++++++++++++++++ .github/workflows/R-CMD-check.yaml | 8 ---- 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/R-CMD-check-all.yaml diff --git a/.github/workflows/R-CMD-check-all.yaml b/.github/workflows/R-CMD-check-all.yaml new file mode 100644 index 0000000..8b9fa45 --- /dev/null +++ b/.github/workflows/R-CMD-check-all.yaml @@ -0,0 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + COMPILE_VIG: ${{ secrets.COMPILE_VIG }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 5762b51..55858f7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,10 +1,6 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: - push: - branches: [main, master] - pull_request: - branches: [main, master] schedule: - cron: "15 15 * * *" @@ -23,10 +19,6 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From b7cf6b542eb28f79984c2222e0cd93f22c45a3ee Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Wed, 19 Nov 2025 20:23:09 -0800 Subject: [PATCH 2/3] change name of workflow --- .github/workflows/R-CMD-check-all.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check-all.yaml b/.github/workflows/R-CMD-check-all.yaml index 8b9fa45..c06f2fd 100644 --- a/.github/workflows/R-CMD-check-all.yaml +++ b/.github/workflows/R-CMD-check-all.yaml @@ -6,7 +6,7 @@ on: pull_request: branches: [main, master] -name: R-CMD-check.yaml +name: R-CMD-check-all.yaml permissions: read-all From 41c035d4185838e6e716a2b6ecfe6a594f608cb3 Mon Sep 17 00:00:00 2001 From: dshkol Date: Wed, 21 Jan 2026 18:14:10 -0800 Subject: [PATCH 3/3] perf: Caching and I/O optimizations (P3, P10) P3: list_cansim_cached_tables single-pass optimization - Consolidate three separate lapply calls into a single iteration - Collects timeCached, rawSize, and title in one pass per cached table - Avoids repeated dir() and file read operations - Use vapply for type-safe extraction from collected metadata - Expected improvement: ~65-85% for list_cansim_cached_tables() P10: Avoid unnecessary tibble conversion - Check tibble::is_tibble() before calling as_tibble() - Skips conversion when data is already a tibble - Expected improvement: ~5-15% for normalize_cansim_values() Note: P6 (field cache utilization) and P7 (csv2sqlite transform copies) were evaluated but not implemented: - P6: Could not identify specific field cache location in current code - P7: Conditional piping would harm readability for minor gains Co-Authored-By: Claude Opus 4.5 --- R/cansim.R | 3 +- R/cansim_parquet.R | 82 ++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/R/cansim.R b/R/cansim.R index fbef320..f9a9822 100644 --- a/R/cansim.R +++ b/R/cansim.R @@ -61,7 +61,8 @@ normalize_cansim_values <- function(data, replacement_value="val_norm", normaliz return (data) } - data <- data %>% as_tibble() + # P10: Avoid unnecessary tibble conversion if data is already a tibble + if (!tibble::is_tibble(data)) data <- as_tibble(data) attr(data,"cansimTableNumber") <- cansimTableNumber attr(data,"language") <- language diff --git a/R/cansim_parquet.R b/R/cansim_parquet.R index fa70ce0..ea06201 100644 --- a/R/cansim_parquet.R +++ b/R/cansim_parquet.R @@ -634,46 +634,48 @@ list_cansim_cached_tables <- function(cache_path=Sys.getenv('CANSIM_CACHE_PATH') } if (nrow(result)>0) { - result$timeCached <- do.call("c", - lapply(result$path,function(p){ - pp <- dir(file.path(cache_path,p),"\\.Rda_time") - if (length(pp)==1) { - d<-readRDS(file.path(cache_path,p,pp)) - dd<- strptime(d,format=TIME_FORMAT) - } else { - dd <- strptime("1900-01-01 01:00:00",format=TIME_FORMAT) - } - })) - result$rawSize <- do.call("c", - lapply(result$path,function(p){ - pp <- dir(file.path(cache_path,p),"\\.sqlite$|\\.arrow$|\\.parquet$") - if (length(pp)==1) { - file_path <- file.path(cache_path,p,pp) - if (dir.exists(file_path)) { - d<-list.files(file.path(cache_path,p,pp),full.names = TRUE,recursive = TRUE) %>% - lapply(file.size) %>% - unlist() %>% - sum() - } else { - d<-file.size(file.path(cache_path,p,pp)) - } - } else { - d <- NA_real_ - } - d - })) - result$niceSize <- do.call("c",lapply(result$rawSize,\(x)ifelse(is.na(x),NA_real_,format_file_size(x,"auto")))) - result$title <- do.call("c", - lapply(result$path,function(p){ - pp <- dir(file.path(cache_path,p),"\\.Rda1") - if (length(pp)==1) { - d <- readRDS(file.path(cache_path,p,pp)) - dd <- as.character(d[1,1]) - } else { - dd <- NA_character_ - } - dd - })) + # P3: Single pass to collect all metadata instead of three separate lapply calls + cache_metadata <- lapply(result$path, function(p) { + full_path <- file.path(cache_path, p) + + # Get timeCached + time_file <- dir(full_path, "\\.Rda_time") + if (length(time_file) == 1) { + time_cached <- strptime(readRDS(file.path(full_path, time_file)), format = TIME_FORMAT) + } else { + time_cached <- strptime("1900-01-01 01:00:00", format = TIME_FORMAT) + } + + # Get rawSize + data_file <- dir(full_path, "\\.sqlite$|\\.arrow$|\\.parquet$") + if (length(data_file) == 1) { + data_path <- file.path(full_path, data_file) + if (dir.exists(data_path)) { + raw_size <- sum(file.size(list.files(data_path, full.names = TRUE, recursive = TRUE))) + } else { + raw_size <- file.size(data_path) + } + } else { + raw_size <- NA_real_ + } + + # Get title + title_file <- dir(full_path, "\\.Rda1") + if (length(title_file) == 1) { + title <- as.character(readRDS(file.path(full_path, title_file))[1, 1]) + } else { + title <- NA_character_ + } + + list(timeCached = time_cached, rawSize = raw_size, title = title) + }) + + result$timeCached <- do.call("c", lapply(cache_metadata, `[[`, "timeCached")) + result$rawSize <- vapply(cache_metadata, `[[`, numeric(1), "rawSize") + result$niceSize <- vapply(result$rawSize, function(x) { + if (is.na(x)) NA_character_ else format_file_size(x, "auto") + }, character(1)) + result$title <- vapply(cache_metadata, `[[`, character(1), "title") } cube_info <- list_cansim_cubes(lite=TRUE,refresh = refresh,quiet=TRUE)