From bc28005424e839d9b9d5aa68b24ae1b1c3c61110 Mon Sep 17 00:00:00 2001 From: Alon Alexander Date: Tue, 10 Feb 2026 11:07:49 +0100 Subject: [PATCH 1/3] Update id system to support subfolders --- R/analysis.R | 27 ++++++------ R/data.R | 7 +++- R/data_utils.R | 1 - R/project.R | 4 ++ man/PMAnalysis.Rd | 11 +++++ tests/testthat/test_analysis.R | 76 ++++++++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 17 deletions(-) diff --git a/R/analysis.R b/R/analysis.R index 623ca69..bb71e44 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -241,31 +241,23 @@ PMAnalysis <- R6Class("PMAnalysis", } # Get all files in the directory - files <- list.files(outputs_dir, full.names = TRUE) + files <- list.files(outputs_dir, full.names = FALSE, recursive = TRUE) if (length(files) == 0) { return(list()) } - # Filter to only files (not directories) using vectorized approach - file_info <- file.info(files) - is_file <- !is.na(file_info$isdir) & !file_info$isdir - files_only <- files[is_file] - - if (length(files_only) == 0) { - return(list()) - } - # Create PMData objects using lapply - file_ids <- tools::file_path_sans_ext(basename(files_only)) - file_paths <- normalizePath(files_only, mustWork = FALSE) + file_ids <- gsub("\\", "/", tools::file_path_sans_ext(files), fixed = TRUE) + file_paths <- normalizePath(file.path(outputs_dir, files), mustWork = FALSE) - lapply(seq_along(files_only), function(i) { + lapply(seq_along(files), function(i) { PMData$new(id = file_ids[i], path = file_paths[i]) }) }, #' @description #' Get output path for a file, returning a PMData object. + #' Supports also subfolders using both unix-style and windows-style delimeteres ("/" and "\\"). #' #' @param name Character. Name of the output file (with or without extension). #' @param type Character. Optional type of output (table, object, image, figure, parquet, csv). @@ -291,6 +283,11 @@ PMAnalysis <- R6Class("PMAnalysis", #' intermediate <- analysis$get_output_path("temp_data", type = "table", intermediate = TRUE) #' intermediate$id # "temp_data" #' intermediate$path # full path to temp_data.parquet in intermediate/ + #' + #' # Get output path with nested folders + #' output2 <- analysis$get_output_path("unique\\complex\\structure.rds") + #' output2$id # "unique/complex/structure.rds" + #' output2$path get_output_path = function(name, type = NULL, intermediate = FALSE) { # Store original name for ID (without extension) original_name <- name @@ -332,7 +329,9 @@ PMAnalysis <- R6Class("PMAnalysis", folder <- if (intermediate) constants$ANALYSIS_INTERMEDIATE_DIR else constants$ANALYSIS_OUTPUT_DIR - full_path <- normalizePath(file.path(self$path, folder, name), mustWork = FALSE) + name <- strsplit(name, "\\\\|/") + full_path_raw <- do.call(file.path, as.list(c(self$path, folder, unlist(name)))) + full_path <- normalizePath(full_path_raw, mustWork = FALSE) PMData$new(id = id, path = full_path) }, diff --git a/R/data.R b/R/data.R index 461fa76..7a8b54d 100644 --- a/R/data.R +++ b/R/data.R @@ -31,7 +31,7 @@ PMData <- R6Class("PMData", chk::chk_scalar(path) chk::chk_character(path) self$id <- id - self$path <- path + self$path <- normalizePath(path, mustWork = FALSE) }, #' @description @@ -101,6 +101,9 @@ PMData <- R6Class("PMData", #' data_rdata$write(obj1, obj2, obj3 = 42) #' }) write = function(x, ...) { + # Create the folder in case it doesn't exist + dir.create(dirname(self$path), showWarnings = FALSE, recursive = TRUE) + # For RData files, we need to preserve object names from the original call ext <- tolower(tools::file_ext(self$path)) if (ext %in% c("rdata", "rda")) { @@ -129,7 +132,7 @@ PMData <- R6Class("PMData", obj_names <- c(obj_names, obj_name) } } - + # Call pm_write_file with explicit object names pm_write_file(self$path, x, ..., object_names = obj_names) } else { diff --git a/R/data_utils.R b/R/data_utils.R index b04cc39..4904ced 100644 --- a/R/data_utils.R +++ b/R/data_utils.R @@ -744,4 +744,3 @@ pm_write_file <- function(file, x, ..., object_names = NULL) { invisible(file) } - diff --git a/R/project.R b/R/project.R index 32804fc..e6dfce7 100644 --- a/R/project.R +++ b/R/project.R @@ -275,6 +275,10 @@ PMProject <- R6Class("PMProject", chk::chk_scalar(id) chk::chk_character(id) + # Normalize (subfolder) id + prev_id = id + id <- gsub("\\", "/", id, fixed = TRUE) + # Determine which analyses to search if (!is.null(analysis_name)) { chk::chk_scalar(analysis_name) diff --git a/man/PMAnalysis.Rd b/man/PMAnalysis.Rd index b34a956..19114c6 100644 --- a/man/PMAnalysis.Rd +++ b/man/PMAnalysis.Rd @@ -100,6 +100,11 @@ intermediate <- analysis$get_output_path("temp_data", type = "table", intermedia intermediate$id # "temp_data" intermediate$path # full path to temp_data.parquet in intermediate/ +# Get output path with nested folders +output2 <- analysis$get_output_path("unique\\\\complex\\\\structure.rds") +output2$id # "unique/complex/structure.rds" +output2$path + ## ------------------------------------------------ ## Method `PMAnalysis$run_in_slurm` ## ------------------------------------------------ @@ -344,6 +349,7 @@ intermediates <- analysis$list_outputs(intermediate = TRUE) \if{latex}{\out{\hypertarget{method-PMAnalysis-get_output_path}{}}} \subsection{Method \code{get_output_path()}}{ Get output path for a file, returning a PMData object. +Supports also subfolders using both unix-style and windows-style delimeteres ("/" and "\\"). \subsection{Usage}{ \if{html}{\out{
}}\preformatted{PMAnalysis$get_output_path(name, type = NULL, intermediate = FALSE)}\if{html}{\out{
}} } @@ -383,6 +389,11 @@ output$path # full path to results.csv in outputs/ intermediate <- analysis$get_output_path("temp_data", type = "table", intermediate = TRUE) intermediate$id # "temp_data" intermediate$path # full path to temp_data.parquet in intermediate/ + +# Get output path with nested folders +output2 <- analysis$get_output_path("unique\\\\complex\\\\structure.rds") +output2$id # "unique/complex/structure.rds" +output2$path } \if{html}{\out{}} diff --git a/tests/testthat/test_analysis.R b/tests/testthat/test_analysis.R index ba687ee..fbbdb82 100644 --- a/tests/testthat/test_analysis.R +++ b/tests/testthat/test_analysis.R @@ -1386,6 +1386,82 @@ describe("PMAnalysis$get_artifact() works correctly", { }) }) +describe("Writing/reading outputs with subfolders works", { + dir <- .get_good_project_path() + pm <- pm::PMProject$new(dir) + analysis <- pm$create_analysis("data_prep") + + it("Writes and readsproperly to unix-style subfolder", { + # Write + expected <- 123 + analysis$get_output_path("t1/file.rds")$write(expected) + expect_true(dir.exists(file.path(analysis$path, "outputs", "t1"))) + expect_true(file.exists(file.path(analysis$path, "outputs", "t1", "file.rds"))) + + # Read using unix style + result <- analysis$get_output_path("t1/file.rds")$read() + expect_equal(result, expected) + + # Read using windows style + result2 <- analysis$get_output_path("t1\\file.rds")$read() + expect_equal(result2, expected) + + # Find as artifact unix style + result3 <- analysis$get_artifact("t1/file")$read() + expect_equal(result3, expected) + + # Find as artifact windows style + result4 <- analysis$get_artifact("t1\\file")$read() + expect_equal(result4, expected) + }) + + it("Writes and reads properly to windows-style subfolder", { + expected <- 456 + analysis$get_output_path("t2\\file.rds")$write(expected) + expect_true(dir.exists(file.path(analysis$path, "outputs", "t2"))) + expect_true(file.exists(file.path(analysis$path, "outputs", "t2", "file.rds"))) + + # Read using unix style + result <- analysis$get_output_path("t2/file.rds")$read() + expect_equal(result, expected) + + # Read using windows style + result2 <- analysis$get_output_path("t2\\file.rds")$read() + expect_equal(result2, expected) + + # Find as artifact unix style + result3 <- analysis$get_artifact("t2/file")$read() + expect_equal(result3, expected) + + # Find as artifact windows style + result4 <- analysis$get_artifact("t2\\file")$read() + expect_equal(result4, expected) + }) + + it("Works with deep subfolders", { + # Write + expected <- 100 + analysis$get_output_path("t3/another/level/file.rds")$write(expected) + expect_true(dir.exists(file.path(analysis$path, "outputs", "t3"))) + expect_true(dir.exists(file.path(analysis$path, "outputs", "t3", "another"))) + expect_true(dir.exists(file.path(analysis$path, "outputs", "t3", "another", "level"))) + expect_true(file.exists(file.path(analysis$path, "outputs", "t3", "another", "level", "file.rds"))) + + expect_equal(analysis$get_output_path("t3/another/level/file.rds")$read(), expected) + expect_equal(analysis$get_output_path("t3\\another\\level\\file.rds")$read(), expected) + + # Works with combinations of separators + expect_equal(analysis$get_output_path("t3\\another\\level/file.rds")$read(), expected) + expect_equal(analysis$get_output_path("t3/another/level\\file.rds")$read(), expected) + + # Find as artifact + expect_equal(analysis$get_artifact("t3/another/level/file")$read(), expected) + expect_equal(analysis$get_artifact("t3\\another\\level\\file")$read(), expected) + expect_equal(analysis$get_artifact("t3\\another/level/file")$read(), expected) + expect_equal(analysis$get_artifact("t3\\another/level\\file")$read(), expected) + }) +}) + describe("pm_infer_analysis works correctly", { dir <- .get_good_project_path() pm <- pm::PMProject$new(dir) From 40bb8e20025a4bd3d42782d15d3ec0d6aedf9f1e Mon Sep 17 00:00:00 2001 From: Alon Alexander Date: Tue, 10 Feb 2026 11:13:02 +0100 Subject: [PATCH 2/3] Update docs for new feature --- DESCRIPTION | 2 +- NEWS.md | 8 ++++++ R/analysis.R | 2 +- docs/404.html | 2 +- docs/LICENSE.html | 2 +- docs/articles/file-formats.html | 6 ++--- docs/articles/file-formats.md | 2 +- docs/articles/getting-started.html | 26 +++++++++--------- docs/articles/getting-started.md | 22 +++++++-------- docs/articles/index.html | 2 +- docs/articles/input-definitions.html | 4 +-- docs/articles/slurm-integration.html | 4 +-- docs/authors.html | 6 ++--- docs/authors.md | 4 +-- docs/index.html | 2 +- docs/news/index.html | 13 ++++++++- docs/news/index.md | 9 +++++++ docs/pkgdown.yml | 2 +- docs/reference/PMAnalysis.html | 27 ++++++++++++++----- docs/reference/PMAnalysis.md | 24 +++++++++++++---- docs/reference/PMData.html | 2 +- docs/reference/PMProject.html | 12 ++++----- docs/reference/PMProject.md | 10 +++---- docs/reference/PMSlurmRun.html | 2 +- docs/reference/dot-cancel_slurm_job.html | 2 +- docs/reference/dot-check_missing_entries.html | 2 +- docs/reference/dot-check_missing_files.html | 2 +- docs/reference/dot-check_slurm_job_done.html | 2 +- .../dot-check_slurm_job_success.html | 2 +- docs/reference/dot-extract_input_ids.html | 2 +- docs/reference/dot-find_code_folder_name.html | 2 +- .../dot-format_validation_error.html | 2 +- docs/reference/dot-get_slurm_job_error.html | 2 +- .../dot-submit_slurm_job_with_env.html | 2 +- docs/reference/dot-validate_input_fields.html | 2 +- docs/reference/dot-validate_input_files.html | 2 +- .../reference/dot-validate_inputs_schema.html | 2 +- .../dot-validate_local_inputs_schema.html | 2 +- docs/reference/index.html | 2 +- docs/reference/is_slurm_available.html | 2 +- docs/reference/pm_create_project.html | 4 +-- docs/reference/pm_create_project.md | 2 +- docs/reference/pm_infer_analysis.html | 14 +++++----- docs/reference/pm_infer_analysis.md | 12 ++++----- docs/reference/pm_project.html | 4 +-- docs/reference/pm_project.md | 2 +- docs/reference/pm_read_file.html | 2 +- docs/reference/pm_write_file.html | 2 +- docs/search.json | 2 +- man/PMAnalysis.Rd | 4 +-- 50 files changed, 165 insertions(+), 110 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5c18ec8..49501ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pm Title: Project and Analysis Manager -Version: 0.1.10 +Version: 0.1.11 Authors@R: person("Alon", "Alexander", , "alon008@gmail.com", role = c("aut", "cre")) Description: Enforces and supports a standardized folder structure for research diff --git a/NEWS.md b/NEWS.md index bb13d6f..9c798e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# pm 0.1.11 + +## New Features + +- Support subfolders in `PMAnalysis$get_output_path` +- Support subfolders in `PMAnalysis$list_outputs` +- Support subfolders in `get_artifact` (both `PMAnalysis` and `PmProject`) + # pm 0.1.10 ## New Features diff --git a/R/analysis.R b/R/analysis.R index bb71e44..71c7a24 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -286,7 +286,7 @@ PMAnalysis <- R6Class("PMAnalysis", #' #' # Get output path with nested folders #' output2 <- analysis$get_output_path("unique\\complex\\structure.rds") - #' output2$id # "unique/complex/structure.rds" + #' output2$id # "unique/complex/structure" #' output2$path get_output_path = function(name, type = NULL, intermediate = FALSE) { # Store original name for ID (without extension) diff --git a/docs/404.html b/docs/404.html index 0f7810a..2d2cbc1 100644 --- a/docs/404.html +++ b/docs/404.html @@ -20,7 +20,7 @@ pm - 0.1.10 + 0.1.11