diff --git a/DESCRIPTION b/DESCRIPTION index 916c69e..57ac985 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: compasstools Title: COMPASS Utilities and Tools -Version: 0.2 +Version: 2.1 Authors@R: c( person("Ben", "Bond-Lamberty", email = "bondlamberty@pnnl.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9525-4633")), @@ -19,7 +19,8 @@ Imports: fpeek, lubridate, tidyr (>= 1.0), - dplyr (>= 1.0) + dplyr (>= 1.0), + arrow (>= 20.0) Suggests: rdrop2 (>= 0.8), covr, diff --git a/NAMESPACE b/NAMESPACE index b92c55f..6f3f00b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ export(nearest_neighbor_TMP) export(process_aquatroll_dir) export(process_sapflow_dir) export(process_teros_dir) +export(read_L1_variable) +export(read_L2_variable) export(read_aquatroll200_file) export(read_aquatroll600_file) export(read_datalogger_file) @@ -14,6 +16,7 @@ export(read_teros_file) export(scan_folders) import(dplyr) import(fpeek) +importFrom(arrow,read_parquet) importFrom(dplyr,bind_rows) importFrom(lubridate,ymd_hms) importFrom(readr,col_character) diff --git a/R/read_Lx_variable.R b/R/read_Lx_variable.R new file mode 100644 index 0000000..41c789b --- /dev/null +++ b/R/read_Lx_variable.R @@ -0,0 +1,106 @@ +# read_Lx_variable.R + +# These two functions share 95% of their code, but they're short + + +#' Read L1 (Level 1) sensor data files +#' +#' This function reads the COMPASS-FME L1 data files (CSV format) +#' for a single variable, from one or more sites, and returns +#' the compiled data. +#' +#' @param variable Variable name ('research name') to be read, character +#' @param path Path of the L1 dataset, character +#' @param site Optional name of the site(s) of data to read, character +#' @param quiet Print diagnostic information? Logical +#' @importFrom dplyr bind_rows +#' @importFrom readr read_csv +#' @returns A \code{\link[tibble]{tibble}} of L1 data. +#' @export +#' @author BBL +#' @note This function only works for L1 v2-0 (July 2025) and higher. +#' @examples +#' \dontrun{ +#' read_L1_variable("gw-tds", site = "TMP") +#' read_L1_variable("gw-tds", c("TMP", "OWC")) # multiple sites +#' read_L1_variable(variable = "gw-tds") # will read all sites' data +#' read_L1_variable(variable = "gw-tds", path = "/path/to/L1/data") +#' } +read_L1_variable <- function(variable, path, site = NULL, quiet = FALSE) { + + if(length(variable) > 1) { + stop("Only one variable can be read at a time") + } + if(is.null(site)) { + sites <- "[A-Z]*" + } else { + sites <- paste0("(", paste(site, collapse = "|"), ")") + } + # Construct regular expression to identify files + regex <- paste0("^", sites, "_[A-Z0-9]+_.*_", variable, "_L1_.*csv$") + if(!quiet) message(regex) + files <- list.files(path, pattern = regex, recursive = TRUE) + if(!quiet) message("Reading ", length(files), " files") + + # The function works fine reading zero files, but this is + # probably not what the user wants + if(length(files) == 0) warning("No files found") + + x <- lapply(files, function(f) { + if(!quiet) message("\t", f) + read_csv(file.path(path, f), col_types = "ccTccccdcclll") + }) + bind_rows(x) +} + + + +#' Read L2 (Level 2) sensor data files (Parquet format) +#' +#' This function reads the COMPASS-FME L2 data files (Parquet format) +#' for a single variable, from one or more sites, and returns +#' the compiled data. +#' +#' @param variable Variable name ('research name') to be read, character +#' @param path Path of the L2 dataset, character +#' @param site Optional name of the site(s) of data to read, character +#' @param quiet Print diagnostic information? Logical +#' @importFrom dplyr bind_rows +#' @importFrom arrow read_parquet +#' @returns A \code{\link[tibble]{tibble}} of L2 data. +#' @export +#' @author BBL +#' @note This function only works for L2 v2-0 (July 2025) and higher. +#' @examples +#' \dontrun{ +#' read_L2_variable("gw-tds", site = "TMP") +#' read_L2_variable("gw-tds", c("TMP", "OWC")) # multiple sites +#' read_L2_variable(variable = "gw-tds") # will read all sites' data +#' read_L2_variable(variable = "gw-tds", path = "/path/to/L2/data") +#' } +read_L2_variable <- function(variable, path, site = NULL, quiet = FALSE) { + + if(length(variable) > 1) { + stop("Only one variable can be read at a time") + } + if(is.null(site)) { + sites <- "[A-Z]*" + } else { + sites <- paste0("(", paste(site, collapse = "|"), ")") + } + # Construct regular expression to identify files + regex <- paste0("^", sites, "_[A-Z0-9]+_.*_", variable, "_L2_.*parquet$") + if(!quiet) message(regex) + files <- list.files(path, pattern = regex, recursive = TRUE) + if(!quiet) message("Reading ", length(files), " files") + + # The function works fine reading zero files, but this is + # probably not what the user wants + if(length(files) == 0) warning("No files found") + + x <- lapply(files, function(f) { + if(!quiet) message("\t", f) + read_parquet(file.path(path, f)) + }) + bind_rows(x) +} diff --git a/man/read_L1_variable.Rd b/man/read_L1_variable.Rd new file mode 100644 index 0000000..4f21a11 --- /dev/null +++ b/man/read_L1_variable.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_Lx_variable.R +\name{read_L1_variable} +\alias{read_L1_variable} +\title{Read L1 (Level 1) sensor data files} +\usage{ +read_L1_variable(variable, path, site = NULL, quiet = FALSE) +} +\arguments{ +\item{variable}{Variable name ('research name') to be read, character} + +\item{path}{Path of the L1 dataset, character} + +\item{site}{Optional name of the site(s) of data to read, character} + +\item{quiet}{Print diagnostic information? Logical} +} +\value{ +A \code{\link[tibble]{tibble}} of L1 data. +} +\description{ +This function reads the COMPASS-FME L1 data files (CSV format) +for a single variable, from one or more sites, and returns +the compiled data. +} +\note{ +This function only works for L1 v2-0 (July 2025) and higher. +} +\examples{ +\dontrun{ +read_L1_variable("gw-tds", site = "TMP") +read_L1_variable("gw-tds", c("TMP", "OWC")) # multiple sites +read_L1_variable(variable = "gw-tds") # will read all sites' data +read_L1_variable(variable = "gw-tds", path = "/path/to/L1/data") +} +} +\author{ +BBL +} diff --git a/man/read_L2_variable.Rd b/man/read_L2_variable.Rd new file mode 100644 index 0000000..db29523 --- /dev/null +++ b/man/read_L2_variable.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_Lx_variable.R +\name{read_L2_variable} +\alias{read_L2_variable} +\title{Read L2 (Level 2) sensor data files (Parquet format)} +\usage{ +read_L2_variable(variable, path, site = NULL, quiet = FALSE) +} +\arguments{ +\item{variable}{Variable name ('research name') to be read, character} + +\item{path}{Path of the L2 dataset, character} + +\item{site}{Optional name of the site(s) of data to read, character} + +\item{quiet}{Print diagnostic information? Logical} +} +\value{ +A \code{\link[tibble]{tibble}} of L2 data. +} +\description{ +This function reads the COMPASS-FME L2 data files (Parquet format) +for a single variable, from one or more sites, and returns +the compiled data. +} +\note{ +This function only works for L2 v2-0 (July 2025) and higher. +} +\examples{ +\dontrun{ +read_L2_variable("gw-tds", site = "TMP") +read_L2_variable("gw-tds", c("TMP", "OWC")) # multiple sites +read_L2_variable(variable = "gw-tds") # will read all sites' data +read_L2_variable(variable = "gw-tds", path = "/path/to/L2/data") +} +} +\author{ +BBL +} diff --git a/tests/testthat/test-read_Lx.R b/tests/testthat/test-read_Lx.R new file mode 100644 index 0000000..ad38d37 --- /dev/null +++ b/tests/testthat/test-read_Lx.R @@ -0,0 +1,46 @@ +# read_Lx_variable functions + +test_that("read_L1_variable works", { + # Handles bad input + expect_error(read_L1_variable(letters[1:2]), regexp = "Only one variable") + + # Single site + x <- read_L1_variable(variable = "gw-tds", path = "./test_data", site = "TMP", quiet = TRUE) + expect_s3_class(x, "data.frame") + expect_true(all(x$Site == "TMP")) + + # Multiple sites + x <- read_L1_variable(variable = "gw-tds", path = "./test_data", quiet = TRUE) + expect_s3_class(x, "data.frame") + expect_true(length(unique(x$Site)) > 1) + + # Respects quiet flag + expect_no_message(read_L1_variable(variable = "gw-tds", path = "./test_data", quiet = TRUE)) + + # Warns if no files found + expect_warning(read_L1_variable("A", path = "./test_data", quiet = TRUE), + regexp = "No files found") +}) + + +test_that("read_L2_variable works", { + # Handles bad input + expect_error(read_L2_variable(letters[1:2]), regexp = "Only one variable") + + # Single site + x <- read_L2_variable(variable = "sonde-fdom-rfu", path = "./test_data", site = "OWC", quiet = TRUE) + expect_s3_class(x, "data.frame") + expect_true(all(x$Site == "OWC")) + + # Multiple sites + x <- read_L2_variable(variable = "sonde-fdom-rfu", path = "./test_data", quiet = TRUE) + expect_s3_class(x, "data.frame") + expect_true(length(unique(x$Site)) > 1) + + # Respects quiet flag + expect_no_message(read_L2_variable(variable = "sonde-fdom-rfu", path = "./test_data", quiet = TRUE)) + + # Warns if no files found + expect_warning(read_L2_variable("A", path = "./test_data", quiet = TRUE), + regexp = "No files found") +}) diff --git a/tests/testthat/test_data/CRC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet b/tests/testthat/test_data/CRC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet new file mode 100644 index 0000000..f7f6987 Binary files /dev/null and b/tests/testthat/test_data/CRC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet differ diff --git a/tests/testthat/test_data/OWC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet b/tests/testthat/test_data/OWC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet new file mode 100644 index 0000000..b00b9ec Binary files /dev/null and b/tests/testthat/test_data/OWC_OW_2024_sonde-fdom-rfu_L2_v2-1.parquet differ diff --git a/tests/testthat/test_data/OWC_UP_20191120-20191231_gw-tds_L1_v2-1.csv b/tests/testthat/test_data/OWC_UP_20191120-20191231_gw-tds_L1_v2-1.csv new file mode 100644 index 0000000..848246f --- /dev/null +++ b/tests/testthat/test_data/OWC_UP_20191120-20191231_gw-tds_L1_v2-1.csv @@ -0,0 +1,8 @@ +Site,Plot,TIMESTAMP,Instrument,Instrument_ID,Sensor_ID,Location,Value,research_name,Source_file,F_OOB,F_OOS,F_MAD +OWC,UP,2019-11-20 09:45:00,AquaTROLL200,683345,,,0,gw-tds,00a8b042,,0, +OWC,UP,2019-11-20 09:45:00,AquaTROLL200,683345,,,0,gw-tds,14554582,,0, +OWC,UP,2019-11-20 09:45:00,AquaTROLL600,685474,,,0,gw-tds,aa6819aa,,0, +OWC,UP,2019-11-20 09:45:00,AquaTROLL600,685474,,,0,gw-tds,ed26e520,,0, +OWC,UP,2019-11-20 10:00:00,AquaTROLL200,683416,,,0,gw-tds,ef1bae45,,0, +OWC,UP,2019-11-20 10:00:00,AquaTROLL200,683345,,,0,gw-tds,00a8b042,,0, +OWC,UP,2019-11-20 10:00:00,AquaTROLL200,683345,,,0,gw-tds,14554582,,0, diff --git a/tests/testthat/test_data/TMP_F_20191120-20191231_gw-tds_L1_v2-1.csv b/tests/testthat/test_data/TMP_F_20191120-20191231_gw-tds_L1_v2-1.csv new file mode 100644 index 0000000..3bde961 --- /dev/null +++ b/tests/testthat/test_data/TMP_F_20191120-20191231_gw-tds_L1_v2-1.csv @@ -0,0 +1,8 @@ +"Site","Plot","TIMESTAMP","Instrument","Instrument_ID","Sensor_ID","Location","Value","research_name","Source_file","F_OOB","F_OOS","F_MAD" +"TMP","F","2019-11-20 09:45:00","AquaTROLL200","683345",,,0,"gw-tds","00a8b042",,0, +"TMP","F","2019-11-20 09:45:00","AquaTROLL200","683345",,,0,"gw-tds","14554582",,0, +"TMP","F","2019-11-20 09:45:00","AquaTROLL600","685474",,,0,"gw-tds","aa6819aa",,0, +"TMP","F","2019-11-20 09:45:00","AquaTROLL600","685474",,,0,"gw-tds","ed26e520",,0, +"TMP","F","2019-11-20 10:00:00","AquaTROLL200","683416",,,0,"gw-tds","ef1bae45",,0, +"TMP","F","2019-11-20 10:00:00","AquaTROLL200","683345",,,0,"gw-tds","00a8b042",,0, +"TMP","F","2019-11-20 10:00:00","AquaTROLL200","683345",,,0,"gw-tds","14554582",,0,