diff --git a/.github/workflows/ultimate_write_to_influxdbcloud.yaml b/.github/workflows/ultimate_write_to_influxdbcloud.yaml index f283b24..dd39176 100644 --- a/.github/workflows/ultimate_write_to_influxdbcloud.yaml +++ b/.github/workflows/ultimate_write_to_influxdbcloud.yaml @@ -1,5 +1,10 @@ name: Import Pilot Data From Nextcloud and Upload to InfluxDB Cloud on: + push: + branches: + - main + - master + - dev schedule: # * is a special character in YAML so you have to quote this string # Run this job every day at 00:00 am UTC ('0 0 * * *') diff --git a/DESCRIPTION b/DESCRIPTION index 100901f..b3bc239 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,8 @@ Imports: tidyselect (>= 1.1.1), withr (>= 2.3.0), xml2 (>= 1.3.2), - xts (>= 0.12.1) + xts (>= 0.12.1), + yaml Suggests: covr (>= 3.5.1), knitr (>= 1.30), @@ -69,4 +70,4 @@ Remotes: Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index dcafad2..8ac0da3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(add_label) export(add_parameter_metadata) export(add_site_metadata) +export(aggregate_export_fst) export(aggregate_export_fst_berlin_f) export(aggregate_export_fst_berlin_s) export(aggregate_export_fst_berlin_t) @@ -114,6 +115,7 @@ importFrom(kwb.nextcloud,download_files) importFrom(kwb.nextcloud,list_files) importFrom(kwb.nextcloud,move_file_or_folder) importFrom(kwb.utils,catAndRun) +importFrom(kwb.utils,noFactorDataFrame) importFrom(kwb.utils,stringList) importFrom(lubridate,ceiling_date) importFrom(lubridate,days) @@ -158,3 +160,4 @@ importFrom(utils,write.csv) importFrom(withr,with_dir) importFrom(xml2,read_html) importFrom(xts,align.time) +importFrom(yaml,read_yaml) diff --git a/R/aggregate_export_fst.R b/R/aggregate_export_fst.R index 99eb7fd..309800f 100644 --- a/R/aggregate_export_fst.R +++ b/R/aggregate_export_fst.R @@ -1,8 +1,3 @@ -# -# TODO: Create a general function and let these two functions be wrappers -# around the general function! -# - #' Berlin-Tiefwerder: aggregate and export to fst #' #' @param year_month_start start year month (default: '2017-06') @@ -12,97 +7,21 @@ #' @importFrom data.table rbindlist #' @importFrom fst write.fst #' @export -aggregate_export_fst_berlin_t <- function(year_month_start = "2017-06", - year_month_end = format(Sys.Date(), "%Y-%m"), - compression = 100) { - monthly_periods <- get_monthly_periods( +aggregate_export_fst_berlin_t <- function( + year_month_start = "2017-06", + year_month_end = format(Sys.Date(), "%Y-%m"), + compression = 100 +) +{ + aggregate_export_fst( year_month_start = year_month_start, - year_month_end = year_month_end + year_month_end = year_month_end, + compression = compression, + FUN_get_monthly_data = get_rawfilespaths_for_month, + FUN_import = import_data_berlin_t, + FUN_calculate_ops = calculate_operational_parameters_berlin_t, + prefix = "berlin_t" ) - - as_posix_cet <- function(fmt, x) as.POSIXct(sprintf(fmt, x), tz = "CET") - - for (year_month in monthly_periods$year_month) { - monthly_period <- monthly_periods[monthly_periods$year_month == year_month, ] - - print(sprintf("Importing data for month '%s':", year_month)) - - raw_data_file_paths <- get_rawfilespaths_for_month(monthly_period) - - system.time( - siteData_raw_list <- import_data_berlin_t( - raw_data_files = raw_data_file_paths - ) - ) - - datetime_start <- as_posix_cet("%s 00:00:00", monthly_period$start) - datetime_end <- as_posix_cet("%s 23:59:59", monthly_period$end) - - times <- kwb.utils::selectColumns(siteData_raw_list, "DateTime") - - condition <- times >= datetime_start & times <= datetime_end - - siteData_raw_list <- siteData_raw_list[condition, ] - - print(sprintf( - "Reduced imported data points to time period: %s - %s", - as.character(min(siteData_raw_list$DateTime)), - as.character(max(siteData_raw_list$DateTime)) - )) - - calc_dat <- calculate_operational_parameters_berlin_t(df = siteData_raw_list) - - siteData_raw_list <- data.table::rbindlist( - l = list(siteData_raw_list, calc_dat), use.names = TRUE, fill = TRUE - ) %>% - as.data.frame() - - export_dir_path <- sprintf( - "%s/data/fst/%s", - package_file("shiny/berlin_t"), - monthly_period$year_month - ) - - check_or_create_export_dir(export_dir_path) - - system.time(fst::write.fst( - siteData_raw_list, - path = sprintf("%s/siteData_raw_list.fst", export_dir_path), - compress = compression - )) - - print("### Step 4: Performing temporal aggregation ##########################") - - system.time( - siteData_10min_list <- group_datetime(siteData_raw_list, by = 10 * 60) - ) - - fst::write.fst( - siteData_10min_list, - path = sprintf("%s/siteData_10min_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_hour_list <- group_datetime(siteData_10min_list, by = 60 * 60) - ) - - fst::write.fst( - siteData_hour_list, - path = sprintf("%s/siteData_hour_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_day_list <- group_datetime(siteData_hour_list, by = "day") - ) - - fst::write.fst( - siteData_day_list, - path = sprintf("%s/siteData_day_list.fst", export_dir_path), - compress = compression - ) - } } #' Berlin-Schoenerlinde: aggregate and export to fst @@ -114,98 +33,23 @@ aggregate_export_fst_berlin_t <- function(year_month_start = "2017-06", #' @importFrom data.table rbindlist #' @importFrom fst write.fst #' @export -aggregate_export_fst_berlin_s <- function(year_month_start = "2017-04", - year_month_end = format(Sys.Date(), "%Y-%m"), - compression = 100) { - monthly_periods <- get_monthly_periods( +aggregate_export_fst_berlin_s <- function( + year_month_start = "2017-04", + year_month_end = format(Sys.Date(), "%Y-%m"), + compression = 100 +) +{ + aggregate_export_fst( year_month_start = year_month_start, - year_month_end = year_month_end + year_month_end = year_month_end, + compression = compression, + FUN_get_monthly_data = function(x) { + get_monthly_data_from_calendarweeks(year_month = x$year_month) + }, + FUN_import = import_data_berlin_s, + FUN_calculate_ops = calculate_operational_parameters_berlin_s, + prefix ="berlin_s" ) - - as_posix_cet <- function(fmt, x) as.POSIXct(sprintf(fmt, x), tz = "CET") - - - for (year_month in monthly_periods$year_month) { - monthly_period <- monthly_periods[monthly_periods$year_month == year_month, ] - - print(sprintf("Importing data for month '%s':", year_month)) - - raw_data_file_paths <- get_monthly_data_from_calendarweeks( - year_month = monthly_period$year_month - ) - - system.time( - siteData_raw_list <- import_data_berlin_s(raw_data_files = raw_data_file_paths) - ) - - datetime_start <- as_posix_cet("%s 00:00:00", monthly_period$start) - datetime_end <- as_posix_cet("%s 23:59:59", monthly_period$end) - - times <- kwb.utils::selectColumns(siteData_raw_list, "DateTime") - - condition <- times >= datetime_start & times <= datetime_end - - siteData_raw_list <- siteData_raw_list[condition, ] - - print(sprintf( - "Reduced imported data points to time period: %s - %s", - as.character(min(siteData_raw_list$DateTime)), - as.character(max(siteData_raw_list$DateTime)) - )) - - calc_dat <- calculate_operational_parameters_berlin_s(df = siteData_raw_list) - - siteData_raw_list <- data.table::rbindlist( - l = list(siteData_raw_list, calc_dat), use.names = TRUE, fill = TRUE - ) %>% - as.data.frame() - - export_dir_path <- sprintf( - "%s/data/fst/%s", - package_file("shiny/berlin_s"), - monthly_period$year_month - ) - - check_or_create_export_dir(export_dir_path) - - system.time(fst::write.fst( - siteData_raw_list, - path = sprintf("%s/siteData_raw_list.fst", export_dir_path), - compress = compression - )) - - print("### Step 4: Performing temporal aggregation ##########################") - - system.time( - siteData_10min_list <- group_datetime(siteData_raw_list, by = 10 * 60) - ) - - fst::write.fst( - siteData_10min_list, - path = sprintf("%s/siteData_10min_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_hour_list <- group_datetime(siteData_10min_list, by = 60 * 60) - ) - - fst::write.fst( - siteData_hour_list, - path = sprintf("%s/siteData_hour_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_day_list <- group_datetime(siteData_hour_list, by = "day") - ) - - fst::write.fst( - siteData_day_list, - path = sprintf("%s/siteData_day_list.fst", export_dir_path), - compress = compression - ) - } } #' Berlin-Friedrichshagen: aggregate and export to fst @@ -219,108 +63,36 @@ aggregate_export_fst_berlin_s <- function(year_month_start = "2017-04", #' @importFrom stringr str_remove #' @importFrom fs dir_ls #' @export -aggregate_export_fst_berlin_f <- function(year_month_start = "2019-11", - year_month_end = format(Sys.Date(), "%Y-%m"), - compression = 100) { - monthly_periods <- get_monthly_periods( - year_month_start = year_month_start, - year_month_end = year_month_end - ) - - as_posix_cet <- function(fmt, x) as.POSIXct(sprintf(fmt, x), tz = "CET") - - - for (year_month in monthly_periods$year_month) { - monthly_period <- monthly_periods[monthly_periods$year_month == year_month, ] - - print(sprintf("Importing data for month '%s':", year_month)) - - raw_data_file_paths <- fs::dir_ls(package_file("shiny/berlin_f/data/raw/online_data"), - recurse = TRUE, regexp = sprintf( +aggregate_export_fst_berlin_f <- function( + year_month_start = "2019-11", + year_month_end = format(Sys.Date(), "%Y-%m"), + compression = 100 +) +{ + # Define function to list raw data files related to one month + FUN_get_monthly_data <- function(x) { + year_month <- unique(x$year_month) + stopifnot(length(year_month == 1L)) + fs::dir_ls( + shiny_file("berlin_f/data/raw/online_data"), + recurse = TRUE, + regexp = sprintf( "^[^~].*%s[0-3][0-9].*\\.xlsx$", stringr::str_remove(year_month, "-") ) ) - - - - system.time( - siteData_raw_list <- import_data_berlin_f(raw_data_files = raw_data_file_paths) - ) - - datetime_start <- as_posix_cet("%s 00:00:00", monthly_period$start) - datetime_end <- as_posix_cet("%s 23:59:59", monthly_period$end) - - times <- kwb.utils::selectColumns(siteData_raw_list, "DateTime") - - condition <- times >= datetime_start & times <= datetime_end - - siteData_raw_list <- siteData_raw_list[condition, ] - - print(sprintf( - "Reduced imported data points to time period: %s - %s", - as.character(min(siteData_raw_list$DateTime)), - as.character(max(siteData_raw_list$DateTime)) - )) - - - export_dir_path <- sprintf( - "%s/data/fst/%s", - package_file("shiny/berlin_f"), - monthly_period$year_month - ) - - check_or_create_export_dir(export_dir_path) - - system.time(fst::write.fst( - siteData_raw_list, - path = sprintf("%s/siteData_raw_list.fst", export_dir_path), - compress = compression - )) - - print("### Step 4: Performing temporal aggregation (10 min) #########################") - - system.time( - siteData_10min_list <- group_datetime(siteData_raw_list, by = 10 * 60) - ) - - print("### Step 5: Calcualtating Performing temporal aggregation ##########################") - calc_dat <- calculate_operational_parameters_berlin_f(df = siteData_10min_list) - - siteData_10min_list <- data.table::rbindlist( - l = list(siteData_10min_list, calc_dat), use.names = TRUE, fill = TRUE - ) %>% - as.data.frame() - - - fst::write.fst( - siteData_10min_list, - path = sprintf("%s/siteData_10min_list.fst", export_dir_path), - compress = compression - ) - - print("### Step 6: Performing temporal aggregation (1 h, 1 day) #########################") - - system.time( - siteData_hour_list <- group_datetime(siteData_10min_list, by = 60 * 60) - ) - - fst::write.fst( - siteData_hour_list, - path = sprintf("%s/siteData_hour_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_day_list <- group_datetime(siteData_hour_list, by = "day") - ) - - fst::write.fst( - siteData_day_list, - path = sprintf("%s/siteData_day_list.fst", export_dir_path), - compress = compression - ) } + + aggregate_export_fst( + year_month_start = year_month_start, + year_month_end = year_month_end, + compression = compression, + FUN_get_monthly_data = FUN_get_monthly_data, + FUN_import = import_data_berlin_f, + FUN_calculate_ops = calculate_operational_parameters_berlin_f, + prefix = "berlin_f", + calculate_ops_on_level = "aggregated" + ) } #' MBR4.0: aggregate and export to fst @@ -333,66 +105,18 @@ aggregate_export_fst_berlin_f <- function(year_month_start = "2019-11", #' @importFrom stringr str_remove #' @importFrom fs dir_ls #' @export -aggregate_export_fst_mbr4 <- function(siteData_raw_list = tidy_mbr4_data(read_mbr4()), - compression = 100) { - - - - export_dir_path <- sprintf( - "%s/data/fst", - shiny_file("mbr4.0") - ) - - check_or_create_export_dir(export_dir_path) - - system.time(fst::write.fst( - x = siteData_raw_list, - path = sprintf("%s/siteData_raw_list.fst", export_dir_path), - compress = compression - )) - - print("### Step 4: Performing temporal aggregation (10 min) #########################") - - system.time( - siteData_10min_list <- group_datetime(siteData_raw_list, by = 10 * 60) - ) - - #print("### Step 5: Calculating Performing temporal aggregation ##########################") - #calc_dat <- calculate_operational_parameters_mbr4(df = siteData_10min_list) - - #siteData_10min_list <- data.table::rbindlist( - # l = list(siteData_10min_list, calc_dat), use.names = TRUE, fill = TRUE - #) %>% - # as.data.frame() - - - fst::write.fst( - siteData_10min_list, - path = sprintf("%s/siteData_10min_list.fst", export_dir_path), - compress = compression - ) - - print("### Step 6: Performing temporal aggregation (1 h, 1 day) #########################") - - system.time( - siteData_hour_list <- group_datetime(siteData_10min_list, by = 60 * 60) - ) - - fst::write.fst( - siteData_hour_list, - path = sprintf("%s/siteData_hour_list.fst", export_dir_path), - compress = compression - ) - - system.time( - siteData_day_list <- group_datetime(siteData_hour_list, by = "day") - ) - - fst::write.fst( - siteData_day_list, - path = sprintf("%s/siteData_day_list.fst", export_dir_path), - compress = compression - ) +aggregate_export_fst_mbr4 <- function( + siteData_raw_list = tidy_mbr4_data(read_mbr4()), + compression = 100 +) +{ + aggregate_export_fst( + compression = compression, + # FUN_calculate_ops = calculate_operational_parameters_mbr4, + calculate_ops_on_level = "aggregated", + siteData_raw_list = siteData_raw_list, + export_dir_path = sprintf("%s/data/fst", shiny_file("mbr4.0")) + ) } # check_or_create_export_dir --------------------------------------------------- diff --git a/R/aggregate_export_fst_general.R b/R/aggregate_export_fst_general.R new file mode 100644 index 0000000..ae39b56 --- /dev/null +++ b/R/aggregate_export_fst_general.R @@ -0,0 +1,197 @@ +# aggregate_export_fst --------------------------------------------------------- + +#' Aggregate and Export to FST Format +#' +#' @param year_month_start start year month in yyyy-mm format, e.g. "2017-06" +#' @param year_month_end end year month (default: current month) +#' @param compression (default: 100) +#' @param FUN_get_monthly_data function to be called to determine +#' "raw_data_file_paths" +#' @param FUN_import function to be called to read data into "siteData_raw_list" +#' @param FUN_calculate_ops function to be called to calculate operational +#' parameters. If \code{NULL} (the default), no operational parameters are +#' calculated +#' @param prefix site-specific prefix to be used as a sub folder name in the +#' export directory +#' @param calculate_ops_on_level one of "raw", "aggregated". Determines whether +#' to calculate operational parameters based on the raw or on the aggregated +#' values. Not used if \code{FUN_calculate_ops} is \code{NULL} +#' @param siteData_raw_list existing raw data. If given (not \code{NULL}), no +#' monthly periods are calculated and no data are imported. The given data are +#' treated as one and only (monthly?) period. +#' @param export_dir_path path to the export directory. Only required if raw +#' data are given in \code{siteData_raw}. +#' @return exports data for each month into subfolder: /data/fst/year-month +#' @importFrom data.table rbindlist +#' @importFrom fst write.fst +#' @export +aggregate_export_fst <- function( + year_month_start = NULL, + year_month_end = format(Sys.Date(), "%Y-%m"), + compression = 100, + FUN_get_monthly_data = NULL, + FUN_import = NULL, + FUN_calculate_ops = NULL, + prefix = NULL, + calculate_ops_on_level = c("raw", "aggregated")[1L], + siteData_raw_list = NULL, + export_dir_path = NULL +) +{ + stopifnot(calculate_ops_on_level %in% c("raw", "aggregated")) + + # If no raw data are given in "siteData_raw_list", determine the monthly + # periods for which to read raw data + if (is.null(siteData_raw_list)) { + + monthly_periods <- get_monthly_periods( + year_month_start = year_month_start, + year_month_end = year_month_end + ) + + year_months <- monthly_periods$year_month + + } else { + + # Otherwise enter the following loop only once with one fake value that + # is actually not used + year_months <- "whole-period" + } + + # Loop over monthly periods (may be only one period "whole-period") + for (year_month in year_months) { + + # Import data for that month (only if not given in "siteData_raw_list"!) + if (is.null(siteData_raw_list)) { + + siteData_raw_list <- import_data_for_month( + monthly_periods = monthly_periods, + year_month = year_month, + FUN_get_monthly_data = FUN_get_monthly_data, + FUN_import = FUN_import + ) + } + + # Determine the path to the export directory (if not given) + export_dir_path <- kwb.utils::defaultIfNULL( + export_dir_path, + sprintf("%s/data/fst/%s", shiny_file(prefix), year_month) + ) + + # Make sure that the export directory exists + check_or_create_export_dir(export_dir_path) + + # If applicable, calculate operational parameters on raw values + if (calculate_ops_on_level == "raw") { + siteData_raw_list <- add_operational(siteData_raw_list, FUN_calculate_ops) + } + + # Write raw values + system.time(fst::write.fst( + siteData_raw_list, + path = sprintf("%s/siteData_raw_list.fst", export_dir_path), + compress = compression + )) + + print("### Step 4: Performing temporal aggregation (10 min) #################") + + # Calculate 10 minute values + system.time( + siteData_10min_list <- group_datetime(siteData_raw_list, by = 10 * 60) + ) + + # If applicable, calculate operational parameters on aggregated values + if (calculate_ops_on_level == "aggregated") { + siteData_10min_list <- add_operational(siteData_10min_list, FUN_calculate_ops) + } + + # Write 10 minute values + fst::write.fst( + siteData_10min_list, + path = sprintf("%s/siteData_10min_list.fst", export_dir_path), + compress = compression + ) + + print("### Step 6: Performing temporal aggregation (1 h, 1 day) #########################") + + # Calculate hourly values + system.time( + siteData_hour_list <- group_datetime(siteData_10min_list, by = 60 * 60) + ) + + # Write hourly values + fst::write.fst( + siteData_hour_list, + path = sprintf("%s/siteData_hour_list.fst", export_dir_path), + compress = compression + ) + + # Calculate daily values + system.time( + siteData_day_list <- group_datetime(siteData_hour_list, by = "day") + ) + + # Write daily values + fst::write.fst( + siteData_day_list, + path = sprintf("%s/siteData_day_list.fst", export_dir_path), + compress = compression + ) + + } # End of loop over monthly periods +} + +# import_data_for_month -------------------------------------------------------- +import_data_for_month <- function( + monthly_periods, + year_month, + FUN_get_monthly_data, + FUN_import +) +{ + print(sprintf("Importing data for month '%s':", year_month)) + + monthly_period <- monthly_periods[monthly_periods$year_month == year_month, ] + + datetime_start <- as_posix_cet("%s 00:00:00", monthly_period$start) + datetime_end <- as_posix_cet("%s 23:59:59", monthly_period$end) + + raw_data_file_paths <- FUN_get_monthly_data(monthly_period) + + system.time( + siteData_raw_list <- FUN_import(raw_data_files = raw_data_file_paths) + ) + + times <- kwb.utils::selectColumns(siteData_raw_list, "DateTime") + + condition <- times >= datetime_start & times <= datetime_end + + siteData_raw_list <- siteData_raw_list[condition, ] + + time_range_text <- as.character(range(siteData_raw_list$DateTime)) + + print(sprintf( + "Reduced imported data points to time period: %s - %s", + time_range_text[1L], time_range_text[2L] + )) + + siteData_raw_list +} + +# add_operational -------------------------------------------------------------- +add_operational <- function(df, FUN_calculate_ops = NULL) +{ + print("### Step: Calcualtating operational parameters ######################") + + if (is.null(FUN_calculate_ops)) { + + print("### -> Skipped (no calculation function given).") + return(df) + } + + calc_dat <- FUN_calculate_ops(df = df) + + as.data.frame( + data.table::rbindlist(l = list(df, calc_dat), use.names = TRUE, fill = TRUE) + ) +} diff --git a/R/calculate_operational_parameters_berlin_f.R b/R/calculate_operational_parameters_berlin_f.R index 8ded289..f8ed44f 100644 --- a/R/calculate_operational_parameters_berlin_f.R +++ b/R/calculate_operational_parameters_berlin_f.R @@ -16,16 +16,19 @@ #' @export #' @importFrom tibble tibble #' -normalised_permeate_flow <- function(tempFeed, - conLoop, - vfrPerm, - vfrLoop, - vfrFeed, - prePerm, - preProc, - preConc, - nwp0 = 1.429162, - vfrPerm0 = 800) { +normalised_permeate_flow <- function( + tempFeed, + conLoop, + vfrPerm, + vfrLoop, + vfrFeed, + prePerm, + preProc, + preConc, + nwp0 = 1.429162, + vfrPerm0 = 800 +) +{ res <- tibble::tibble( tcf = exp(3020 * (1 / 298 - (1 / (273 + tempFeed)))), cfc = conLoop * 0.65 * ((log(1 / (1 - vfrPerm / (vfrLoop + vfrFeed)))) / (vfrPerm / (vfrLoop + vfrFeed))), @@ -35,21 +38,21 @@ normalised_permeate_flow <- function(tempFeed, nwpt = (vfrPerm * (nwp0 / .data$nwp)) * (vfrPerm / vfrPerm0), nwpr = -(1 - .data$nwpt / vfrPerm) * 100 ) - + res$nwpt } - - #' Calculate operational parameters for Berlin-Friedrichshagen #' @param df a data frame as retrieved by import_data_berlin_f() #' @param calc_list list with calculation operations to be carried out -#' @param calc_list_name full names of parameters to be used for plotting for each -#' calculation specified wit 'calc_list'. +#' @param calc_list_name full names of parameters to be used for plotting for +#' each calculation specified wit 'calc_list'. #' @param calc_list_unit units of parameters to be used for plotting for each -#' calculation specified wit 'calc_list'. -#' @param calc_paras a vector with parameter codes used for performing calculations -#' defined in 'calc_list' +#' calculation specified wit 'calc_list'. +#' @param calc_paras a vector with parameter codes used for performing +#' calculations defined in 'calc_list' +#' @param config configuration object (list) from which the \code{calc_*} +#' arguments are filled. Default: \code{get_calc_config("berlin_f")} #' @return dataframe with calculated operational parameters #' @export #' @examples @@ -58,67 +61,15 @@ normalised_permeate_flow <- function(tempFeed, #' myDat <- calculate_operational_parameters_berlin_f(df = raw_list) #' } #' -calculate_operational_parameters_berlin_f <- function(df, - calc_list = list( - vfrPerm = "`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`", - yield = "100*(`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`) / `Durchfluss_Rohwasser`", - conLoop = "(`Durchfluss_Rohwasser`*`LF_Rohwasser` + `Durchfluss_Rezirkulation`*`LF_Konzentrat`)/(`Durchfluss_Rohwasser` + `Durchfluss_Rezirkulation`)", - recovery = "100*(1 - `LF_Permeat` / conLoop)", - deltaPreProcConc = "`Druck_Arbeitsdruck` - `Druck_Konzentrat`", - # Membranflaeche NF 4x in Reihe: #4 x NF 270-4040 mit 7,6 m2 aktiver Flaeche - # surf = 4 * 7.6 - flux = "vfrPerm / (4 * 7.6)", - cfv = "(`Durchfluss_Rohwasser`+ `Durchfluss_Rezirkulation`) / ((pi * 0.0095^2) * 1000 * 3600)", - tmp = "((`Druck_Arbeitsdruck` + `Druck_Konzentrat`) / 2) - `Druck_Permeat`", - nwpt = "normalised_permeate_flow(tempFeed = `Temperatur_Rohwasser`, - conLoop = `conLoop`, - vfrPerm = `vfrPerm`, - vfrLoop = `Durchfluss_Rezirkulation`, - vfrFeed = `Durchfluss_Rohwasser`, - prePerm = `Druck_Permeat`, - preProc = `Druck_Arbeitsdruck`, - preConc = `Druck_Konzentrat`, - nwp0 = 1.429162, - vfrPerm0 = 800)", - nwpr = "- ((1 - (nwpt / vfrPerm))) * 100" - ), - calc_list_name = c( - "Durchfluss Permeat", - "Ausbeute", - "Leitf\u00E4higkeit Rezirkulation", - "R\u00FCckhalt", - "Druckverlust (Feed - Konzentrat)", - "Flux", - "\u00DCberstr\u00F6mungsgeschwindigkeit", - "Transmembrandruck", - "Normalisierter Permeatstrom", - "Relativer Permeatstrom" - ), - calc_list_unit = c( - "l/h", - "%", - "\xB5S/cm", - "%", - "bar", - "l/h/m2", - "m/s", - "bar", - "l/h", - "%" - ), - calc_paras = c( - "Durchfluss_Rohwasser", - "Durchfluss_Konzentrat", - "Durchfluss_Rezirkulation", - "Druck_Arbeitsdruck", - "Druck_Rohwasser", - "Druck_Konzentrat", - "Druck_Permeat", - "LF_Permeat", - "LF_Rohwasser", - "LF_Konzentrat", - "Temperatur_Rohwasser" - )) { +calculate_operational_parameters_berlin_f <- function( + df, + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_f") +) +{ res <- calculate_operational_parameters( df, calc_list, @@ -126,11 +77,11 @@ calculate_operational_parameters_berlin_f <- function(df, calc_list_unit, calc_paras ) - + res$SiteName <- "General" res$SiteName_ParaName_Unit <- paste("General (calculated):", res$ParameterLabel) res$DataType <- "calculated" res$Source <- "online" - + res } diff --git a/R/calculate_operational_parameters_berlin_s.R b/R/calculate_operational_parameters_berlin_s.R index c661b1a..f4b5e2b 100644 --- a/R/calculate_operational_parameters_berlin_s.R +++ b/R/calculate_operational_parameters_berlin_s.R @@ -1,16 +1,19 @@ #' Calculate operational parameters for Berlin-Schoenerlinde #' #' @param df a data frame as retrieved by read_wedeco_data() -#' @param calc_list list with calculation operations to be carried out -#' (default: list(deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", -#' Ozoneintrag = "(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage")) -#' @param calc_list_name full names of parameters to be used for plotting for each -#' calculation specified wit 'calc_list'. default: c('delta SAK', 'Ozoneintrag') +#' @param calc_list list with calculation operations to be carried out (default: +#' list(deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", Ozoneintrag = +#' "(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage")) +#' @param calc_list_name full names of parameters to be used for plotting for +#' each calculation specified wit 'calc_list'. default: c('delta SAK', +#' 'Ozoneintrag') #' @param calc_list_unit units of parameters to be used for plotting for each -#' calculation specified wit 'calc_list'. default: c("percent", "mg-O3/L") -#' @param calc_paras a vector with parameter codes used for performing calculations -#' defined in 'calc_list' (default: c("SCAN_SAK_Ablauf", "SCAN_SAK_Zulauf", -#' "C_O3_Zugas", "C_O3_Abgas", "Q_Gas", "Q_Ozonanlage")) +#' calculation specified wit 'calc_list'. default: c("percent", "mg-O3/L") +#' @param calc_paras a vector with parameter codes used for performing +#' calculations defined in 'calc_list' (default: c("SCAN_SAK_Ablauf", +#' "SCAN_SAK_Zulauf", "C_O3_Zugas", "C_O3_Abgas", "Q_Gas", "Q_Ozonanlage")) +#' @param config configuration object (list) from which the \code{calc_*} +#' arguments are filled. Default: \code{get_calc_config("berlin_s")} #' @return dataframe with calculated operational parameters #' @export #' @examples @@ -19,29 +22,23 @@ #' myDat <- calculate_operational_parameters_berlin_s(df = raw_list) #' } #' -calculate_operational_parameters_berlin_s <- function(df, - calc_list = list( - deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", - Ozoneintrag = "(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage" - ), - calc_list_name = c("delta SAK", "Ozoneintrag"), - calc_list_unit = c("%", "mg-O3/L"), - calc_paras = c( - "SCAN_SAK_Ablauf", - "SCAN_SAK_Zulauf", - "C_O3_Zugas", - "C_O3_Abgas", - "Q_Gas", - "Q_Ozonanlage" - )) { +calculate_operational_parameters_berlin_s <- function( + df, + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_s") +) +{ res <- calculate_operational_parameters( df, calc_list, calc_list_name, calc_list_unit, calc_paras ) - + res$SiteName <- "General" res$SiteName_ParaName_Unit <- paste("General (calculated):", res$ParameterLabel) res$DataType <- "calculated" res$Source <- "online" - + res } diff --git a/R/calculate_operational_parameters_berlin_t.R b/R/calculate_operational_parameters_berlin_t.R index c25fadf..676b346 100644 --- a/R/calculate_operational_parameters_berlin_t.R +++ b/R/calculate_operational_parameters_berlin_t.R @@ -8,6 +8,8 @@ #' calculation specified wit 'calc_list'. default: c("percent") #' @param calc_paras a vector with parameter codes used for performing calculations #' defined in 'calc_list' (default: c("FY-20-01", "FT-10-01") +#' @param config configuration object (list) from which the \code{calc_*} +#' arguments are filled. Default: \code{get_calc_config("berlin_t")} #' @return dataframe with calculated operational parameters #' @export #' @examples @@ -16,11 +18,15 @@ #' myDat <- calculate_operational_parameters_berlin_t(df = raw_list) #' } #' -calculate_operational_parameters_berlin_t <- function(df, - calc_list = list(recovery = "100*`FY-20-01`/`FT-10-01`"), - calc_list_name = c("recovery"), - calc_list_unit = c("%"), - calc_paras = c("FY-20-01", "FT-10-01")) { +calculate_operational_parameters_berlin_t <- function( + df, + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_t") +) +{ res <- calculate_operational_parameters( df, calc_list, @@ -28,11 +34,11 @@ calculate_operational_parameters_berlin_t <- function(df, calc_list_unit, calc_paras ) - + res$SiteName <- "General" res$SiteName_ParaName_Unit <- paste("General (calculated):", res$ParameterLabel) res$DataType <- "calculated" res$Source <- "online" - + res } diff --git a/R/calculate_operational_parameters_haridwar.R b/R/calculate_operational_parameters_haridwar.R index 3a4ae5f..e1048ed 100644 --- a/R/calculate_operational_parameters_haridwar.R +++ b/R/calculate_operational_parameters_haridwar.R @@ -16,6 +16,8 @@ #' @param calc_paras a vector with parameter codes used for performing calculations #' defined in 'calc_list' (default: c('Redox_Out1', 'Redox_Out2', 'Redox_In', #' 'Flux', 'Up', 'Ip', 'Uz', 'Iz')) +#' @param config configuration object (list) from which the \code{calc_*} +#' arguments are filled. Default: \code{get_calc_config("haridwar")} #' @return dataframe with calculated operational parameters #' @importFrom kwb.utils stringList #' @export @@ -25,47 +27,21 @@ #' myDat <- calculate_operational_parameters(df = haridwar_raw_list) #' } #' -calculate_operational_parameters <- function(df, - calc_list = list( - Redox_Out = "(Redox_Out1+Redox_Out2)/2", - Redox_Diff = "Redox_Out - Redox_In", - Power_pump = "Up*Ip", - Power_cell = "Uz*Iz", - Pump_WhPerCbm = "Power_pump/(Flux/1000)", - Cell_WhPerCbm = "Power_cell/(Flux/1000)" - ), - calc_list_name = c( - "Mean redox potential in tank", - "Difference (outflow - inflow) of redox potential", - "Power demand of pump", - "Power demand of cell", - "Specific energy demand of pump", - "Specific energy demand of cell" - ), - calc_list_unit = c( - "mV", - "mV", - "W", - "W", - "Wh/m3", - "Wh/m3" - ), - calc_paras = c( - "Redox_Out1", - "Redox_Out2", - "Redox_In", - "Flux", - "Up", - "Ip", - "Uz", - "Iz" - )) { +calculate_operational_parameters <- function( + df, + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("haridwar") +) +{ print(sprintf( "Calculating %d operational parameter(s): %s", length(calc_list_name), kwb.utils::stringList(calc_list_name) )) - + meta_data <- data.frame( ParameterCode = names(calc_list), ParameterName = calc_list_name, @@ -73,26 +49,23 @@ calculate_operational_parameters <- function(df, ParameterLabel = sprintf("%s (%s)", calc_list_name, calc_list_unit), stringsAsFactors = FALSE ) - - - + operation <- df[df$ParameterCode %in% calc_paras, ] %>% filter_("!is.na(ParameterValue)") %>% select_("DateTime", "ParameterCode", "ParameterValue") - + operation_matrix <- operation %>% tidyr::spread_( key_col = "ParameterCode", value_col = "ParameterValue" ) - - + ### Calculate additional parameters: operation_calc <- operation_matrix %>% dplyr::mutate_(.dots = calc_list) %>% dplyr::select_(.dots = c("DateTime", names(calc_list))) - - operation_calc_tidy <- tidyr::gather_( + + tidyr::gather_( operation_calc, key_col = "ParameterCode", value_col = "ParameterValue", @@ -101,12 +74,8 @@ calculate_operational_parameters <- function(df, dplyr::filter_("!is.na(ParameterValue)") %>% dplyr::left_join(y = meta_data) # dplyr::mutate_(DataType = "'calculated'") - - - return(operation_calc_tidy) } - #' Plot calculate operational time series #' @param df a data frame as retrieved by calculate_operational_parameters() #' @return plots time series for calculated operational parameters @@ -120,13 +89,13 @@ calculate_operational_parameters <- function(df, #' plot_calculated_operational_timeseries <- function(df) { calculated_paras <- unique(df$ParameterLabel) - - + + for (i in seq_along(calculated_paras)) { sel_par1 <- df$ParameterLabel[order(calculated_paras)][i] - + n_measurements <- nrow(df[df[, "ParameterLabel"] == sel_par1, ]) - + if (n_measurements > 0) { g1 <- ggplot2::ggplot(df, ggplot2::aes_string( x = "DateTime", @@ -147,7 +116,7 @@ plot_calculated_operational_timeseries <- function(df) { legend.title = element_blank() ) + ggplot2::labs(x = "", y = "") - + print(g1) } } @@ -156,12 +125,12 @@ plot_calculated_operational_timeseries <- function(df) { if (FALSE) { myDat <- calculate_operational_parameters(df = haridwar_raw_list) - - + + plot_calculated_operational_timeseries(df = myDat) - + ### Plot it - + # # backwash <- operation[operation$Anlauf == 90,"DateTime"] # diff --git a/R/check_thresholds.R b/R/check_thresholds.R index 20d3d7b..c1815b1 100644 --- a/R/check_thresholds.R +++ b/R/check_thresholds.R @@ -16,16 +16,17 @@ check_thresholds <- function(df, # haridwar_day_list, thresholds$ParameterThresholdComparison ) - thresholds$number_total <- 0 - thresholds$number_of_satisfying <- 0 - thresholds$numberOfExceedance <- 0 + thresholds$number_total <- 0L + thresholds$number_of_satisfying <- 0L + thresholds$numberOfExceedance <- 0L thresholds$exceedanceLabel <- "No data within reporting period!" for (i in seq_len(nrow(thresholds))) { + cond1 <- df$ParameterCode == thresholds$ParameterCode[i] cond1 <- cond1 & df$SiteCode == thresholds$SiteCode[i] - cond1 <- cond1 & !is.na(df$ParameterValue) + cond1 <- cond1 & ! is.na(df$ParameterValue) cond2 <- eval(parse(text = sprintf( "df$ParameterValue %s %s", @@ -35,15 +36,15 @@ check_thresholds <- function(df, # haridwar_day_list, condition <- cond1 & cond2 - n_total <- nrow(df[cond1, ]) - - n_satisfy <- nrow(df[condition, ]) + n_total <- sum(cond1) + n_satisfy <- sum(condition) n_exceed <- n_total - n_satisfy thresholds$number_total[i] <- n_total thresholds$number_of_satisfying[i] <- n_satisfy - if (n_total > 0) { + if (n_total > 0L) { + thresholds$numberOfExceedance[i] <- n_exceed thresholds$exceedanceLabel[i] <- sprintf( "%d (%2.1f %%)", n_exceed, kwb.utils::percentage(n_exceed, n_total) diff --git a/R/create_monthly_selection.R b/R/create_monthly_selection.R index 25f1577..b23f787 100644 --- a/R/create_monthly_selection.R +++ b/R/create_monthly_selection.R @@ -6,9 +6,14 @@ #' 'endDate' month including a column 'label' (used in shiny app for month #' selection) #' @importFrom lubridate days_in_month +#' @importFrom kwb.utils noFactorDataFrame #' @export -create_monthly_selection <- function(startDate = "2016-09-01", endDate = Sys.Date()) { +create_monthly_selection <- function( + startDate = "2016-09-01", + endDate = Sys.Date() +) +{ ym <- function(x) format(x, "%Y-%m") endMonth <- as.Date(sprintf("%s-01", ym(endDate))) @@ -17,10 +22,9 @@ create_monthly_selection <- function(startDate = "2016-09-01", endDate = Sys.Dat end <- as.Date(sprintf("%s-%s", ym(start), lubridate::days_in_month(start))) - data.frame( + kwb.utils::noFactorDataFrame( start = start, end = end, - label = format(start, "%B %Y"), - stringsAsFactors = FALSE + label = format(start, "%B %Y") ) } diff --git a/R/dygraph_add_limits.R b/R/dygraph_add_limits.R index 12b492c..81086c1 100644 --- a/R/dygraph_add_limits.R +++ b/R/dygraph_add_limits.R @@ -14,17 +14,23 @@ #' @return add limits to existing dygraph object #' @import dygraphs #' @export -dygraph_add_limits <- function(dygraph, - limits_df, # thresholds[thresholds$ParameterName %in% "Battery voltage",], - label_loc = "left", - col_limits = "ParameterThreshold", - col_label = "label", - ...) { - if (nrow(limits_df) == 0) { +dygraph_add_limits <- function( + dygraph, + limits_df, # thresholds[thresholds$ParameterName %in% "Battery voltage",], + label_loc = "left", + col_limits = "ParameterThreshold", + col_label = "label", + ... +) +{ + n_rows <- nrow(limits_df) + + if (n_rows == 0L) { return(dygraph) } - for (i in seq_len(nrow(limits_df))) { + for (i in seq_len(n_rows)) { + dygraph <- dygraphs::dyLimit( dygraph = dygraph, limit = limits_df[i, col_limits], diff --git a/R/export_and_plot.R b/R/export_and_plot.R index 20a9a80..2b10023 100644 --- a/R/export_and_plot.R +++ b/R/export_and_plot.R @@ -4,13 +4,16 @@ #' @importFrom tidyr spread #' @export #' -long_to_wide <- function(df) { +long_to_wide <- function(df) +{ df %>% - dplyr::mutate(ParameterName_SiteName = sprintf( - "%s_%s", - .data$ParameterName, - .data$SiteName - )) %>% + dplyr::mutate( + ParameterName_SiteName = sprintf( + "%s_%s", + .data$ParameterName, + .data$SiteName + ) + ) %>% dplyr::select( .data$DateTime, .data$ParameterName_SiteName, @@ -22,9 +25,6 @@ long_to_wide <- function(df) { ) } - - - #' CSV data export in "wide" format #' #' @param df_long data frame in long format (as retrieved by \code{kwb.pilot::group_datetime}) @@ -35,26 +35,21 @@ long_to_wide <- function(df) { #' @export #' @importFrom fs dir_create #' @importFrom kwb.utils catAndRun -export_data <- function(df_long, - export_dir, - dbg = TRUE) { +export_data <- function(df_long, export_dir, dbg = TRUE) +{ fs::dir_create(sprintf("%s/data", export_dir)) - - df_name <- deparse(substitute(df_long)) - - df_file <- sprintf( - "%s/data/%s.csv", - export_dir, - df_name - ) - - - kwb.utils::catAndRun(sprintf("Export data to %s", df_file), + + name <- deparse(substitute(df_long)) + + file <- sprintf("%s/data/%s.csv", export_dir, name) + + kwb.utils::catAndRun( + sprintf("Export data to %s", file), + dbg = dbg, expr = { df_wide <- long_to_wide(df_long) - readr::write_csv2(df_wide, path = df_file) - }, - dbg = dbg + readr::write_csv2(df_wide, path = file) + } ) } @@ -69,24 +64,17 @@ export_data <- function(df_long, #' @importFrom htmlwidgets saveWidget #' @importFrom plotly ggplotly #' -plot_data <- function(df_long, - export_dir, - dbg = TRUE) { +plot_data <- function(df_long, export_dir, dbg = TRUE) +{ fs::dir_create(sprintf("%s/plots", export_dir)) - - - df_name <- deparse(substitute(df_long)) - - - plot_file <- sprintf( - "%s/plots/%s.html", - export_dir, - df_name - ) - - - - kwb.utils::catAndRun(sprintf("Export plot: %s", plot_file), + + name <- deparse(substitute(df_long)) + + file <- sprintf("%s/plots/%s.html", export_dir, name) + + kwb.utils::catAndRun( + sprintf("Export plot: %s", file), + dbg = dbg, expr = { g1 <- df_long %>% ggplot2::ggplot(mapping = ggplot2::aes_string( @@ -97,18 +85,15 @@ plot_data <- function(df_long, ggplot2::facet_wrap(~ParameterName, scales = "free_y", ncol = 1) + ggplot2::geom_point() + ggplot2::theme_bw() - - - withr::with_dir(sprintf("%s/plots", export_dir), - code = { - plotly::ggplotly(g1) %>% - htmlwidgets::saveWidget(basename(plot_file), - selfcontained = FALSE, - title = df_name - ) - } - ) - }, - dbg = dbg + + withr::with_dir(sprintf("%s/plots", export_dir), code = { + plotly::ggplotly(g1) %>% + htmlwidgets::saveWidget( + basename(file), + selfcontained = FALSE, + title = name + ) + }) + } ) } diff --git a/R/get_calc_info_from_config.R b/R/get_calc_info_from_config.R new file mode 100644 index 0000000..4acc1e2 --- /dev/null +++ b/R/get_calc_info_from_config.R @@ -0,0 +1,24 @@ +# get_calc_info_from_config ---------------------------------------------------- +get_calc_info_from_config <- function(config, what = "expr") +{ + calculated <- kwb.utils::selectElements(config, "calculated") + + if (what == "paras") { + return(kwb.utils::selectElements(config, "parameters")) + } + + result <- lapply(calculated, kwb.utils::selectElements, what) + + if (what == "expr") { + return(result) + } + + unname(unlist(result)) +} + +# get_calc_config -------------------------------------------------------------- +#' @importFrom yaml read_yaml +get_calc_config <- function(site) +{ + yaml::read_yaml(shiny_file(site, "config/config.yml")) +} diff --git a/R/get_monthly_data_from_calenderweeks.R b/R/get_monthly_data_from_calenderweeks.R index 78557f8..a5a7f58 100644 --- a/R/get_monthly_data_from_calenderweeks.R +++ b/R/get_monthly_data_from_calenderweeks.R @@ -3,24 +3,25 @@ #' @param end end of period (default: .Date()) #' @return data.frame with daily date sequence for and corresponding calendar week #' @importFrom lubridate ymd +#' @importFrom kwb.utils noFactorDataFrame #' @export -calenderweek_from_dates <- function(start = "2017-04-24", end = Sys.Date()) { +calenderweek_from_dates <- function(start = "2017-04-24", end = Sys.Date()) +{ dates <- seq( from = lubridate::ymd(start), to = lubridate::ymd(end), by = "days" ) - data.frame( + kwb.utils::noFactorDataFrame( date = dates, yearmonth = format(dates, format = "%Y-%m"), jahrmonattag = format(dates, format = "%d.%m.%y"), year = as.numeric(format(dates, format = "%Y")), month = as.numeric(format(dates, format = "%m")), day = as.numeric(format(dates, format = "%d")), - cw = sprintf("%02d", lubridate::isoweek(dates)), - stringsAsFactors = FALSE + cw = sprintf("%02d", lubridate::isoweek(dates)) ) } @@ -29,20 +30,22 @@ calenderweek_from_dates <- function(start = "2017-04-24", end = Sys.Date()) { #' @return character vector with operational filenames with all calendar weeks #' that need to be imported for Berlin Schoenerlinde #' @importFrom dplyr filter_ pull +#' @importFrom kwb.utils noFactorDataFrame #' @importFrom tidyr separate #' @export -get_monthly_data_from_calendarweeks <- function(year_month) { +get_monthly_data_from_calendarweeks <- function(year_month) +{ cw_for_month <- calenderweek_from_dates() %>% dplyr::filter_(~ yearmonth == year_month) %>% dplyr::pull("cw") %>% unique() - dir_operation <- package_file("shiny/berlin_s/data/operation") + dir_operation <- shiny_file("berlin_s/data/operation") files <- list.files(dir_operation, pattern = ".csv") files_to_import <- tidyr::separate_( - data = data.frame(files = files, stringsAsFactors = FALSE), + data = kwb.utils::noFactorDataFrame(files = files), col = "files", into = c("a", "year", "c", "cw", "e"), remove = FALSE @@ -54,17 +57,21 @@ get_monthly_data_from_calendarweeks <- function(year_month) { sprintf("%s/%s", dir_operation, files_to_import) } -if (FALSE) { +if (FALSE) +{ ############################################################################## #### Make one CSV file of Ozone_2017_BisKW_29.csv for each calendar week 17-29 ############################################################################## - old_data <- readLines(kwb.pilot:::package_file( - "shiny/berlin_s/data/operation/Ozone_2017_BisKW_29.csv" + shiny_file <- getFromNamespace("shiny_file", ns = "kwb.pilot") + + old_data <- readLines(shiny_file( + "berlin_s/data/operation/Ozone_2017_BisKW_29.csv" )) date_strings <- stringr::str_sub(old_data, 1, 8) for (week in 17:29) { + pattern <- calenderweek_from_dates() %>% dplyr::filter(cw == week) %>% dplyr::pull(jahrmonattag) %>% diff --git a/R/get_monthly_periods.R b/R/get_monthly_periods.R index e60c923..e6655cb 100644 --- a/R/get_monthly_periods.R +++ b/R/get_monthly_periods.R @@ -15,11 +15,15 @@ last_day <- function(date) { #' @param year_month_end end year month (default: current month) #' @param tz (default: 'CET') #' @return dataframe with monthly periods +#' @importFrom kwb.utils noFactorDataFrame #' @importFrom lubridate days #' @export -get_monthly_periods <- function(year_month_start = "2017-06", - year_month_end = format(Sys.Date(), "%Y-%m"), - tz = "CET") { +get_monthly_periods <- function( + year_month_start = "2017-06", + year_month_end = format(Sys.Date(), "%Y-%m"), + tz = "CET" +) +{ first_day <- function(x) as.Date(paste0(x, "-01"), tz = tz) month_start <- seq( @@ -28,11 +32,10 @@ get_monthly_periods <- function(year_month_start = "2017-06", by = "month" ) - data.frame( + kwb.utils::noFactorDataFrame( year_month = format(month_start, format = "%Y-%m"), start = month_start, - end = last_day(month_start), - stringsAsFactors = FALSE + end = last_day(month_start) ) } @@ -41,21 +44,25 @@ get_monthly_periods <- function(year_month_start = "2017-06", #' @param monthly_period one row of data.frame as retrieved by function #' first row of get_monthly_periods(), i.e. year month is (default: '2017-06') #' @param raw_data_dir directory with operational raw data files for Berlin Tiefwerder -#' (default: \code{kwb.pilot:::package_file("shiny/berlin_t/data/operation")}) +#' (default: \code{kwb.pilot:::shiny_file("berlin_t/data/operation")}) #' @param max_offset_days number of days in previous/next month to look for beginning/ #' ending of month (default: 7) #' @return dataframe with monthly periods #' @importFrom lubridate days #' @importFrom kwb.utils stringList #' @export -get_rawfilespaths_for_month <- function(monthly_period = get_monthly_periods()[1, ], - raw_data_dir = package_file("shiny/berlin_t/data/operation"), - max_offset_days = 7) { - rawfiles <- stringr::str_sub(list.files(raw_data_dir), start = 1, end = 10) +get_rawfilespaths_for_month <- function( + monthly_period = get_monthly_periods()[1, ], + raw_data_dir = shiny_file("berlin_t/data/operation"), + max_offset_days = 7 +) +{ + rawfiles <- stringr::str_sub(list.files(raw_data_dir), start = 1L, end = 10L) offset_days <- lubridate::days(seq_len(max_offset_days)) get_offset <- function(type) { + type <- match.arg(type, c("min", "max")) offset <- as.character(if (type == "min") { @@ -65,7 +72,7 @@ get_rawfilespaths_for_month <- function(monthly_period = get_monthly_periods()[1 }) if (any(available <- offset %in% rawfiles)) { - return(ifelse(type == "min", rev, identity)(offset[available]))[1] + return(ifelse(type == "min", rev, identity)(offset[available]))[1L] } warning( @@ -76,7 +83,7 @@ get_rawfilespaths_for_month <- function(monthly_period = get_monthly_periods()[1 kwb.utils::stringList(offset) ) - return(NA) + NA } dates_to_grab <- c( diff --git a/R/get_thresholds.R b/R/get_thresholds.R index 1e27997..c3ec900 100644 --- a/R/get_thresholds.R +++ b/R/get_thresholds.R @@ -1,15 +1,16 @@ #' Get thresholds for analytics/operational parameters #' @param file path to csv file with thresholds for Haridwar site (default: -#' \code{kwb.pilot:::package_file("shiny/haridwar/data/thresholds.csv")}) +#' \code{kwb.pilot:::shiny_file("haridwar/data/thresholds.csv")}) #' @return returns data frame thresholds for operational/analytical parameters #' @importFrom dplyr mutate_ #' @importFrom utils read.csv #' @export -get_thresholds <- function(file = package_file("shiny/haridwar/data/thresholds.csv")) { +get_thresholds <- function(file = shiny_file("haridwar/data/thresholds.csv")) +{ read.csv(file, stringsAsFactors = FALSE) %>% dplyr::mutate_( - "label" = "sprintf('%s %s %3.1f (%s)', + label = "sprintf('%s %s %3.1f (%s)', ParameterName, ParameterThresholdComparison, ParameterThreshold, diff --git a/R/group_datetime.R b/R/group_datetime.R index ba479a0..0ce6602 100644 --- a/R/group_datetime.R +++ b/R/group_datetime.R @@ -5,7 +5,7 @@ #' day aggregation or "day", "month" or "year" for longer time spans #' @param fun function to be used for grouping measurement data of column ParameterValue #' (default: stats::median) -#' (default: kwb.pilot:::package_file("shiny/haridwar/.my.cnf")) +#' (default: kwb.pilot:::shiny_file("haridwar/.my.cnf")) #' @param col_datetime column name of datetime column (default: DateTime) #' @param col_datatype column name of data type column (default: DataType) #' @param dbg print debug information @@ -17,23 +17,28 @@ #' @importFrom kwb.utils stringList #' @export -group_datetime <- function(df, - by = 600, - fun = "stats::median", - col_datetime = "DateTime", - col_datatype = "DataType", - dbg = TRUE) { +group_datetime <- function( + df, + by = 600, + fun = "stats::median", + col_datetime = "DateTime", + col_datatype = "DataType", + dbg = TRUE +) +{ if (is.character(by)) { by <- tolower(by) } - + + add_midnight <- function(x) paste(x, "00:00:00") + grp_list <- list( - year = "%Y-01-01 00:00:00", - month = "%Y-%m-01 00:00:00", - day = "%Y-%m-%d 00:00:00" + year = add_midnight("%Y-01-01"), + month = add_midnight("%Y-%m-01"), + day = add_midnight("%Y-%m-%d") ) - - if (!is.numeric(by) && !by %in% names(grp_list)) { + + if (! is.numeric(by) && ! by %in% names(grp_list)) { clean_stop( sprintf("'%s' is no valid aggregation time step!\n", by), "Please select one of: ", kwb.utils::stringList(names(grp_list)), @@ -41,47 +46,63 @@ group_datetime <- function(df, "seconds (e.g. 600 in case of 10 minute aggregation)." ) } - + # Extract the time column timestamps <- kwb.utils::selectColumns(df, col_datetime) - - if (!is.numeric(by)) { + + if (! is.numeric(by)) { + timely <- if (by == "day") "daily" else paste0(by, "ly") - + text <- paste("Performing", timely, "temporal aggregation!") - + times <- fasttime::fastPOSIXct( format(timestamps, format = grp_list[[by]]), - tz = base::check_tzones(timestamps), + tz = check_tzones(timestamps), required.components = 3L ) - + descriptions <- sprintf("%s %s", timely, fun) + } else { + text <- paste( "Performing temporal aggregation for", by, "seconds time periods!" ) - + times <- xts::align.time(timestamps, n = by) - by / 2 - + descriptions <- sprintf("%d seconds %s", by, fun) } - + # Extract the type column types <- kwb.utils::selectColumns(df, col_datatype) - + # Overwrite original columns with new time objects and type labels - df[, col_datetime] <- times - df[, col_datatype] <- sprintf("%s (%s)", types, descriptions) - + df[[col_datetime]] <- times + df[[col_datatype]] <- sprintf("%s (%s)", types, descriptions) + kwb.utils::catAndRun(text, dbg = dbg, expr = { df %>% - dplyr::group_by(dplyr::across(.cols = dplyr::setdiff(names(df), - "ParameterValue")) - ) %>% - dplyr::summarise(ParameterValue = eval(parse(text = paste0(fun, - "(.data$ParameterValue)"))), - .groups = "keep") %>% + dplyr::group_by( + dplyr::across(.cols = dplyr::setdiff(names(df), "ParameterValue")) + ) %>% + dplyr::summarise( + ParameterValue = eval(parse( + text = paste0(fun, "(.data$ParameterValue)") + )), + .groups = "keep" + ) %>% as.data.frame() }) } + +# check_tzones ----------------------------------------------------------------- +check_tzones <- getFromNamespace( + x = grep( + pattern = "check_tzones", + x = ls(getNamespace("base"), all.names = TRUE), + value = TRUE + ), + ns = "base" +) diff --git a/R/import_analytics_haridwar.R b/R/import_analytics_haridwar.R index 20be135..d9bb690 100644 --- a/R/import_analytics_haridwar.R +++ b/R/import_analytics_haridwar.R @@ -10,37 +10,44 @@ #' "measurementID" in case of samples #' @importFrom plyr rbind.fill #' @keywords internal -fill_datetime <- function(df, - col_rawData_pattern = "raw", - col_datetime = "DateTime", - dbg = FALSE) { - columns_raw_data <- grep( - names(df), - pattern = col_rawData_pattern - ) - +fill_datetime <- function( + df, + col_rawData_pattern = "raw", + col_datetime = "DateTime", + dbg = FALSE +) +{ + columns_raw_data <- grep(names(df), pattern = col_rawData_pattern) + df$totSamples <- rowSums(!is.na(df[, columns_raw_data])) - + df$measurementID <- NA - - dates_indices <- which(!is.na(df[, col_datetime])) - - + + dates_indices <- which(! is.na(df[[col_datetime]])) + for (start_index in dates_indices) { - measurementID <- 0 - end_index <- start_index + 2 - + + measurementID <- 0L + end_index <- start_index + 2L + for (ind in start_index:end_index) { - if (df$totSamples[ind] > 0) { + + if (df$totSamples[ind] > 0L) { + measurementID <- measurementID + 1 df$DateTime[ind] <- df$DateTime[start_index] df$measurementID[ind] <- measurementID + } else { - if (dbg) print("Do nothing") + + if (dbg) { + print("Do nothing") + } } } } - return(df) + + df } #' Imports an analytics sheet from an EXCEL spreadsheet @@ -60,15 +67,17 @@ fill_datetime <- function(df, #' @import readxl tidyr dplyr #' @importFrom kwb.utils stringList #' @keywords internal -import_sheet <- function(xlsPath, - sheet, - col_names = TRUE, - col_rawData_pattern = "raw", - col_ignore_pattern = "mean|empty|X_|RX|not_used", - skip = 69, - tz_org = "UTC", - tz_export = "UTC") { - +import_sheet <- function( + xlsPath, + sheet, + col_names = TRUE, + col_rawData_pattern = "raw", + col_ignore_pattern = "mean|empty|X_|RX|not_used", + skip = 69, + tz_org = "UTC", + tz_export = "UTC" +) +{ ### Read original EXCEL sheet tmp_par1 <- readxl::read_excel( path = xlsPath, @@ -76,18 +85,18 @@ import_sheet <- function(xlsPath, col_names = col_names, skip = skip ) - - names(tmp_par1)[1] <- "DateTime" - - + + names(tmp_par1)[1L] <- "DateTime" + ### Check if all data points in first column are of type DATE/TIME if (is.character(tmp_par1$DateTime)) { - date_time_entries <- tmp_par1$DateTime[!is.na(tmp_par1$DateTime)] - + + date_time_entries <- tmp_par1$DateTime[! is.na(tmp_par1$DateTime)] + non_datetime_indices <- is.na(suppressWarnings(as.numeric(date_time_entries))) - + non_datetime_values <- date_time_entries[non_datetime_indices] - + msg <- sprintf( "All data values in first column need to be of type 'DATE/TIME'\n The following value(s) do not satisfy this condition: %s\n @@ -96,50 +105,41 @@ import_sheet <- function(xlsPath, sheet, xlsPath ) - + clean_stop(msg) } - + ### Fill missing date/time entries in case samples were taken ### (for details: see function: fill_datetime) - + ### Ignore columns without headers: cols_with_headers <- which(names(tmp_par1) != "") - + tmp_par2 <- fill_datetime( tmp_par1[, cols_with_headers], col_rawData_pattern = col_rawData_pattern ) %>% - dplyr::filter_(~ totSamples > 0) - - - + dplyr::filter_(~ totSamples > 0L) + ### Define time zone of samples - tmp_par3 <- set_timezone( - tmp_par2, - tz = tz_org - ) - + tmp_par3 <- set_timezone(tmp_par2, tz = tz_org) + ### Define time zone be be used for export - tmp_par4 <- change_timezone( - tmp_par3, - tz = tz_export - ) - - - col_import <- !grepl(pattern = col_ignore_pattern, x = names(tmp_par4)) - - tmp_par5 <- tmp_par4[, col_import] - - col_values <- names(tmp_par5)[grepl("@", names(tmp_par5))] - + tmp_par4 <- change_timezone(tmp_par3, tz = tz_export) + + col_import <- ! grepl(pattern = col_ignore_pattern, x = names(tmp_par4)) + + tmp_par5 <- tmp_par4[[col_import]] + + col_values <- grep("@", names(tmp_par5), value = TRUE) + tmp_par5_list <- tidyr::gather_( data = tmp_par5, key_col = "Keys", value_col = "ParameterValue", gather_cols = col_values ) - + tmp_par6_list <- tidyr::separate_( tmp_par5_list, col = "Keys", @@ -147,21 +147,21 @@ import_sheet <- function(xlsPath, sep = "@", remove = TRUE ) - - + ### Remove rows with NA as ParameterValue tmp_par7_list <- tmp_par6_list %>% dplyr::filter_("!is.na(ParameterValue)") - - + ### Cast to numeric just in case EXCEL data is imported as CHARACTER - tmp_par7_list$ParameterValue <- suppressWarnings(as.numeric(tmp_par7_list$ParameterValue)) - - + tmp_par7_list$ParameterValue <- suppressWarnings( + as.numeric(tmp_par7_list$ParameterValue) + ) + non_numeric_paravals <- tmp_par7_list$ParameterValue[is.na(tmp_par7_list$ParameterValue)] - + ### Check if all parameter values are of type NUMERIC if (any(non_numeric_paravals)) { + msg <- sprintf( "All parameter values need to be numeric!\n The following value(s) do not satisfy this condition: %s\n @@ -170,11 +170,11 @@ import_sheet <- function(xlsPath, sheet, xlsPath ) - + clean_stop(msg) } - - return(tmp_par7_list) + + tmp_par7_list } #' Imports multiple analytics sheets from an EXCEL spreadsheet @@ -198,51 +198,46 @@ import_sheet <- function(xlsPath, #' @return returns data frame with normalised analytics data in list form #' @import readxl tidyr dplyr #' @export -import_sheets <- function(xlsPath, - sheets_analytics, - sheet_parameters = "Parameters", - sheet_sites = "Sites", - sheet_location = "Location", - col_rawData_pattern = "raw", - col_ignore_pattern = "mean|empty|X_|RX|not_used", - ### skip: rows to skip for each sheet - skip = 69, - ### tz_org: - tz_org = NULL, - ### tz_export: - tz_export = "UTC", - dbg = TRUE) { - sites <- readxl::read_excel( - path = xlsPath, - sheet = sheet_sites - ) - - location <- readxl::read_excel( - path = xlsPath, - sheet = sheet_location - ) - - parameters <- readxl::read_excel( - xlsPath, - sheet = sheet_parameters - ) - +import_sheets <- function( + xlsPath, + sheets_analytics, + sheet_parameters = "Parameters", + sheet_sites = "Sites", + sheet_location = "Location", + col_rawData_pattern = "raw", + col_ignore_pattern = "mean|empty|X_|RX|not_used", + ### skip: rows to skip for each sheet + skip = 69, + ### tz_org: + tz_org = NULL, + ### tz_export: + tz_export = "UTC", + dbg = TRUE +) +{ + sites <- readxl::read_excel(path = xlsPath, sheet = sheet_sites) + location <- readxl::read_excel(path = xlsPath, sheet = sheet_location) + parameters <- readxl::read_excel(xlsPath, sheet = sheet_parameters) + ### If no explicit time zone for analytics is defined, use value of column "ParameterUnit" ### in sheet_parameters with "ParameterCode TZ" if (is.null(tz_org)) { tz_org <- parameters$ParameterUnit[parameters$ParameterCode == "TZ"] } - - for (sheet_index in seq_along(sheets_analytics)) { + + data_frames <- lapply(seq_along(sheets_analytics), function(sheet_index) { + mySheet <- sheets_analytics[sheet_index] + if (dbg) { + print(sprintf( "Importing & normalising analytics sheet: '%s' from '%s'", mySheet, basename(xlsPath) )) } - + tmp <- import_sheet( xlsPath = xlsPath, sheet = mySheet, @@ -252,7 +247,7 @@ import_sheets <- function(xlsPath, tz_org = tz_org, tz_export = tz_export ) - + tmp <- dplyr::left_join(tmp, sites, by = "SiteCode") %>% dplyr::left_join(parameters, by = "ParameterCode") %>% dplyr::left_join(location, by = "LocationID") %>% @@ -274,14 +269,9 @@ import_sheets <- function(xlsPath, ~Comments, ~Who ) + }) - if (sheet_index == 1) { - res <- tmp - } else { - res <- plyr::rbind.fill(res, tmp) - } - } - return(res) + do.call(plyr::rbind.fill, data_frames) } #' Plot analytics data (in PDF) @@ -292,39 +282,39 @@ import_sheets <- function(xlsPath, #' @importFrom ggforce facet_wrap_paginate #' @importFrom grDevices dev.off pdf #' @export -plot_analytics <- function(df) { +plot_analytics <- function(df) +{ locIDs <- unique(df$LocationID) - + for (loc_index in seq_along(locIDs)) { + tmp <- df[df$LocationID == locIDs[loc_index], ] ### Create column "SiteLabel" for plotting (based on SiteCode & SiteName) in ### order to introduce an ordered plotting for second plot (starting with SP1: ### well water left -> ending with SP4: tank water) - tmp$SiteLabel <- sprintf( - "%s (%s)", - tmp$SiteCode, - tmp$SiteName - ) - + tmp$SiteLabel <- sprintf("%s (%s)", tmp$SiteCode, tmp$SiteName) + # Calculate the number of pages (based on unique ParameterNames) - n_pages <- length(unique(tmp[, "ParameterName"])) - + n_pages <- length(unique(tmp[["ParameterName"]])) + title_label <- sprintf( "Location: %s (ID: %s)", unique(tmp$LocationName), unique(tmp$LocationID) ) - + ### 1) Time series plot for each substance pdfDir <- "report" dir.create(pdfDir) - + grDevices::pdf( file = file.path(pdfDir, sprintf("%d_analytics_timeSeries.pdf", locIDs[loc_index])), width = 10, height = 7 ) + for (i in seq_len(n_pages)) { + g1 <- ggplot2::ggplot(tmp, aes_string( x = "DateTime", y = "ParameterValue", @@ -341,16 +331,21 @@ plot_analytics <- function(df) { ggplot2::theme_bw() + ggplot2::theme(legend.position = "top") + ggplot2::labs(title = title_label) + print(g1) } + grDevices::dev.off() + ### 2) All values per monitoring location for each substance grDevices::pdf( file = file.path(pdfDir, sprintf("%d_analytics_allSites_onePlot.pdf", locIDs[loc_index])), width = 10, height = 7 ) + for (i in seq_len(n_pages)) { + g2 <- ggplot2::ggplot(tmp, aes_string( x = "SiteLabel", y = "ParameterValue", @@ -363,16 +358,14 @@ plot_analytics <- function(df) { scales = "free_y", page = i ) + - ggplot2::geom_jitter( - width = 0.05, - height = 0, - alpha = 0.4 - ) + + ggplot2::geom_jitter(width = 0.05, height = 0, alpha = 0.4) + ggplot2::theme_bw() + ggplot2::theme(legend.position = "top") + ggplot2::labs(title = title_label) + print(g2) } + grDevices::dev.off() } } diff --git a/R/import_data_basel.R b/R/import_data_basel.R index e1cfde4..6eb66b0 100644 --- a/R/import_data_basel.R +++ b/R/import_data_basel.R @@ -1,50 +1,43 @@ #' Imports operational data for Basel (without metadata and only for one site #' at once, e.g. "rhein" or "wiese") #' @param xlsx_dir Define directory with raw data in EXCEL spreadsheet (.xlsx) to -#' be imported (default: sema.pilot:::package_file("shiny/basel/data/operation/wiese")) +#' be imported (default: sema.pilot:::shiny_file("basel/data/operation/wiese")) #' @return returns data frame with imported raw operational data #' @importFrom readxl read_excel #' @importFrom tidyr gather_ #' @export -import_operation_basel <- function(xlsx_dir = package_file("shiny/basel/data/operation/wiese")) { - xlsx_files <- list_full_xls_files() - - for (xlsx_file in xlsx_files) { - print(sprintf("Importing: %s", xlsx_file)) - tmp <- readxl::read_excel(path = xlsx_file) - - if (xlsx_file == xlsx_files[1]) { - raw_data <- tmp - } else { - raw_data <- rbind(raw_data, tmp) - } - } - +import_operation_basel <- function( + xlsx_dir = shiny_file("basel/data/operation/wiese") +) +{ + files <- list_full_xls_files(xlsx_dir) + + raw_data <- do.call(rbind, lapply(files, function(file) { + print(sprintf("Importing: %s", file)) + readxl::read_excel(path = file) + })) + names(raw_data)[1] <- "DateTime" - + print(sprintf("Setting time zone to 'CET'")) raw_data <- set_timezone(raw_data, tz = "CET") - - raw_data_tidy <- tidyr::gather_( + + tidyr::gather_( data = raw_data, key_col = "Parameter_Site_Unit", value_col = "ParameterValue", gather_cols = setdiff(names(raw_data), "DateTime") - ) - - - raw_data_tidy$Source <- "online" - raw_data_tidy$DataType <- "raw" - - - return(raw_data_tidy) + ) %>% + dplyr::mutate( + Source = "online", + DataType = "raw" + ) } - #' Imports analytical data for Basel (without metadata) #' @param csv_dir Define directory with raw analytical data in CSV (.csv) format to -#' be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics")) +#' be imported (default: sema.pilot:::shiny_file("basel/data/analytics")) #' @return returns data frame with imported raw analytics data #' @importFrom janitor clean_names #' @importFrom readxl read_excel @@ -52,40 +45,34 @@ import_operation_basel <- function(xlsx_dir = package_file("shiny/basel/data/ope #' @importFrom dplyr group_by summarise select_ filter_ rename_ left_join mutate #' @export - -import_analytics_basel <- function(csv_dir = package_file("shiny/basel/data/analytics")) { - csv_files <- list_full_csv_files(csv_dir) - - for (csv_file in csv_files) { - print(sprintf("Importing: %s", csv_file)) - tmp <- read.csv2( - file = csv_file, - na.strings = "", - stringsAsFactors = FALSE - ) %>% +import_analytics_basel <- function(csv_dir = shiny_file("basel/data/analytics")) +{ + files <- list_full_csv_files(csv_dir) + + raw_data <- do.call(rbind, lapply(files, function(file) { + + print(sprintf("Importing: %s", file)) + + tmp <- read.csv2(file = file, na.strings = "", stringsAsFactors = FALSE) %>% janitor::clean_names() - - + ### Correct manually "prufpunkt_bezeichnung" for all "prufpunkt >= 94000" in ### case these are different from the "prufpunkt_bezeichnung" compared to ### "prufpunkt < 94000" - - + rep_strings <- list( "Metolachlor OA" = "Metolachlor-OXA", "Metolachlor ESA" = "Metolachlor-ESA", "N-Acetyl-4-aminoantipyri" = "N-Acetyl-4-Aminoantipyri", "\\" = "Cyprosulfamide" ) - + tmp$prufpunkt_bezeichnung_cor <- kwb.utils::multiSubstitute( strings = tmp$prufpunkt_bezeichnung, replacements = rep_strings ) - - - + correction_df <- tmp %>% dplyr::group_by_( "prufpunkt_bezeichnung", @@ -101,16 +88,14 @@ import_analytics_basel <- function(csv_dir = package_file("shiny/basel/data/anal "prufpunkt_bezeichnung_cor" = "prufpunkt_bezeichnung", "prufpunkt_cor" = "prufpunkt" ) - - tmp <- tmp %>% + + tmp %>% dplyr::left_join(correction_df) %>% - dplyr::mutate(DateTime = as.POSIXct(strptime( - x = paste( - tmp$datum, - tmp$uhrzeit - ), - format = "%d.%m.%Y %H:%M" - ))) %>% + dplyr::mutate( + DateTime = as.POSIXct( + strptime(x = paste(tmp$datum, tmp$uhrzeit), format = "%d.%m.%Y %H:%M") + ) + ) %>% dplyr::rename( SiteCode = "probestelle", ParameterCode_Org = "prufpunkt", @@ -136,45 +121,40 @@ import_analytics_basel <- function(csv_dir = package_file("shiny/basel/data/anal "Method_Org", "MethodName_Org" ) - - - - if (csv_file == csv_files[1]) { - raw_data <- tmp - } else { - raw_data <- rbind(raw_data, tmp) - } - } - + })) + print(sprintf("Setting time zone to 'CET'")) raw_data <- set_timezone(raw_data, tz = "CET") - - raw_data$ParameterValue <- as.numeric(raw_data$ParameterValue) - raw_data$Source <- "offline" - raw_data$DataType <- "raw" - - return(raw_data) + + raw_data %>% + dplyr::mutate( + ParameterValue = as.numeric(raw_data$ParameterValue), + Source = "offline", + DataType = "raw" + ) } - #' Helper function: add site metadata #' @param df data frame containing at least a column "SiteCode" #' @param df_col_sitecode column in df containing site code (default: "SiteCode") #' @param meta_site_path Define path of "meta_site.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv")) #' @return returns input data frame with joined metadata #' @importFrom tidyr separate_ #' @export -add_site_metadata <- function(df, - df_col_sitecode = "SiteCode", - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv")) { +add_site_metadata <- function( + df, + df_col_sitecode = "SiteCode", + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv") +) +{ meta_site <- read.csv( - file = meta_site_path, - stringsAsFactors = FALSE, + file = meta_site_path, + stringsAsFactors = FALSE, na.strings = "" ) - + res <- df %>% tidyr::separate_( col = df_col_sitecode, @@ -182,61 +162,58 @@ add_site_metadata <- function(df, into = paste0("SiteName", 1:3), remove = FALSE ) - - + for (siteID in 1:3) { - print(sprintf( - "Replacing SiteCode%d with SiteName%d", - siteID, - siteID - )) + + print(sprintf("Replacing SiteCode%d with SiteName%d", siteID, siteID)) + col_sitename <- paste0("SiteName", siteID) sites <- meta_site[meta_site$SiteID == siteID, ] - - if (nrow(sites) > 0) { + + if (nrow(sites) > 0L) { + for (site_idx in 1:nrow(sites)) { + sel_site <- sites[site_idx, ] - strings_to_replace <- !is.na(res[, col_sitename]) & res[, col_sitename] == sel_site$SiteLocation - if (sum(strings_to_replace) > 0) { + + strings_to_replace <- ! is.na(res[, col_sitename]) & + res[, col_sitename] == sel_site$SiteLocation + + if (sum(strings_to_replace) > 0L) { res[strings_to_replace, col_sitename] <- sel_site$SiteLocationName } } } - res[is.na(res[, col_sitename]), col_sitename] <- "" + + res[[col_sitename]][is.na(res[[col_sitename]])] <- "" } - - res$SiteName <- sprintf( - "%s (%s %s)", - res$SiteName1, - res$SiteName2, - res$SiteName3 - ) - - return(res) + + res$SiteName <- sprintf("%s (%s %s)", res$SiteName1, res$SiteName2, res$SiteName3) + + res } - #' Helper function: add parameter metadata #' @param df data frame containing at least a column "ParameterCode" #' @param meta_parameter_path Define path of "meta_parameter.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv")) #' @return returns input data frame with joined metadata (parameter codes/ methods #' not included in meta_parameter file will not be imported!!!!) #' @importFrom dplyr left_join #' @export -add_parameter_metadata <- function(df, - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv")) { +add_parameter_metadata <- function( + df, + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") +) +{ meta_parameter <- read.csv( file = meta_parameter_path, stringsAsFactors = FALSE, na.strings = "" ) - - - res <- df %>% + + df %>% dplyr::inner_join(meta_parameter) - - return(res) } #' Helper function: add label ("SiteName_ParaName_Unit_Method") @@ -250,24 +227,30 @@ add_parameter_metadata <- function(df, #' @return returns input data frame with added column "SiteName_ParaName_Unit_Method" #' @export -add_label <- function(df, - col_sitename = "SiteName", - col_parametername = "ParameterName", - col_parameterunit = "ParameterUnit", - col_method = "Method_Org") { +add_label <- function( + df, + col_sitename = "SiteName", + col_parametername = "ParameterName", + col_parameterunit = "ParameterUnit", + col_method = "Method_Org" +) +{ col_method_exists <- col_method %in% names(df) - if (col_method_exists) { - boolean_no_method <- is.na(df[, col_method]) | df[, col_method] == "" + methods <- df[[col_method]] + + has_no_method <- if (col_method_exists) { + is.na(methods) | methods == "" } else { - boolean_no_method <- rep(TRUE, times = nrow(df)) + rep(TRUE, times = length(methods)) } - + df$SiteName_ParaName_Unit_Method <- "" - - - if (any(boolean_no_method)) { - ind <- which(boolean_no_method) + + if (any(has_no_method)) { + + ind <- which(has_no_method) + df$SiteName_ParaName_Unit_Method[ind] <- sprintf( "%s: %s (%s)", df[ind, col_sitename], @@ -275,10 +258,12 @@ add_label <- function(df, df[ind, col_parameterunit] ) } - - if (any(!boolean_no_method)) { - ind <- which(!boolean_no_method) - df$SiteName_ParaName_Unit_Method[!boolean_no_method] <- sprintf( + + if (any(! has_no_method)) { + + ind <- which(! has_no_method) + + df$SiteName_ParaName_Unit_Method[! has_no_method] <- sprintf( "%s: %s (%s, Method: %s)", df[ind, col_sitename], df[ind, col_parametername], @@ -286,126 +271,126 @@ add_label <- function(df, df[ind, col_method] ) } - return(df) + + df } - #' Imports operational data for Basel (with metadata for #' both sites at once, i.e. "rhein" and "wiese") #' @param raw_dir_rhein Define directory for site "rhein" with raw data in #' EXCEL spreadsheet format (.xlsx) to be imported (default: -#' sema.pilot:::package_file("shiny/basel/data/operation/rhein")) +#' sema.pilot:::shiny_file("basel/data/operation/rhein")) #' @param raw_dir_wiese Define directory for site "rhein" with raw data in #' EXCEL spreadsheet format (.xlsx) to be imported (default: -#' sema.pilot:::package_file("shiny/basel/data/operation/wiese")) +#' sema.pilot:::shiny_file("basel/data/operation/wiese")) #' @param meta_online_path path to file containing metadata for online data -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_online.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_online.csv")) #' @param meta_site_path Define path of "meta_site.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv")) #' @param meta_parameter_path Define path of "meta_parameter.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv")) #' @return returns data frame with imported raw operational data with metadata #' for both sites (i.e."rhein" and "wiese") #' @importFrom dplyr left_join #' @return data.frame with operational data for Basel sites including metadata #' @export -import_operation_meta_basel <- function(raw_dir_rhein = package_file("shiny/basel/data/operation/rhein"), - raw_dir_wiese = package_file("shiny/basel/data/operation/wiese"), - meta_online_path = package_file("shiny/basel/data/metadata/meta_online.csv"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv")) { +import_operation_meta_basel <- function( + raw_dir_rhein = shiny_file("basel/data/operation/rhein"), + raw_dir_wiese = shiny_file("basel/data/operation/wiese"), + meta_online_path = shiny_file("basel/data/metadata/meta_online.csv"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") +) +{ meta_online <- read.csv( file = meta_online_path, stringsAsFactors = FALSE, na.strings = "" ) - - + online_meta <- add_site_metadata( df = meta_online, meta_site_path = meta_site_path ) %>% add_parameter_metadata(meta_parameter_path = meta_parameter_path) %>% add_label() - + ### 1.1) Wiese: Import XLSX data and join with metadata print("###################################################################") print("######## Importing operational data with metadata for site 'Wiese'") print("###################################################################") - + wiese <- import_operation_basel(xlsx_dir = raw_dir_wiese) %>% - dplyr::left_join(online_meta[grep( - pattern = "WF", - online_meta$SiteCode - ), ]) - - + dplyr::left_join(online_meta[grep(pattern = "WF", online_meta$SiteCode), ]) + ### 1.2) Rhein: Import XLSX data and join with metadata print("###################################################################") print("######## Importing operational data with metadata for site 'Rhein'") print("###################################################################") - + rhein <- import_operation_basel(xlsx_dir = raw_dir_rhein) %>% dplyr::left_join(online_meta[grep( pattern = "RF", online_meta$SiteCode ), ]) - - basel <- rbind(wiese, rhein) - - return(basel) + + rbind(wiese, rhein) } #' Imports analytical data for Basel (with metadata for both sites at once, i.e. #' "rhein" and "wiese") #' @param analytics_dir Define directory with raw analytical data in CSV (.csv) format to -#' be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics")) +#' be imported (default: sema.pilot:::shiny_file("basel/data/analytics")) #' @param meta_site_path Define path of "meta_site.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv")) #' @param meta_parameter_path Define path of "meta_parameter.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv")) #' @return data.frame with analytics data for Basel sites including metadata #' @export -import_analytics_meta_basel <- function(analytics_dir = package_file("shiny/basel/data/analytics"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv")) { +import_analytics_meta_basel <- function( + analytics_dir = shiny_file("basel/data/analytics"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") +) +{ print("###################################################################") print("###### Importing analytics data with metadata for sites 'Wiese' and Rhein'") print("###################################################################") - - analytics_meta_data <- import_analytics_basel(csv_dir = analytics_dir) %>% + + import_analytics_basel(csv_dir = analytics_dir) %>% add_site_metadata(meta_site_path = meta_site_path) %>% add_parameter_metadata(meta_parameter_path = meta_parameter_path) %>% add_label() - - return(analytics_meta_data) } #' Imports operational & analytical data for Basel (with metadata for both sites #' at once, i.e. "rhein" and "wiese") #' @param analytics_dir Define directory with raw analytical data in CSV (.csv) format to -#' be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics")) +#' be imported (default: sema.pilot:::shiny_file("basel/data/analytics")) #' @param raw_dir_rhein Define directory for site "rhein" with raw data in #' EXCEL spreadsheet format (.xlsx) to be imported (default: -#' sema.pilot:::package_file("shiny/basel/data/operation/rhein")) +#' sema.pilot:::shiny_file("basel/data/operation/rhein")) #' @param raw_dir_wiese Define directory for site "rhein" with raw data in #' EXCEL spreadsheet format (.xlsx) to be imported (default: -#' sema.pilot:::package_file("shiny/basel/data/operation/wiese")) +#' sema.pilot:::shiny_file("basel/data/operation/wiese")) #' @param meta_online_path path to file containing metadata for online data -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_online.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_online.csv")) #' @param meta_parameter_path Define path of "meta_parameter.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv")) #' @param meta_site_path Define path of "meta_site.csv" to be imported -#' (default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv")) +#' (default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv")) #' @return data.frame with analytical & operational data for Basel #' @importFrom plyr rbind.fill #' @export -import_data_basel <- function(analytics_dir = package_file("shiny/basel/data/analytics"), - raw_dir_rhein = package_file("shiny/basel/data/operation/rhein"), - raw_dir_wiese = package_file("shiny/basel/data/operation/wiese"), - meta_online_path = package_file("shiny/basel/data/metadata/meta_online.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv")) { +import_data_basel <- function( + analytics_dir = shiny_file("basel/data/analytics"), + raw_dir_rhein = shiny_file("basel/data/operation/rhein"), + raw_dir_wiese = shiny_file("basel/data/operation/wiese"), + meta_online_path = shiny_file("basel/data/metadata/meta_online.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv") +) +{ operation_meta <- import_operation_meta_basel( raw_dir_rhein = raw_dir_rhein, raw_dir_wiese = raw_dir_wiese, @@ -413,17 +398,16 @@ import_data_basel <- function(analytics_dir = package_file("shiny/basel/data/ana meta_site_path = meta_site_path, meta_parameter_path = meta_parameter_path ) + analytics_meta <- import_analytics_meta_basel( analytics_dir = analytics_dir, meta_site_path = meta_site_path, meta_parameter_path = meta_parameter_path ) - - + print("###################################################################") print("######## Add analytical to the operational data (including metadata)") print("###################################################################") - data_basel <- plyr::rbind.fill(operation_meta, analytics_meta) - - return(data_basel) + + plyr::rbind.fill(operation_meta, analytics_meta) } diff --git a/R/import_data_berlin_f.R b/R/import_data_berlin_f.R index 0205c75..86375b7 100644 --- a/R/import_data_berlin_f.R +++ b/R/import_data_berlin_f.R @@ -2,25 +2,26 @@ #' @param raw_data_files vector with full path to operational raw data files that #' allows to limit import to specific files (default: -#' fs::dir_ls(package_file("shiny/berlin_f/data/raw/online_data""), recurse = TRUE, +#' fs::dir_ls(kwb.pilot:::shiny_file("berlin_f/data/raw/online_data""), recurse = TRUE, #' regexp = "^[^~].*\\.xlsx$")). #' @param meta_file_path path to metadata file (default: -#' kwb.pilot:::package_file(file.path("shiny/berlin_f/data/raw/online_data", -#' "parameter_site_metadata.csv")) +#' kwb.pilot:::shiny_file("berlin_f/data/raw/online_data/parameter_site_metadata.csv")) #' @return data.frame with imported operational data (analytics´data to be added as #' soon as available) #' @export #' @importFrom fs dir_ls -import_data_berlin_f <- function(raw_data_files = fs::dir_ls(package_file("shiny/berlin_f/data/raw/online_data"), recurse = TRUE, regexp = "^[^~].*\\.xlsx$"), - meta_file_path = package_file("shiny/berlin_f/data/raw/online_data/parameter_unit_metadata.csv")) { - +import_data_berlin_f <- function( + raw_data_files = fs::dir_ls(shiny_file("berlin_f/data/raw/online_data"), recurse = TRUE, regexp = "^[^~].*\\.xlsx$"), + meta_file_path = shiny_file("berlin_f/data/raw/online_data/parameter_unit_metadata.csv") +) +{ # kwb.utils::assignPackageObjects("kwb.pilot") # ======= - # raw_data_dir = package_file("shiny/berlin_f/data/operation"), + # raw_data_dir = shiny_file("berlin_f/data/operation"), # raw_data_files = fs::dir_ls(raw_data_dir, recurse = TRUE, regexp = "^[^~].*\\.xlsx$"), - # analytics_path = package_file("shiny/berlin_f/data/analytics.xlsx"), - # meta_file_path = package_file("shiny/berlin_f/data/parameter_site_metadata.csv") + # analytics_path = shiny_file("berlin_f/data/analytics.xlsx"), + # meta_file_path = shiny_file("berlin_f/data/parameter_site_metadata.csv") # ) # { @@ -31,14 +32,11 @@ import_data_berlin_f <- function(raw_data_files = fs::dir_ls(package_file("shiny # data_berlin_f_offline <- import_lab_data_berlin_f(raw_data_dir = raw_data_dir, # meta_file_path = meta_file_path) - - meta_data <- readr::read_csv(meta_file_path, col_types = "cc", locale = readr::locale(encoding = "UTF-8") ) - data_berlin_f <- read_weintek_batch(raw_data_files) %>% dplyr::left_join(meta_data) %>% dplyr::mutate( @@ -58,7 +56,6 @@ import_data_berlin_f <- function(raw_data_files = fs::dir_ls(package_file("shiny ### Remove duplicates if any exist remove_duplicates(col_names = c("DateTime", "ParameterName", "SiteName")) - Encoding(data_berlin_f$ParameterUnit) <- "UTF-8" # data_berlin_f <- remove_duplicates( @@ -66,6 +63,5 @@ import_data_berlin_f <- function(raw_data_files = fs::dir_ls(package_file("shiny # col_names = c("DateTime", "ParameterName", "SiteName") # ) - - return(data_berlin_f) + data_berlin_f } diff --git a/R/import_data_haridwar.R b/R/import_data_haridwar.R index 629e7b6..d125dab 100644 --- a/R/import_data_haridwar.R +++ b/R/import_data_haridwar.R @@ -1,11 +1,11 @@ #' Imports Haridwar data #' #' @param analytics_path Define path of analytics EXCEL spreadsheet to be -#' imported (default: kwb.pilot:::package_file("shiny/haridwar/data/analytics.xlsx")) +#' imported (default: kwb.pilot:::shiny_file("haridwar/data/analytics.xlsx")) #' @param operation_mySQL_conf column name pattern for identifying raw data -#' (default: kwb.pilot:::package_file("shiny/haridwar/.my.cnf")) +#' (default: kwb.pilot:::shiny_file("haridwar/.my.cnf")) #' @param operation_meta_path path to table with meta data for operational -#' parameters (default: kwb.pilot:::package_file("shiny/haridwar/data/operation_parameters.csv")) +#' parameters (default: kwb.pilot:::shiny_file("haridwar/data/operation_parameters.csv")) #' @param excludedSheets all sheets, which are not listed here will be imported #' as lab data sheets (default: c("Parameters", "Location", "Sites", "#Summary", #' "Site_and_Parameter", "Observations", "dP", "ORP", "Flow", "Current_Voltage", @@ -21,33 +21,36 @@ #' @importFrom utils read.csv #' @export -import_data_haridwar <- function(analytics_path = package_file("shiny/haridwar/data/analytics.xlsx"), - operation_mySQL_conf = package_file("shiny/haridwar/.my.cnf"), - operation_meta_path = package_file("shiny/haridwar/data/operation_parameters.csv"), - excludedSheets = c( - "Parameters", - "Location", - "Sites", - "#Summary", - "Site_and_Parameter", - "Observations", - "dP", - "ORP", - "Flow", - "Current_Voltage", - # "SAK_254", - # "SAK_463", - "As_total_Arsenator" - ), - skip = 69, - debug = TRUE) { - if (!file.exists(analytics_path)) { +import_data_haridwar <- function( + analytics_path = shiny_file("haridwar/data/analytics.xlsx"), + operation_mySQL_conf = shiny_file("haridwar/.my.cnf"), + operation_meta_path = shiny_file("haridwar/data/operation_parameters.csv"), + excludedSheets = c( + "Parameters", + "Location", + "Sites", + "#Summary", + "Site_and_Parameter", + "Observations", + "dP", + "ORP", + "Flow", + "Current_Voltage", + # "SAK_254", + # "SAK_463", + "As_total_Arsenator" + ), + skip = 69, + debug = TRUE +) +{ + if (! file.exists(analytics_path)) { clean_stop(sprintf( "No analytics file %s is located under: %s", basename(analytics_path), dirname(analytics_path) )) } - + if (!file.exists(operation_mySQL_conf)) { clean_stop( "No '.my.cnf' file located under: ", dirname(operation_mySQL_conf), ".\n", @@ -71,7 +74,7 @@ import_data_haridwar <- function(analytics_path = package_file("shiny/haridwar/d all_sheets <- readxl::excel_sheets(path = analytics_path) - analytics_to_import <- all_sheets[!all_sheets %in% excludedSheets] + analytics_to_import <- all_sheets[! all_sheets %in% excludedSheets] analytics_4014 <- import_sheets( xlsPath = analytics_path, diff --git a/R/import_operation_berlin_s.R b/R/import_operation_berlin_s.R index 603f4b0..3e8e50f 100644 --- a/R/import_operation_berlin_s.R +++ b/R/import_operation_berlin_s.R @@ -4,29 +4,30 @@ #' for meta file creation #' @return data.frame with meta data file structure #' @importFrom data.table fread +#' @importFrom kwb.utils noFactorDataFrame #' @importFrom tidyr separate #' @export -create_wedeco_metafile <- function(raw_data_file) { +create_wedeco_metafile <- function(raw_data_file) +{ ozone <- kwb.utils::catAndRun( paste("Importing raw data file:", raw_data_file), data.table::fread(input = raw_data_file, header = TRUE, sep = ";", skip = 2) ) - - indices_names <- seq(from = 1, by = 2, to = ncol(ozone) - 1) + + indices_names <- seq(from = 1L, by = 2L, to = ncol(ozone) - 1L) indices_units <- indices_names + 1L - + na_vector <- rep(NA, length(indices_names)) units <- names(ozone)[indices_units] - - meta_file <- data.frame( + + meta_file <- kwb.utils::noFactorDataFrame( ParameterCode = na_vector, ParameterName = na_vector, SiteCode = na_vector, SiteName = na_vector, ParameterName_SiteName = names(ozone)[indices_names], ParameterUnitOrg = units, - ParameterUnit = units, - stringsAsFactors = FALSE + ParameterUnit = units ) %>% tidyr::separate( col = "ParameterName_SiteName", @@ -34,9 +35,9 @@ create_wedeco_metafile <- function(raw_data_file) { sep = " - ", remove = FALSE ) - + meta_file$ProzessID <- as.numeric(meta_file$ProzessID) - + meta_file$ParameterUnit <- kwb.utils::multiSubstitute( strings = meta_file$ParameterUnit, replacements = list( @@ -44,7 +45,7 @@ create_wedeco_metafile <- function(raw_data_file) { "V.*" = "" ) ) - + meta_file } @@ -59,9 +60,12 @@ create_wedeco_metafile <- function(raw_data_file) { #' @importFrom lubridate parse_date_time2 #' @importFrom data.table rbindlist #' @export -read_wedeco_data <- function(raw_data_dir = package_file("shiny/berlin_s/data/operation"), - raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_s/data/parameter_site_metadata.csv")) { +read_wedeco_data <- function( + raw_data_dir = shiny_file("berlin_s/data/operation"), + raw_data_files = NULL, + meta_file_path = shiny_file("berlin_s/data/parameter_site_metadata.csv") +) +{ meta_data <- read.csv(meta_file_path, stringsAsFactors = FALSE) %>% dplyr::select_( "ProzessID", @@ -72,50 +76,48 @@ read_wedeco_data <- function(raw_data_dir = package_file("shiny/berlin_s/data/op "SiteName", "ZeroOne" ) - + meta_data$ParameterLabel <- sprintf( "%s (%s)", meta_data$ParameterName, meta_data$ParameterUnit ) - + files_to_import <- if (is.null(raw_data_files)) { list_full_csv_files(raw_data_dir) } else { raw_data_files } - - raw_list <- lapply( - files_to_import, - FUN = function(pathfile) { + + raw_list <- lapply(files_to_import, function(pathfile) { + print(paste("Importing raw data file:", pathfile)) - + ozone <- data.table::fread( - pathfile, - header = TRUE, sep = ";", dec = ",", fill = TRUE, skip = 2 + pathfile, header = TRUE, sep = ";", dec = ",", fill = TRUE, skip = 2 ) - - indices_names <- seq(from = 1, by = 2, to = ncol(ozone) - 1) + + indices_names <- seq(from = 1L, by = 2L, to = ncol(ozone) - 1L) indices_units <- indices_names + 1L - - process_ids <- stringr::str_sub(names(ozone)[indices_names], 6, 11) - + + process_ids <- stringr::str_sub(names(ozone)[indices_names], 6L, 11L) + names(ozone)[indices_units] <- process_ids - - ozone <- ozone[, c(1, indices_units), with = FALSE] - - names(ozone)[1] <- "DateTime" - + + ozone <- ozone[, c(1L, indices_units), with = FALSE] + + names(ozone)[1L] <- "DateTime" + ozone$DateTime <- lubridate::parse_date_time2( ozone$DateTime, orders = "d!.m!*.y!* H!:M!:S!", tz = "CET" ) - + columns <- c("DateTime", meta_data$ProzessID[meta_data$ZeroOne == 1]) - + relevant_paras <- names(ozone)[names(ozone) %in% columns] - + ozone[, relevant_paras] %>% tidyr::gather_( key_col = "ProzessID", @@ -125,47 +127,50 @@ read_wedeco_data <- function(raw_data_dir = package_file("shiny/berlin_s/data/op dplyr::mutate_(ProzessID = "as.numeric(ProzessID)") } ) - + meta_data <- meta_data %>% select_(.dots = "-ZeroOne") - + df_tidy <- data.table::rbindlist(l = raw_list, use.names = TRUE) %>% dplyr::left_join(y = meta_data) %>% as.data.frame() - + df_tidy$Source <- "online" - + df_tidy$SiteName[kwb.utils::isNaOrEmpty(df_tidy$SiteName)] <- "General" - + df_tidy } #' Import data for Berlin Schoenerlinde #' @param raw_data_dir path of directory containing WEDECO CSV files -#' (default: kwb.pilot:::package_file("shiny/berlin_s/data/operation")) +#' (default: kwb.pilot:::shiny_file("berlin_s/data/operation")) #' @param raw_data_files vector with full path to operational raw data files that #' allows to limit import to specific files (default: NULL). If specified parameter #' "raw_data_dir" will not be used #' @param meta_file_path path to metadata file (default: -#' kwb.pilot:::package_file("shiny/berlin_s/data/parameter_site_metadata.csv")) +#' kwb.pilot:::shiny_file("berlin_s/data/parameter_site_metadata.csv")) #' @return list with "df": data.frame with imported operational data (analytics #' data to be added as soon as available) and "added_data_points": number of #' added data points in case of existing fst file was updated with new operational #' data #' @export -import_data_berlin_s <- function(raw_data_dir = package_file("shiny/berlin_s/data/operation"), - raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_s/data/parameter_site_metadata.csv")) { +import_data_berlin_s <- function( + raw_data_dir = shiny_file("berlin_s/data/operation"), + raw_data_files = NULL, + meta_file_path = shiny_file("berlin_s/data/parameter_site_metadata.csv") +) +{ df <- read_wedeco_data(raw_data_dir, raw_data_files, meta_file_path) - + df$DataType <- "raw" - + df$SiteName_ParaName_Unit <- sprintf( "%s: %s (%s)", df$SiteName, df$ParameterName, df$ParameterUnit ) - + ### Remove duplicates if any exist - remove_duplicates( - df = df, col_names = c("DateTime", "ParameterCode", "SiteCode") - ) + remove_duplicates(df = df, col_names = c( + "DateTime", "ParameterCode", "SiteCode" + )) } diff --git a/R/import_operation_berlin_t.R b/R/import_operation_berlin_t.R index 689849b..17288cc 100644 --- a/R/import_operation_berlin_t.R +++ b/R/import_operation_berlin_t.R @@ -1,39 +1,44 @@ +# import_lab_data_berlin_t ----------------------------------------------------- + #' BerlinTiefwerder: import lab data +#' #' @param xlsx_path full path to lab data EXCEL file in xlsx format -#' (default: kwb.pilot:::package_file("shiny/berlin_t/data/analytics.xlsx")) +#' (default: kwb.pilot:::shiny_file("berlin_t/data/analytics.xlsx")) #' @return a list of imported lab data for Berlin-Tiefwerder #' @import tidyr #' @importFrom dplyr left_join mutate #' @importFrom readxl read_xlsx #' @importFrom magrittr "%>%" #' @export -import_lab_data_berlin_t <- function(xlsx_path = package_file("shiny/berlin_t/data/analytics.xlsx")) { +import_lab_data_berlin_t <- function( + xlsx_path = shiny_file("berlin_t/data/analytics.xlsx") +) +{ lab_results <- xlsx_path %>% - readxl::read_xlsx(sheet = "Tabelle1", skip = 12) %>% - dplyr::mutate_( - ParameterName = gsub(pattern = "\\s*\\(.*", "", "ParameterCode") - ) - - gather_cols <- setdiff(names(lab_results), c( - "ParameterCode", "ParameterUnit", "ParameterName" - )) - + readxl::read_xlsx(sheet = "Tabelle1", skip = 12L) %>% + dplyr::mutate_(ParameterName = gsub("\\s*\\(.*", "", "ParameterCode")) + + parameters <- setdiff( + names(lab_results), + c("ParameterCode", "ParameterUnit", "ParameterName") + ) + sep_into <- c( "ProbenNr", "Date", "Termin", "Komplexkuerzel", "Ort_Typ", "Art", "Gegenstand", "Bezeichnung", "SiteName", "InterneKN", "Bemerkung", "DateTime" ) - + df <- lab_results %>% - tidyr::gather_("Combi", "ParameterValueRaw", gather_cols) %>% + tidyr::gather_("Combi", "ParameterValueRaw", parameters) %>% tidyr::separate_("Combi", sep_into, sep = "@", remove = TRUE) - + par_value_raw <- kwb.utils::selectColumns(df, "ParameterValueRaw") par_value_txt <- comma_to_dot(par_value_raw) par_value_num <- as.numeric(gsub("<", "", par_value_txt)) - + is_below <- grepl("<", par_value_txt) - + df <- kwb.utils::setColumns( df, Date = column_to_date(df, "Date"), @@ -43,15 +48,15 @@ import_lab_data_berlin_t <- function(xlsx_path = package_file("shiny/berlin_t/da DetectionLimit_numeric = ifelse(is_below, par_value_num, NA), ParameterValue = ifelse(is_below, par_value_num / 2, par_value_num) ) - + site_names <- unique(kwb.utils::selectColumns(df, "SiteName")) - + site_meta <- data.frame( SiteCode = seq_along(site_names), SiteName = site_names, stringsAsFactors = FALSE ) - + list( matrix = lab_results, list = df %>% @@ -60,15 +65,17 @@ import_lab_data_berlin_t <- function(xlsx_path = package_file("shiny/berlin_t/da ) } +# read_pentair_data ------------------------------------------------------------ + #' Read PENTAIR operational data #' #' @param raw_data_dir path of directory containing PENTAIR xls files -#' (default: kwb.pilot:::package_file("shiny/berlin_t/data/operation")) +#' (default: kwb.pilot:::shiny_file("berlin_t/data/operation")) #' @param raw_data_files vector with full path to operational raw data files that #' allows to limit import to specific files (default: NULL). If specified parameter #' "raw_data_dir" will not be used #' @param meta_file_path path to metadata file (default: -#' kwb.pilot:::package_file("shiny/berlin_t/data/parameter_site_metadata.csv")) +#' kwb.pilot:::shiny_file("berlin_t/data/parameter_site_metadata.csv")) #' @param locale locale (default: \code{\link[readr]{locale}}(tz = "CET")) #' @param col_types col_types (default: \code{\link[readr]{cols}}) #' @return data.frame with imported PENTAIR operational data @@ -79,131 +86,167 @@ import_lab_data_berlin_t <- function(xlsx_path = package_file("shiny/berlin_t/da #' @importFrom kwb.utils catAndRun #' @importFrom utils write.csv #' @export -read_pentair_data <- function(raw_data_dir = package_file("shiny/berlin_t/data/operation"), - raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_t/data/parameter_site_metadata.csv"), - locale = readr::locale(tz = "CET"), - col_types = readr::cols()) { - +read_pentair_data <- function( + raw_data_dir = shiny_file("berlin_t/data/operation"), + raw_data_files = NULL, + meta_file_path = shiny_file("berlin_t/data/parameter_site_metadata.csv"), + locale = readr::locale(tz = "CET"), + col_types = readr::cols() +) +{ xls_files <- if (is.null(raw_data_files)) { list_full_xls_files(raw_data_dir) } else { raw_data_files } - if(file.exists(meta_file_path)) { - - meta_data <- read.csv( - file = meta_file_path, header = TRUE, sep = ",", dec = ".", - stringsAsFactors = FALSE - ) - - columns <- c("TimeStamp", meta_data$ParameterCode[meta_data$ZeroOne == 1]) + meta_data <- if (file.exists(meta_file_path)) { + read_pentair_meta_data(meta_file_path) + } # else NULL - raw_list <- lapply(xls_files, FUN = function(xls_file) { - print(paste("Importing raw data file:", xls_file)) - tmp <- readr::read_tsv(file = xls_file, locale = locale, - col_types = col_types) - relevant_paras <- names(tmp)[names(tmp) %in% columns] - tmp[, relevant_paras] - - df_tidy <- data.table::rbindlist(l = raw_list, use.names = TRUE, fill = TRUE) - - - gather_cols <- setdiff(names(df_tidy), "TimeStamp") - - }) + df_tidy <- xls_files %>% + lapply(function(xls_file) { + kwb.utils::catAndRun( + paste("Importing raw data file:", xls_file), + expr = { + data <- readr::read_tsv( + file = xls_file, + locale = locale, + col_types = col_types + ) + if (is.null(meta_data)) { + data + } else { + is_active <- meta_data$ZeroOne == 1 + parameters <- meta_data$ParameterCode[is_active] + data[, intersect(names(data), c("TimeStamp", parameters))] + } + } + ) + }) %>% + data.table::rbindlist(use.names = TRUE, fill = TRUE) - } else { + parameters <- setdiff(names(df_tidy), "TimeStamp") - raw_list <- lapply(xls_files, FUN = function(xls_file) { - print(paste("Importing raw data file:", xls_file)) - tmp <- readr::read_tsv(file = xls_file, - locale = locale, - col_types = col_types) - }) - - df_tidy <- data.table::rbindlist(l = raw_list, use.names = TRUE, fill = TRUE) - - gather_cols <- setdiff(names(df_tidy), "TimeStamp") - - meta_data <- tibble::tibble(ParameterCode = gather_cols, - ParameterName = gather_cols, - ParameterUnit = "", - SiteCode = "", - SiteName = "", - ZeroOne = 1 + if (is.null(meta_data)) { + meta_data <- write_default_pentair_meta_data( + parameters = parameters, + target_dir = raw_data_dir ) - - meta_path <- file.path(raw_data_dir, "parameter_site_metadata_dummy.csv") - - msg_text <- sprintf("No metadata file provided. Generating and exporting dummy metadata file to '%s'.", - meta_path) - - kwb.utils::catAndRun(messageText = msg_text, expr = { - write.csv(meta_data, file = meta_path, row.names = FALSE) - }) - - } - - meta_data$ParameterLabel <- sprintf_columns("%s (%s)", meta_data, columns = c( - "ParameterName", "ParameterUnit")) - df_tidy <- data.table::rbindlist(l = raw_list, use.names = TRUE, fill = TRUE) + meta_data$ParameterLabel <- sprintf_columns( + "%s (%s)", + meta_data, + columns = c("ParameterName", "ParameterUnit") + ) + df_tidy <- df_tidy %>% + tidyr::pivot_longer( + cols = tidyselect::all_of(parameters), + names_to = "ParameterCode", + values_to = "ParameterValue" + ) %>% + dplyr::rename( + DateTime = "TimeStamp" + ) %>% + dplyr::left_join( + y = kwb.utils::removeColumns(meta_data, "ZeroOne") + ) %>% + as.data.frame() - gather_cols <- setdiff(names(df_tidy), "TimeStamp") + df_tidy$Source <- "online" + df_tidy$SiteName[is.na(df_tidy$SiteName)] <- "General" - df_tidy <- df_tidy %>% - tidyr::pivot_longer(cols = tidyselect::all_of(gather_cols), - names_to = "ParameterCode", - values_to = "ParameterValue") %>% - dplyr::rename(DateTime = "TimeStamp") %>% - dplyr::left_join(y = meta_data %>% dplyr::select(-tidyselect::matches("ZeroOne"))) %>% - as.data.frame() - - df_tidy$Source <- "online" + df_tidy +} - df_tidy$SiteName[is.na(df_tidy$SiteName)] <- "General" +# read_pentair_meta_data ------------------------------------------------------- +read_pentair_meta_data <- function(file) +{ + read.csv( + file = file, + header = TRUE, + sep = ",", + dec = ".", + stringsAsFactors = FALSE + ) +} - df_tidy +# write_default_pentair_meta_data ---------------------------------------------- +write_default_pentair_meta_data <- function(parameters, target_dir) +{ + meta_data <- tibble::tibble( + ParameterCode = parameters, + ParameterName = parameters, + ParameterUnit = "", + SiteCode = "", + SiteName = "", + ZeroOne = 1L + ) + + file <- file.path(target_dir, "parameter_site_metadata_dummy.csv") + + kwb.utils::catAndRun( + paste("No metadata file provided.", sprintf( + "Generating and exporting dummy metadata file to '%s'.", file + )), + expr = write.csv(meta_data, file = file, row.names = FALSE) + ) + + # Return the meta data + meta_data } +# import_data_berlin_t --------------------------------------------------------- + #' Import data for Berlin Tiefwerder #' #' @param raw_data_dir path of directory containing PENTAIR xls files -#' (default: kwb.pilot:::package_file("shiny/berlin_t/data/operation")) +#' (default: kwb.pilot:::shiny_file("berlin_t/data/operation")) #' @param raw_data_files vector with full path to operational raw data files that #' allows to limit import to specific files (default: NULL). If specified parameter #' "raw_data_dir" will not be used #' @param analytics_path full path to lab data EXCEL file in xlsx format -#' (default: kwb.pilot:::package_file("shiny/berlin_t/data/analytics.xlsx")) +#' (default: kwb.pilot:::shiny_file("berlin_t/data/analytics.xlsx")) #' @param meta_file_path path to metadata file (default: -#' kwb.pilot:::package_file("shiny/berlin_t/data/parameter_site_metadata.csv")) +#' kwb.pilot:::shiny_file("berlin_t/data/parameter_site_metadata.csv")) #' @return data.frame with imported operational data (analytics´data to be added as #' soon as available) #' @export -import_data_berlin_t <- function(raw_data_dir = package_file("shiny/berlin_t/data/operation"), - raw_data_files = NULL, - analytics_path = package_file("shiny/berlin_t/data/analytics.xlsx"), - meta_file_path = package_file("shiny/berlin_t/data/parameter_site_metadata.csv")) { +import_data_berlin_t <- function( + raw_data_dir = shiny_file("berlin_t/data/operation"), + raw_data_files = NULL, + analytics_path = shiny_file("berlin_t/data/analytics.xlsx"), + meta_file_path = shiny_file("berlin_t/data/parameter_site_metadata.csv") +) +{ df <- read_pentair_data(raw_data_dir, raw_data_files, meta_file_path) - - #### To do: joind with ANALYTICS data as soon as available - # data_berlin_t_offline <- read_pentair_data(raw_data_dir = raw_data_dir, - # meta_file_path = meta_file_path) - - # data_berlin_t_offline <- import_lab_data_berlin_t(raw_data_dir = raw_data_dir, - # meta_file_path = meta_file_path) - + + # TODO: join with ANALYTICS data as soon as available + + # data_berlin_t_offline <- read_pentair_data( + # raw_data_dir = raw_data_dir, + # meta_file_path = meta_file_path + # ) + + # data_berlin_t_offline <- import_lab_data_berlin_t( + # raw_data_dir = raw_data_dir, + # meta_file_path = meta_file_path + # ) + df$DataType <- "raw" - - df$SiteName_ParaName_Unit <- sprintf_columns("%s: %s (%s)", df, columns = c( - "SiteName", "ParameterName", "ParameterUnit" - )) - - ### Remove duplicates if any exist - remove_duplicates(df, col_names = c("DateTime", "ParameterCode", "SiteCode")) + + df$SiteName_ParaName_Unit <- sprintf_columns( + "%s: %s (%s)", + df, + columns = c("SiteName", "ParameterName", "ParameterUnit") + ) + + # Remove duplicates if any exist + remove_duplicates( + df, + col_names = c("DateTime", "ParameterCode", "SiteCode") + ) } diff --git a/R/import_operation_haridwar.R b/R/import_operation_haridwar.R index 42d0c12..b6c9e1f 100644 --- a/R/import_operation_haridwar.R +++ b/R/import_operation_haridwar.R @@ -12,8 +12,16 @@ #' @importFrom dplyr src_mysql #' @importFrom dbplyr src_dbi #' @keywords internal -src_mysql_from_cnf <- function(dbname, group = NULL, dir = file.path(getwd(), ".my.cnf"), host = NULL, - user = NULL, password = NULL, ...) { +src_mysql_from_cnf <- function( + dbname, + group = NULL, + dir = file.path(getwd(), ".my.cnf"), + host = NULL, + user = NULL, + password = NULL, + ... +) +{ dir <- normalizePath(dir) kwb.utils::safePath(dir) @@ -36,7 +44,8 @@ src_mysql_from_cnf <- function(dbname, group = NULL, dir = file.path(getwd(), ". #' @return returns data frame operational data from MySQL db #' @importFrom dplyr filter tbl tbl_df rename_ filter_ select_ mutate_ #' @export -import_operation <- function(mysql_conf = file.path(getwd(), ".my.cnf")) { +import_operation <- function(mysql_conf = file.path(getwd(), ".my.cnf")) +{ sumewa <- src_mysql_from_cnf("sumewa", "autarcon", dir = mysql_conf) tbl_live <- dplyr::tbl(sumewa, "live") %>% @@ -83,7 +92,7 @@ import_operation <- function(mysql_conf = file.path(getwd(), ".my.cnf")) { duplicated_datetimes <- names(which(table(tbl_tmp$time) != 1)) - tbl_tmp[!tbl_tmp$time %in% duplicated_datetimes, ] %>% + tbl_tmp[! tbl_tmp$time %in% duplicated_datetimes, ] %>% left_join(data.frame( AnlagenID = c(4013, 4014), LocationName = rep("Haridwar", 2) @@ -93,7 +102,8 @@ import_operation <- function(mysql_conf = file.path(getwd(), ".my.cnf")) { } # MAIN ------------------------------------------------------------------------- -if (FALSE) { +if (FALSE) +{ operation <- import_operation() ### Calculate additional parameters: diff --git a/R/influxdb_ultimate.R b/R/influxdb_ultimate.R index 2af95ca..6cf9908 100644 --- a/R/influxdb_ultimate.R +++ b/R/influxdb_ultimate.R @@ -43,7 +43,7 @@ get_pivot_data <- function(agg_interval = "1d", tables <- client$query(text = flux_qry) data.table::rbindlist(tables) %>% - dplyr::select(order(colnames(.))) %>% + dplyr::select(order(colnames(.data))) %>% dplyr::relocate(.data$time, .after = "_time") %>% dplyr::select(-.data$`_time`) } diff --git a/R/merge_and_export_fst.R b/R/merge_and_export_fst.R index 7675fe3..235efd6 100644 --- a/R/merge_and_export_fst.R +++ b/R/merge_and_export_fst.R @@ -4,19 +4,23 @@ #' @param time_pattern optional pattern to filter months to be imported (default: NULL), #' for using it do e.g. "2017-06|2017-07" or c("2017-06", "2017-07") #' @param fst_dir directory with fst files or subdirs to be imported (default: -#' kwb.pilot:::package_file("shiny/berlin_t/data/fst")) +#' kwb.pilot:::shiny_file("berlin_t/data/fst")) #' @importFrom stringr str_detect #' @importFrom data.table rbindlist #' @return merged data.frame #' @keywords internal -group_fst_by_pattern <- function(time_pattern = NULL, - fst_pattern = "raw", - fst_dir = package_file("shiny/berlin_t/data/fst")) { +group_fst_by_pattern <- function( + time_pattern = NULL, + fst_pattern = "raw", + fst_dir = shiny_file("berlin_t/data/fst") +) +{ files <- list.files(fst_dir, fst_pattern, recursive = TRUE, full.names = TRUE) - if (!is.null(time_pattern)) { - if (length(time_pattern) > 1) { + if (! is.null(time_pattern)) { + + if (length(time_pattern) > 1L) { time_pattern <- to_pattern_or(time_pattern) } @@ -40,16 +44,19 @@ group_fst_by_pattern <- function(time_pattern = NULL, #' for using it do e.g. "2017-06|2017-07" or c("2017-06", "2017-07") #' @param compression compression for fst export (default: 100) #' @param import_dir directory with fst files or subdirs to be imported (default: -#' kwb.pilot:::package_file("shiny/berlin_t/data/fst")) +#' kwb.pilot:::shiny_file("berlin_t/data/fst")) #' @param export_dir directory with fst directory for export (default: -#' kwb.pilot:::package_file("shiny/berlin_t/data")) +#' kwb.pilot:::shiny_file("berlin_t/data")) #' @return imports multiple fst files and exports them to be used for app #' @export -merge_and_export_fst <- function(time_pattern = NULL, - compression = 100, - import_dir = package_file("shiny/berlin_t/data/fst"), - export_dir = package_file("shiny/berlin_t/data")) { - if (!dir.exists(export_dir)) { +merge_and_export_fst <- function( + time_pattern = NULL, + compression = 100, + import_dir = shiny_file("berlin_t/data/fst"), + export_dir = shiny_file("berlin_t/data") +) +{ + if (! dir.exists(export_dir)) { kwb.utils::catAndRun( sprintf("Creating export path: %s", export_dir), dir.create(export_dir, recursive = TRUE) @@ -57,6 +64,7 @@ merge_and_export_fst <- function(time_pattern = NULL, } for (fst_pattern in c("raw", "10min", "hour", "day")) { + site_data_list <- kwb.utils::catAndRun( paste("Grouping by", fst_pattern), group_fst_by_pattern( diff --git a/R/read_fst.R b/R/read_fst.R index 6554832..91191b1 100644 --- a/R/read_fst.R +++ b/R/read_fst.R @@ -8,30 +8,33 @@ #' @return data.frame with formatting of DateTime column POSIXct #' @importFrom fst read.fst #' @export -read_fst <- function(path, tz = "CET", col_datetime = "DateTime", ...) { +read_fst <- function(path, tz = "CET", col_datetime = "DateTime", ...) +{ df <- fst::read.fst(path, ...) df[, col_datetime] <- as.POSIXct(df[, col_datetime], origin = "1970-01-01", tz = "CET") df } - #' Load fst data for shiny app #' #' @param fst_dir directory of fst files to be loaded #' @export -load_fst_data <- function(fst_dir) { +load_fst_data <- function(fst_dir) +{ print("### Step 4: Loading data ##########################") - + step_no <- 0L - + step_assign <- function(title, varname, filename) { step_no <<- step_no + 1L print(sprintf("### %d): %s", step_no, title)) - assign(x = varname, - value = read_fst(file.path(fst_dir, filename)), - envir = .GlobalEnv) + assign( + x = varname, + value = read_fst(file.path(fst_dir, filename)), + envir = .GlobalEnv + ) } - + step_assign("Raw data", "siteData_raw_list", "siteData_raw_list.fst") step_assign("10 minutes data", "siteData_10min_list", "siteData_10min_list.fst") step_assign("hourly data", "siteData_hour_list", "siteData_hour_list.fst") diff --git a/R/read_mbr4.R b/R/read_mbr4.R index 9e7969a..b23c62f 100644 --- a/R/read_mbr4.R +++ b/R/read_mbr4.R @@ -13,28 +13,32 @@ #' @return Reads MBR4.0 tsv data #' @importFrom readr read_tsv locale cols col_double col_character col_datetime -read_mbr4_tsv <- function(path, - locale = readr::locale(tz = "CET", - decimal_mark = ",", - grouping_mark = "."), - col_types = readr::cols( - .default = readr::col_double(), - zustand = readr::col_character(), - meldungen = readr::col_character(), - Zeitstempel = readr::col_datetime(format = "%d.%m.%Y %H:%M") - ), - dbg = FALSE, - ...) { - +read_mbr4_tsv <- function( + path, + locale = readr::locale( + tz = "CET", + decimal_mark = ",", + grouping_mark = "." + ), + col_types = readr::cols( + .default = readr::col_double(), + zustand = readr::col_character(), + meldungen = readr::col_character(), + Zeitstempel = readr::col_datetime(format = "%d.%m.%Y %H:%M") + ), + dbg = FALSE, + ... +) +{ import_raw <- function() { - readr::read_tsv( - file = path, - locale = locale, - col_types = col_types, - trim_ws = TRUE, - ... - ) %>% - dplyr::select(!tidyselect::matches("X[0-9]+")) + readr::read_tsv( + file = path, + locale = locale, + col_types = col_types, + trim_ws = TRUE, + ... + ) %>% + dplyr::select(!tidyselect::matches("X[0-9]+")) } if (dbg) { @@ -45,7 +49,6 @@ read_mbr4_tsv <- function(path, } } - #' Read MBR4.0 data combining latest and archived data #' @description Download latest data as 'tsv' from Martin Systems Webportal and #' combine with archived ('tsv') on Nextcloud' @@ -71,38 +74,38 @@ read_mbr4_tsv <- function(path, #' mbr4_data <- read_mbr4() #' str(mbr4_data) #' } -read_mbr4 <- function(latest_url = Sys.getenv("MBR40_URL"), - archived_file = "MBR_export_", - archived_dir = "projects/MBR4.0/Exchange/Rohdaten/Online_export", - archived_url = Sys.getenv("NEXTCLOUD_URL"), - archived_user = Sys.getenv("NEXTCLOUD_USER"), - archived_pw = Sys.getenv("NEXTCLOUD_USER"), - target_dir = tempdir(), - dbg = FALSE, - ...) { - - - - mbr4_data_latest <- read_mbr4_latest(url = latest_url, - target_dir = target_dir, - dbg = dbg, - ...) - +read_mbr4 <- function( + latest_url = Sys.getenv("MBR40_URL"), + archived_file = "MBR_export_", + archived_dir = "projects/MBR4.0/Exchange/Rohdaten/Online_export", + archived_url = Sys.getenv("NEXTCLOUD_URL"), + archived_user = Sys.getenv("NEXTCLOUD_USER"), + archived_pw = Sys.getenv("NEXTCLOUD_USER"), + target_dir = tempdir(), + dbg = FALSE, + ... +) +{ + mbr4_data_latest <- read_mbr4_latest( + url = latest_url, + target_dir = target_dir, + dbg = dbg, + ... + ) - mbr4_data_archived <- read_mbr4_archived(file = archived_file, - dir = archived_dir, - target_dir = target_dir, - url = archived_url, - user = archived_user, - pw = archived_pw, - dbg = dbg, - ...) + mbr4_data_archived <- read_mbr4_archived( + file = archived_file, + dir = archived_dir, + target_dir = target_dir, + url = archived_url, + user = archived_user, + pw = archived_pw, + dbg = dbg, + ... + ) - dplyr::bind_rows(mbr4_data_latest, - mbr4_data_archived) %>% + dplyr::bind_rows(mbr4_data_latest, mbr4_data_archived) %>% remove_duplicates() - - } #' Read MBR4.0 data from Martin Systems Webportal (As "tsv") @@ -129,37 +132,35 @@ read_mbr4 <- function(latest_url = Sys.getenv("MBR40_URL"), #' #' @examples #' url_mbr40 <- Sys.getenv("MBR40_URL") -#' if(url_mbr40 != "") { -#' mbr4_data_latest <- read_mbr4_latest(url = url_mbr40) -#' str(mbr4_data_latest) +#' if (url_mbr40 != "") { +#' mbr4_data_latest <- read_mbr4_latest(url = url_mbr40) +#' str(mbr4_data_latest) #' } -read_mbr4_latest <- function(url = Sys.getenv("MBR40_URL"), - target_dir = tempdir(), - locale = readr::locale(tz = "CET", - decimal_mark = ".", - grouping_mark = ","), - col_types = readr::cols( - .default = readr::col_double(), - zustand = readr::col_character(), - meldungen = readr::col_character(), - Zeitstempel = readr::col_datetime(format = "%Y-%m-%d %H:%M:%S") - ), - dbg = FALSE, - ...) { - +read_mbr4_latest <- function( + url = Sys.getenv("MBR40_URL"), + target_dir = tempdir(), + locale = readr::locale(tz = "CET", decimal_mark = ".", grouping_mark = ","), + col_types = readr::cols( + .default = readr::col_double(), + zustand = readr::col_character(), + meldungen = readr::col_character(), + Zeitstempel = readr::col_datetime(format = "%Y-%m-%d %H:%M:%S") + ), + dbg = FALSE, + ... +) +{ target_path <- file.path(target_dir, "mbr4.tsv") - utils::download.file( - url = url, - destfile = target_path, quiet = !dbg - ) + utils::download.file(url = url, destfile = target_path, quiet = ! dbg) - read_mbr4_tsv(path = target_path, - locale = locale, - col_types = col_types, - dbg = dbg, - ...) - + read_mbr4_tsv( + path = target_path, + locale = locale, + col_types = col_types, + dbg = dbg, + ... + ) } #' Read MBR4.0 archived data from Nextcloud @@ -186,17 +187,17 @@ read_mbr4_latest <- function(url = Sys.getenv("MBR40_URL"), #' @export #' #' @examples +#' \dontrun{ #' if(check_env_nextcloud()) { -#' mbr4_data_archived <- read_mbr4_archived() -#' str(mbr4_data_archived) +#' mbr4_data_archived <- read_mbr4_archived() +#' str(mbr4_data_archived) +#' } #' } read_mbr4_archived <- function( file = "MBR_export_", dir = "projects/MBR4.0/Exchange/Rohdaten/Online_export", target_dir = tempdir(), - locale = readr::locale(tz = "CET", - decimal_mark = ",", - grouping_mark = "."), + locale = readr::locale(tz = "CET", decimal_mark = ",", grouping_mark = "."), col_types = readr::cols( .default = readr::col_double(), zustand = readr::col_character(), @@ -207,37 +208,43 @@ read_mbr4_archived <- function( user = Sys.getenv("NEXTCLOUD_USER"), pw = Sys.getenv("NEXTCLOUD_USER"), dbg = FALSE, - ...) { - - stopifnot(all(c(url, user, pw) != "")) - + ... +) +{ + stopifnot(all(c(url, user, pw) != "")) archived_file <- kwb.nextcloud::list_files( path = dir, full_info = TRUE) %>% - dplyr::filter(stringr::str_detect(.data$file, - pattern = sprintf("^%s", - file))) %>% - dplyr::filter(stringr::str_detect(.data$file, - pattern = "\\.tsv$")) %>% + dplyr::filter(stringr::str_detect(.data$file, pattern = sprintf("^%s", file))) %>% + dplyr::filter(stringr::str_detect(.data$file, pattern = "\\.tsv$")) %>% dplyr::arrange(dplyr::desc(.data$lastmodified)) - if (nrow(archived_file) > 1) { - message(sprintf(paste("Multiple '.tsv' files (%s) found on Nextcloud.", - "Using newest one '%s' (last modified: %s"), - paste(archived_file$file, collapse = ", "), - archived_file$file[1], - archived_file$lastmodified)) - archived_file <- archived_file[1,] + if (nrow(archived_file) > 1L) { + + message(sprintf( + paste( + "Multiple '.tsv' files (%s) found on Nextcloud.", + "Using newest one '%s' (last modified: %s" + ), + paste(archived_file$file, collapse = ", "), + archived_file$file[1], + archived_file$lastmodified + )) + + archived_file <- archived_file[1L, ] } - archived_path <- kwb.nextcloud::download_files(hrefs = archived_file$href, - target_dir = target_dir) + archived_path <- kwb.nextcloud::download_files( + hrefs = archived_file$href, + target_dir = target_dir + ) - read_mbr4_tsv(path = archived_path, - locale = locale, - col_types = col_types, - dbg = dbg, - ...) + read_mbr4_tsv( + path = archived_path, + locale = locale, + col_types = col_types, + dbg = dbg, + ... + ) } - diff --git a/R/read_weintek.R b/R/read_weintek.R index 2a4a12b..b5e71c6 100644 --- a/R/read_weintek.R +++ b/R/read_weintek.R @@ -8,7 +8,8 @@ #' @return data frame with Weintek raw data #' @export #' -read_weintek <- function(path, tz = "CET", dbg = TRUE) { +read_weintek <- function(path, tz = "CET", dbg = TRUE) +{ if (dbg) { message("Importing file: ", path) } @@ -29,7 +30,6 @@ read_weintek <- function(path, tz = "CET", dbg = TRUE) { set_timezone(df, tz = tz, col_datetime = "DateTime") } - #' Read Weintek data from multiple files #' #' @param files path to Weintek files @@ -40,7 +40,8 @@ read_weintek <- function(path, tz = "CET", dbg = TRUE) { #' @return data frame with Weintek raw data #' @export #' -read_weintek_batch <- function(files, tz = "CET", dbg = TRUE) { +read_weintek_batch <- function(files, tz = "CET", dbg = TRUE) +{ paraname_site <- basename(dirname(files)) data_list <- stats::setNames( diff --git a/R/remove_duplicates.R b/R/remove_duplicates.R index b199b08..a7e8f20 100644 --- a/R/remove_duplicates.R +++ b/R/remove_duplicates.R @@ -5,10 +5,11 @@ #' @return data.frame without duplicates #' @importFrom kwb.utils stringList #' @export -remove_duplicates <- function(df, col_names = names(df)) { +remove_duplicates <- function(df, col_names = names(df)) +{ available <- col_names %in% names(df) - if (!all(available)) { + if (! all(available)) { clean_stop( "The following 'col_names' specified by the user are not defined ", "in the 'df':\n", kwb.utils::stringList(col_names[!available]) @@ -28,7 +29,7 @@ remove_duplicates <- function(df, col_names = names(df)) { print_to_text(df[is_duplicated, ]) ) - df <- df[!is_duplicated, ] + df <- df[! is_duplicated, ] } df diff --git a/R/report_batch.R b/R/report_batch.R index f692067..845d745 100644 --- a/R/report_batch.R +++ b/R/report_batch.R @@ -3,7 +3,8 @@ #' @param rmd_name name of Rmarkdown document to render (default: "report.Rmd") #' @param output_dir outputDirectory (default: getwd()) #' @keywords internal -write_report <- function(rmd_name = "report.Rmd", output_dir = getwd()) { +write_report <- function(rmd_name = "report.Rmd", output_dir = getwd()) +{ code <- 'if (!require("rmarkdown") || packageVersion("rmarkdown") < "1.3") { install.packages("rmarkdown", repos = "https://cloud.r-project.org") } @@ -28,17 +29,20 @@ write_report <- function(rmd_name = "report.Rmd", output_dir = getwd()) { #' @param open_in_explorer open batchDir in Windows explorer (default: TRUE). #' Only working on a Windows system! #' @export -create_report_batch <- function(batchDir = file.path(tempdir(), "batch_report"), - batchName = "create_report.bat", - report_path = NULL, - report_config_path = NULL, - open_in_explorer = TRUE) { +create_report_batch <- function( + batchDir = file.path(tempdir(), "batch_report"), + batchName = "create_report.bat", + report_path = NULL, + report_config_path = NULL, + open_in_explorer = TRUE +) +{ # Helper function force_copy <- function(from, to) file.copy(from, to, overwrite = TRUE) batchDir <- gsub(pattern = "\\\\", replacement = .Platform$file.sep, batchDir) - if (!dir.exists(batchDir)) { + if (! dir.exists(batchDir)) { dir.create(batchDir, showWarnings = FALSE) } @@ -46,7 +50,7 @@ create_report_batch <- function(batchDir = file.path(tempdir(), "batch_report"), on.exit(owdir) report_path <- kwb.utils::defaultIfNULL( - report_path, package_file("shiny/haridwar/report/report.Rmd") + report_path, shiny_file("haridwar/report/report.Rmd") ) report_name <- basename(report_path) @@ -60,15 +64,13 @@ create_report_batch <- function(batchDir = file.path(tempdir(), "batch_report"), write_report(rmd_name = report_name, output_dir = batchDir) - if (!is.null(report_config_path)) { - if (file.exists(report_config_path)) { - force_copy( - from = report_config_path, - to = file.path(batchDir, "input", "report_config.txt") - ) - } + if (! is.null(report_config_path) && file.exists(report_config_path)) { + force_copy( + from = report_config_path, + to = file.path(batchDir, "input", "report_config.txt") + ) } - + batch_r_script <- paste0( kwb.utils::removeExtension(basename(batchName)), ".R" ) @@ -94,7 +96,7 @@ create_report_batch <- function(batchDir = file.path(tempdir(), "batch_report"), cat("Batch file & data structure created at:", normalizePath(batch_path)) - if (open_in_explorer & .Platform$OS.type == "windows") { + if (open_in_explorer && .Platform$OS.type == "windows") { shell(paste("explorer", normalizePath(batchDir))) } diff --git a/R/report_configuration.R b/R/report_configuration.R index 5319a10..f7b782c 100644 --- a/R/report_configuration.R +++ b/R/report_configuration.R @@ -10,9 +10,12 @@ #' @return default list for report configuration template #' @export -report_config_template <- function(df = NULL, - temporal_aggregation = "raw", - output_timezone = "UTC") { +report_config_template <- function( + df = NULL, + temporal_aggregation = "raw", + output_timezone = "UTC" +) +{ if (is.null(df)) { sitenames <- c( "General", @@ -21,22 +24,24 @@ report_config_template <- function(df = NULL, "After Filter", "After AO Cell" ) - + parameters_online <- "Redox potential" parameters_offline <- "Temperature" - + start_day <- sprintf("%s-01", format(Sys.Date(), format = "%Y-%m")) end_day <- as.character(Sys.Date()) daterange <- c(start_day, end_day) + } else { + unique_pars <- function(src) unique(df$ParameterName[df$Source == src]) - + sitenames <- unique(df$SiteName) parameters_online <- unique_pars("online") parameters_offline <- unique_pars("offline") daterange <- as.character(as.Date(range(df$DateTime))) } - + list( report_sitenames = sitenames, report_aggregation = temporal_aggregation, @@ -70,23 +75,24 @@ report_config_template <- function(df = NULL, #' output_file = "report_config.txt" #' ) #' } -report_config_to_txt <- function(config_list, output_file = "report_config.txt") { +report_config_to_txt <- function(config_list, output_file = "report_config.txt") +{ ### Write config list to text file ### see http://stackoverflow.com/questions/8261590/write-list-to-a-text-file-preserving-names-r - + if (file.exists(output_file)) { file.remove(output_file) } - + output_dir <- dirname(output_file) - - if (!dir.exists(output_dir)) { + + if (! dir.exists(output_dir)) { dir.create(path = output_dir, showWarnings = FALSE) } - + # z <- deparse(substitute(config_list)) # cat(z, "\n", file=output_file) - + for (key in names(config_list)) { cat(file = output_file, append = TRUE, sprintf( "%s=%s\n", key, kwb.utils::stringList(config_list[[key]], collapse = " ") @@ -111,29 +117,30 @@ report_config_to_txt <- function(config_list, output_file = "report_config.txt") #' ### Check whether both are identical #' identical(x = config, y = config_imported) #' } -report_txt_to_config <- function(config_txt = "report_config.txt") { +report_txt_to_config <- function(config_txt = "report_config.txt") +{ x <- scan(config_txt, what = "", sep = "\n") - + # Separate elements by one or more whitepace y <- strsplit(x, "=") - + # Extract the first vector element and set it as the list element name names(y) <- sapply(y, `[[`, 1) - + # names(y) <- sapply(y, function(x) x[[1]]) # same as above # Remove the first vector element from each list element y <- lapply(y, `[`, -1) - + ### Remove "'" from character strings - y <- lapply(y, FUN = function(x) { + y <- lapply(y, function(x) { gsub(pattern = "'", replacement = "", unlist(strsplit(x, split = "'\\s"))) }) - + num_aggregation <- as.numeric(y$report_aggregation) - - if (!is.na(num_aggregation)) { + + if (! is.na(num_aggregation)) { y$report_aggregation <- num_aggregation } - + y } diff --git a/R/run_shiny.R b/R/run_shiny.R index d50c9d5..93f882f 100644 --- a/R/run_shiny.R +++ b/R/run_shiny.R @@ -14,17 +14,23 @@ #' @importFrom shinythemes shinytheme #' @importFrom kwb.utils stringList #' @export -run_app <- function(siteName = "haridwar", use_live_data = FALSE, mySQL_conf = NULL, - launch.browser = TRUE, ...) { +run_app <- function( + siteName = "haridwar", + use_live_data = FALSE, + mySQL_conf = NULL, + launch.browser = TRUE, + ... +) +{ use_live_data <- toupper(use_live_data) - shinyDir <- package_file("shiny") + shinyDir <- shiny_file() appDir <- file.path(shinyDir, siteName) site_names <- dir(shinyDir) - if (!siteName %in% site_names) { + if (! siteName %in% site_names) { clean_stop( "Could not find shiny app directory for ", siteName, ".\n", "Please select for parameter 'siteName' one of:\n", @@ -33,14 +39,16 @@ run_app <- function(siteName = "haridwar", use_live_data = FALSE, mySQL_conf = N } if (siteName == "haridwar") { + mySQL_conf_path <- file.path(appDir, ".my.cnf") if (use_live_data) { - if (!is.null(mySQL_conf)) { + + if (! is.null(mySQL_conf)) { file.copy(from = mySQL_conf, to = mySQL_conf_path) } - if (!file.exists(mySQL_conf_path)) { + if (! file.exists(mySQL_conf_path)) { clean_stop( "No '.my.cnf' file located under: ", appDir, ".\n", "Please once specify the path to a valid MySQL config file with ", @@ -52,7 +60,7 @@ run_app <- function(siteName = "haridwar", use_live_data = FALSE, mySQL_conf = N global_path <- file.path(appDir, "global.R") - if (!file.exists(global_path)) { + if (! file.exists(global_path)) { clean_stop("Could not find a 'global.R' in: ", appDir) } @@ -65,6 +73,8 @@ run_app <- function(siteName = "haridwar", use_live_data = FALSE, mySQL_conf = N shiny::runApp( appDir, - display.mode = "normal", launch.browser = launch.browser, ... + display.mode = "normal", + launch.browser = launch.browser, + ... ) } diff --git a/R/tidy_mbr4_data.R b/R/tidy_mbr4_data.R index 1802090..5ea4df4 100644 --- a/R/tidy_mbr4_data.R +++ b/R/tidy_mbr4_data.R @@ -15,14 +15,19 @@ #' mbr4_data <- read_mbr4() #' mbr4_data_tidy <- kwb.pilot::tidy_mbr4_data(mbr4_data) #' } -tidy_mbr4_data <- function(mbr4_data, - path_metadata = kwb.pilot:::shiny_file("mbr4.0/data/metadata.csv")) { +tidy_mbr4_data <- function( + mbr4_data, + path_metadata = shiny_file("mbr4.0/data/metadata.csv") +) +{ remove_cols <- c("zustand", "meldungen", "id") keep_cols <- c("Zeitstempel") - metadata <- readr::read_csv(file = path_metadata, - col_types = readr::cols(.default = "c")) + metadata <- readr::read_csv( + file = path_metadata, + col_types = readr::cols(.default = "c") + ) mbr4_data %>% dplyr::select(!tidyselect::all_of(remove_cols)) %>% diff --git a/R/timezone.R b/R/timezone.R index 8735e94..a3a357d 100644 --- a/R/timezone.R +++ b/R/timezone.R @@ -3,7 +3,8 @@ #' @param x a vector that should be tested whether #' @return returns TRUE if of tpye POSIXct #' @keywords internal -is_POSIXct <- function(x) { +is_POSIXct <- function(x) +{ inherits(x, "POSIXct") } @@ -17,7 +18,8 @@ is_POSIXct <- function(x) { #' \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones} for more #' details. #' @export -set_timezone <- function(df, tz = "UTC", col_datetime = "DateTime") { +set_timezone <- function(df, tz = "UTC", col_datetime = "DateTime") +{ # Convert tibble into R data.frame df <- as.data.frame(df) @@ -29,12 +31,13 @@ set_timezone <- function(df, tz = "UTC", col_datetime = "DateTime") { } # get_posix_column_or_stop ----------------------------------------------------- -get_posix_column_or_stop <- function(df, column) { +get_posix_column_or_stop <- function(df, column) +{ # Select the time column times <- kwb.utils::selectColumns(df, column) # Assumption: first column of dataframe always needs to date/time (i.e. POSIXct) - if (!is_POSIXct(times)) { + if (! is_POSIXct(times)) { clean_stop( "Column ", column, " needs to be of type DATE/TIME (POSIXct). ", "Please check sheet 'xyz' of imported xls file 'xyz'!" @@ -56,12 +59,19 @@ get_posix_column_or_stop <- function(df, column) { #' details. #' @importFrom lubridate with_tz #' @export -change_timezone <- function(df, tz = "UTC", col_datetime = "DateTime", debug = TRUE) { +change_timezone <- function( + df, + tz = "UTC", + col_datetime = "DateTime", + debug = TRUE +) +{ times <- get_posix_column_or_stop(df, col_datetime) tz_org <- paste(unique(base::attr(times, "tzone")), collapse = " , ") if (tz_org == tz) { + kwb.utils::catIf(debug, sprintf( "Original time zone(s) %s and new time zone %s are identical", tz_org, tz )) @@ -87,7 +97,8 @@ change_timezone <- function(df, tz = "UTC", col_datetime = "DateTime", debug = T #' @importFrom rvest html_nodes html_table #' @importFrom xml2 read_html #' @export -get_valid_timezones <- function() { +get_valid_timezones <- function() +{ url_tz <- "https://en.wikipedia.org/wiki/List_of_tz_database_time_zones" url_tz %>% diff --git a/R/utils.R b/R/utils.R index 03d0fe6..4a51316 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,10 +1,18 @@ +# as_posix_cet ----------------------------------------------------------------- +as_posix_cet <- function(fmt, x) +{ + as.POSIXct(sprintf(fmt, x), tz = "CET") +} + # clean_stop ------------------------------------------------------------------- -clean_stop <- function(...) { +clean_stop <- function(...) +{ stop(..., call. = FALSE) } # column_to_date --------------------------------------------------------------- -column_to_date <- function(df, column) { +column_to_date <- function(df, column) +{ dates_raw <- kwb.utils::selectColumns(df, column) janitor::excel_numeric_to_date( date_num = as.numeric(dates_raw), @@ -13,32 +21,31 @@ column_to_date <- function(df, column) { } # comma_to_dot ----------------------------------------------------------------- -comma_to_dot <- function(x) { +comma_to_dot <- function(x) +{ gsub(",", ".", x) } # list_full_csv_files ---------------------------------------------------------- -list_full_csv_files <- function(path) { +list_full_csv_files <- function(path) +{ list.files(path, pattern = "\\.csv", full.names = TRUE) } # list_full_xls_files ---------------------------------------------------------- -list_full_xls_files <- function(path) { +list_full_xls_files <- function(path) +{ list.files(path, pattern = "\\.xls", full.names = TRUE) } # num_column_to_posix_cet ------------------------------------------------------ -num_column_to_posix_cet <- function(df, column) { +num_column_to_posix_cet <- function(df, column) +{ times_raw <- kwb.utils::selectColumns(df, column) times_num <- as.numeric(comma_to_dot(times_raw)) as.POSIXct(times_num * 24 * 3600, origin = "1899-12-30", tz = "CET") } -# package_file ----------------------------------------------------------------- -package_file <- function(...) { - system.file(..., package = "kwb.pilot") -} - #' Path to Shiny File in Package #' #' @param ... relative path to file in "shiny" folder of R package "kwb.pilot" @@ -47,32 +54,38 @@ package_file <- function(...) { #' @export #' @examples #' shiny_file() -shiny_file <- function(...) { +shiny_file <- function(...) +{ system.file("shiny", ... , package = "kwb.pilot") } # print_to_text ---------------------------------------------------------------- #' @importFrom utils capture.output -print_to_text <- function(x) { +print_to_text <- function(x) +{ paste(utils::capture.output(print(x)), collapse = "\n") } -# sprintf_columns ---------------------------------------------------------------- -sprintf_columns <- function(fmt, df, columns) { +# sprintf_columns -------------------------------------------------------------- +sprintf_columns <- function(fmt, df, columns) +{ do.call(sprintf, c(list(fmt), kwb.utils::selectColumns(df, columns))) } # to_list_items ---------------------------------------------------------------- -to_list_items <- function(items) { +to_list_items <- function(items) +{ paste("* ", items, collapse = " \n") } # to_month_pattern ------------------------------------------------------------- -to_month_pattern <- function(from, to) { +to_month_pattern <- function(from, to) +{ to_pattern_or(c(from, to)) } # to_pattern_or ---------------------------------------------------------------- -to_pattern_or <- function(x) { +to_pattern_or <- function(x) +{ paste0(x, collapse = "|") } diff --git a/_pkgdown.yml b/_pkgdown.yml index 74cba87..e021ef0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -76,6 +76,7 @@ reference: General functions which can be used for all pilot plant (with default parameterisation "AquaNES: Haridwar" site) contents: + - aggregate_export_fst - calenderweek_from_dates - calculate_operational_parameters - change_timezone @@ -98,7 +99,6 @@ reference: - set_timezone - shiny_file - - title: "AquaNES: Berlin-Tiefwerder (site 1)" desc: > Functions for importing data of Berlin-Tiefwerder site diff --git a/inst/shiny/basel/global.R b/inst/shiny/basel/global.R index 22d892a..e27ad97 100644 --- a/inst/shiny/basel/global.R +++ b/inst/shiny/basel/global.R @@ -38,7 +38,7 @@ saveRDS(siteData_day_list, file = "data/siteData_day_list.Rds") print("### Step 5: Importing threshold information ##########################") -threshold_file <- kwb.pilot:::package_file("shiny/basel/data/thresholds.csv") +threshold_file <- kwb.pilot:::shiny_file("basel/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(threshold_file) diff --git a/inst/shiny/basel/report/report.Rmd b/inst/shiny/basel/report/report.Rmd index 9c5629d..1555830 100644 --- a/inst/shiny/basel/report/report.Rmd +++ b/inst/shiny/basel/report/report.Rmd @@ -32,7 +32,7 @@ params: ```{r, echo=FALSE, results='hide', message=FALSE, warning=FALSE} -thresholds_file <- kwb.pilot:::package_file("shiny/berlin_s/data/thresholds.csv") +thresholds_file <- kwb.pilot:::shiny_file("berlin_s/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(thresholds_file) diff --git a/inst/shiny/berlin_f/config/config.yml b/inst/shiny/berlin_f/config/config.yml new file mode 100644 index 0000000..2f5fbbb --- /dev/null +++ b/inst/shiny/berlin_f/config/config.yml @@ -0,0 +1,65 @@ +parameters: +- Durchfluss_Rohwasser +- Durchfluss_Konzentrat +- Durchfluss_Rezirkulation +- Druck_Arbeitsdruck +- Druck_Rohwasser +- Druck_Konzentrat +- Druck_Permeat +- LF_Permeat +- LF_Rohwasser +- LF_Konzentrat +- Temperatur_Rohwasser +calculated: + vfrPerm: + name: Durchfluss Permeat + unit: l/h + expr: '`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`' + yield: + name: Ausbeute + unit: '%' + expr: 100*(`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`) / `Durchfluss_Rohwasser` + conLoop: + name: Leitfähigkeit Rezirkulation + unit: μS/cm + expr: (`Durchfluss_Rohwasser`*`LF_Rohwasser` + `Durchfluss_Rezirkulation`*`LF_Konzentrat`)/(`Durchfluss_Rohwasser` + + `Durchfluss_Rezirkulation`) + recovery: + name: Rückhalt + unit: '%' + expr: 100*(1 - `LF_Permeat` / conLoop) + deltaPreProcConc: + name: Druckverlust (Feed - Konzentrat) + unit: bar + expr: '`Druck_Arbeitsdruck` - `Druck_Konzentrat`' + flux: + name: Flux + unit: l/h/m2 + expr: vfrPerm / (4 * 7.6) + cfv: + name: Überströmungsgeschwindigkeit + unit: m/s + expr: (`Durchfluss_Rohwasser`+ `Durchfluss_Rezirkulation`) / ((pi * 0.0095^2) + * 1000 * 3600) + tmp: + name: Transmembrandruck + unit: bar + expr: ((`Druck_Arbeitsdruck` + `Druck_Konzentrat`) / 2) - `Druck_Permeat` + nwpt: + name: Normalisierter Permeatstrom + unit: l/h + expr: |- + normalised_permeate_flow(tempFeed = `Temperatur_Rohwasser`, + conLoop = `conLoop`, + vfrPerm = `vfrPerm`, + vfrLoop = `Durchfluss_Rezirkulation`, + vfrFeed = `Durchfluss_Rohwasser`, + prePerm = `Druck_Permeat`, + preProc = `Druck_Arbeitsdruck`, + preConc = `Druck_Konzentrat`, + nwp0 = 1.429162, + vfrPerm0 = 800) + nwpr: + name: Relativer Permeatstrom + unit: '%' + expr: '- ((1 - (nwpt / vfrPerm))) * 100' diff --git a/inst/shiny/berlin_f/global.R b/inst/shiny/berlin_f/global.R index a0a7ce0..014038d 100644 --- a/inst/shiny/berlin_f/global.R +++ b/inst/shiny/berlin_f/global.R @@ -20,17 +20,17 @@ kwb.pilot::aggregate_export_fst_berlin_f(year_month_start = year_month_start, month_pattern <- paste0(c(year_month_start,year_month_end), collapse = "|") kwb.pilot::merge_and_export_fst(time_pattern = month_pattern, - import_dir = kwb.pilot:::package_file("shiny/berlin_f/data/fst"), - export_dir = kwb.pilot:::package_file("shiny/berlin_f/data/fst")) + import_dir = kwb.pilot:::shiny_file("berlin_f/data/fst"), + export_dir = kwb.pilot:::shiny_file("berlin_f/data/fst")) } -kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::package_file("shiny/berlin_f/data/fst")) +kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::shiny_file("berlin_f/data/fst")) print("### Step 5: Importing threshold information ##########################") print("### NOT IMPLEMENTED YET") -threshold_file <- kwb.pilot:::package_file("shiny/berlin_f/data/thresholds.csv") +threshold_file <- kwb.pilot:::shiny_file("berlin_f/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(threshold_file) diff --git a/inst/shiny/berlin_s/config/config.yml b/inst/shiny/berlin_s/config/config.yml new file mode 100644 index 0000000..ed4533d --- /dev/null +++ b/inst/shiny/berlin_s/config/config.yml @@ -0,0 +1,16 @@ +parameters: +- SCAN_SAK_Ablauf +- SCAN_SAK_Zulauf +- C_O3_Zugas +- C_O3_Abgas +- Q_Gas +- Q_Ozonanlage +calculated: + deltaSAK: + name: delta SAK + unit: '%' + expr: (1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100 + Ozoneintrag: + name: Ozoneintrag + unit: mg-O3/L + expr: (C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage diff --git a/inst/shiny/berlin_s/global.R b/inst/shiny/berlin_s/global.R index 96147d2..f816ea2 100644 --- a/inst/shiny/berlin_s/global.R +++ b/inst/shiny/berlin_s/global.R @@ -22,17 +22,17 @@ month_pattern <- kwb.pilot:::to_month_pattern(year_month_start, year_month_end) kwb.pilot::merge_and_export_fst( time_pattern = month_pattern, - import_dir = kwb.pilot:::package_file("shiny/berlin_s/data/fst"), - export_dir = kwb.pilot:::package_file("shiny/berlin_s/data") + import_dir = kwb.pilot:::shiny_file("berlin_s/data/fst"), + export_dir = kwb.pilot:::shiny_file("berlin_s/data") ) } -kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::package_file("shiny/berlin_s/data")) +kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::shiny_file("berlin_s/data")) print("### Step 5: Importing threshold information ##########################") -threshold_file <- kwb.pilot:::package_file("shiny/berlin_s/data/thresholds.csv") +threshold_file <- kwb.pilot:::shiny_file("berlin_s/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(threshold_file) diff --git a/inst/shiny/berlin_s/report/report.Rmd b/inst/shiny/berlin_s/report/report.Rmd index 7b0239d..c8204e7 100644 --- a/inst/shiny/berlin_s/report/report.Rmd +++ b/inst/shiny/berlin_s/report/report.Rmd @@ -32,7 +32,7 @@ params: ```{r, echo=FALSE, results='hide', message=FALSE, warning=FALSE} -thresholds_file <- kwb.pilot:::package_file("shiny/berlin_s/data/thresholds.csv") +thresholds_file <- kwb.pilot:::shiny_file("berlin_s/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(thresholds_file) diff --git a/inst/shiny/berlin_t/config/config.yml b/inst/shiny/berlin_t/config/config.yml new file mode 100644 index 0000000..e573a43 --- /dev/null +++ b/inst/shiny/berlin_t/config/config.yml @@ -0,0 +1,8 @@ +parameters: +- FY-20-01 +- FT-10-01 +calculated: + recovery: + name: recovery + unit: '%' + expr: 100*`FY-20-01`/`FT-10-01` diff --git a/inst/shiny/berlin_t/global.R b/inst/shiny/berlin_t/global.R index 4c24360..868098a 100644 --- a/inst/shiny/berlin_t/global.R +++ b/inst/shiny/berlin_t/global.R @@ -23,11 +23,11 @@ kwb.pilot::merge_and_export_fst(time_pattern = month_pattern) } -kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::package_file("shiny/berlin_t/data")) +kwb.pilot::load_fst_data(fst_dir = kwb.pilot:::shiny_file("berlin_t/data")) print("### Step 5: Importing threshold information ##########################") -threshold_file <- kwb.pilot:::package_file("shiny/berlin_t/data/thresholds.csv") +threshold_file <- kwb.pilot:::shiny_file("berlin_t/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(threshold_file) diff --git a/inst/shiny/berlin_t/report/report.Rmd b/inst/shiny/berlin_t/report/report.Rmd index 0280a63..329fe0f 100644 --- a/inst/shiny/berlin_t/report/report.Rmd +++ b/inst/shiny/berlin_t/report/report.Rmd @@ -33,7 +33,7 @@ params: ```{r, echo=FALSE, results='hide', message=FALSE, warning=FALSE} -thresholds_file <- kwb.pilot:::package_file("shiny/berlin_t/data/thresholds.csv") +thresholds_file <- kwb.pilot:::shiny_file("berlin_t/data/thresholds.csv") thresholds <- kwb.pilot::get_thresholds(thresholds_file) diff --git a/inst/shiny/haridwar/config/config.yml b/inst/shiny/haridwar/config/config.yml new file mode 100644 index 0000000..75a6756 --- /dev/null +++ b/inst/shiny/haridwar/config/config.yml @@ -0,0 +1,34 @@ +parameters: +- Redox_Out1 +- Redox_Out2 +- Redox_In +- Flux +- Up +- Ip +- Uz +- Iz +calculated: + Redox_Out: + name: Mean redox potential in tank + unit: mV + expr: (Redox_Out1+Redox_Out2)/2 + Redox_Diff: + name: Difference (outflow - inflow) of redox potential + unit: mV + expr: Redox_Out - Redox_In + Power_pump: + name: Power demand of pump + unit: W + expr: Up*Ip + Power_cell: + name: Power demand of cell + unit: W + expr: Uz*Iz + Pump_WhPerCbm: + name: Specific energy demand of pump + unit: Wh/m3 + expr: Power_pump/(Flux/1000) + Cell_WhPerCbm: + name: Specific energy demand of cell + unit: Wh/m3 + expr: Power_cell/(Flux/1000) diff --git a/inst/shiny/mbr4.0/global.R b/inst/shiny/mbr4.0/global.R index 9ff0468..90b9b4d 100644 --- a/inst/shiny/mbr4.0/global.R +++ b/inst/shiny/mbr4.0/global.R @@ -35,7 +35,7 @@ kwb.pilot::load_fst_data(fst_dir = kwb.pilot::shiny_file("mbr4.0/data/fst")) print("### Step 5: Importing threshold information ##########################") print("### NOT IMPLEMENTED YET") -#threshold_file <- kwb.pilot:::package_file("shiny/mbr4.0/data/thresholds.csv") +#threshold_file <- kwb.pilot:::shiny_file("mbr4.0/data/thresholds.csv") #thresholds <- kwb.pilot::get_thresholds(threshold_file) diff --git a/man/add_parameter_metadata.Rd b/man/add_parameter_metadata.Rd index 0ed5807..5eec164 100644 --- a/man/add_parameter_metadata.Rd +++ b/man/add_parameter_metadata.Rd @@ -6,14 +6,14 @@ \usage{ add_parameter_metadata( df, - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv") + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") ) } \arguments{ \item{df}{data frame containing at least a column "ParameterCode"} \item{meta_parameter_path}{Define path of "meta_parameter.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv"))} } \value{ returns input data frame with joined metadata (parameter codes/ methods diff --git a/man/add_site_metadata.Rd b/man/add_site_metadata.Rd index eeae4a3..b8ec40b 100644 --- a/man/add_site_metadata.Rd +++ b/man/add_site_metadata.Rd @@ -7,7 +7,7 @@ add_site_metadata( df, df_col_sitecode = "SiteCode", - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv") + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv") ) } \arguments{ @@ -16,7 +16,7 @@ add_site_metadata( \item{df_col_sitecode}{column in df containing site code (default: "SiteCode")} \item{meta_site_path}{Define path of "meta_site.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv"))} } \value{ returns input data frame with joined metadata diff --git a/man/aggregate_export_fst.Rd b/man/aggregate_export_fst.Rd new file mode 100644 index 0000000..6d3f81d --- /dev/null +++ b/man/aggregate_export_fst.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aggregate_export_fst_general.R +\name{aggregate_export_fst} +\alias{aggregate_export_fst} +\title{Aggregate and Export to FST Format} +\usage{ +aggregate_export_fst( + year_month_start = NULL, + year_month_end = format(Sys.Date(), "\%Y-\%m"), + compression = 100, + FUN_get_monthly_data = NULL, + FUN_import = NULL, + FUN_calculate_ops = NULL, + prefix = NULL, + calculate_ops_on_level = c("raw", "aggregated")[1L], + siteData_raw_list = NULL, + export_dir_path = NULL +) +} +\arguments{ +\item{year_month_start}{start year month in yyyy-mm format, e.g. "2017-06"} + +\item{year_month_end}{end year month (default: current month)} + +\item{compression}{(default: 100)} + +\item{FUN_get_monthly_data}{function to be called to determine +"raw_data_file_paths"} + +\item{FUN_import}{function to be called to read data into "siteData_raw_list"} + +\item{FUN_calculate_ops}{function to be called to calculate operational +parameters. If \code{NULL} (the default), no operational parameters are +calculated} + +\item{prefix}{site-specific prefix to be used as a sub folder name in the +export directory} + +\item{calculate_ops_on_level}{one of "raw", "aggregated". Determines whether +to calculate operational parameters based on the raw or on the aggregated +values. Not used if \code{FUN_calculate_ops} is \code{NULL}} + +\item{siteData_raw_list}{existing raw data. If given (not \code{NULL}), no +monthly periods are calculated and no data are imported. The given data are +treated as one and only (monthly?) period.} + +\item{export_dir_path}{path to the export directory. Only required if raw +data are given in \code{siteData_raw}.} +} +\value{ +exports data for each month into subfolder: /data/fst/year-month +} +\description{ +Aggregate and Export to FST Format +} diff --git a/man/calculate_operational_parameters.Rd b/man/calculate_operational_parameters.Rd index 69d79ec..14cdacb 100644 --- a/man/calculate_operational_parameters.Rd +++ b/man/calculate_operational_parameters.Rd @@ -6,16 +6,11 @@ \usage{ calculate_operational_parameters( df, - calc_list = list(Redox_Out = "(Redox_Out1+Redox_Out2)/2", Redox_Diff = - "Redox_Out - Redox_In", Power_pump = "Up*Ip", Power_cell = "Uz*Iz", Pump_WhPerCbm = - "Power_pump/(Flux/1000)", Cell_WhPerCbm = "Power_cell/(Flux/1000)"), - calc_list_name = c("Mean redox potential in tank", - "Difference (outflow - inflow) of redox potential", "Power demand of pump", - "Power demand of cell", "Specific energy demand of pump", - "Specific energy demand of cell"), - calc_list_unit = c("mV", "mV", "W", "W", "Wh/m3", "Wh/m3"), - calc_paras = c("Redox_Out1", "Redox_Out2", "Redox_In", "Flux", "Up", "Ip", "Uz", - "Iz") + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("haridwar") ) } \arguments{ @@ -40,6 +35,9 @@ calculation specified wit 'calc_list'. default: c('mV', 'mV', 'Wh', 'Wh', \item{calc_paras}{a vector with parameter codes used for performing calculations defined in 'calc_list' (default: c('Redox_Out1', 'Redox_Out2', 'Redox_In', 'Flux', 'Up', 'Ip', 'Uz', 'Iz'))} + +\item{config}{configuration object (list) from which the \code{calc_*} +arguments are filled. Default: \code{get_calc_config("haridwar")}} } \value{ dataframe with calculated operational parameters diff --git a/man/calculate_operational_parameters_berlin_f.Rd b/man/calculate_operational_parameters_berlin_f.Rd index bb7457b..b008bf7 100644 --- a/man/calculate_operational_parameters_berlin_f.Rd +++ b/man/calculate_operational_parameters_berlin_f.Rd @@ -6,27 +6,11 @@ \usage{ calculate_operational_parameters_berlin_f( df, - calc_list = list(vfrPerm = "`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`", yield - = "100*(`Durchfluss_Rohwasser` - `Durchfluss_Konzentrat`) / `Durchfluss_Rohwasser`", - conLoop = - "(`Durchfluss_Rohwasser`*`LF_Rohwasser` + `Durchfluss_Rezirkulation`*`LF_Konzentrat`)/(`Durchfluss_Rohwasser` + `Durchfluss_Rezirkulation`)", - recovery = "100*(1 - `LF_Permeat` / conLoop)", deltaPreProcConc = - "`Druck_Arbeitsdruck` - `Druck_Konzentrat`", flux = "vfrPerm / (4 * 7.6)", cfv = - "(`Durchfluss_Rohwasser`+ `Durchfluss_Rezirkulation`) / ((pi * 0.0095^2) * 1000 * 3600)", - tmp = "((`Druck_Arbeitsdruck` + `Druck_Konzentrat`) / 2) - `Druck_Permeat`", - nwpt = - "normalised_permeate_flow(tempFeed = `Temperatur_Rohwasser`,\\n conLoop = `conLoop`,\\n vfrPerm = `vfrPerm`,\\n vfrLoop = `Durchfluss_Rezirkulation`,\\n vfrFeed = `Durchfluss_Rohwasser`,\\n prePerm = `Druck_Permeat`,\\n preProc = `Druck_Arbeitsdruck`,\\n preConc = `Druck_Konzentrat`,\\n nwp0 = 1.429162,\\n vfrPerm0 = 800)", - nwpr = "- ((1 - (nwpt / vfrPerm))) * 100"), - calc_list_name = c("Durchfluss Permeat", "Ausbeute", "Leitfähigkeit Rezirkulation", - "Rückhalt", "Druckverlust (Feed - Konzentrat)", "Flux", - "Überströmungsgeschwindigkeit", "Transmembrandruck", "Normalisierter Permeatstrom", - "Relativer Permeatstrom"), - calc_list_unit = c("l/h", "\%", "\\xb5S/cm", "\%", "bar", "l/h/m2", "m/s", "bar", - "l/h", "\%"), - calc_paras = c("Durchfluss_Rohwasser", "Durchfluss_Konzentrat", - "Durchfluss_Rezirkulation", "Druck_Arbeitsdruck", "Druck_Rohwasser", - "Druck_Konzentrat", "Druck_Permeat", "LF_Permeat", "LF_Rohwasser", "LF_Konzentrat", - "Temperatur_Rohwasser") + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_f") ) } \arguments{ @@ -34,14 +18,17 @@ calculate_operational_parameters_berlin_f( \item{calc_list}{list with calculation operations to be carried out} -\item{calc_list_name}{full names of parameters to be used for plotting for each -calculation specified wit 'calc_list'.} +\item{calc_list_name}{full names of parameters to be used for plotting for +each calculation specified wit 'calc_list'.} \item{calc_list_unit}{units of parameters to be used for plotting for each calculation specified wit 'calc_list'.} -\item{calc_paras}{a vector with parameter codes used for performing calculations -defined in 'calc_list'} +\item{calc_paras}{a vector with parameter codes used for performing +calculations defined in 'calc_list'} + +\item{config}{configuration object (list) from which the \code{calc_*} +arguments are filled. Default: \code{get_calc_config("berlin_f")}} } \value{ dataframe with calculated operational parameters diff --git a/man/calculate_operational_parameters_berlin_s.Rd b/man/calculate_operational_parameters_berlin_s.Rd index ed5a99c..b2bacfd 100644 --- a/man/calculate_operational_parameters_berlin_s.Rd +++ b/man/calculate_operational_parameters_berlin_s.Rd @@ -6,30 +6,33 @@ \usage{ calculate_operational_parameters_berlin_s( df, - calc_list = list(deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", Ozoneintrag = - "(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage"), - calc_list_name = c("delta SAK", "Ozoneintrag"), - calc_list_unit = c("\%", "mg-O3/L"), - calc_paras = c("SCAN_SAK_Ablauf", "SCAN_SAK_Zulauf", "C_O3_Zugas", "C_O3_Abgas", - "Q_Gas", "Q_Ozonanlage") + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_s") ) } \arguments{ \item{df}{a data frame as retrieved by read_wedeco_data()} -\item{calc_list}{list with calculation operations to be carried out -(default: list(deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", -Ozoneintrag = "(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage"))} +\item{calc_list}{list with calculation operations to be carried out (default: +list(deltaSAK = "(1-SCAN_SAK_Ablauf/SCAN_SAK_Zulauf)*100", Ozoneintrag = +"(C_O3_Zugas - C_O3_Abgas)*Q_Gas/Q_Ozonanlage"))} -\item{calc_list_name}{full names of parameters to be used for plotting for each -calculation specified wit 'calc_list'. default: c('delta SAK', 'Ozoneintrag')} +\item{calc_list_name}{full names of parameters to be used for plotting for +each calculation specified wit 'calc_list'. default: c('delta SAK', +'Ozoneintrag')} \item{calc_list_unit}{units of parameters to be used for plotting for each calculation specified wit 'calc_list'. default: c("percent", "mg-O3/L")} -\item{calc_paras}{a vector with parameter codes used for performing calculations -defined in 'calc_list' (default: c("SCAN_SAK_Ablauf", "SCAN_SAK_Zulauf", -"C_O3_Zugas", "C_O3_Abgas", "Q_Gas", "Q_Ozonanlage"))} +\item{calc_paras}{a vector with parameter codes used for performing +calculations defined in 'calc_list' (default: c("SCAN_SAK_Ablauf", +"SCAN_SAK_Zulauf", "C_O3_Zugas", "C_O3_Abgas", "Q_Gas", "Q_Ozonanlage"))} + +\item{config}{configuration object (list) from which the \code{calc_*} +arguments are filled. Default: \code{get_calc_config("berlin_s")}} } \value{ dataframe with calculated operational parameters diff --git a/man/calculate_operational_parameters_berlin_t.Rd b/man/calculate_operational_parameters_berlin_t.Rd index eec7f85..c64b84c 100644 --- a/man/calculate_operational_parameters_berlin_t.Rd +++ b/man/calculate_operational_parameters_berlin_t.Rd @@ -6,10 +6,11 @@ \usage{ calculate_operational_parameters_berlin_t( df, - calc_list = list(recovery = "100*`FY-20-01`/`FT-10-01`"), - calc_list_name = c("recovery"), - calc_list_unit = c("\%"), - calc_paras = c("FY-20-01", "FT-10-01") + calc_list = get_calc_info_from_config(config, "expr"), + calc_list_name = get_calc_info_from_config(config, "name"), + calc_list_unit = get_calc_info_from_config(config, "unit"), + calc_paras = get_calc_info_from_config(config, "paras"), + config = get_calc_config("berlin_t") ) } \arguments{ @@ -26,6 +27,9 @@ calculation specified wit 'calc_list'. default: c("percent")} \item{calc_paras}{a vector with parameter codes used for performing calculations defined in 'calc_list' (default: c("FY-20-01", "FT-10-01")} + +\item{config}{configuration object (list) from which the \code{calc_*} +arguments are filled. Default: \code{get_calc_config("berlin_t")}} } \value{ dataframe with calculated operational parameters diff --git a/man/get_rawfilespaths_for_month.Rd b/man/get_rawfilespaths_for_month.Rd index 1aed14f..b31d11d 100644 --- a/man/get_rawfilespaths_for_month.Rd +++ b/man/get_rawfilespaths_for_month.Rd @@ -6,7 +6,7 @@ \usage{ get_rawfilespaths_for_month( monthly_period = get_monthly_periods()[1, ], - raw_data_dir = package_file("shiny/berlin_t/data/operation"), + raw_data_dir = shiny_file("berlin_t/data/operation"), max_offset_days = 7 ) } @@ -15,7 +15,7 @@ get_rawfilespaths_for_month( first row of get_monthly_periods(), i.e. year month is (default: '2017-06')} \item{raw_data_dir}{directory with operational raw data files for Berlin Tiefwerder -(default: \code{kwb.pilot:::package_file("shiny/berlin_t/data/operation")})} +(default: \code{kwb.pilot:::shiny_file("berlin_t/data/operation")})} \item{max_offset_days}{number of days in previous/next month to look for beginning/ ending of month (default: 7)} diff --git a/man/get_thresholds.Rd b/man/get_thresholds.Rd index 174b4cb..a5b14ab 100644 --- a/man/get_thresholds.Rd +++ b/man/get_thresholds.Rd @@ -4,11 +4,11 @@ \alias{get_thresholds} \title{Get thresholds for analytics/operational parameters} \usage{ -get_thresholds(file = package_file("shiny/haridwar/data/thresholds.csv")) +get_thresholds(file = shiny_file("haridwar/data/thresholds.csv")) } \arguments{ \item{file}{path to csv file with thresholds for Haridwar site (default: -\code{kwb.pilot:::package_file("shiny/haridwar/data/thresholds.csv")})} +\code{kwb.pilot:::shiny_file("haridwar/data/thresholds.csv")})} } \value{ returns data frame thresholds for operational/analytical parameters diff --git a/man/group_datetime.Rd b/man/group_datetime.Rd index 471893b..b2c3a18 100644 --- a/man/group_datetime.Rd +++ b/man/group_datetime.Rd @@ -21,7 +21,7 @@ day aggregation or "day", "month" or "year" for longer time spans} \item{fun}{function to be used for grouping measurement data of column ParameterValue (default: stats::median) -(default: kwb.pilot:::package_file("shiny/haridwar/.my.cnf"))} +(default: kwb.pilot:::shiny_file("haridwar/.my.cnf"))} \item{col_datetime}{column name of datetime column (default: DateTime)} diff --git a/man/group_fst_by_pattern.Rd b/man/group_fst_by_pattern.Rd index 2f4dedd..d1d2660 100644 --- a/man/group_fst_by_pattern.Rd +++ b/man/group_fst_by_pattern.Rd @@ -7,7 +7,7 @@ group_fst_by_pattern( time_pattern = NULL, fst_pattern = "raw", - fst_dir = package_file("shiny/berlin_t/data/fst") + fst_dir = shiny_file("berlin_t/data/fst") ) } \arguments{ @@ -17,7 +17,7 @@ for using it do e.g. "2017-06|2017-07" or c("2017-06", "2017-07")} \item{fst_pattern}{pattern to search for in fst filename (default: "raw")} \item{fst_dir}{directory with fst files or subdirs to be imported (default: -kwb.pilot:::package_file("shiny/berlin_t/data/fst"))} +kwb.pilot:::shiny_file("berlin_t/data/fst"))} } \value{ merged data.frame diff --git a/man/import_analytics_basel.Rd b/man/import_analytics_basel.Rd index 6dd3c3e..fe00c2e 100644 --- a/man/import_analytics_basel.Rd +++ b/man/import_analytics_basel.Rd @@ -4,11 +4,11 @@ \alias{import_analytics_basel} \title{Imports analytical data for Basel (without metadata)} \usage{ -import_analytics_basel(csv_dir = package_file("shiny/basel/data/analytics")) +import_analytics_basel(csv_dir = shiny_file("basel/data/analytics")) } \arguments{ \item{csv_dir}{Define directory with raw analytical data in CSV (.csv) format to -be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics"))} +be imported (default: sema.pilot:::shiny_file("basel/data/analytics"))} } \value{ returns data frame with imported raw analytics data diff --git a/man/import_analytics_meta_basel.Rd b/man/import_analytics_meta_basel.Rd index 479bd3e..a0050a7 100644 --- a/man/import_analytics_meta_basel.Rd +++ b/man/import_analytics_meta_basel.Rd @@ -6,20 +6,20 @@ "rhein" and "wiese")} \usage{ import_analytics_meta_basel( - analytics_dir = package_file("shiny/basel/data/analytics"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv") + analytics_dir = shiny_file("basel/data/analytics"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") ) } \arguments{ \item{analytics_dir}{Define directory with raw analytical data in CSV (.csv) format to -be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics"))} +be imported (default: sema.pilot:::shiny_file("basel/data/analytics"))} \item{meta_site_path}{Define path of "meta_site.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv"))} \item{meta_parameter_path}{Define path of "meta_parameter.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv"))} } \value{ data.frame with analytics data for Basel sites including metadata diff --git a/man/import_data_basel.Rd b/man/import_data_basel.Rd index 3349420..cac48e0 100644 --- a/man/import_data_basel.Rd +++ b/man/import_data_basel.Rd @@ -6,34 +6,34 @@ at once, i.e. "rhein" and "wiese")} \usage{ import_data_basel( - analytics_dir = package_file("shiny/basel/data/analytics"), - raw_dir_rhein = package_file("shiny/basel/data/operation/rhein"), - raw_dir_wiese = package_file("shiny/basel/data/operation/wiese"), - meta_online_path = package_file("shiny/basel/data/metadata/meta_online.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv") + analytics_dir = shiny_file("basel/data/analytics"), + raw_dir_rhein = shiny_file("basel/data/operation/rhein"), + raw_dir_wiese = shiny_file("basel/data/operation/wiese"), + meta_online_path = shiny_file("basel/data/metadata/meta_online.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv") ) } \arguments{ \item{analytics_dir}{Define directory with raw analytical data in CSV (.csv) format to -be imported (default: sema.pilot:::package_file("shiny/basel/data/analytics"))} +be imported (default: sema.pilot:::shiny_file("basel/data/analytics"))} \item{raw_dir_rhein}{Define directory for site "rhein" with raw data in EXCEL spreadsheet format (.xlsx) to be imported (default: -sema.pilot:::package_file("shiny/basel/data/operation/rhein"))} +sema.pilot:::shiny_file("basel/data/operation/rhein"))} \item{raw_dir_wiese}{Define directory for site "rhein" with raw data in EXCEL spreadsheet format (.xlsx) to be imported (default: -sema.pilot:::package_file("shiny/basel/data/operation/wiese"))} +sema.pilot:::shiny_file("basel/data/operation/wiese"))} \item{meta_online_path}{path to file containing metadata for online data -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_online.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_online.csv"))} \item{meta_parameter_path}{Define path of "meta_parameter.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv"))} \item{meta_site_path}{Define path of "meta_site.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv"))} } \value{ data.frame with analytical & operational data for Basel diff --git a/man/import_data_berlin_f.Rd b/man/import_data_berlin_f.Rd index 4da9436..cfb932e 100644 --- a/man/import_data_berlin_f.Rd +++ b/man/import_data_berlin_f.Rd @@ -5,21 +5,20 @@ \title{Import data for Berlin Friedrichshagen} \usage{ import_data_berlin_f( - raw_data_files = fs::dir_ls(package_file("shiny/berlin_f/data/raw/online_data"), - recurse = TRUE, regexp = "^[^~].*\\\\.xlsx$"), - - meta_file_path = package_file("shiny/berlin_f/data/raw/online_data/parameter_unit_metadata.csv") + raw_data_files = fs::dir_ls(shiny_file("berlin_f/data/raw/online_data"), recurse = + TRUE, regexp = "^[^~].*\\\\.xlsx$"), + meta_file_path = + shiny_file("berlin_f/data/raw/online_data/parameter_unit_metadata.csv") ) } \arguments{ \item{raw_data_files}{vector with full path to operational raw data files that allows to limit import to specific files (default: -fs::dir_ls(package_file("shiny/berlin_f/data/raw/online_data""), recurse = TRUE, +fs::dir_ls(kwb.pilot:::shiny_file("berlin_f/data/raw/online_data""), recurse = TRUE, regexp = "^[^~].*\\.xlsx$")).} \item{meta_file_path}{path to metadata file (default: -kwb.pilot:::package_file(file.path("shiny/berlin_f/data/raw/online_data", -"parameter_site_metadata.csv"))} +kwb.pilot:::shiny_file("berlin_f/data/raw/online_data/parameter_site_metadata.csv"))} } \value{ data.frame with imported operational data (analytics´data to be added as diff --git a/man/import_data_berlin_s.Rd b/man/import_data_berlin_s.Rd index a981d78..b2bb518 100644 --- a/man/import_data_berlin_s.Rd +++ b/man/import_data_berlin_s.Rd @@ -5,21 +5,21 @@ \title{Import data for Berlin Schoenerlinde} \usage{ import_data_berlin_s( - raw_data_dir = package_file("shiny/berlin_s/data/operation"), + raw_data_dir = shiny_file("berlin_s/data/operation"), raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_s/data/parameter_site_metadata.csv") + meta_file_path = shiny_file("berlin_s/data/parameter_site_metadata.csv") ) } \arguments{ \item{raw_data_dir}{path of directory containing WEDECO CSV files -(default: kwb.pilot:::package_file("shiny/berlin_s/data/operation"))} +(default: kwb.pilot:::shiny_file("berlin_s/data/operation"))} \item{raw_data_files}{vector with full path to operational raw data files that allows to limit import to specific files (default: NULL). If specified parameter "raw_data_dir" will not be used} \item{meta_file_path}{path to metadata file (default: -kwb.pilot:::package_file("shiny/berlin_s/data/parameter_site_metadata.csv"))} +kwb.pilot:::shiny_file("berlin_s/data/parameter_site_metadata.csv"))} } \value{ list with "df": data.frame with imported operational data (analytics diff --git a/man/import_data_berlin_t.Rd b/man/import_data_berlin_t.Rd index 1169730..dc3d5ef 100644 --- a/man/import_data_berlin_t.Rd +++ b/man/import_data_berlin_t.Rd @@ -5,25 +5,25 @@ \title{Import data for Berlin Tiefwerder} \usage{ import_data_berlin_t( - raw_data_dir = package_file("shiny/berlin_t/data/operation"), + raw_data_dir = shiny_file("berlin_t/data/operation"), raw_data_files = NULL, - analytics_path = package_file("shiny/berlin_t/data/analytics.xlsx"), - meta_file_path = package_file("shiny/berlin_t/data/parameter_site_metadata.csv") + analytics_path = shiny_file("berlin_t/data/analytics.xlsx"), + meta_file_path = shiny_file("berlin_t/data/parameter_site_metadata.csv") ) } \arguments{ \item{raw_data_dir}{path of directory containing PENTAIR xls files -(default: kwb.pilot:::package_file("shiny/berlin_t/data/operation"))} +(default: kwb.pilot:::shiny_file("berlin_t/data/operation"))} \item{raw_data_files}{vector with full path to operational raw data files that allows to limit import to specific files (default: NULL). If specified parameter "raw_data_dir" will not be used} \item{analytics_path}{full path to lab data EXCEL file in xlsx format -(default: kwb.pilot:::package_file("shiny/berlin_t/data/analytics.xlsx"))} +(default: kwb.pilot:::shiny_file("berlin_t/data/analytics.xlsx"))} \item{meta_file_path}{path to metadata file (default: -kwb.pilot:::package_file("shiny/berlin_t/data/parameter_site_metadata.csv"))} +kwb.pilot:::shiny_file("berlin_t/data/parameter_site_metadata.csv"))} } \value{ data.frame with imported operational data (analytics´data to be added as diff --git a/man/import_data_haridwar.Rd b/man/import_data_haridwar.Rd index 6d400ac..88c450d 100644 --- a/man/import_data_haridwar.Rd +++ b/man/import_data_haridwar.Rd @@ -5,25 +5,24 @@ \title{Imports Haridwar data} \usage{ import_data_haridwar( - analytics_path = package_file("shiny/haridwar/data/analytics.xlsx"), - operation_mySQL_conf = package_file("shiny/haridwar/.my.cnf"), - operation_meta_path = package_file("shiny/haridwar/data/operation_parameters.csv"), - excludedSheets = c("Parameters", "Location", "Sites", "#Summary", - "Site_and_Parameter", "Observations", "dP", "ORP", "Flow", "Current_Voltage", - "As_total_Arsenator"), + analytics_path = shiny_file("haridwar/data/analytics.xlsx"), + operation_mySQL_conf = shiny_file("haridwar/.my.cnf"), + operation_meta_path = shiny_file("haridwar/data/operation_parameters.csv"), + excludedSheets = c("Parameters", "Location", "Sites", "#Summary", "Site_and_Parameter", + "Observations", "dP", "ORP", "Flow", "Current_Voltage", "As_total_Arsenator"), skip = 69, debug = TRUE ) } \arguments{ \item{analytics_path}{Define path of analytics EXCEL spreadsheet to be -imported (default: kwb.pilot:::package_file("shiny/haridwar/data/analytics.xlsx"))} +imported (default: kwb.pilot:::shiny_file("haridwar/data/analytics.xlsx"))} \item{operation_mySQL_conf}{column name pattern for identifying raw data -(default: kwb.pilot:::package_file("shiny/haridwar/.my.cnf"))} +(default: kwb.pilot:::shiny_file("haridwar/.my.cnf"))} \item{operation_meta_path}{path to table with meta data for operational -parameters (default: kwb.pilot:::package_file("shiny/haridwar/data/operation_parameters.csv"))} +parameters (default: kwb.pilot:::shiny_file("haridwar/data/operation_parameters.csv"))} \item{excludedSheets}{all sheets, which are not listed here will be imported as lab data sheets (default: c("Parameters", "Location", "Sites", "#Summary", diff --git a/man/import_lab_data_berlin_t.Rd b/man/import_lab_data_berlin_t.Rd index 4117d69..bd8f4c8 100644 --- a/man/import_lab_data_berlin_t.Rd +++ b/man/import_lab_data_berlin_t.Rd @@ -5,12 +5,12 @@ \title{BerlinTiefwerder: import lab data} \usage{ import_lab_data_berlin_t( - xlsx_path = package_file("shiny/berlin_t/data/analytics.xlsx") + xlsx_path = shiny_file("berlin_t/data/analytics.xlsx") ) } \arguments{ \item{xlsx_path}{full path to lab data EXCEL file in xlsx format -(default: kwb.pilot:::package_file("shiny/berlin_t/data/analytics.xlsx"))} +(default: kwb.pilot:::shiny_file("berlin_t/data/analytics.xlsx"))} } \value{ a list of imported lab data for Berlin-Tiefwerder diff --git a/man/import_operation_basel.Rd b/man/import_operation_basel.Rd index 1829cac..3090562 100644 --- a/man/import_operation_basel.Rd +++ b/man/import_operation_basel.Rd @@ -5,13 +5,11 @@ \title{Imports operational data for Basel (without metadata and only for one site at once, e.g. "rhein" or "wiese")} \usage{ -import_operation_basel( - xlsx_dir = package_file("shiny/basel/data/operation/wiese") -) +import_operation_basel(xlsx_dir = shiny_file("basel/data/operation/wiese")) } \arguments{ \item{xlsx_dir}{Define directory with raw data in EXCEL spreadsheet (.xlsx) to -be imported (default: sema.pilot:::package_file("shiny/basel/data/operation/wiese"))} +be imported (default: sema.pilot:::shiny_file("basel/data/operation/wiese"))} } \value{ returns data frame with imported raw operational data diff --git a/man/import_operation_meta_basel.Rd b/man/import_operation_meta_basel.Rd index 8b06622..657821e 100644 --- a/man/import_operation_meta_basel.Rd +++ b/man/import_operation_meta_basel.Rd @@ -6,30 +6,30 @@ both sites at once, i.e. "rhein" and "wiese")} \usage{ import_operation_meta_basel( - raw_dir_rhein = package_file("shiny/basel/data/operation/rhein"), - raw_dir_wiese = package_file("shiny/basel/data/operation/wiese"), - meta_online_path = package_file("shiny/basel/data/metadata/meta_online.csv"), - meta_site_path = package_file("shiny/basel/data/metadata/meta_site.csv"), - meta_parameter_path = package_file("shiny/basel/data/metadata/meta_parameter.csv") + raw_dir_rhein = shiny_file("basel/data/operation/rhein"), + raw_dir_wiese = shiny_file("basel/data/operation/wiese"), + meta_online_path = shiny_file("basel/data/metadata/meta_online.csv"), + meta_site_path = shiny_file("basel/data/metadata/meta_site.csv"), + meta_parameter_path = shiny_file("basel/data/metadata/meta_parameter.csv") ) } \arguments{ \item{raw_dir_rhein}{Define directory for site "rhein" with raw data in EXCEL spreadsheet format (.xlsx) to be imported (default: -sema.pilot:::package_file("shiny/basel/data/operation/rhein"))} +sema.pilot:::shiny_file("basel/data/operation/rhein"))} \item{raw_dir_wiese}{Define directory for site "rhein" with raw data in EXCEL spreadsheet format (.xlsx) to be imported (default: -sema.pilot:::package_file("shiny/basel/data/operation/wiese"))} +sema.pilot:::shiny_file("basel/data/operation/wiese"))} \item{meta_online_path}{path to file containing metadata for online data -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_online.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_online.csv"))} \item{meta_site_path}{Define path of "meta_site.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_site.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_site.csv"))} \item{meta_parameter_path}{Define path of "meta_parameter.csv" to be imported -(default: sema.pilot:::package_file("shiny/basel/data/metadata/meta_parameter.csv"))} +(default: sema.pilot:::shiny_file("basel/data/metadata/meta_parameter.csv"))} } \value{ returns data frame with imported raw operational data with metadata diff --git a/man/merge_and_export_fst.Rd b/man/merge_and_export_fst.Rd index af620fb..04c6b37 100644 --- a/man/merge_and_export_fst.Rd +++ b/man/merge_and_export_fst.Rd @@ -7,8 +7,8 @@ merge_and_export_fst( time_pattern = NULL, compression = 100, - import_dir = package_file("shiny/berlin_t/data/fst"), - export_dir = package_file("shiny/berlin_t/data") + import_dir = shiny_file("berlin_t/data/fst"), + export_dir = shiny_file("berlin_t/data") ) } \arguments{ @@ -18,10 +18,10 @@ for using it do e.g. "2017-06|2017-07" or c("2017-06", "2017-07")} \item{compression}{compression for fst export (default: 100)} \item{import_dir}{directory with fst files or subdirs to be imported (default: -kwb.pilot:::package_file("shiny/berlin_t/data/fst"))} +kwb.pilot:::shiny_file("berlin_t/data/fst"))} \item{export_dir}{directory with fst directory for export (default: -kwb.pilot:::package_file("shiny/berlin_t/data"))} +kwb.pilot:::shiny_file("berlin_t/data"))} } \value{ imports multiple fst files and exports them to be used for app diff --git a/man/read_mbr4_archived.Rd b/man/read_mbr4_archived.Rd index ab2c247..a85cb7c 100644 --- a/man/read_mbr4_archived.Rd +++ b/man/read_mbr4_archived.Rd @@ -54,8 +54,10 @@ tibble with imported archived MBR4.0 xlsx data from Nextcloud Read MBR4.0 archived data from Nextcloud } \examples{ +\dontrun{ if(check_env_nextcloud()) { -mbr4_data_archived <- read_mbr4_archived() -str(mbr4_data_archived) + mbr4_data_archived <- read_mbr4_archived() + str(mbr4_data_archived) +} } } diff --git a/man/read_mbr4_latest.Rd b/man/read_mbr4_latest.Rd index b4618cc..e0ea753 100644 --- a/man/read_mbr4_latest.Rd +++ b/man/read_mbr4_latest.Rd @@ -45,8 +45,8 @@ Read MBR4.0 data from Martin Systems Webportal (As "tsv") } \examples{ url_mbr40 <- Sys.getenv("MBR40_URL") -if(url_mbr40 != "") { -mbr4_data_latest <- read_mbr4_latest(url = url_mbr40) -str(mbr4_data_latest) +if (url_mbr40 != "") { + mbr4_data_latest <- read_mbr4_latest(url = url_mbr40) + str(mbr4_data_latest) } } diff --git a/man/read_pentair_data.Rd b/man/read_pentair_data.Rd index 6fe3c93..4095c31 100644 --- a/man/read_pentair_data.Rd +++ b/man/read_pentair_data.Rd @@ -5,23 +5,23 @@ \title{Read PENTAIR operational data} \usage{ read_pentair_data( - raw_data_dir = package_file("shiny/berlin_t/data/operation"), + raw_data_dir = shiny_file("berlin_t/data/operation"), raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_t/data/parameter_site_metadata.csv"), + meta_file_path = shiny_file("berlin_t/data/parameter_site_metadata.csv"), locale = readr::locale(tz = "CET"), col_types = readr::cols() ) } \arguments{ \item{raw_data_dir}{path of directory containing PENTAIR xls files -(default: kwb.pilot:::package_file("shiny/berlin_t/data/operation"))} +(default: kwb.pilot:::shiny_file("berlin_t/data/operation"))} \item{raw_data_files}{vector with full path to operational raw data files that allows to limit import to specific files (default: NULL). If specified parameter "raw_data_dir" will not be used} \item{meta_file_path}{path to metadata file (default: -kwb.pilot:::package_file("shiny/berlin_t/data/parameter_site_metadata.csv"))} +kwb.pilot:::shiny_file("berlin_t/data/parameter_site_metadata.csv"))} \item{locale}{locale (default: \code{\link[readr]{locale}}(tz = "CET"))} diff --git a/man/read_wedeco_data.Rd b/man/read_wedeco_data.Rd index dd8c9db..cb481ff 100644 --- a/man/read_wedeco_data.Rd +++ b/man/read_wedeco_data.Rd @@ -5,9 +5,9 @@ \title{Import WEDECO raw data} \usage{ read_wedeco_data( - raw_data_dir = package_file("shiny/berlin_s/data/operation"), + raw_data_dir = shiny_file("berlin_s/data/operation"), raw_data_files = NULL, - meta_file_path = package_file("shiny/berlin_s/data/parameter_site_metadata.csv") + meta_file_path = shiny_file("berlin_s/data/parameter_site_metadata.csv") ) } \arguments{ diff --git a/man/tidy_mbr4_data.Rd b/man/tidy_mbr4_data.Rd index 23eb0fc..6029502 100644 --- a/man/tidy_mbr4_data.Rd +++ b/man/tidy_mbr4_data.Rd @@ -6,7 +6,7 @@ \usage{ tidy_mbr4_data( mbr4_data, - path_metadata = kwb.pilot:::shiny_file("mbr4.0/data/metadata.csv") + path_metadata = shiny_file("mbr4.0/data/metadata.csv") ) } \arguments{ diff --git a/vignettes/mbr40.Rmd b/vignettes/mbr40.Rmd index e94213d..3cc6d63 100644 --- a/vignettes/mbr40.Rmd +++ b/vignettes/mbr40.Rmd @@ -12,7 +12,8 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) -url_defined <- Sys.getenv("MBR40_URL") != "" + +url_defined <- FALSE # Sys.getenv("MBR40_URL") != "" ``` ## Install R package diff --git a/vignettes/ultimate.Rmd b/vignettes/ultimate.Rmd index 0f9bafc..6b46506 100644 --- a/vignettes/ultimate.Rmd +++ b/vignettes/ultimate.Rmd @@ -2,9 +2,11 @@ title: "Pilot: Ultimate" output: rmarkdown::html_vignette vignette: > + %\VignetteEncoding{UTF-8} %\VignetteIndexEntry{Pilot: Ultimate} %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +editor_options: + chunk_output_type: console --- ```{r, include = FALSE} @@ -12,12 +14,14 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) -credentials_defined <- kwb.pilot::check_env_influxdb_ultimate() && kwb.pilot::check_env_nextcloud() + +credentials_defined <- kwb.pilot::check_env_influxdb_ultimate() && + kwb.pilot::check_env_nextcloud() + is_github <- identical(Sys.getenv("CI"), "true") - + #eval_chunks <- credentials_defined && is_github eval_chunks = FALSE - ``` ## Define Login Credentials @@ -55,7 +59,8 @@ Finally you need to restart Rstudio and proceed with the code below: # Enable repository from kwb-r options(repos = c( kwbr = 'https://kwb-r.r-universe.dev', - CRAN = 'https://cloud.r-project.org')) + CRAN = 'https://cloud.r-project.org' +)) # Download and install kwb.pilot in R install.packages('kwb.pilot') @@ -90,13 +95,14 @@ paths_list <- list( b_local_imported = "/" ) -paths <- kwb.utils::resolve(paths_list, - root = ifelse(identical(Sys.getenv("CI"), - "true"), - paths_list$root_ci, - paths_list$root_windows) - ) - +paths <- kwb.utils::resolve( + paths_list, + root = ifelse( + identical(Sys.getenv("CI"), "true"), + paths_list$root_ci, + paths_list$root_windows + ) +) ``` ### Write to InfluxDBCloud @@ -113,39 +119,47 @@ paths <- kwb.utils::resolve(paths_list, ## Step: kwb.pilot::download_nextcloud_files() is optional and can be skipped ## if the data is provided in the `paths$raw_data_dir` -files_pilot_a <- kwb.pilot::download_nextcloud_files(dir_cloud = paths$a_cloud_raw, - dir_local = paths$a_local_raw) - - -if(length(files_pilot_a) > 0) { - -tsv_paths <- list.files( - path = paths$a_local_raw, - full.names = TRUE, - pattern = "xls$" -) - -kwb.pilot::write_to_influxdb_loop( - tsv_paths = tsv_paths, - paths = list(site_code = paths$a_site_code, - raw_data_dir = paths$a_local_raw), - changed_only = FALSE, - max_tsv_files = 10, - batch_size = 5000 +files_pilot_a <- kwb.pilot::download_nextcloud_files( + dir_cloud = paths$a_cloud_raw, + dir_local = paths$a_local_raw ) - -### Move all files for Pilot A`s cloud "raw" data directory to the "imported" -### directory (in case that a file is already existing there: overwrite it)! -paths_pilot_a <- data.frame(raw = file.path(paths$a_cloud_raw, files_pilot_a), - imported = file.path(paths$a_cloud_imported, files_pilot_a) - ) - -kwb.pilot::move_nextcloud_files(paths_pilot_a, overwrite = TRUE) +if (length(files_pilot_a) > 0L) { + + tsv_paths <- list.files( + path = paths$a_local_raw, + full.names = TRUE, + pattern = "xls$" + ) + + kwb.pilot::write_to_influxdb_loop( + tsv_paths = tsv_paths, + paths = list( + site_code = paths$a_site_code, + raw_data_dir = paths$a_local_raw + ), + changed_only = FALSE, + max_tsv_files = 10, + batch_size = 5000 + ) + + ### Move all files for Pilot A`s cloud "raw" data directory to the "imported" + ### directory (in case that a file is already existing there: overwrite it)! + paths_pilot_a <- data.frame( + raw = file.path(paths$a_cloud_raw, files_pilot_a), + imported = file.path(paths$a_cloud_imported, files_pilot_a + ) + ) + + kwb.pilot::move_nextcloud_files(paths_pilot_a, overwrite = TRUE) + } else { - message(sprintf("Nothing to do for '%s'! No new files in '%s'", - paths$a_site_code, - paths$a_cloud_raw)) + + message(sprintf( + "Nothing to do for '%s'! No new files in '%s'", + paths$a_site_code, + paths$a_cloud_raw + )) } ``` @@ -158,36 +172,45 @@ kwb.pilot::move_nextcloud_files(paths_pilot_a, overwrite = TRUE) ## Step: kwb.pilot::download_nextcloud_files() is optional and can be skipped ## if the data is provided in the `paths$raw_data_dir` -files_pilot_b <- kwb.pilot::download_nextcloud_files(dir_cloud = paths$b_cloud_raw, - dir_local = paths$b_local_raw) - -if(length(files_pilot_b) > 0) { -tsv_paths <- list.files( - path = paths$b_local_raw, - full.names = TRUE, - pattern = "xls$" +files_pilot_b <- kwb.pilot::download_nextcloud_files( + dir_cloud = paths$b_cloud_raw, + dir_local = paths$b_local_raw ) -kwb.pilot::write_to_influxdb_loop( - tsv_paths = tsv_paths, - paths = list(site_code = paths$b_site_code, - raw_data_dir = paths$b_local_raw), - changed_only = FALSE, - max_tsv_files = 5, - batch_size = 5000 -) - -### Move all files for Pilot B`s cloud "raw" data directory to the "imported" -### directory (in case that a file is already existing there: overwrite it)! -paths_pilot_b <- data.frame(raw = file.path(paths$b_cloud_raw, files_pilot_b), - imported = file.path(paths$b_cloud_imported, files_pilot_b) - ) - -kwb.pilot::move_nextcloud_files(paths_pilot_b, overwrite = TRUE) +if (length(files_pilot_b) > 0L) { + + tsv_paths <- list.files( + path = paths$b_local_raw, + full.names = TRUE, + pattern = "xls$" + ) + + kwb.pilot::write_to_influxdb_loop( + tsv_paths = tsv_paths, + paths = list( + site_code = paths$b_site_code, + raw_data_dir = paths$b_local_raw + ), + changed_only = FALSE, + max_tsv_files = 5, + batch_size = 5000 + ) + + ### Move all files for Pilot B`s cloud "raw" data directory to the "imported" + ### directory (in case that a file is already existing there: overwrite it)! + paths_pilot_b <- data.frame( + raw = file.path(paths$b_cloud_raw, files_pilot_b), + imported = file.path(paths$b_cloud_imported, files_pilot_b) + ) + + kwb.pilot::move_nextcloud_files(paths_pilot_b, overwrite = TRUE) + } else { - message(sprintf("Nothing to do for '%s'! No new files in '%s'", - paths$b_site_code, - paths$b_cloud_raw)) + + message(sprintf( + "Nothing to do for '%s'! No new files in '%s'", + paths$b_site_code, + paths$b_cloud_raw + )) } ``` -