From 6ae75cc970cdaadb302b0ed87fec577c3fccc428 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 18:42:38 +0200 Subject: [PATCH 01/51] Indicate development version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d09fd30..16c2cd4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: kwb.umberto Title: R package supporting UMERTO LCA at KWB -Version: 0.2.0 +Version: 0.2.0.9000 Authors@R: c( person("Michael", "Rustler", , "michael.rustler@kompetenz-wasser.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0647-7726")), From 0b7b58c2ac60b7c750e6bdb573761fde35281de8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 18:45:35 +0200 Subject: [PATCH 02/51] Improve indentation --- R/data_aggregation.R | 69 +++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 67851e1..ee106d4 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -25,22 +25,29 @@ #' umberto10_rawdata <- kwb.umberto::import_rawdata(csv_dir = umberto10_csv_dir) #' umberto10_data_grouped <- kwb.umberto::group_data(umberto10_rawdata) #' head(umberto10_data_grouped) -group_data <- function(raw_data, - grouping_paras = c("lci_method", "model", "process", "unit"), - grouping_function = "sum", - summarise_col = "quantity") { - +group_data <- function( + raw_data, + grouping_paras = c("lci_method", "model", "process", "unit"), + grouping_function = "sum", + summarise_col = "quantity" +) +{ summarise_col_fun <- function(summarise_col) { sprintf("%s_%s", summarise_col, grouping_function) } raw_data %>% - dplyr::group_by(dplyr::across(tidyselect::all_of(grouping_paras))) %>% - dplyr::rename_with(.fn = summarise_col_fun, - .cols = summarise_col) %>% - dplyr::summarise_at(.vars = summarise_col_fun(summarise_col), - .funs = grouping_function) - + dplyr::group_by( + dplyr::across(tidyselect::all_of(grouping_paras)) + ) %>% + dplyr::rename_with( + .fn = summarise_col_fun, + .cols = summarise_col + ) %>% + dplyr::summarise_at( + .vars = summarise_col_fun(summarise_col), + .funs = grouping_function + ) } @@ -75,15 +82,19 @@ group_data <- function(raw_data, #' umberto10_data_pivot <- kwb.umberto::pivot_data(umberto10_data_grouped) #' head(umberto10_data_pivot) #' -pivot_data <- function(rawdata_grouped, - cols_to_ignore = "unit", - key_col = "model", - value_col = "quantity_sum") { - +pivot_data <- function( + rawdata_grouped, + cols_to_ignore = "unit", + key_col = "model", + value_col = "quantity_sum" +) +{ rawdata_grouped %>% - dplyr::select(tidyselect::all_of(setdiff(names(rawdata_grouped), - cols_to_ignore)) - ) %>% + dplyr::select( + tidyselect::all_of( + setdiff(names(rawdata_grouped), cols_to_ignore) + ) + ) %>% tidyr::spread(key = key_col, value = value_col) } @@ -115,19 +126,21 @@ pivot_data <- function(rawdata_grouped, #' umberto10_data_pivot_list <- kwb.umberto::create_pivot_list(umberto10_data_pivot) #' head(umberto10_data_pivot_list) #' -create_pivot_list <- function(pivot_data, - arrange_cols = "process") { - +create_pivot_list <- function(pivot_data, arrange_cols = "process") +{ myList <- list() + lci_methods <- unique(pivot_data$lci_method) + for (i in seq_along(lci_methods)) { selected_lci_method <- unique(pivot_data$lci_method)[i] - processes <- data.frame(lci_method = selected_lci_method, - process = unique(pivot_data$process), - stringsAsFactors = FALSE) - + processes <- data.frame( + lci_method = selected_lci_method, + process = unique(pivot_data$process), + stringsAsFactors = FALSE + ) tmp_data <- pivot_data[pivot_data$lci_method == selected_lci_method,] %>% dplyr::right_join(processes) %>% @@ -135,6 +148,8 @@ create_pivot_list <- function(pivot_data, myList[[i]] <- tmp_data } + names(myList) <- sprintf("lci_method%d", seq_along(lci_methods)) + return(myList) -} \ No newline at end of file +} From fb579714a0afa67cc3a09944a24542daece789d8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 18:52:41 +0200 Subject: [PATCH 03/51] Use selectColumns(), add argument "method_col" --- DESCRIPTION | 1 + R/data_aggregation.R | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 16c2cd4..acf3955 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: ggforce, ggplot2, janitor, + kwb.utils, magrittr, openxlsx, readr, diff --git a/R/data_aggregation.R b/R/data_aggregation.R index ee106d4..48811bf 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -105,6 +105,7 @@ pivot_data <- function( #' @return a list of results, where each element contains the result table for #' one lci_method #' @importFrom dplyr right_join arrange +#' @importFrom kwb.utils selectColumns #' @export #' @examples #' @@ -126,23 +127,29 @@ pivot_data <- function( #' umberto10_data_pivot_list <- kwb.umberto::create_pivot_list(umberto10_data_pivot) #' head(umberto10_data_pivot_list) #' -create_pivot_list <- function(pivot_data, arrange_cols = "process") +create_pivot_list <- function( + pivot_data, + arrange_cols = "process", + method_col = "lci_method" +) { myList <- list() - lci_methods <- unique(pivot_data$lci_method) + method_vector <- kwb.utils::selectColumns(pivot_data, method_col) + + lci_methods <- unique(method_vector) for (i in seq_along(lci_methods)) { - selected_lci_method <- unique(pivot_data$lci_method)[i] + selected_lci_method <- lci_methods[i] processes <- data.frame( lci_method = selected_lci_method, - process = unique(pivot_data$process), + process = unique(kwb.utils::selectColumns(pivot_data, "process")), stringsAsFactors = FALSE ) - tmp_data <- pivot_data[pivot_data$lci_method == selected_lci_method,] %>% + tmp_data <- pivot_data[method_vector == selected_lci_method, ] %>% dplyr::right_join(processes) %>% dplyr::arrange_(arrange_cols) From 3642a7bd47b9889417a953146ae7d1fed5cd0955 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 19:18:13 +0200 Subject: [PATCH 04/51] Consider column name passed as argument --- R/data_aggregation.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 48811bf..843c9d2 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -144,10 +144,13 @@ create_pivot_list <- function( selected_lci_method <- lci_methods[i] processes <- data.frame( - lci_method = selected_lci_method, + METHOD = selected_lci_method, process = unique(kwb.utils::selectColumns(pivot_data, "process")), stringsAsFactors = FALSE - ) + ) %>% + kwb.utils::renameColumns(list( + METHOD = method_col + )) tmp_data <- pivot_data[method_vector == selected_lci_method, ] %>% dplyr::right_join(processes) %>% From 5b7c60a740b74ac3e47c3ee16c37a34647c571cd Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 19:30:10 +0200 Subject: [PATCH 05/51] Name list elements according to method_col and use stats::setNames() to shorten by 1 line --- R/data_aggregation.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 843c9d2..d6c48a6 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -159,7 +159,5 @@ create_pivot_list <- function( myList[[i]] <- tmp_data } - names(myList) <- sprintf("lci_method%d", seq_along(lci_methods)) - - return(myList) + stats::setNames(myList, sprintf("%s%d", method_col, seq_along(lci_methods))) } From ca07fe1faefa40083dd6a418d1e584140c745dd2 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 5 Sep 2023 19:34:37 +0200 Subject: [PATCH 06/51] Use lapply() and simplify variable name (lci_methods -> methods) --- R/data_aggregation.R | 46 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index d6c48a6..1bd0c49 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -133,31 +133,31 @@ create_pivot_list <- function( method_col = "lci_method" ) { - myList <- list() - method_vector <- kwb.utils::selectColumns(pivot_data, method_col) - lci_methods <- unique(method_vector) + methods <- unique(method_vector) - for (i in seq_along(lci_methods)) { - - selected_lci_method <- lci_methods[i] - - processes <- data.frame( - METHOD = selected_lci_method, - process = unique(kwb.utils::selectColumns(pivot_data, "process")), - stringsAsFactors = FALSE - ) %>% - kwb.utils::renameColumns(list( - METHOD = method_col - )) - - tmp_data <- pivot_data[method_vector == selected_lci_method, ] %>% - dplyr::right_join(processes) %>% - dplyr::arrange_(arrange_cols) - - myList[[i]] <- tmp_data - } + indices <- seq_along(methods) - stats::setNames(myList, sprintf("%s%d", method_col, seq_along(lci_methods))) + lapply( + X = indices, + FUN = function(i) { + + selected_lci_method <- methods[i] + + processes <- data.frame( + METHOD = selected_lci_method, + process = unique(kwb.utils::selectColumns(pivot_data, "process")), + stringsAsFactors = FALSE + ) %>% + kwb.utils::renameColumns(list( + METHOD = method_col + )) + + pivot_data[method_vector == selected_lci_method, ] %>% + dplyr::right_join(processes) %>% + dplyr::arrange_(arrange_cols) + } + ) %>% + stats::setNames(sprintf("%s%d", method_col, indices)) } From 3b44e1d98ef6a3358e48281340ae80e31eedc6c8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 09:07:18 +0200 Subject: [PATCH 07/51] Start adding support for json files as input --- R/import_rawdata_json.R | 356 ++++++++++++++++++++++++++++++++++++++++ R/merge_json_tables.R | 69 ++++++++ 2 files changed, 425 insertions(+) create mode 100644 R/import_rawdata_json.R create mode 100644 R/merge_json_tables.R diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R new file mode 100644 index 0000000..55abe51 --- /dev/null +++ b/R/import_rawdata_json.R @@ -0,0 +1,356 @@ +if (FALSE) +{ + json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" + + result <- import_rawdata_json(json_dir) + + View(result) +} + +# import_rawdata_json ---------------------------------------------------------- +import_rawdata_json <- function(json_dir) +{ + contents <- read_json_files(json_dir) + + result <- lapply(contents, to_tables) + + data_frames <- lapply(result, merge_json_tables) + + do.call(rbind, data_frames) +} + +# read_json_files -------------------------------------------------------------- +read_json_files <- function(json_dir) +{ + json_files <- list_json_files_or_stop(json_dir) + + lapply(json_files, jsonlite::read_json) +} + +# list_json_files_or_stop ------------------------------------------------------ +list_json_files_or_stop <- function(json_dir) +{ + json_files <- list.files( + json_dir, + pattern = "\\.json$", + full.names = TRUE + ) + + if (length(json_files) < 1L) { + kwb.utils::stopFormatted( + "No result files (*.json) in folder '%s/'", json_dir + ) + } + + json_files +} + +# to_tables -------------------------------------------------------------------- +to_tables <- function(content) +{ + #content <- contents[[1L]] + + fetch <- kwb.utils::createAccessor(content) + + list( + products = fetch("products") %>% + convert_and_bind(to_product) %>% + prefix_columns("product_"), + entries = fetch("entries") %>% + convert_and_bind(to_entry) %>% + kwb.utils::removeColumns("exchangeFullName") %>% + prefix_columns("entry_"), + processes = fetch("processes") %>% + convert_and_bind(to_process) %>% + prefix_columns("process_"), + places = fetch("places") %>% + convert_and_bind(to_place) %>% + prefix_columns("place_"), + indicators = fetch("indicators") %>% + convert_and_bind(to_indicator) %>% + prefix_columns("indicator_"), + scenarios = fetch("scenarios") %>% + convert_and_bind(to_scenario) %>% + prefix_columns("scenario_"), + evaluationMethods = fetch("evaluationMethods") %>% + convert_and_bind(to_evaluationMethod) %>% + prefix_columns("evaluationMethod_") + ) +} + +# to_product ------------------------------------------------------------------- +to_product <- function(x) +{ + #x <- contents[[1]]$products[[1]] + + flat <- get_flat_part(x) + + lcia_list <- get_remaining(x, flat) %>% + remove_zero_length_entries() %>% + kwb.utils::selectElements("lcia") + + #str(lcia_list) + + #convert_and_bind(lcia_list, get_flat_part) + + lcia <- lcia_list %>% + convert_and_bind(to_lcia) %>% + kwb.utils::renameColumns(list( + "entryId" = "lciaEntryId" + )) + + cbind( + flat %>% + remove_uuid(), + lcia + ) +} + +# get_flat_part ---------------------------------------------------------------- +get_flat_part <- function(x) +{ + check_for_list(x) + + is_flat <- lengths(x) == 1L & !sapply(x, is.list) + + if (!any(is_flat)) { + return(NULL) + } + + as.data.frame(x[is_flat]) +} + +# check_for_list --------------------------------------------------------------- +check_for_list <- function(x) +{ + stopifnot(is.list(x)) +} + +# check_for_data_frame --------------------------------------------------------- +check_for_data_frame <- function(x) +{ + stopifnot(is.data.frame(x)) +} + +# get_remaining ---------------------------------------------------------------- +get_remaining <- function(x, flat_part) +{ + check_for_list(x) + check_for_data_frame(flat_part) + + x[setdiff(names(x), names(flat_part))] +} + +# remove_zero_length_entries --------------------------------------------------- +remove_zero_length_entries <- function(x) +{ + check_for_list(x) + + x[lengths(x) > 0L] +} + +# to_lcia ---------------------------------------------------------------------- +to_lcia <- function(x) +{ + #x <- lcia_list[[2L]] + flat <- get_flat_part(x) + + remaining <- get_remaining(x, flat) %>% + remove_zero_length_entries() + + #str(remaining, 2) + + get_inout <- function(side) { + sublist <- remaining[[paste0(side, "s")]] + if (!is.null(sublist)) { + sublist %>% + convert_and_bind(get_flat_part) %>% + cbind(side = side) + } # else NULL + } + + inputs <- get_inout("input") + outputs <- get_inout("output") + + cbind(flat, rbind(inputs, outputs)) +} + +# to_entry --------------------------------------------------------------------- +to_entry <- function(xz) +{ + #xz <- contents[[1]]$entries[[1]] + flat <- get_flat_part(xz) + + kwb.utils::removeColumns(flat, c( + "X.id", + "exchangeId", + "classificationId" + )) +} + +# to_process ------------------------------------------------------------------- +to_process <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# remove_xid ------------------------------------------------------------------- +remove_xid <- function(df) +{ + kwb.utils::removeColumns(df, "X.id") +} + +# remove_uuid ------------------------------------------------------------------ +remove_uuid <- function(df) +{ + kwb.utils::removeColumns(df, "uuid") +} + +# to_place --------------------------------------------------------------------- +to_place <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# to_scenario ------------------------------------------------------------------ +to_scenario <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# to_evaluationMethod ---------------------------------------------------------- +to_evaluationMethod <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() %>% + remove_uuid() +} + +# to_indicator ----------------------------------------------------------------- +to_indicator <- function(x) +{ + #x <- contents[[1L]]$indicators[[1L]] + flat <- get_flat_part(x) + + path_parts <- get_remaining(x, flat) %>% + kwb.utils::selectElements("path") + + stopifnot(all(lengths(path_parts) == 1L)) + + flat %>% + remove_uuid() %>% + cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) +} + +# convert_and_bind ------------------------------------------------------------- +convert_and_bind <- function(x_list, converter) +{ + lapply(x_list, converter) %>% + kwb.utils::safeRowBindAll() +} + +# flatten_all ------------------------------------------------------------------ +flatten_all <- function(x) +{ + do.call(rbind, lapply(x, flatten)) +} + +# flatten ---------------------------------------------------------------------- +flatten <- function(x, depth = 0L, max_depth = 3L) +{ + if (depth > max_depth) { + return(x) + } + + check_for_list(x) + + # If the list is empty, return NULL + if (length(x) == 0L) { + return(NULL) + } + + # If the elements are not named, flatten and merge them with a + # specific function + if (is.null(names(x))) { + return(flatten_list_of_unnamed_elements(x)) + } + + # Get the part that is already flat + part_1 <- get_flat_part(x) + + remaining <- get_remaining(x, part_1) + + # Loop through the other parts + part_2 <- if (length(remaining)) { + + result <- lapply(names(remaining), function(element) { + cat(kwb.utils::indent(element, depth), "\n") + flatten( + remaining[[element]], + depth = depth + 1L, + max_depth = max_depth + ) + }) + + names(result) <- elements + + result <- kwb.utils::excludeNULL(result, dbg = FALSE) + } # else NULL + + if (is.null(part_2)) { + return(part_1) + } + + if (kwb.utils::allAreIdentical(lapply(part_2, names))) { + part_2 <- do.call(rbind, part_2) + kwb.utils::resetRowNames( + cbind(data.frame(id = rownames(part_2)), part_2) + ) + } + + if (is.null(part_1)) { + return(part_2) + } + + if ( + is.data.frame(part_1) && + is.data.frame(part_2) && + nrow(part_1) == 1L + ) { + return(cbind(part_1, part_2)) + } + + list(part_1 = part_1, part_2 = part_2) +} + +# flatten_list_of_unnamed_elements --------------------------------------------- +flatten_list_of_unnamed_elements <- function(x) +{ + stopifnot(!is.null(names(x))) + + # We expect all elements to be lists with identical names + stopifnot(all_have_identical_names(x)) + + do.call(rbind, lapply(x, flatten)) +} + +# all_have_identical_names ----------------------------------------------------- +all_have_identical_names <- function(x) +{ + suppressMessages(kwb.utils::allAreIdentical(lapply(x, names))) +} + +# prefix_columns --------------------------------------------------------------- +prefix_columns <- function(df, prefix = deparse(substitute(df))) +{ + check_for_data_frame(df) + + stats::setNames(df, paste0(prefix, names(df))) +} diff --git a/R/merge_json_tables.R b/R/merge_json_tables.R new file mode 100644 index 0000000..33cfc57 --- /dev/null +++ b/R/merge_json_tables.R @@ -0,0 +1,69 @@ +# merge_json_tables ------------------------------------------------------------ +merge_json_tables <- function(tables) +{ + fetch <- kwb.utils::createAccessor(tables) + + fetch("products") %>% + dplyr::left_join( + kwb.utils::selectColumns(fetch("entries"), c( + "entry_id", + "entry_exchange", + "entry_unit" + )), + by = c(product_entryId = "entry_id") + ) %>% + kwb.utils::renameColumns(list( + entry_exchange = "ref_flow_exchange", + entry_unit = "ref_flow_unit" + )) %>% + dplyr::left_join( + fetch("entries"), + by = c(product_lciaEntryId = "entry_id") + ) %>% + dplyr::left_join( + fetch("scenarios"), + by = c(product_scenarioId = "scenario_id") + ) %>% + dplyr::left_join( + fetch("indicators"), + by = c(product_indicatorId = "indicator_id") + ) %>% + dplyr::left_join( + fetch("processes"), + by = c(product_processId = "process_id") + ) %>% + dplyr::left_join( + fetch("places"), + by = c(product_placeId = "place_id") + ) %>% + kwb.utils::moveColumnsToFront(c( + "scenario_name" + , "ref_flow_exchange" + , "product_amount" + , "ref_flow_unit" + , "product_arrow" + , "indicator_indicatorPath" + , "indicator_name" + , "process_name" + , "place_name" + , "entry_exchange" + , "entry_exchangeContext" + , "product_quantity" + , "product_unit" + , "entry_materialType" + , "product_side" + , "entry_costItemGroup" + , "entry_exchangeBoundary" + )) %>% + move_id_columns_right() +} + +# move_id_columns_right -------------------------------------------------------- +move_id_columns_right <- function(df) +{ + columns <- names(df) + + is_id <- grepl("(Id|Index)$", columns) + + df[, c(columns[!is_id], columns[is_id]), drop = FALSE] +} From 9ee44367b158224da10528d9ff06fa66bdcf5259 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 09:24:00 +0200 Subject: [PATCH 08/51] Fix issues reported by R CMD check --- DESCRIPTION | 5 ++++- NAMESPACE | 1 + R/import_rawdata_json.R | 4 +++- man/create_pivot_list.Rd | 6 +++++- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index acf3955..6eaf4b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: ggforce, ggplot2, janitor, + jsonlite, kwb.utils, magrittr, openxlsx, @@ -33,8 +34,10 @@ Suggests: testthat (>= 3.0.0) VignetteBuilder: knitr +Remotes: + github::kwb-r/kwb.utils ByteCompile: true Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index c52aa8c..a862fe5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ importFrom(dplyr,select) importFrom(dplyr,summarise_at) importFrom(ggforce,facet_wrap_paginate) importFrom(janitor,clean_names) +importFrom(kwb.utils,selectColumns) importFrom(magrittr,"%>%") importFrom(openxlsx,write.xlsx) importFrom(readr,read_csv) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 55abe51..a0f962a 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -289,8 +289,10 @@ flatten <- function(x, depth = 0L, max_depth = 3L) # Loop through the other parts part_2 <- if (length(remaining)) { + + elements <- names(remaining) - result <- lapply(names(remaining), function(element) { + result <- lapply(elements, function(element) { cat(kwb.utils::indent(element, depth), "\n") flatten( remaining[[element]], diff --git a/man/create_pivot_list.Rd b/man/create_pivot_list.Rd index 8249f8d..c39196c 100644 --- a/man/create_pivot_list.Rd +++ b/man/create_pivot_list.Rd @@ -4,7 +4,11 @@ \alias{create_pivot_list} \title{Create pivot list} \usage{ -create_pivot_list(pivot_data, arrange_cols = "process") +create_pivot_list( + pivot_data, + arrange_cols = "process", + method_col = "lci_method" +) } \arguments{ \item{pivot_data}{privot data as retrieved from function pivot_data()} From 825470ce1938330117e2a78aac27ec68d0d1a60c Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 11:57:14 +0200 Subject: [PATCH 09/51] Provide the old format so that the post-processing can remain --- R/import_rawdata_json.R | 51 ++++++++++++++++++++++++++++++++++++----- R/merge_json_tables.R | 1 + 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index a0f962a..0c55d4f 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -2,13 +2,13 @@ if (FALSE) { json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" - result <- import_rawdata_json(json_dir) - + result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) + names(result) View(result) } # import_rawdata_json ---------------------------------------------------------- -import_rawdata_json <- function(json_dir) +import_rawdata_json <- function(json_dir, old_format = TRUE, add_place = FALSE) { contents <- read_json_files(json_dir) @@ -16,7 +16,38 @@ import_rawdata_json <- function(json_dir) data_frames <- lapply(result, merge_json_tables) - do.call(rbind, data_frames) + result <- kwb.utils::rbindAll(data_frames, nameColumn = "model") + + if (!old_format) { + return(result) + } + + fetch <- kwb.utils::createAccessor(result) + + result <- data.frame( + project = "not-used", + model = fetch("model"), + net = "not-used", + timestamp = fetch("timestamp"), + product = "not-used", + product_name = fetch("ref_flow_exchange"), + product_arrow = fetch("product_arrow"), + product_flow_amount = fetch("product_amount"), + lcia_method = fetch("indicator_name"), + phase = "not-used", + process = fetch("process_name"), + material_type = fetch("entry_materialType"), + material = "not-used", + quantity = fetch("product_quantity"), + unit = fetch("product_unit"), + scenario = fetch("scenario_name") + ) + + if (add_place) { + result <- cbind(result, place = fetch("place_name")) + } + + result } # read_json_files -------------------------------------------------------------- @@ -24,7 +55,10 @@ read_json_files <- function(json_dir) { json_files <- list_json_files_or_stop(json_dir) - lapply(json_files, jsonlite::read_json) + stats::setNames( + lapply(json_files, jsonlite::read_json), + basename(json_files) # %>% kwb.utils::removeExtension() + ) } # list_json_files_or_stop ------------------------------------------------------ @@ -52,7 +86,7 @@ to_tables <- function(content) fetch <- kwb.utils::createAccessor(content) - list( + result <- list( products = fetch("products") %>% convert_and_bind(to_product) %>% prefix_columns("product_"), @@ -76,6 +110,11 @@ to_tables <- function(content) convert_and_bind(to_evaluationMethod) %>% prefix_columns("evaluationMethod_") ) + + structure( + result, + timestamp = fetch("timestamp") + ) } # to_product ------------------------------------------------------------------- diff --git a/R/merge_json_tables.R b/R/merge_json_tables.R index 33cfc57..ef98842 100644 --- a/R/merge_json_tables.R +++ b/R/merge_json_tables.R @@ -55,6 +55,7 @@ merge_json_tables <- function(tables) , "entry_costItemGroup" , "entry_exchangeBoundary" )) %>% + cbind(timestamp = kwb.utils::getAttribute(tables, "timestamp")) %>% move_id_columns_right() } From 0cb62e21a5f17f64cf6cc8563911784e43e4bdad Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 12:04:50 +0200 Subject: [PATCH 10/51] Document argument "method_col" --- R/data_aggregation.R | 3 +++ man/create_pivot_list.Rd | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 1bd0c49..4df7a0b 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -102,6 +102,9 @@ pivot_data <- function( #' #' @param pivot_data privot data as retrieved from function pivot_data() #' @param arrange_cols columns used for arranging the data (default: "process") +#' @param method_col name of the column containing the method +#' (default: "lci_method"). Depending on your Umberto version you may need to +#' set method_col to "lcia_method". #' @return a list of results, where each element contains the result table for #' one lci_method #' @importFrom dplyr right_join arrange diff --git a/man/create_pivot_list.Rd b/man/create_pivot_list.Rd index c39196c..c116c0c 100644 --- a/man/create_pivot_list.Rd +++ b/man/create_pivot_list.Rd @@ -14,6 +14,10 @@ create_pivot_list( \item{pivot_data}{privot data as retrieved from function pivot_data()} \item{arrange_cols}{columns used for arranging the data (default: "process")} + +\item{method_col}{name of the column containing the method +(default: "lci_method"). Depending on your Umberto version you may need to +set method_col to "lcia_method".} } \value{ a list of results, where each element contains the result table for From cdfd04896c0f6b8a8185d3468227faf27b2334ae Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 12:05:31 +0200 Subject: [PATCH 11/51] Document and export import_rawdata_json() --- NAMESPACE | 1 + R/import_rawdata_json.R | 11 +++++++++++ man/import_rawdata_json.Rd | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 man/import_rawdata_json.Rd diff --git a/NAMESPACE b/NAMESPACE index a862fe5..99b5c7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(create_pivot_list) export(group_data) export(import_rawdata) +export(import_rawdata_json) export(pivot_data) export(plot_results) export(write_xlsx) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 0c55d4f..1f94197 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -8,6 +8,17 @@ if (FALSE) } # import_rawdata_json ---------------------------------------------------------- + +#' Import Umberto results from .json files +#' +#' @param json_dir path to directory containing .json files. All .json files +#' will be read. +#' @param old_format if TRUE (the default) the same columns are provided that +#' are provided by \code{\link{import_rawdata}} that imports .csv files +#' @param The old_format does not contain the "place". With add_place = TRUE +#' the "place" is contained in the result even if old_format = TRUE. +#' @return data frame +#' @export import_rawdata_json <- function(json_dir, old_format = TRUE, add_place = FALSE) { contents <- read_json_files(json_dir) diff --git a/man/import_rawdata_json.Rd b/man/import_rawdata_json.Rd new file mode 100644 index 0000000..9f2f682 --- /dev/null +++ b/man/import_rawdata_json.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import_rawdata_json.R +\name{import_rawdata_json} +\alias{import_rawdata_json} +\title{Import Umberto results from .json files} +\usage{ +import_rawdata_json(json_dir, old_format = TRUE, add_place = FALSE) +} +\arguments{ +\item{json_dir}{path to directory containing .json files. All .json files +will be read.} + +\item{old_format}{if TRUE (the default) the same columns are provided that +are provided by \code{\link{import_rawdata}} that imports .csv files} + +\item{The}{old_format does not contain the "place". With add_place = TRUE +the "place" is contained in the result even if old_format = TRUE.} +} +\value{ +data frame +} +\description{ +Import Umberto results from .json files +} From 9c5b3ff68ca02066b1428ffc11d90fc9bf3b84d5 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 12:40:53 +0200 Subject: [PATCH 12/51] Fix error in documentation --- R/import_rawdata_json.R | 4 ++-- man/import_rawdata_json.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 1f94197..b6f52a9 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -15,8 +15,8 @@ if (FALSE) #' will be read. #' @param old_format if TRUE (the default) the same columns are provided that #' are provided by \code{\link{import_rawdata}} that imports .csv files -#' @param The old_format does not contain the "place". With add_place = TRUE -#' the "place" is contained in the result even if old_format = TRUE. +#' @param add_place With add_place = TRUE, the "place" is contained in the +#' result even if old_format = TRUE #' @return data frame #' @export import_rawdata_json <- function(json_dir, old_format = TRUE, add_place = FALSE) diff --git a/man/import_rawdata_json.Rd b/man/import_rawdata_json.Rd index 9f2f682..c77bc48 100644 --- a/man/import_rawdata_json.Rd +++ b/man/import_rawdata_json.Rd @@ -13,8 +13,8 @@ will be read.} \item{old_format}{if TRUE (the default) the same columns are provided that are provided by \code{\link{import_rawdata}} that imports .csv files} -\item{The}{old_format does not contain the "place". With add_place = TRUE -the "place" is contained in the result even if old_format = TRUE.} +\item{add_place}{With add_place = TRUE, the "place" is contained in the +result even if old_format = TRUE} } \value{ data frame From 712b31d9bf4e6e367900f7652dbf89fff506ae22 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 7 Sep 2023 12:49:13 +0200 Subject: [PATCH 13/51] Use arrange() instead of deprecated arrange_() --- R/data_aggregation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data_aggregation.R b/R/data_aggregation.R index 4df7a0b..e9ce955 100644 --- a/R/data_aggregation.R +++ b/R/data_aggregation.R @@ -159,7 +159,7 @@ create_pivot_list <- function( pivot_data[method_vector == selected_lci_method, ] %>% dplyr::right_join(processes) %>% - dplyr::arrange_(arrange_cols) + dplyr::arrange(arrange_cols) } ) %>% stats::setNames(sprintf("%s%d", method_col, indices)) From 80e536e31d0b0adb83a60cf02c19606f6e6bd36f Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 12:58:29 +0200 Subject: [PATCH 14/51] Remove unused function flatten_all() --- R/import_rawdata_json.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index b6f52a9..6e61f3f 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -306,12 +306,6 @@ convert_and_bind <- function(x_list, converter) kwb.utils::safeRowBindAll() } -# flatten_all ------------------------------------------------------------------ -flatten_all <- function(x) -{ - do.call(rbind, lapply(x, flatten)) -} - # flatten ---------------------------------------------------------------------- flatten <- function(x, depth = 0L, max_depth = 3L) { From b2710493d19e82fdb17d7785638390e928696302 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:05:19 +0200 Subject: [PATCH 15/51] Move general functions to utils.R --- R/import_rawdata_json.R | 33 ------------------------------ R/utils.R | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 33 deletions(-) create mode 100644 R/utils.R diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 6e61f3f..d988238 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -170,18 +170,6 @@ get_flat_part <- function(x) as.data.frame(x[is_flat]) } -# check_for_list --------------------------------------------------------------- -check_for_list <- function(x) -{ - stopifnot(is.list(x)) -} - -# check_for_data_frame --------------------------------------------------------- -check_for_data_frame <- function(x) -{ - stopifnot(is.data.frame(x)) -} - # get_remaining ---------------------------------------------------------------- get_remaining <- function(x, flat_part) { @@ -191,14 +179,6 @@ get_remaining <- function(x, flat_part) x[setdiff(names(x), names(flat_part))] } -# remove_zero_length_entries --------------------------------------------------- -remove_zero_length_entries <- function(x) -{ - check_for_list(x) - - x[lengths(x) > 0L] -} - # to_lcia ---------------------------------------------------------------------- to_lcia <- function(x) { @@ -387,16 +367,3 @@ flatten_list_of_unnamed_elements <- function(x) do.call(rbind, lapply(x, flatten)) } -# all_have_identical_names ----------------------------------------------------- -all_have_identical_names <- function(x) -{ - suppressMessages(kwb.utils::allAreIdentical(lapply(x, names))) -} - -# prefix_columns --------------------------------------------------------------- -prefix_columns <- function(df, prefix = deparse(substitute(df))) -{ - check_for_data_frame(df) - - stats::setNames(df, paste0(prefix, names(df))) -} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..804e37d --- /dev/null +++ b/R/utils.R @@ -0,0 +1,45 @@ +# check_for_list --------------------------------------------------------------- +check_for_list <- function(x) +{ + stopifnot(is.list(x)) +} + +# check_for_data_frame --------------------------------------------------------- +check_for_data_frame <- function(x) +{ + stopifnot(is.data.frame(x)) +} + +# all_have_identical_names ----------------------------------------------------- +all_have_identical_names <- function(x) +{ + suppressMessages(kwb.utils::allAreIdentical(lapply(x, names))) +} + +# prefix_columns --------------------------------------------------------------- +prefix_columns <- function(df, prefix = deparse(substitute(df))) +{ + check_for_data_frame(df) + + stats::setNames(df, paste0(prefix, names(df))) +} + +# remove_zero_length_entries --------------------------------------------------- +remove_zero_length_entries <- function(x) +{ + check_for_list(x) + + x[lengths(x) > 0L] +} + +# replace_null_with_na --------------------------------------------------------- +replace_null_with_na <- function(x) +{ + stopifnot(is.list(x)) + + is_null <- sapply(x, is.null) + + x[is_null] <- as.list(rep(NA, sum(is_null))) + + x +} From c6cdb8a2320883026a93b39e1c6ba84ea656e757 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:39:52 +0200 Subject: [PATCH 16/51] Remove unused function flatten() --- R/import_rawdata_json.R | 70 ----------------------------------------- 1 file changed, 70 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index d988238..c2bff90 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -286,76 +286,6 @@ convert_and_bind <- function(x_list, converter) kwb.utils::safeRowBindAll() } -# flatten ---------------------------------------------------------------------- -flatten <- function(x, depth = 0L, max_depth = 3L) -{ - if (depth > max_depth) { - return(x) - } - - check_for_list(x) - - # If the list is empty, return NULL - if (length(x) == 0L) { - return(NULL) - } - - # If the elements are not named, flatten and merge them with a - # specific function - if (is.null(names(x))) { - return(flatten_list_of_unnamed_elements(x)) - } - - # Get the part that is already flat - part_1 <- get_flat_part(x) - - remaining <- get_remaining(x, part_1) - - # Loop through the other parts - part_2 <- if (length(remaining)) { - - elements <- names(remaining) - - result <- lapply(elements, function(element) { - cat(kwb.utils::indent(element, depth), "\n") - flatten( - remaining[[element]], - depth = depth + 1L, - max_depth = max_depth - ) - }) - - names(result) <- elements - - result <- kwb.utils::excludeNULL(result, dbg = FALSE) - } # else NULL - - if (is.null(part_2)) { - return(part_1) - } - - if (kwb.utils::allAreIdentical(lapply(part_2, names))) { - part_2 <- do.call(rbind, part_2) - kwb.utils::resetRowNames( - cbind(data.frame(id = rownames(part_2)), part_2) - ) - } - - if (is.null(part_1)) { - return(part_2) - } - - if ( - is.data.frame(part_1) && - is.data.frame(part_2) && - nrow(part_1) == 1L - ) { - return(cbind(part_1, part_2)) - } - - list(part_1 = part_1, part_2 = part_2) -} - # flatten_list_of_unnamed_elements --------------------------------------------- flatten_list_of_unnamed_elements <- function(x) { From 6b4ba96c8218c2006df6a11f211b6001c74e112e Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:43:13 +0200 Subject: [PATCH 17/51] Move to_tables() and related to its own file --- R/import_rawdata_json.R | 208 ---------------------------------------- R/to_tables.R | 207 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+), 208 deletions(-) create mode 100644 R/to_tables.R diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index c2bff90..7e3ca7c 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -89,211 +89,3 @@ list_json_files_or_stop <- function(json_dir) json_files } - -# to_tables -------------------------------------------------------------------- -to_tables <- function(content) -{ - #content <- contents[[1L]] - - fetch <- kwb.utils::createAccessor(content) - - result <- list( - products = fetch("products") %>% - convert_and_bind(to_product) %>% - prefix_columns("product_"), - entries = fetch("entries") %>% - convert_and_bind(to_entry) %>% - kwb.utils::removeColumns("exchangeFullName") %>% - prefix_columns("entry_"), - processes = fetch("processes") %>% - convert_and_bind(to_process) %>% - prefix_columns("process_"), - places = fetch("places") %>% - convert_and_bind(to_place) %>% - prefix_columns("place_"), - indicators = fetch("indicators") %>% - convert_and_bind(to_indicator) %>% - prefix_columns("indicator_"), - scenarios = fetch("scenarios") %>% - convert_and_bind(to_scenario) %>% - prefix_columns("scenario_"), - evaluationMethods = fetch("evaluationMethods") %>% - convert_and_bind(to_evaluationMethod) %>% - prefix_columns("evaluationMethod_") - ) - - structure( - result, - timestamp = fetch("timestamp") - ) -} - -# to_product ------------------------------------------------------------------- -to_product <- function(x) -{ - #x <- contents[[1]]$products[[1]] - - flat <- get_flat_part(x) - - lcia_list <- get_remaining(x, flat) %>% - remove_zero_length_entries() %>% - kwb.utils::selectElements("lcia") - - #str(lcia_list) - - #convert_and_bind(lcia_list, get_flat_part) - - lcia <- lcia_list %>% - convert_and_bind(to_lcia) %>% - kwb.utils::renameColumns(list( - "entryId" = "lciaEntryId" - )) - - cbind( - flat %>% - remove_uuid(), - lcia - ) -} - -# get_flat_part ---------------------------------------------------------------- -get_flat_part <- function(x) -{ - check_for_list(x) - - is_flat <- lengths(x) == 1L & !sapply(x, is.list) - - if (!any(is_flat)) { - return(NULL) - } - - as.data.frame(x[is_flat]) -} - -# get_remaining ---------------------------------------------------------------- -get_remaining <- function(x, flat_part) -{ - check_for_list(x) - check_for_data_frame(flat_part) - - x[setdiff(names(x), names(flat_part))] -} - -# to_lcia ---------------------------------------------------------------------- -to_lcia <- function(x) -{ - #x <- lcia_list[[2L]] - flat <- get_flat_part(x) - - remaining <- get_remaining(x, flat) %>% - remove_zero_length_entries() - - #str(remaining, 2) - - get_inout <- function(side) { - sublist <- remaining[[paste0(side, "s")]] - if (!is.null(sublist)) { - sublist %>% - convert_and_bind(get_flat_part) %>% - cbind(side = side) - } # else NULL - } - - inputs <- get_inout("input") - outputs <- get_inout("output") - - cbind(flat, rbind(inputs, outputs)) -} - -# to_entry --------------------------------------------------------------------- -to_entry <- function(xz) -{ - #xz <- contents[[1]]$entries[[1]] - flat <- get_flat_part(xz) - - kwb.utils::removeColumns(flat, c( - "X.id", - "exchangeId", - "classificationId" - )) -} - -# to_process ------------------------------------------------------------------- -to_process <- function(x) -{ - x %>% - get_flat_part() %>% - remove_xid() -} - -# remove_xid ------------------------------------------------------------------- -remove_xid <- function(df) -{ - kwb.utils::removeColumns(df, "X.id") -} - -# remove_uuid ------------------------------------------------------------------ -remove_uuid <- function(df) -{ - kwb.utils::removeColumns(df, "uuid") -} - -# to_place --------------------------------------------------------------------- -to_place <- function(x) -{ - x %>% - get_flat_part() %>% - remove_xid() -} - -# to_scenario ------------------------------------------------------------------ -to_scenario <- function(x) -{ - x %>% - get_flat_part() %>% - remove_xid() -} - -# to_evaluationMethod ---------------------------------------------------------- -to_evaluationMethod <- function(x) -{ - x %>% - get_flat_part() %>% - remove_xid() %>% - remove_uuid() -} - -# to_indicator ----------------------------------------------------------------- -to_indicator <- function(x) -{ - #x <- contents[[1L]]$indicators[[1L]] - flat <- get_flat_part(x) - - path_parts <- get_remaining(x, flat) %>% - kwb.utils::selectElements("path") - - stopifnot(all(lengths(path_parts) == 1L)) - - flat %>% - remove_uuid() %>% - cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) -} - -# convert_and_bind ------------------------------------------------------------- -convert_and_bind <- function(x_list, converter) -{ - lapply(x_list, converter) %>% - kwb.utils::safeRowBindAll() -} - -# flatten_list_of_unnamed_elements --------------------------------------------- -flatten_list_of_unnamed_elements <- function(x) -{ - stopifnot(!is.null(names(x))) - - # We expect all elements to be lists with identical names - stopifnot(all_have_identical_names(x)) - - do.call(rbind, lapply(x, flatten)) -} - diff --git a/R/to_tables.R b/R/to_tables.R new file mode 100644 index 0000000..d393eca --- /dev/null +++ b/R/to_tables.R @@ -0,0 +1,207 @@ +# to_tables -------------------------------------------------------------------- +to_tables <- function(content) +{ + #content <- contents[[1L]] + + fetch <- kwb.utils::createAccessor(content) + + result <- list( + products = fetch("products") %>% + convert_and_bind(to_product) %>% + prefix_columns("product_"), + entries = fetch("entries") %>% + convert_and_bind(to_entry) %>% + kwb.utils::removeColumns("exchangeFullName") %>% + prefix_columns("entry_"), + processes = fetch("processes") %>% + convert_and_bind(to_process) %>% + prefix_columns("process_"), + places = fetch("places") %>% + convert_and_bind(to_place) %>% + prefix_columns("place_"), + indicators = fetch("indicators") %>% + convert_and_bind(to_indicator) %>% + prefix_columns("indicator_"), + scenarios = fetch("scenarios") %>% + convert_and_bind(to_scenario) %>% + prefix_columns("scenario_"), + evaluationMethods = fetch("evaluationMethods") %>% + convert_and_bind(to_evaluationMethod) %>% + prefix_columns("evaluationMethod_") + ) + + structure( + result, + timestamp = fetch("timestamp") + ) +} + +# to_product ------------------------------------------------------------------- +to_product <- function(x) +{ + #x <- contents[[1]]$products[[1]] + + flat <- get_flat_part(x) + + lcia_list <- get_remaining(x, flat) %>% + remove_zero_length_entries() %>% + kwb.utils::selectElements("lcia") + + #str(lcia_list) + + #convert_and_bind(lcia_list, get_flat_part) + + lcia <- lcia_list %>% + convert_and_bind(to_lcia) %>% + kwb.utils::renameColumns(list( + "entryId" = "lciaEntryId" + )) + + cbind( + flat %>% + remove_uuid(), + lcia + ) +} + +# get_flat_part ---------------------------------------------------------------- +get_flat_part <- function(x) +{ + check_for_list(x) + + is_flat <- lengths(x) == 1L & !sapply(x, is.list) + + if (!any(is_flat)) { + return(NULL) + } + + as.data.frame(x[is_flat]) +} + +# get_remaining ---------------------------------------------------------------- +get_remaining <- function(x, flat_part) +{ + check_for_list(x) + check_for_data_frame(flat_part) + + x[setdiff(names(x), names(flat_part))] +} + +# to_lcia ---------------------------------------------------------------------- +to_lcia <- function(x) +{ + #x <- lcia_list[[2L]] + flat <- get_flat_part(x) + + remaining <- get_remaining(x, flat) %>% + remove_zero_length_entries() + + #str(remaining, 2) + + get_inout <- function(side) { + sublist <- remaining[[paste0(side, "s")]] + if (!is.null(sublist)) { + sublist %>% + convert_and_bind(get_flat_part) %>% + cbind(side = side) + } # else NULL + } + + inputs <- get_inout("input") + outputs <- get_inout("output") + + cbind(flat, rbind(inputs, outputs)) +} + +# to_entry --------------------------------------------------------------------- +to_entry <- function(xz) +{ + #xz <- contents[[1]]$entries[[1]] + flat <- get_flat_part(xz) + + kwb.utils::removeColumns(flat, c( + "X.id", + "exchangeId", + "classificationId" + )) +} + +# to_process ------------------------------------------------------------------- +to_process <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# remove_xid ------------------------------------------------------------------- +remove_xid <- function(df) +{ + kwb.utils::removeColumns(df, "X.id") +} + +# remove_uuid ------------------------------------------------------------------ +remove_uuid <- function(df) +{ + kwb.utils::removeColumns(df, "uuid") +} + +# to_place --------------------------------------------------------------------- +to_place <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# to_scenario ------------------------------------------------------------------ +to_scenario <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() +} + +# to_evaluationMethod ---------------------------------------------------------- +to_evaluationMethod <- function(x) +{ + x %>% + get_flat_part() %>% + remove_xid() %>% + remove_uuid() +} + +# to_indicator ----------------------------------------------------------------- +to_indicator <- function(x) +{ + #x <- contents[[1L]]$indicators[[1L]] + flat <- get_flat_part(x) + + path_parts <- get_remaining(x, flat) %>% + kwb.utils::selectElements("path") + + stopifnot(all(lengths(path_parts) == 1L)) + + flat %>% + remove_uuid() %>% + cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) +} + +# convert_and_bind ------------------------------------------------------------- +convert_and_bind <- function(x_list, converter) +{ + lapply(x_list, converter) %>% + kwb.utils::safeRowBindAll() +} + +# flatten_list_of_unnamed_elements --------------------------------------------- +flatten_list_of_unnamed_elements <- function(x) +{ + stopifnot(!is.null(names(x))) + + # We expect all elements to be lists with identical names + stopifnot(all_have_identical_names(x)) + + do.call(rbind, lapply(x, flatten)) +} + From 69a7b930720f802ca3d996f0fe3b51d7f4c9b95f Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:45:27 +0200 Subject: [PATCH 18/51] Move functions to utils.R, reorder by name --- R/to_tables.R | 30 ------------------------------ R/utils.R | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/R/to_tables.R b/R/to_tables.R index d393eca..18473e9 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -64,29 +64,6 @@ to_product <- function(x) ) } -# get_flat_part ---------------------------------------------------------------- -get_flat_part <- function(x) -{ - check_for_list(x) - - is_flat <- lengths(x) == 1L & !sapply(x, is.list) - - if (!any(is_flat)) { - return(NULL) - } - - as.data.frame(x[is_flat]) -} - -# get_remaining ---------------------------------------------------------------- -get_remaining <- function(x, flat_part) -{ - check_for_list(x) - check_for_data_frame(flat_part) - - x[setdiff(names(x), names(flat_part))] -} - # to_lcia ---------------------------------------------------------------------- to_lcia <- function(x) { @@ -187,13 +164,6 @@ to_indicator <- function(x) cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) } -# convert_and_bind ------------------------------------------------------------- -convert_and_bind <- function(x_list, converter) -{ - lapply(x_list, converter) %>% - kwb.utils::safeRowBindAll() -} - # flatten_list_of_unnamed_elements --------------------------------------------- flatten_list_of_unnamed_elements <- function(x) { diff --git a/R/utils.R b/R/utils.R index 804e37d..9ea6ab6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,7 +1,7 @@ -# check_for_list --------------------------------------------------------------- -check_for_list <- function(x) +# all_have_identical_names ----------------------------------------------------- +all_have_identical_names <- function(x) { - stopifnot(is.list(x)) + suppressMessages(kwb.utils::allAreIdentical(lapply(x, names))) } # check_for_data_frame --------------------------------------------------------- @@ -10,10 +10,40 @@ check_for_data_frame <- function(x) stopifnot(is.data.frame(x)) } -# all_have_identical_names ----------------------------------------------------- -all_have_identical_names <- function(x) +# check_for_list --------------------------------------------------------------- +check_for_list <- function(x) { - suppressMessages(kwb.utils::allAreIdentical(lapply(x, names))) + stopifnot(is.list(x)) +} + +# convert_and_bind ------------------------------------------------------------- +convert_and_bind <- function(x_list, converter) +{ + lapply(x_list, converter) %>% + kwb.utils::safeRowBindAll() +} + +# get_flat_part ---------------------------------------------------------------- +get_flat_part <- function(x) +{ + check_for_list(x) + + is_flat <- lengths(x) == 1L & !sapply(x, is.list) + + if (!any(is_flat)) { + return(NULL) + } + + as.data.frame(x[is_flat]) +} + +# get_remaining ---------------------------------------------------------------- +get_remaining <- function(x, flat_part) +{ + check_for_list(x) + check_for_data_frame(flat_part) + + x[setdiff(names(x), names(flat_part))] } # prefix_columns --------------------------------------------------------------- From 0f24d9e1bb91706db5c42a57d8aaaeb067e49c90 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:50:42 +0200 Subject: [PATCH 19/51] Remove unused flatten_list_of_unnamed_elements() --- R/to_tables.R | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/R/to_tables.R b/R/to_tables.R index 18473e9..17450b7 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -164,14 +164,3 @@ to_indicator <- function(x) cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) } -# flatten_list_of_unnamed_elements --------------------------------------------- -flatten_list_of_unnamed_elements <- function(x) -{ - stopifnot(!is.null(names(x))) - - # We expect all elements to be lists with identical names - stopifnot(all_have_identical_names(x)) - - do.call(rbind, lapply(x, flatten)) -} - From e9e98e654c1c8b2d1dd20203d9e255a90166430b Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 13:55:28 +0200 Subject: [PATCH 20/51] Move two functions to helpers.R, reorder funcs --- R/helpers.R | 11 +++++++++++ R/to_tables.R | 51 +++++++++++++++++++++++---------------------------- 2 files changed, 34 insertions(+), 28 deletions(-) create mode 100644 R/helpers.R diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 0000000..cec6263 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,11 @@ +# remove_xid ------------------------------------------------------------------- +remove_xid <- function(df) +{ + kwb.utils::removeColumns(df, "X.id") +} + +# remove_uuid ------------------------------------------------------------------ +remove_uuid <- function(df) +{ + kwb.utils::removeColumns(df, "uuid") +} diff --git a/R/to_tables.R b/R/to_tables.R index 17450b7..96e9a72 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -6,25 +6,32 @@ to_tables <- function(content) fetch <- kwb.utils::createAccessor(content) result <- list( + products = fetch("products") %>% convert_and_bind(to_product) %>% prefix_columns("product_"), + entries = fetch("entries") %>% convert_and_bind(to_entry) %>% kwb.utils::removeColumns("exchangeFullName") %>% prefix_columns("entry_"), + processes = fetch("processes") %>% convert_and_bind(to_process) %>% prefix_columns("process_"), + places = fetch("places") %>% convert_and_bind(to_place) %>% prefix_columns("place_"), + indicators = fetch("indicators") %>% convert_and_bind(to_indicator) %>% prefix_columns("indicator_"), + scenarios = fetch("scenarios") %>% convert_and_bind(to_scenario) %>% prefix_columns("scenario_"), + evaluationMethods = fetch("evaluationMethods") %>% convert_and_bind(to_evaluationMethod) %>% prefix_columns("evaluationMethod_") @@ -111,18 +118,6 @@ to_process <- function(x) remove_xid() } -# remove_xid ------------------------------------------------------------------- -remove_xid <- function(df) -{ - kwb.utils::removeColumns(df, "X.id") -} - -# remove_uuid ------------------------------------------------------------------ -remove_uuid <- function(df) -{ - kwb.utils::removeColumns(df, "uuid") -} - # to_place --------------------------------------------------------------------- to_place <- function(x) { @@ -131,6 +126,22 @@ to_place <- function(x) remove_xid() } +# to_indicator ----------------------------------------------------------------- +to_indicator <- function(x) +{ + #x <- contents[[1L]]$indicators[[1L]] + flat <- get_flat_part(x) + + path_parts <- get_remaining(x, flat) %>% + kwb.utils::selectElements("path") + + stopifnot(all(lengths(path_parts) == 1L)) + + flat %>% + remove_uuid() %>% + cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) +} + # to_scenario ------------------------------------------------------------------ to_scenario <- function(x) { @@ -148,19 +159,3 @@ to_evaluationMethod <- function(x) remove_uuid() } -# to_indicator ----------------------------------------------------------------- -to_indicator <- function(x) -{ - #x <- contents[[1L]]$indicators[[1L]] - flat <- get_flat_part(x) - - path_parts <- get_remaining(x, flat) %>% - kwb.utils::selectElements("path") - - stopifnot(all(lengths(path_parts) == 1L)) - - flat %>% - remove_uuid() %>% - cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) -} - From 9ad4d6e93dba769e11135884b411d3c29360e00d Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 14:15:34 +0200 Subject: [PATCH 21/51] Add general flatten() function, try to use it TODO: check if I can use this more general function to simplify or shorten the existing approach. --- R/flatten.R | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ R/to_tables.R | 2 ++ 2 files changed, 80 insertions(+) create mode 100644 R/flatten.R diff --git a/R/flatten.R b/R/flatten.R new file mode 100644 index 0000000..f21cd4e --- /dev/null +++ b/R/flatten.R @@ -0,0 +1,78 @@ +if (FALSE) +{ + xx <- kwb.umberto:::read_json_files("~/../Downloads/S/support/fabian/R-Umberto/Umberto11//") + library(magrittr) + + is_list <- sapply(xx[[1]], is.list) + + results <- lapply(xx[[1]][is_list], flatten) + + r2 <- fhpredict:::flatten_recursive_list(xx[[1]]$products) +} + +# flatten ---------------------------------------------------------------------- +flatten <- function(x, name = NULL) +{ + # x must be a list + check_for_list(x) + + # If the list is empty, return NULL + if (length(x) == 0L) { + return(NULL) + } + + # Are the list entries lists themselves? + is_list <- sapply(x, is.list) + + # Are the list elements named? + is_named <- !is.null(names(x)) + + # If no element is a list and all elements are of length one we return x, + # converted to a data frame + if (!any(is_list) && all(lengths(x) == 1L)) { + + # If the elements are named, each element becomes a column + if (is_named) { + return(do.call(kwb.utils::noFactorDataFrame, x)) + } + + # Otherwise, the "name" argument must be given. It is used as column name + # of the returned data frame + stopifnot(!is.null(name)) + + # List elements are concatenated with a separator to one string value + result <- kwb.utils::noFactorDataFrame(do.call(paste, c(x, sep = "|"))) + + # Name the (one and only) column + return(stats::setNames(result, name)) + } + + # If the elements are not named, flatten and merge them + if (!is_named) { + + stopifnot(all(sapply(x, is.list))) + stopifnot(all_have_identical_names(x)) + + return(do.call(rbind, lapply(x, flatten))) + } + + # Get the part that is already flat (get_flat_part(x)) + part_1 <- do.call( + data.frame, + replace_null_with_na(x[!is_list]) + ) + + # Names of the other elements (that are lists) + elements <- names(which(is_list)) + + # Loop through these elements, flatten and row-bind them + part_2 <- do.call(rbind, lapply( + elements, function(name) flatten(x[[name]], name) + )) + + if (is.null(part_2)) { + return(part_1) + } + + cbind(part_1, part_2) +} diff --git a/R/to_tables.R b/R/to_tables.R index 96e9a72..058b48b 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -11,6 +11,8 @@ to_tables <- function(content) convert_and_bind(to_product) %>% prefix_columns("product_"), + #products2 = flatten(fetch("products")), + entries = fetch("entries") %>% convert_and_bind(to_entry) %>% kwb.utils::removeColumns("exchangeFullName") %>% From 41be7023cf209272be0a4fc0edb4184429a2aab9 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sun, 10 Sep 2023 14:27:28 +0200 Subject: [PATCH 22/51] Document better what could be improved --- R/import_rawdata_json.R | 2 ++ R/to_tables.R | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 7e3ca7c..c54da28 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -3,7 +3,9 @@ if (FALSE) json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) + names(result) + View(result) } diff --git a/R/to_tables.R b/R/to_tables.R index 058b48b..4999e72 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -3,6 +3,12 @@ to_tables <- function(content) { #content <- contents[[1L]] + # Try this general function for the list-type elements + # kwb.umberto:::flatten(content$products) + # kwb.umberto:::flatten(content$entries) + # kwb.umberto:::flatten(content$lifeCycleStages) + # ... + fetch <- kwb.utils::createAccessor(content) result <- list( @@ -11,8 +17,6 @@ to_tables <- function(content) convert_and_bind(to_product) %>% prefix_columns("product_"), - #products2 = flatten(fetch("products")), - entries = fetch("entries") %>% convert_and_bind(to_entry) %>% kwb.utils::removeColumns("exchangeFullName") %>% From 17b49d02f247a7bbe9580750d34e9a3625df02c6 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 07:17:06 +0200 Subject: [PATCH 23/51] Add argument "sep", pass args to recursive call --- R/flatten.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/flatten.R b/R/flatten.R index f21cd4e..2a26722 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -11,7 +11,7 @@ if (FALSE) } # flatten ---------------------------------------------------------------------- -flatten <- function(x, name = NULL) +flatten <- function(x, name = NULL, sep = "|") { # x must be a list check_for_list(x) @@ -41,19 +41,18 @@ flatten <- function(x, name = NULL) stopifnot(!is.null(name)) # List elements are concatenated with a separator to one string value - result <- kwb.utils::noFactorDataFrame(do.call(paste, c(x, sep = "|"))) + result <- kwb.utils::noFactorDataFrame(do.call(paste, c(x, sep = sep))) # Name the (one and only) column return(stats::setNames(result, name)) } - # If the elements are not named, flatten and merge them + # If the elements are not named, flatten and row-bind them if (!is_named) { stopifnot(all(sapply(x, is.list))) stopifnot(all_have_identical_names(x)) - - return(do.call(rbind, lapply(x, flatten))) + return(do.call(rbind, lapply(x, flatten, name = name, sep = sep))) } # Get the part that is already flat (get_flat_part(x)) From 6df4e45202944f38bbf17772e5b0f66df74ca1bc Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 07:20:47 +0200 Subject: [PATCH 24/51] Handle different cases when row-binding Keep the name of the original list elements in a column if there was more than one element from which flattened data frames are joined --- R/flatten.R | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/R/flatten.R b/R/flatten.R index 2a26722..f6b6b29 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -64,11 +64,32 @@ flatten <- function(x, name = NULL, sep = "|") # Names of the other elements (that are lists) elements <- names(which(is_list)) - # Loop through these elements, flatten and row-bind them - part_2 <- do.call(rbind, lapply( - elements, function(name) flatten(x[[name]], name) - )) + # Loop through these elements, flatten them + part_2_tables <- elements %>% + lapply(function(name) flatten(x[[name]], name = name, sep = sep)) %>% + stats::setNames(elements) + n_tables <- length(part_2_tables) + + # and row-bind them + part_2 <- if (n_tables > 1L) { + + #do.call(rbind, part_2_tables) + + # Find a column name that does not yet exist + name_column <- kwb.utils::hsSafeName( + paste(elements, collapse = "_"), + names(part_2_tables[[1L]]) + ) + + kwb.utils::rbindAll(part_2_tables, name_column) + + } else if (n_tables == 1L) { + + part_2_tables[[1L]] + + } # else NULL + if (is.null(part_2)) { return(part_1) } From 7bf5e85b0f049df90af27cc9f52562b8cc75736d Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 07:23:49 +0200 Subject: [PATCH 25/51] Use flatten(), compare with original results --- R/import_rawdata_json.R | 9 +-- R/to_tables.R | 170 +++++++++++++++++++++++++++++----------- R/utils.R | 11 +++ 3 files changed, 139 insertions(+), 51 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index c54da28..c49b708 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -1,12 +1,11 @@ if (FALSE) { json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" - - result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) - - names(result) - View(result) + result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) + + kwb.utils::assignPackageObjects("kwb.umberto") + contents <- read_json_files(json_dir) } # import_rawdata_json ---------------------------------------------------------- diff --git a/R/to_tables.R b/R/to_tables.R index 4999e72..9c93358 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -10,37 +10,44 @@ to_tables <- function(content) # ... fetch <- kwb.utils::createAccessor(content) + + products <- fetch("products") %>% + convert_and_bind(to_product) %>% + prefix_columns("product_") + + entries <- fetch("entries") %>% + convert_and_bind(to_entry) %>% + kwb.utils::removeColumns("exchangeFullName") %>% + prefix_columns("entry_") + + processes <- fetch("processes") %>% + convert_and_bind(to_process) %>% + prefix_columns("process_") + + places <- fetch("places") %>% + convert_and_bind(to_place) %>% + prefix_columns("place_") + + indicators <- fetch("indicators") %>% + convert_and_bind(to_indicator) %>% + prefix_columns("indicator_") + + scenarios <- fetch("scenarios") %>% + convert_and_bind(to_scenario) %>% + prefix_columns("scenario_") + + evaluationMethods <- fetch("evaluationMethods") %>% + convert_and_bind(to_evaluationMethod) %>% + prefix_columns("evaluationMethod_") result <- list( - - products = fetch("products") %>% - convert_and_bind(to_product) %>% - prefix_columns("product_"), - - entries = fetch("entries") %>% - convert_and_bind(to_entry) %>% - kwb.utils::removeColumns("exchangeFullName") %>% - prefix_columns("entry_"), - - processes = fetch("processes") %>% - convert_and_bind(to_process) %>% - prefix_columns("process_"), - - places = fetch("places") %>% - convert_and_bind(to_place) %>% - prefix_columns("place_"), - - indicators = fetch("indicators") %>% - convert_and_bind(to_indicator) %>% - prefix_columns("indicator_"), - - scenarios = fetch("scenarios") %>% - convert_and_bind(to_scenario) %>% - prefix_columns("scenario_"), - - evaluationMethods = fetch("evaluationMethods") %>% - convert_and_bind(to_evaluationMethod) %>% - prefix_columns("evaluationMethod_") + products = products, + entries = entries, + processes = processes, + places = places, + indicators = indicators, + scenarios = scenarios, + evaluationMethods = evaluationMethods ) structure( @@ -80,7 +87,7 @@ to_product <- function(x) # to_lcia ---------------------------------------------------------------------- to_lcia <- function(x) { - #x <- lcia_list[[2L]] + #x <- lcia_list[[1L]];str(x) flat <- get_flat_part(x) remaining <- get_remaining(x, flat) %>% @@ -97,45 +104,90 @@ to_lcia <- function(x) } # else NULL } - inputs <- get_inout("input") - outputs <- get_inout("output") + result_old <- cbind(flat, rbind( + get_inout("input"), + get_inout("output"), + get_inout("internal") + )) + + result <- flatten(x) %>% + kwb.utils::renameColumns(list( + inputs_outputs_internals = "side" + )) + + result$side <- gsub("s$", "", result$side) - cbind(flat, rbind(inputs, outputs)) + check_identity(result_old, result) + + result } # to_entry --------------------------------------------------------------------- -to_entry <- function(xz) +to_entry <- function(x) { - #xz <- contents[[1]]$entries[[1]] - flat <- get_flat_part(xz) + #x <- contents[[1]]$entries[[1]] + + flat <- get_flat_part(x) - kwb.utils::removeColumns(flat, c( + result_old <- kwb.utils::removeColumns(flat, c( "X.id", "exchangeId", "classificationId" - )) + )) + + result <- flatten(x) %>% + kwb.utils::removeColumns(pattern = "(\\.i|I)d$") + + check_identity( + result_old, + if (all(is.na(result$source))) { + kwb.utils::removeColumns(result, "source") + } else { + result + } + ) + + result } # to_process ------------------------------------------------------------------- to_process <- function(x) { - x %>% + #x <- contents[[1L]]$processes[[1L]];str(x) + + result_old <- x %>% get_flat_part() %>% remove_xid() + + result <- flatten(x) %>% + remove_xid() + + check_identity(result_old, result) + + result } # to_place --------------------------------------------------------------------- to_place <- function(x) { - x %>% + #x <- contents[[1L]]$places[[1L]];str(x) + + result_old <- x %>% get_flat_part() %>% remove_xid() + + result <- flatten(x) %>% + remove_xid() + + check_identity(result_old, result) + + result } # to_indicator ----------------------------------------------------------------- to_indicator <- function(x) { - #x <- contents[[1L]]$indicators[[1L]] + #x <- contents[[1L]]$indicators[[2L]];str(x) flat <- get_flat_part(x) path_parts <- get_remaining(x, flat) %>% @@ -143,25 +195,51 @@ to_indicator <- function(x) stopifnot(all(lengths(path_parts) == 1L)) - flat %>% + result_old <- flat %>% remove_uuid() %>% - cbind(indicatorPath = paste0(path_parts[[1L]], "->", path_parts[[2L]])) + cbind(indicatorPath = do.call(paste, c(path_parts, sep = "->"))) + + result <- flatten(x, sep = "->") %>% + remove_uuid() %>% + kwb.utils::renameColumns(list(path = "indicatorPath")) + + check_identity(result_old, result) + + result } # to_scenario ------------------------------------------------------------------ to_scenario <- function(x) { - x %>% + #x <- contents[[1]]$scenarios[[1]];str(x) + + result_old <- x %>% get_flat_part() %>% remove_xid() + + result <- flatten(x) %>% + remove_xid() + + check_identity(result_old, result) + + result } # to_evaluationMethod ---------------------------------------------------------- to_evaluationMethod <- function(x) { - x %>% + #x <- contents[[1]]$evaluationMethods[[2]];str(x) + + result_old <- x %>% get_flat_part() %>% remove_xid() %>% remove_uuid() + + result <- flatten(x) %>% + remove_xid() %>% + remove_uuid() + + check_identity(result_old, result) + + result } - diff --git a/R/utils.R b/R/utils.R index 9ea6ab6..b4ffb22 100644 --- a/R/utils.R +++ b/R/utils.R @@ -16,6 +16,17 @@ check_for_list <- function(x) stopifnot(is.list(x)) } +# check_identity --------------------------------------------------------------- +check_identity <- function(x, y) +{ + is_identical <- identical(x, y) + + kwb.utils::printIf(!is_identical, x, caption = deparse(substitute(x))) + kwb.utils::printIf(!is_identical, y, caption = deparse(substitute(y))) + + stopifnot(is_identical) +} + # convert_and_bind ------------------------------------------------------------- convert_and_bind <- function(x_list, converter) { From 8f0f012ff909de2a8b1b45f6b8049ed41b3334cc Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 07:38:15 +0200 Subject: [PATCH 26/51] Remove old processing and code for comparison --- R/to_tables.R | 104 ++++---------------------------------------------- 1 file changed, 7 insertions(+), 97 deletions(-) diff --git a/R/to_tables.R b/R/to_tables.R index 9c93358..9254604 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -88,28 +88,7 @@ to_product <- function(x) to_lcia <- function(x) { #x <- lcia_list[[1L]];str(x) - flat <- get_flat_part(x) - - remaining <- get_remaining(x, flat) %>% - remove_zero_length_entries() - - #str(remaining, 2) - - get_inout <- function(side) { - sublist <- remaining[[paste0(side, "s")]] - if (!is.null(sublist)) { - sublist %>% - convert_and_bind(get_flat_part) %>% - cbind(side = side) - } # else NULL - } - - result_old <- cbind(flat, rbind( - get_inout("input"), - get_inout("output"), - get_inout("internal") - )) - + result <- flatten(x) %>% kwb.utils::renameColumns(list( inputs_outputs_internals = "side" @@ -117,8 +96,6 @@ to_lcia <- function(x) result$side <- gsub("s$", "", result$side) - check_identity(result_old, result) - result } @@ -127,27 +104,8 @@ to_entry <- function(x) { #x <- contents[[1]]$entries[[1]] - flat <- get_flat_part(x) - - result_old <- kwb.utils::removeColumns(flat, c( - "X.id", - "exchangeId", - "classificationId" - )) - - result <- flatten(x) %>% + flatten(x) %>% kwb.utils::removeColumns(pattern = "(\\.i|I)d$") - - check_identity( - result_old, - if (all(is.na(result$source))) { - kwb.utils::removeColumns(result, "source") - } else { - result - } - ) - - result } # to_process ------------------------------------------------------------------- @@ -155,16 +113,8 @@ to_process <- function(x) { #x <- contents[[1L]]$processes[[1L]];str(x) - result_old <- x %>% - get_flat_part() %>% - remove_xid() - - result <- flatten(x) %>% + flatten(x) %>% remove_xid() - - check_identity(result_old, result) - - result } # to_place --------------------------------------------------------------------- @@ -172,40 +122,17 @@ to_place <- function(x) { #x <- contents[[1L]]$places[[1L]];str(x) - result_old <- x %>% - get_flat_part() %>% - remove_xid() - - result <- flatten(x) %>% + flatten(x) %>% remove_xid() - - check_identity(result_old, result) - - result } # to_indicator ----------------------------------------------------------------- to_indicator <- function(x) { #x <- contents[[1L]]$indicators[[2L]];str(x) - flat <- get_flat_part(x) - - path_parts <- get_remaining(x, flat) %>% - kwb.utils::selectElements("path") - - stopifnot(all(lengths(path_parts) == 1L)) - - result_old <- flat %>% - remove_uuid() %>% - cbind(indicatorPath = do.call(paste, c(path_parts, sep = "->"))) - - result <- flatten(x, sep = "->") %>% + flatten(x, sep = "->") %>% remove_uuid() %>% kwb.utils::renameColumns(list(path = "indicatorPath")) - - check_identity(result_old, result) - - result } # to_scenario ------------------------------------------------------------------ @@ -213,16 +140,8 @@ to_scenario <- function(x) { #x <- contents[[1]]$scenarios[[1]];str(x) - result_old <- x %>% - get_flat_part() %>% + flatten(x) %>% remove_xid() - - result <- flatten(x) %>% - remove_xid() - - check_identity(result_old, result) - - result } # to_evaluationMethod ---------------------------------------------------------- @@ -230,16 +149,7 @@ to_evaluationMethod <- function(x) { #x <- contents[[1]]$evaluationMethods[[2]];str(x) - result_old <- x %>% - get_flat_part() %>% - remove_xid() %>% - remove_uuid() - - result <- flatten(x) %>% + flatten(x) %>% remove_xid() %>% remove_uuid() - - check_identity(result_old, result) - - result } From b914b8ecb2e58df0d0e42cb8eaf159b470edfcd5 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 08:07:18 +0200 Subject: [PATCH 27/51] Try to use new flatten() in to_product() --- R/to_tables.R | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/R/to_tables.R b/R/to_tables.R index 9254604..650d0c9 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -69,19 +69,25 @@ to_product <- function(x) #str(lcia_list) - #convert_and_bind(lcia_list, get_flat_part) + renaming <- list("entryId" = "lciaEntryId") - lcia <- lcia_list %>% + lcia_old <- lcia_list %>% convert_and_bind(to_lcia) %>% - kwb.utils::renameColumns(list( - "entryId" = "lciaEntryId" - )) - - cbind( - flat %>% - remove_uuid(), - lcia - ) + kwb.utils::renameColumns(renaming) + + lcia <- lcia_list %>% + flatten() %>% + prettify_column_side() %>% + kwb.utils::renameColumns(renaming) + + check_identity(lcia_old, lcia) + + result_old <- cbind(remove_uuid(flat), lcia) + + #result <- remove_uuid(flatten(x)) + #check_identity(result_old, result) + + result_old } # to_lcia ---------------------------------------------------------------------- @@ -89,10 +95,16 @@ to_lcia <- function(x) { #x <- lcia_list[[1L]];str(x) - result <- flatten(x) %>% - kwb.utils::renameColumns(list( - inputs_outputs_internals = "side" - )) + flatten(x) %>% + prettify_column_side() +} + +# prettify_column_side --------------------------------------------------------- +prettify_column_side <- function(data) +{ + result <- kwb.utils::renameColumns(data, list( + inputs_outputs_internals = "side" + )) result$side <- gsub("s$", "", result$side) From d00f819549d03a8e4ac717eb9418496f17e83545 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 08:24:24 +0200 Subject: [PATCH 28/51] Use flatten() on a higher level, check identity --- R/to_tables.R | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/R/to_tables.R b/R/to_tables.R index 650d0c9..d179a0a 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -19,27 +19,72 @@ to_tables <- function(content) convert_and_bind(to_entry) %>% kwb.utils::removeColumns("exchangeFullName") %>% prefix_columns("entry_") + + entries2 <- fetch("entries") %>% + flatten() %>% + kwb.utils::removeColumns(pattern = "(\\.i|I)d$") %>% + kwb.utils::removeColumns("exchangeFullName") %>% + prefix_columns("entry_") + check_identity(entries, entries2) + processes <- fetch("processes") %>% convert_and_bind(to_process) %>% prefix_columns("process_") + + processes2 <- fetch("processes") %>% + flatten() %>% + remove_xid() %>% + prefix_columns("process_") + check_identity(processes, processes2) + places <- fetch("places") %>% convert_and_bind(to_place) %>% prefix_columns("place_") + places2 <- fetch("places") %>% + flatten() %>% + remove_xid() %>% + prefix_columns("place_") + + check_identity(places, places2) + indicators <- fetch("indicators") %>% convert_and_bind(to_indicator) %>% prefix_columns("indicator_") + + indicators2 <- fetch("indicators") %>% + flatten(x, sep = "->") %>% + remove_uuid() %>% + kwb.utils::renameColumns(list(path = "indicatorPath")) %>% + prefix_columns("indicator_") + + check_identity(indicators, indicators2) scenarios <- fetch("scenarios") %>% convert_and_bind(to_scenario) %>% prefix_columns("scenario_") + + scenarios2 <- fetch("scenarios") %>% + flatten() %>% + remove_xid() %>% + prefix_columns("scenario_") + + check_identity(scenarios, scenarios2) evaluationMethods <- fetch("evaluationMethods") %>% convert_and_bind(to_evaluationMethod) %>% prefix_columns("evaluationMethod_") + evaluationMethods2 <- fetch("evaluationMethods") %>% + flatten() %>% + remove_xid() %>% + remove_uuid() %>% + prefix_columns("evaluationMethod_") + + check_identity(evaluationMethods, evaluationMethods2) + result <- list( products = products, entries = entries, From 550ee2156dd2bc156a6b5623aed9eaf504715bcb Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Sep 2023 08:39:59 +0200 Subject: [PATCH 29/51] Remove code related to old approach and checks --- R/to_tables.R | 139 ++++---------------------------------------------- 1 file changed, 11 insertions(+), 128 deletions(-) diff --git a/R/to_tables.R b/R/to_tables.R index d179a0a..4da6ae9 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -14,77 +14,45 @@ to_tables <- function(content) products <- fetch("products") %>% convert_and_bind(to_product) %>% prefix_columns("product_") + + # products2 <- fetch("products") %>% + # flatten() %>% + # ... + # check_identity(products, products2) entries <- fetch("entries") %>% - convert_and_bind(to_entry) %>% - kwb.utils::removeColumns("exchangeFullName") %>% - prefix_columns("entry_") - - entries2 <- fetch("entries") %>% flatten() %>% kwb.utils::removeColumns(pattern = "(\\.i|I)d$") %>% kwb.utils::removeColumns("exchangeFullName") %>% prefix_columns("entry_") - - check_identity(entries, entries2) processes <- fetch("processes") %>% - convert_and_bind(to_process) %>% - prefix_columns("process_") - - processes2 <- fetch("processes") %>% flatten() %>% remove_xid() %>% prefix_columns("process_") - - check_identity(processes, processes2) places <- fetch("places") %>% - convert_and_bind(to_place) %>% - prefix_columns("place_") - - places2 <- fetch("places") %>% flatten() %>% remove_xid() %>% prefix_columns("place_") - check_identity(places, places2) - indicators <- fetch("indicators") %>% - convert_and_bind(to_indicator) %>% - prefix_columns("indicator_") - - indicators2 <- fetch("indicators") %>% flatten(x, sep = "->") %>% remove_uuid() %>% kwb.utils::renameColumns(list(path = "indicatorPath")) %>% prefix_columns("indicator_") - check_identity(indicators, indicators2) - scenarios <- fetch("scenarios") %>% - convert_and_bind(to_scenario) %>% - prefix_columns("scenario_") - - scenarios2 <- fetch("scenarios") %>% flatten() %>% remove_xid() %>% prefix_columns("scenario_") - check_identity(scenarios, scenarios2) - evaluationMethods <- fetch("evaluationMethods") %>% - convert_and_bind(to_evaluationMethod) %>% - prefix_columns("evaluationMethod_") - - evaluationMethods2 <- fetch("evaluationMethods") %>% flatten() %>% remove_xid() %>% remove_uuid() %>% prefix_columns("evaluationMethod_") - check_identity(evaluationMethods, evaluationMethods2) - result <- list( products = products, entries = entries, @@ -114,99 +82,14 @@ to_product <- function(x) #str(lcia_list) - renaming <- list("entryId" = "lciaEntryId") - - lcia_old <- lcia_list %>% - convert_and_bind(to_lcia) %>% - kwb.utils::renameColumns(renaming) - lcia <- lcia_list %>% flatten() %>% - prettify_column_side() %>% - kwb.utils::renameColumns(renaming) - - check_identity(lcia_old, lcia) - - result_old <- cbind(remove_uuid(flat), lcia) - - #result <- remove_uuid(flatten(x)) - #check_identity(result_old, result) - - result_old -} - -# to_lcia ---------------------------------------------------------------------- -to_lcia <- function(x) -{ - #x <- lcia_list[[1L]];str(x) - - flatten(x) %>% - prettify_column_side() -} - -# prettify_column_side --------------------------------------------------------- -prettify_column_side <- function(data) -{ - result <- kwb.utils::renameColumns(data, list( - inputs_outputs_internals = "side" - )) + kwb.utils::renameColumns(list( + entryId = "lciaEntryId", + inputs_outputs_internals = "side" + )) - result$side <- gsub("s$", "", result$side) - - result -} - -# to_entry --------------------------------------------------------------------- -to_entry <- function(x) -{ - #x <- contents[[1]]$entries[[1]] + lcia$side <- gsub("s$", "", lcia$side) - flatten(x) %>% - kwb.utils::removeColumns(pattern = "(\\.i|I)d$") -} - -# to_process ------------------------------------------------------------------- -to_process <- function(x) -{ - #x <- contents[[1L]]$processes[[1L]];str(x) - - flatten(x) %>% - remove_xid() -} - -# to_place --------------------------------------------------------------------- -to_place <- function(x) -{ - #x <- contents[[1L]]$places[[1L]];str(x) - - flatten(x) %>% - remove_xid() -} - -# to_indicator ----------------------------------------------------------------- -to_indicator <- function(x) -{ - #x <- contents[[1L]]$indicators[[2L]];str(x) - flatten(x, sep = "->") %>% - remove_uuid() %>% - kwb.utils::renameColumns(list(path = "indicatorPath")) -} - -# to_scenario ------------------------------------------------------------------ -to_scenario <- function(x) -{ - #x <- contents[[1]]$scenarios[[1]];str(x) - - flatten(x) %>% - remove_xid() -} - -# to_evaluationMethod ---------------------------------------------------------- -to_evaluationMethod <- function(x) -{ - #x <- contents[[1]]$evaluationMethods[[2]];str(x) - - flatten(x) %>% - remove_xid() %>% - remove_uuid() + cbind(remove_uuid(flat), lcia) } From b4a05905293e9f0477aa291b96c58f4bd094235a Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 14 Sep 2023 14:46:08 +0200 Subject: [PATCH 30/51] Consider possibility of zero rows in cbind() --- R/flatten.R | 6 +++++- R/to_tables.R | 7 ++++++- R/utils.R | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/R/flatten.R b/R/flatten.R index f6b6b29..59cbed7 100644 --- a/R/flatten.R +++ b/R/flatten.R @@ -94,5 +94,9 @@ flatten <- function(x, name = NULL, sep = "|") return(part_1) } - cbind(part_1, part_2) + # We expect part_1 to have one row + check_for_exactly_one_row(part_1) + + # Consider that part_2 may have no rows! + cbind(part_1[rep.int(1L, nrow(part_2)), , drop = FALSE], part_2) } diff --git a/R/to_tables.R b/R/to_tables.R index 4da6ae9..4d7532b 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -91,5 +91,10 @@ to_product <- function(x) lcia$side <- gsub("s$", "", lcia$side) - cbind(remove_uuid(flat), lcia) + part_1 <- remove_uuid(flat) + + # We expect part_1 to have one row + check_for_exactly_one_row(part_1) + + cbind(part_1[rep.int(1L, nrow(lcia)), , drop = FALSE], lcia) } diff --git a/R/utils.R b/R/utils.R index b4ffb22..cdbec5b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,6 +10,12 @@ check_for_data_frame <- function(x) stopifnot(is.data.frame(x)) } +# check_for_exactly_one_row ---------------------------------------------------- +check_for_exactly_one_row <- function(date) +{ + stopifnot(nrow(date) == 1L) +} + # check_for_list --------------------------------------------------------------- check_for_list <- function(x) { From 2b00f60ee48ca2de790603021a290b690b841942 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 2 Jan 2024 13:53:15 +0100 Subject: [PATCH 31/51] Allow to specify the paths to the .json files --- R/import_rawdata_json.R | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index c49b708..ecf375e 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -2,8 +2,16 @@ if (FALSE) { json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" + files <- dir(json_dir, "\\.json$", full.names = TRUE) + result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) + results <- lapply(files, FUN = function(file) { + kwb.umberto:::import_rawdata_json(files = file, add_place = TRUE) + }) + + stopifnot(identical(do.call(rbind, results), result)) + kwb.utils::assignPackageObjects("kwb.umberto") contents <- read_json_files(json_dir) } @@ -18,11 +26,19 @@ if (FALSE) #' are provided by \code{\link{import_rawdata}} that imports .csv files #' @param add_place With add_place = TRUE, the "place" is contained in the #' result even if old_format = TRUE +#' @param files optional. If given and not \code{NULL} this is expected to be a +#' vector of character with the full paths to the \code{.json} files to be +#' read. #' @return data frame #' @export -import_rawdata_json <- function(json_dir, old_format = TRUE, add_place = FALSE) +import_rawdata_json <- function( + json_dir, + old_format = TRUE, + add_place = FALSE, + files = NULL +) { - contents <- read_json_files(json_dir) + contents <- read_json_files(json_dir, files = files) result <- lapply(contents, to_tables) @@ -63,14 +79,15 @@ import_rawdata_json <- function(json_dir, old_format = TRUE, add_place = FALSE) } # read_json_files -------------------------------------------------------------- -read_json_files <- function(json_dir) +read_json_files <- function(json_dir, files = NULL) { - json_files <- list_json_files_or_stop(json_dir) + if (is.null(files)) { + files <- list_json_files_or_stop(json_dir) + } - stats::setNames( - lapply(json_files, jsonlite::read_json), - basename(json_files) # %>% kwb.utils::removeExtension() - ) + files %>% + lapply(jsonlite::read_json) %>% + stats::setNames(basename(files)) } # list_json_files_or_stop ------------------------------------------------------ From 9705817247a69a1e9b8af186800ff366e98fda28 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:57:05 +0100 Subject: [PATCH 32/51] Improve formatting --- R/excel_export.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/excel_export.R b/R/excel_export.R index c8afa40..eee1ec8 100644 --- a/R/excel_export.R +++ b/R/excel_export.R @@ -1,4 +1,5 @@ #' Write results to EXCEL +#' #' @param data_pivot_list a list as retrieved by function create_pivot_list() #' @param path relative or full path to be used for exporting the results to #' EXCEL (default: "results.xlsx") @@ -27,10 +28,7 @@ #' export_path <- file.path(getwd(), "umberto10_results.xlsx") #' print(sprintf("Exporting aggregated results to %s", export_path)) #' write_xlsx(umberto10_data_pivot_list, path = export_path) -write_xlsx <- function(data_pivot_list, - path = "results.xlsx") { - - openxlsx::write.xlsx(data_pivot_list, - file = path) - -} \ No newline at end of file +write_xlsx <- function(data_pivot_list, path = "results.xlsx") +{ + openxlsx::write.xlsx(data_pivot_list, file = path) +} From 95d9783fb7287bd74aae79fd18e3614c30654fcc Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:57:20 +0100 Subject: [PATCH 33/51] Add split_by_columns() --- R/utils.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/utils.R b/R/utils.R index cdbec5b..b6eb494 100644 --- a/R/utils.R +++ b/R/utils.R @@ -90,3 +90,9 @@ replace_null_with_na <- function(x) x } + +# split_by_columns ------------------------------------------------------------- +split_by_columns <- function(data, columns) +{ + split(x = data, f = kwb.utils::selectColumns(data, columns)) +} From 7b188babe4032194d6ce5e282915e06250f2836f Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:58:01 +0100 Subject: [PATCH 34/51] Rename "result" to "result_tables" --- R/import_rawdata_json.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index ecf375e..cd95c9e 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -38,11 +38,12 @@ import_rawdata_json <- function( files = NULL ) { + #kwb.utils::assignPackageObjects("kwb.umberto") contents <- read_json_files(json_dir, files = files) - result <- lapply(contents, to_tables) + result_tables <- lapply(contents, to_tables) - data_frames <- lapply(result, merge_json_tables) + data_frames <- lapply(result_tables, merge_json_tables) result <- kwb.utils::rbindAll(data_frames, nameColumn = "model") From 7f1bcc32a44811356d018a4a90c96c1dc45787a3 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:58:18 +0100 Subject: [PATCH 35/51] Fix bug in pipe --- R/to_tables.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/to_tables.R b/R/to_tables.R index 4d7532b..734dec0 100644 --- a/R/to_tables.R +++ b/R/to_tables.R @@ -37,7 +37,7 @@ to_tables <- function(content) prefix_columns("place_") indicators <- fetch("indicators") %>% - flatten(x, sep = "->") %>% + flatten(sep = "->") %>% remove_uuid() %>% kwb.utils::renameColumns(list(path = "indicatorPath")) %>% prefix_columns("indicator_") From ff9a9aafaaf43082fb959df53e60ebda2369972f Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:58:58 +0100 Subject: [PATCH 36/51] Load files from Lea --- R/import_rawdata_json.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index cd95c9e..bd22eb3 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -1,10 +1,14 @@ if (FALSE) { - json_dir = "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" + json_dir <- "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" + json_dir <- "~/../Downloads/S/support/lea/umberto" files <- dir(json_dir, "\\.json$", full.names = TRUE) - result <- kwb.umberto:::import_rawdata_json(json_dir, add_place = TRUE) + result_old_with_place <- kwb.umberto:::import_rawdata_json( + json_dir, + add_place = TRUE + ) results <- lapply(files, FUN = function(file) { kwb.umberto:::import_rawdata_json(files = file, add_place = TRUE) @@ -13,7 +17,10 @@ if (FALSE) stopifnot(identical(do.call(rbind, results), result)) kwb.utils::assignPackageObjects("kwb.umberto") + contents <- read_json_files(json_dir) + + #jsTree::jsTree } # import_rawdata_json ---------------------------------------------------------- From bfe43cdb71201fea99358eb09369773538805242 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:59:32 +0100 Subject: [PATCH 37/51] Update Rd file --- man/import_rawdata_json.Rd | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/man/import_rawdata_json.Rd b/man/import_rawdata_json.Rd index c77bc48..0fd8e21 100644 --- a/man/import_rawdata_json.Rd +++ b/man/import_rawdata_json.Rd @@ -4,7 +4,12 @@ \alias{import_rawdata_json} \title{Import Umberto results from .json files} \usage{ -import_rawdata_json(json_dir, old_format = TRUE, add_place = FALSE) +import_rawdata_json( + json_dir, + old_format = TRUE, + add_place = FALSE, + files = NULL +) } \arguments{ \item{json_dir}{path to directory containing .json files. All .json files @@ -15,6 +20,10 @@ are provided by \code{\link{import_rawdata}} that imports .csv files} \item{add_place}{With add_place = TRUE, the "place" is contained in the result even if old_format = TRUE} + +\item{files}{optional. If given and not \code{NULL} this is expected to be a +vector of character with the full paths to the \code{.json} files to be +read.} } \value{ data frame From 1ed871951783fdfbe46ce9860fd60f510e01bee0 Mon Sep 17 00:00:00 2001 From: hsonne Date: Fri, 5 Jan 2024 16:59:47 +0100 Subject: [PATCH 38/51] Add import_json_files_to_excel() --- DESCRIPTION | 3 +- NAMESPACE | 4 ++ R/import_json_files_to_excel.R | 87 +++++++++++++++++++++++++++++++ man/import_json_files_to_excel.Rd | 22 ++++++++ 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 R/import_json_files_to_excel.R create mode 100644 man/import_json_files_to_excel.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6eaf4b1..1299138 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ Imports: openxlsx, readr, tidyr, - tidyselect + tidyselect, + writexl Suggests: covr, knitr, diff --git a/NAMESPACE b/NAMESPACE index 99b5c7d..dad8478 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export("%>%") export(create_pivot_list) export(group_data) +export(import_json_files_to_excel) export(import_rawdata) export(import_rawdata_json) export(pivot_data) @@ -19,7 +20,9 @@ importFrom(dplyr,select) importFrom(dplyr,summarise_at) importFrom(ggforce,facet_wrap_paginate) importFrom(janitor,clean_names) +importFrom(kwb.utils,hsOpenWindowsExplorer) importFrom(kwb.utils,selectColumns) +importFrom(kwb.utils,substSpecialChars) importFrom(magrittr,"%>%") importFrom(openxlsx,write.xlsx) importFrom(readr,read_csv) @@ -27,3 +30,4 @@ importFrom(readr,read_csv2) importFrom(stats,setNames) importFrom(tidyr,spread) importFrom(tidyselect,all_of) +importFrom(writexl,write_xlsx) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R new file mode 100644 index 0000000..018914c --- /dev/null +++ b/R/import_json_files_to_excel.R @@ -0,0 +1,87 @@ +# import_json_files_to_excel --------------------------------------------------- + +#' Import JSON files to Excel File +#' +#' @param json_dir path to directory containing .json files +#' @param file path to Excel file to be created. Default: +#' \code{"umberto-results_.xlsx"} in \code{tempdir()} +#' @param open logical indicating whether or not to open the created Excel file +#' @return path to created Excel file +#' @importFrom kwb.utils hsOpenWindowsExplorer substSpecialChars +#' @importFrom writexl write_xlsx +#' @export +import_json_files_to_excel <- function( + json_dir, + file = NULL, + open = TRUE +) +{ + sheets <- json_dir %>% + import_rawdata_json(add_place = TRUE, old_format = FALSE) %>% + get_core_data() %>% + core_data_to_wide() %>% + split_by_columns("indicator") + + if (is.null(file)) { + base_name <- kwb.utils::substSpecialChars(basename(json_dir)) + file <- file.path( + tempdir(), + sprintf("umberto-import_%s.xlsx", base_name) + ) + } + + writexl::write_xlsx(sheets, file) + + if (open) { + try(kwb.utils::hsOpenWindowsExplorer(file)) + } + + file +} + +# get_core_data ---------------------------------------------------------------- +get_core_data <- function(data) +{ + result <- kwb.utils::renameAndSelect(data, list( + model = "model", + process_name = "process", + place_name = "place", + entry_exchange = "exchange", + indicator_name = "indicator", + product_quantity = "quantity", + product_unit = "unit" + )) + + result <- result[!grepl("^Connection", result$place), ] + + duplicates <- kwb.utils::findPartialDuplicates( + result, setdiff(names(result), "quantity") + ) + + stopifnot(is.null(duplicates)) + + result +} + +# core_data_to_wide ------------------------------------------------------------ +core_data_to_wide <- function(data) +{ + data %>% + tidyr::pivot_wider( + id_cols = c( + "indicator", + "process", + "place", + "exchange", + "unit" + ), + names_from = "model", + values_from = "quantity" + ) %>% + kwb.utils::orderBy(c( + "indicator", + "process", + "place", + "exchange" + )) +} diff --git a/man/import_json_files_to_excel.Rd b/man/import_json_files_to_excel.Rd new file mode 100644 index 0000000..7ceea1d --- /dev/null +++ b/man/import_json_files_to_excel.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import_json_files_to_excel.R +\name{import_json_files_to_excel} +\alias{import_json_files_to_excel} +\title{Import JSON files to Excel File} +\usage{ +import_json_files_to_excel(json_dir, file = NULL, open = TRUE) +} +\arguments{ +\item{json_dir}{path to directory containing .json files} + +\item{file}{path to Excel file to be created. Default: +\code{"umberto-results_.xlsx"} in \code{tempdir()}} + +\item{open}{logical indicating whether or not to open the created Excel file} +} +\value{ +path to created Excel file +} +\description{ +Import JSON files to Excel File +} From 1977a5e4551168fbd6e6164959043194f455806e Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 6 Jan 2024 10:29:00 +0100 Subject: [PATCH 39/51] Omit intermediate variable "base_name" --- R/import_json_files_to_excel.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 018914c..ab31dd5 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -23,11 +23,11 @@ import_json_files_to_excel <- function( split_by_columns("indicator") if (is.null(file)) { - base_name <- kwb.utils::substSpecialChars(basename(json_dir)) - file <- file.path( - tempdir(), - sprintf("umberto-import_%s.xlsx", base_name) - ) + file <- file.path(tempdir(), paste0( + "umberto-import_", + kwb.utils::substSpecialChars(basename(json_dir)), + ".xlsx" + )) } writexl::write_xlsx(sheets, file) From 95bc42286912db9e1fa6d83e07040d823550731f Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 6 Jan 2024 11:12:18 +0100 Subject: [PATCH 40/51] Clean stop_on_differing_names() --- R/import_rawdata.R | 24 ++++++++++-------- .../test-function-stop_on_differing_names.R | 25 +++++++++++-------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/R/import_rawdata.R b/R/import_rawdata.R index 4b06a40..0a2ae19 100644 --- a/R/import_rawdata.R +++ b/R/import_rawdata.R @@ -81,25 +81,29 @@ stop_on_differing_names <- function(x) stopifnot(is.list(x), all(sapply(x, is.data.frame))) # Return if there are not at least two data frames - if (length(x) < 2) { - + if (length(x) < 2L) { return() } # Get the column names of the first data frame of the list - names_1 <- names(x[[1]]) + columns_of_first <- names(x[[1L]]) + + # Names of data frames in list x + df_names <- names(x) + is_empty <- df_names == "" + df_names[is_empty] <- sprintf("", which(is_empty)) # Compare with the column names of the other data frames - for (i in seq_along(x)[-1]) { + for (i in seq_along(df_names)[-1L]) { - names_i <- names(x[[i]]) + columns_of_current <- names(x[[i]]) - if (! identical(names_1, names_i)) { - + if (!identical(columns_of_first, columns_of_current)) { + pasted <- function(x) paste(x, collapse = ", ") stop_( - "There are differing column names:\n", - " ", names(x)[1], ": ", paste(names_1, collapse = ", "), - "\n ", names(x)[i], ": ", paste(names_i, collapse = ", ") + "There are differing column names:", + "\n ", df_names[1L], ": ", pasted(columns_of_first), + "\n ", df_names[i], ": ", pasted(columns_of_current) ) } } diff --git a/tests/testthat/test-function-stop_on_differing_names.R b/tests/testthat/test-function-stop_on_differing_names.R index dda10de..aa6ac09 100644 --- a/tests/testthat/test-function-stop_on_differing_names.R +++ b/tests/testthat/test-function-stop_on_differing_names.R @@ -1,16 +1,19 @@ -# -# This test file has been generated by kwb.test::create_test_files() -# launched by user mrustl on 2022-11-18 13:49:45. -# Your are strongly encouraged to modify the dummy functions -# so that real cases are tested. You should then delete this comment. -# +#library(testthat) test_that("stop_on_differing_names() works", { - expect_error( - kwb.umberto:::stop_on_differing_names() - # argument "x" is missing, with no default - ) + f <- kwb.umberto:::stop_on_differing_names + + expect_error(f()) + dfs <- list( + df1 = data.frame(a = 1, b = 2), + df2 = data.frame(a = 1, b = 2, c = 3), + data.frame(a = 2, b = 3) + ) + + expect_error(f(dfs)) + expect_error(f(dfs[-1L]), "") + + expect_silent(f(dfs[-2L])) }) - From 13db85772fe2e4949db5614fc232c0b349fd1817 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 6 Jan 2024 12:52:28 +0100 Subject: [PATCH 41/51] Create Excel file within input directory (by default) --- R/import_json_files_to_excel.R | 12 ++---------- man/import_json_files_to_excel.Rd | 8 ++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index ab31dd5..4faa368 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -4,7 +4,7 @@ #' #' @param json_dir path to directory containing .json files #' @param file path to Excel file to be created. Default: -#' \code{"umberto-results_.xlsx"} in \code{tempdir()} +#' \code{"umberto-results.xlsx"} within \code{json_dir} #' @param open logical indicating whether or not to open the created Excel file #' @return path to created Excel file #' @importFrom kwb.utils hsOpenWindowsExplorer substSpecialChars @@ -12,7 +12,7 @@ #' @export import_json_files_to_excel <- function( json_dir, - file = NULL, + file = file.path(json_dir, "umberto-results.xlsx"), open = TRUE ) { @@ -22,14 +22,6 @@ import_json_files_to_excel <- function( core_data_to_wide() %>% split_by_columns("indicator") - if (is.null(file)) { - file <- file.path(tempdir(), paste0( - "umberto-import_", - kwb.utils::substSpecialChars(basename(json_dir)), - ".xlsx" - )) - } - writexl::write_xlsx(sheets, file) if (open) { diff --git a/man/import_json_files_to_excel.Rd b/man/import_json_files_to_excel.Rd index 7ceea1d..21c35eb 100644 --- a/man/import_json_files_to_excel.Rd +++ b/man/import_json_files_to_excel.Rd @@ -4,13 +4,17 @@ \alias{import_json_files_to_excel} \title{Import JSON files to Excel File} \usage{ -import_json_files_to_excel(json_dir, file = NULL, open = TRUE) +import_json_files_to_excel( + json_dir, + file = file.path(json_dir, "umberto-results.xlsx"), + open = TRUE +) } \arguments{ \item{json_dir}{path to directory containing .json files} \item{file}{path to Excel file to be created. Default: -\code{"umberto-results_.xlsx"} in \code{tempdir()}} +\code{"umberto-results.xlsx"} within \code{json_dir}} \item{open}{logical indicating whether or not to open the created Excel file} } From 604a94b41a420046240f0841bb5c9fc20b441e2e Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 6 Jan 2024 13:11:11 +0100 Subject: [PATCH 42/51] Import kwb.utils::stopFormatted() --- NAMESPACE | 1 + R/import_rawdata_json.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index dad8478..9138b7c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ importFrom(ggforce,facet_wrap_paginate) importFrom(janitor,clean_names) importFrom(kwb.utils,hsOpenWindowsExplorer) importFrom(kwb.utils,selectColumns) +importFrom(kwb.utils,stopFormatted) importFrom(kwb.utils,substSpecialChars) importFrom(magrittr,"%>%") importFrom(openxlsx,write.xlsx) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index bd22eb3..bb361df 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -99,6 +99,7 @@ read_json_files <- function(json_dir, files = NULL) } # list_json_files_or_stop ------------------------------------------------------ +#' @importFrom kwb.utils stopFormatted list_json_files_or_stop <- function(json_dir) { json_files <- list.files( From 4dab70818db167e4df194593bcdf4d5dda1a07f9 Mon Sep 17 00:00:00 2001 From: hsonne Date: Sat, 6 Jan 2024 13:11:48 +0100 Subject: [PATCH 43/51] Add argument "overwrite" --- R/import_json_files_to_excel.R | 26 ++++++++++++++++++++++++-- man/import_json_files_to_excel.Rd | 4 ++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 4faa368..812126f 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -5,6 +5,8 @@ #' @param json_dir path to directory containing .json files #' @param file path to Excel file to be created. Default: #' \code{"umberto-results.xlsx"} within \code{json_dir} +#' @param overwrite whether or not to overwrite the Excel \code{file} if it +#' exists. Default: \code{FALSE}. #' @param open logical indicating whether or not to open the created Excel file #' @return path to created Excel file #' @importFrom kwb.utils hsOpenWindowsExplorer substSpecialChars @@ -13,6 +15,7 @@ import_json_files_to_excel <- function( json_dir, file = file.path(json_dir, "umberto-results.xlsx"), + overwrite = FALSE, open = TRUE ) { @@ -22,10 +25,29 @@ import_json_files_to_excel <- function( core_data_to_wide() %>% split_by_columns("indicator") - writexl::write_xlsx(sheets, file) + file_exists <- file.exists(file) + quoted_file <- dQuote(file, '"') + + if (file_exists && !overwrite) { + stop( + "The Excel file exists:\n ", + quoted_file, + "\nPlease choose another file name or set overwrite = TRUE.", + call. = FALSE + ) + } + + kwb.utils::catAndRun( + paste( + ifelse(file_exists, "Overwriting", "Writing"), + "Excel file", + quoted_file + ), + writexl::write_xlsx(sheets, file) + ) if (open) { - try(kwb.utils::hsOpenWindowsExplorer(file)) + try(kwb.utils::hsOpenWindowsExplorer(path.expand(file))) } file diff --git a/man/import_json_files_to_excel.Rd b/man/import_json_files_to_excel.Rd index 21c35eb..e7f7534 100644 --- a/man/import_json_files_to_excel.Rd +++ b/man/import_json_files_to_excel.Rd @@ -7,6 +7,7 @@ import_json_files_to_excel( json_dir, file = file.path(json_dir, "umberto-results.xlsx"), + overwrite = FALSE, open = TRUE ) } @@ -16,6 +17,9 @@ import_json_files_to_excel( \item{file}{path to Excel file to be created. Default: \code{"umberto-results.xlsx"} within \code{json_dir}} +\item{overwrite}{whether or not to overwrite the Excel \code{file} if it +exists. Default: \code{FALSE}.} + \item{open}{logical indicating whether or not to open the created Excel file} } \value{ From 3731059a02955e7a581a0b013f9871b4f5d0da01 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 11 Jan 2024 14:47:19 +0100 Subject: [PATCH 44/51] Name sheets "m01", "m02", ... --- R/import_json_files_to_excel.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 812126f..620219f 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -23,7 +23,8 @@ import_json_files_to_excel <- function( import_rawdata_json(add_place = TRUE, old_format = FALSE) %>% get_core_data() %>% core_data_to_wide() %>% - split_by_columns("indicator") + split_by_columns("indicator") %>% + stats::setNames(sprintf("m%02d", seq_along(.))) file_exists <- file.exists(file) quoted_file <- dQuote(file, '"') From 0dad4c23ac563219fbb244982a065c925af2e892 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 11 Mar 2024 18:45:54 +0100 Subject: [PATCH 45/51] Add argument "expand" for same rows in sheets --- DESCRIPTION | 2 +- R/import_json_files_to_excel.R | 32 ++++++++++++++++++++++++++++--- R/import_rawdata_json.R | 2 +- man/import_json_files_to_excel.Rd | 7 ++++++- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1299138..f9b5fcb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,5 +40,5 @@ Remotes: ByteCompile: true Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Config/testthat/edition: 3 diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 620219f..7b92695 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -8,6 +8,9 @@ #' @param overwrite whether or not to overwrite the Excel \code{file} if it #' exists. Default: \code{FALSE}. #' @param open logical indicating whether or not to open the created Excel file +#' @param expand logical indicating whether or not to expand all sheets to the +#' same number of rows (so that all possible combinations of values in the +#' key columns are given) #' @return path to created Excel file #' @importFrom kwb.utils hsOpenWindowsExplorer substSpecialChars #' @importFrom writexl write_xlsx @@ -16,15 +19,23 @@ import_json_files_to_excel <- function( json_dir, file = file.path(json_dir, "umberto-results.xlsx"), overwrite = FALSE, - open = TRUE + open = TRUE, + expand = TRUE ) { + #kwb.utils::assignPackageObjects("kwb.umberto");`%>%` <- magrittr::`%>%` + sheets <- json_dir %>% import_rawdata_json(add_place = TRUE, old_format = FALSE) %>% get_core_data() %>% core_data_to_wide() %>% - split_by_columns("indicator") %>% - stats::setNames(sprintf("m%02d", seq_along(.))) + split_by_columns("indicator") + + names(sheets) <- sprintf("m%02d", seq_along(sheets)) + + if (expand) { + sheets <- expand_to_all_key_combinations(sheets) + } file_exists <- file.exists(file) quoted_file <- dQuote(file, '"') @@ -100,3 +111,18 @@ core_data_to_wide <- function(data) "exchange" )) } + +# expand_to_all_key_combinations ----------------------------------------------- +expand_to_all_key_combinations <- function( + sheets, + keys = c("indicator", "process", "place", "exchange") +) +{ + key_levels <- lapply(stats::setNames(nm = keys), function(key) { + unique(unlist(lapply(sheets, kwb.utils::selectColumns, key))) + }) + + level_combis <- do.call(kwb.utils::expandGrid, key_levels) + + lapply(sheets, dplyr::right_join, level_combis, by = keys) +} diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index bb361df..d68c8ea 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -45,7 +45,7 @@ import_rawdata_json <- function( files = NULL ) { - #kwb.utils::assignPackageObjects("kwb.umberto") + #kwb.utils::assignPackageObjects("kwb.umberto");files = NULL contents <- read_json_files(json_dir, files = files) result_tables <- lapply(contents, to_tables) diff --git a/man/import_json_files_to_excel.Rd b/man/import_json_files_to_excel.Rd index e7f7534..afb6a09 100644 --- a/man/import_json_files_to_excel.Rd +++ b/man/import_json_files_to_excel.Rd @@ -8,7 +8,8 @@ import_json_files_to_excel( json_dir, file = file.path(json_dir, "umberto-results.xlsx"), overwrite = FALSE, - open = TRUE + open = TRUE, + expand = TRUE ) } \arguments{ @@ -21,6 +22,10 @@ import_json_files_to_excel( exists. Default: \code{FALSE}.} \item{open}{logical indicating whether or not to open the created Excel file} + +\item{expand}{logical indicating whether or not to expand all sheets to the +same number of rows (so that all possible combinations of values in the +key columns are given)} } \value{ path to created Excel file From 2dd80abfd2f28bb09c88e76310fdfeae71b544e8 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 12 Mar 2024 19:21:54 +0100 Subject: [PATCH 46/51] Reimplement the "expanding" of sheets --- R/import_json_files_to_excel.R | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 7b92695..9581335 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -8,9 +8,11 @@ #' @param overwrite whether or not to overwrite the Excel \code{file} if it #' exists. Default: \code{FALSE}. #' @param open logical indicating whether or not to open the created Excel file -#' @param expand logical indicating whether or not to expand all sheets to the -#' same number of rows (so that all possible combinations of values in the -#' key columns are given) +#' @param expand_keys If this argument is not \code{NULL} but a vector of (key) +#' column names, all sheets are expanded to the same number of rows. All +#' possible combinations of values in these key columns are then given in each +#' sheet even though there are no values for these key value combinations. +#' Default: \code{c("indicator", "process", "place", "exchange")} #' @return path to created Excel file #' @importFrom kwb.utils hsOpenWindowsExplorer substSpecialChars #' @importFrom writexl write_xlsx @@ -20,7 +22,7 @@ import_json_files_to_excel <- function( file = file.path(json_dir, "umberto-results.xlsx"), overwrite = FALSE, open = TRUE, - expand = TRUE + expand_keys = c("process", "place", "exchange") ) { #kwb.utils::assignPackageObjects("kwb.umberto");`%>%` <- magrittr::`%>%` @@ -34,7 +36,7 @@ import_json_files_to_excel <- function( names(sheets) <- sprintf("m%02d", seq_along(sheets)) if (expand) { - sheets <- expand_to_all_key_combinations(sheets) + sheets <- expand_to_all_key_combinations(sheets, keys = expand_keys) } file_exists <- file.exists(file) @@ -113,16 +115,18 @@ core_data_to_wide <- function(data) } # expand_to_all_key_combinations ----------------------------------------------- -expand_to_all_key_combinations <- function( - sheets, - keys = c("indicator", "process", "place", "exchange") -) +expand_to_all_key_combinations <- function(sheets, keys) { - key_levels <- lapply(stats::setNames(nm = keys), function(key) { - unique(unlist(lapply(sheets, kwb.utils::selectColumns, key))) + level_combis <- unique(do.call(rbind, lapply( + sheets, + FUN = function(sheet) unique(kwb.utils::selectColumns(sheet, keys)) + ))) + + lapply(names(sheets), function(indicator) { + dplyr::left_join( + x = data.frame(indicator = indicator, level_combis), + y = sheets[[indicator]], + by = c("indicator", keys) + ) }) - - level_combis <- do.call(kwb.utils::expandGrid, key_levels) - - lapply(sheets, dplyr::right_join, level_combis, by = keys) } From 90921df23c6b4b56dc11bd5c956e70b3609e2b4a Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 12 Mar 2024 19:33:23 +0100 Subject: [PATCH 47/51] Update Rd file --- man/import_json_files_to_excel.Rd | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/man/import_json_files_to_excel.Rd b/man/import_json_files_to_excel.Rd index afb6a09..02e0f95 100644 --- a/man/import_json_files_to_excel.Rd +++ b/man/import_json_files_to_excel.Rd @@ -9,7 +9,7 @@ import_json_files_to_excel( file = file.path(json_dir, "umberto-results.xlsx"), overwrite = FALSE, open = TRUE, - expand = TRUE + expand_keys = c("process", "place", "exchange") ) } \arguments{ @@ -23,9 +23,11 @@ exists. Default: \code{FALSE}.} \item{open}{logical indicating whether or not to open the created Excel file} -\item{expand}{logical indicating whether or not to expand all sheets to the -same number of rows (so that all possible combinations of values in the -key columns are given)} +\item{expand_keys}{If this argument is not \code{NULL} but a vector of (key) +column names, all sheets are expanded to the same number of rows. All +possible combinations of values in these key columns are then given in each +sheet even though there are no values for these key value combinations. +Default: \code{c("indicator", "process", "place", "exchange")}} } \value{ path to created Excel file From 105d70ecf795e442456c32f8e88673bd0fac1876 Mon Sep 17 00:00:00 2001 From: hsonne Date: Tue, 12 Mar 2024 19:33:37 +0100 Subject: [PATCH 48/51] Add test call --- R/import_rawdata_json.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index d68c8ea..8e01828 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -18,6 +18,8 @@ if (FALSE) kwb.utils::assignPackageObjects("kwb.umberto") + import_json_files_to_excel(json_dir) + contents <- read_json_files(json_dir) #jsTree::jsTree From 14fff20fcdf9f3261c073bc1bb58312e3b750136 Mon Sep 17 00:00:00 2001 From: hsonne Date: Thu, 14 Mar 2024 17:45:36 +0100 Subject: [PATCH 49/51] Fix :bug: in expand_to_all_key_combinations() --- R/import_json_files_to_excel.R | 8 +++++--- R/import_rawdata_json.R | 12 +++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/import_json_files_to_excel.R b/R/import_json_files_to_excel.R index 9581335..ada519c 100644 --- a/R/import_json_files_to_excel.R +++ b/R/import_json_files_to_excel.R @@ -35,7 +35,7 @@ import_json_files_to_excel <- function( names(sheets) <- sprintf("m%02d", seq_along(sheets)) - if (expand) { + if (!is.null(expand_keys)) { sheets <- expand_to_all_key_combinations(sheets, keys = expand_keys) } @@ -122,10 +122,12 @@ expand_to_all_key_combinations <- function(sheets, keys) FUN = function(sheet) unique(kwb.utils::selectColumns(sheet, keys)) ))) - lapply(names(sheets), function(indicator) { + lapply(sheets, function(y) { + indicator <- unique(kwb.utils::selectColumns(y, "indicator")) + stopifnot(length(indicator) == 1L) dplyr::left_join( x = data.frame(indicator = indicator, level_combis), - y = sheets[[indicator]], + y = y, by = c("indicator", keys) ) }) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 8e01828..29cd878 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -2,7 +2,10 @@ if (FALSE) { json_dir <- "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" json_dir <- "~/../Downloads/S/support/lea/umberto" - + json_dir <- "Y:/WWT_Department/Projects/Ultimate/Data_Work-packages/WP2-assessments/CS9_Kalundborg/LCAumberto11" + + result_file <- kwb.umberto:::import_json_files_to_excel(json_dir, file = tempfile(fileext = ".xlsx")) + files <- dir(json_dir, "\\.json$", full.names = TRUE) result_old_with_place <- kwb.umberto:::import_rawdata_json( @@ -14,13 +17,8 @@ if (FALSE) kwb.umberto:::import_rawdata_json(files = file, add_place = TRUE) }) - stopifnot(identical(do.call(rbind, results), result)) - - kwb.utils::assignPackageObjects("kwb.umberto") - - import_json_files_to_excel(json_dir) - contents <- read_json_files(json_dir) + contents <- kwb.umberto:::read_json_files(json_dir) #jsTree::jsTree } From 1856d46e4e50a68986ea55ef3e114f2c8e084f45 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 18 Mar 2024 18:58:51 +0100 Subject: [PATCH 50/51] Add comment with installation command --- R/import_rawdata_json.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index 29cd878..ae9af54 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -1,5 +1,7 @@ if (FALSE) { + #remotes::install_github("kwb-r/kwb.umberto@expand") + json_dir <- "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" json_dir <- "~/../Downloads/S/support/lea/umberto" json_dir <- "Y:/WWT_Department/Projects/Ultimate/Data_Work-packages/WP2-assessments/CS9_Kalundborg/LCAumberto11" @@ -17,7 +19,6 @@ if (FALSE) kwb.umberto:::import_rawdata_json(files = file, add_place = TRUE) }) - contents <- kwb.umberto:::read_json_files(json_dir) #jsTree::jsTree From 9d7182dff8ad73e6fd52a84584927f37be40ae98 Mon Sep 17 00:00:00 2001 From: hsonne Date: Mon, 18 Mar 2024 19:04:06 +0100 Subject: [PATCH 51/51] Install from "dev" after merging "expand" --- R/import_rawdata_json.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/import_rawdata_json.R b/R/import_rawdata_json.R index ae9af54..bc7c155 100644 --- a/R/import_rawdata_json.R +++ b/R/import_rawdata_json.R @@ -1,6 +1,6 @@ if (FALSE) { - #remotes::install_github("kwb-r/kwb.umberto@expand") + #remotes::install_github("kwb-r/kwb.umberto@dev") json_dir <- "~/../Downloads/S/support/fabian/R-Umberto/Umberto11" json_dir <- "~/../Downloads/S/support/lea/umberto"