KWB-R · hsonne · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023 · Sep 5, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: kwb.umberto
 Title: R package supporting UMERTO LCA at KWB
-Version: 0.2.0
+Version: 0.2.0.9000
 Authors@R: c(
     person("Michael", "Rustler", , "michael.rustler@kompetenz-wasser.de", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0647-7726")),
@@ -20,20 +20,25 @@ Imports:
     ggforce,
     ggplot2,
     janitor,
+    jsonlite,
+    kwb.utils,
     magrittr,
     openxlsx,
     readr,
     tidyr,
-    tidyselect
+    tidyselect,
+    writexl
 Suggests: 
     covr,
     knitr,
     rmarkdown,
     testthat (>= 3.0.0)
 VignetteBuilder: 
     knitr
+Remotes: 
+    github::kwb-r/kwb.utils
 ByteCompile: true
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.2.2
+RoxygenNote: 7.3.1
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,7 +3,9 @@
 export("%>%")
 export(create_pivot_list)
 export(group_data)
+export(import_json_files_to_excel)
 export(import_rawdata)
+export(import_rawdata_json)
 export(pivot_data)
 export(plot_results)
 export(write_xlsx)
@@ -18,10 +20,15 @@ importFrom(dplyr,select)
 importFrom(dplyr,summarise_at)
 importFrom(ggforce,facet_wrap_paginate)
 importFrom(janitor,clean_names)
+importFrom(kwb.utils,hsOpenWindowsExplorer)
+importFrom(kwb.utils,selectColumns)
+importFrom(kwb.utils,stopFormatted)
+importFrom(kwb.utils,substSpecialChars)
 importFrom(magrittr,"%>%")
 importFrom(openxlsx,write.xlsx)
 importFrom(readr,read_csv)
 importFrom(readr,read_csv2)
 importFrom(stats,setNames)
 importFrom(tidyr,spread)
 importFrom(tidyselect,all_of)
+importFrom(writexl,write_xlsx)
diff --git a/R/data_aggregation.R b/R/data_aggregation.R
@@ -25,22 +25,29 @@
 #' umberto10_rawdata <- kwb.umberto::import_rawdata(csv_dir = umberto10_csv_dir)
 #' umberto10_data_grouped <- kwb.umberto::group_data(umberto10_rawdata)
 #' head(umberto10_data_grouped)
-group_data <- function(raw_data,
-                       grouping_paras = c("lci_method", "model", "process", "unit"),
-                       grouping_function = "sum",
-                       summarise_col = "quantity") {
-
+group_data <- function(
+    raw_data,
+    grouping_paras = c("lci_method", "model", "process", "unit"),
+    grouping_function = "sum",
+    summarise_col = "quantity"
+)
+{
   summarise_col_fun <- function(summarise_col) {
     sprintf("%s_%s", summarise_col, grouping_function)
   }
 
   raw_data %>% 
-    dplyr::group_by(dplyr::across(tidyselect::all_of(grouping_paras))) %>% 
-    dplyr::rename_with(.fn = summarise_col_fun,  
-                       .cols = summarise_col) %>% 
-    dplyr::summarise_at(.vars = summarise_col_fun(summarise_col),
-                        .funs = grouping_function)
-
+    dplyr::group_by(
+      dplyr::across(tidyselect::all_of(grouping_paras))
+    ) %>% 
+    dplyr::rename_with(
+      .fn = summarise_col_fun,  
+      .cols = summarise_col
+    ) %>% 
+    dplyr::summarise_at(
+      .vars = summarise_col_fun(summarise_col),
+      .funs = grouping_function
+    )
 }
 
 
@@ -75,25 +82,33 @@ group_data <- function(raw_data,
 #' umberto10_data_pivot <- kwb.umberto::pivot_data(umberto10_data_grouped)
 #' head(umberto10_data_pivot)
 #' 
-pivot_data <- function(rawdata_grouped, 
-                       cols_to_ignore = "unit",
-                       key_col = "model",
-                       value_col = "quantity_sum") {
-
+pivot_data <- function(
+    rawdata_grouped, 
+    cols_to_ignore = "unit",
+    key_col = "model",
+    value_col = "quantity_sum"
+)
+{
   rawdata_grouped %>% 
-    dplyr::select(tidyselect::all_of(setdiff(names(rawdata_grouped), 
-                                             cols_to_ignore))
-                  ) %>% 
+    dplyr::select(
+      tidyselect::all_of(
+        setdiff(names(rawdata_grouped), cols_to_ignore)
+      )
+    ) %>% 
     tidyr::spread(key = key_col, value = value_col)
 }
 
 #' Create pivot list
 #'
 #' @param pivot_data privot data as retrieved from function pivot_data()
 #' @param arrange_cols columns used for arranging the data (default: "process")
+#' @param method_col name of the column containing the method 
+#'   (default: "lci_method"). Depending on your Umberto version you may need to
+#'   set method_col to "lcia_method".
 #' @return a list of results, where each element contains the result table for 
 #' one lci_method
 #' @importFrom dplyr right_join arrange
+#' @importFrom kwb.utils selectColumns
 #' @export
 #' @examples 
 #' 
@@ -115,26 +130,37 @@ pivot_data <- function(rawdata_grouped,
 #' umberto10_data_pivot_list <- kwb.umberto::create_pivot_list(umberto10_data_pivot)
 #' head(umberto10_data_pivot_list)
 #' 
-create_pivot_list <- function(pivot_data, 
-                              arrange_cols = "process") {
+create_pivot_list <- function(
+    pivot_data, 
+    arrange_cols = "process", 
+    method_col = "lci_method"
+)
+{
+  method_vector <- kwb.utils::selectColumns(pivot_data, method_col)
 
-  myList <- list()
-  lci_methods <- unique(pivot_data$lci_method)
-  for (i in seq_along(lci_methods)) {
-
-    selected_lci_method <- unique(pivot_data$lci_method)[i]
-
-    processes <- data.frame(lci_method = selected_lci_method,
-                            process = unique(pivot_data$process),
-                            stringsAsFactors = FALSE)
-
-
-    tmp_data <- pivot_data[pivot_data$lci_method ==  selected_lci_method,] %>% 
-      dplyr::right_join(processes) %>% 
-      dplyr::arrange_(arrange_cols)
-
-    myList[[i]] <- tmp_data
-  }
-  names(myList) <- sprintf("lci_method%d", seq_along(lci_methods))
-  return(myList)
-}
+  methods <- unique(method_vector)
+
+  indices <- seq_along(methods)
+
+  lapply(
+    X = indices, 
+    FUN = function(i) {
+
+      selected_lci_method <- methods[i]
+
+      processes <- data.frame(
+        METHOD = selected_lci_method,
+        process = unique(kwb.utils::selectColumns(pivot_data, "process")),
+        stringsAsFactors = FALSE
+      ) %>%
+        kwb.utils::renameColumns(list(
+          METHOD = method_col
+        ))
+
+      pivot_data[method_vector == selected_lci_method, ] %>% 
+        dplyr::right_join(processes) %>% 
+        dplyr::arrange(arrange_cols)
+    }
+  ) %>% 
+    stats::setNames(sprintf("%s%d", method_col, indices))
+}
diff --git a/R/excel_export.R b/R/excel_export.R
@@ -1,4 +1,5 @@
 #' Write results to EXCEL
+#' 
 #' @param data_pivot_list a list as retrieved by function create_pivot_list()
 #' @param path  relative or full path to be used for exporting the results to 
 #' EXCEL (default: "results.xlsx")
@@ -27,10 +28,7 @@
 #' export_path <- file.path(getwd(), "umberto10_results.xlsx")
 #' print(sprintf("Exporting aggregated results to %s", export_path))
 #' write_xlsx(umberto10_data_pivot_list, path = export_path)
-write_xlsx <- function(data_pivot_list,
-                       path = "results.xlsx") {
-
-  openxlsx::write.xlsx(data_pivot_list,
-                       file = path)
-
-}
+write_xlsx <- function(data_pivot_list, path = "results.xlsx")
+{
+  openxlsx::write.xlsx(data_pivot_list, file = path)
+}
diff --git a/R/flatten.R b/R/flatten.R
@@ -0,0 +1,102 @@
+if (FALSE)
+{
+  xx <- kwb.umberto:::read_json_files("~/../Downloads/S/support/fabian/R-Umberto/Umberto11//")
+  library(magrittr)
+
+  is_list <- sapply(xx[[1]], is.list)
+
+  results <- lapply(xx[[1]][is_list], flatten)
+
+  r2 <- fhpredict:::flatten_recursive_list(xx[[1]]$products)
+}
+
+# flatten ----------------------------------------------------------------------
+flatten <- function(x, name = NULL, sep = "|")
+{
+  # x must be a list
+  check_for_list(x)
+
+  # If the list is empty, return NULL
+  if (length(x) == 0L) {
+    return(NULL)
+  }
+
+  # Are the list entries lists themselves?
+  is_list <- sapply(x, is.list)
+
+  # Are the list elements named?
+  is_named <- !is.null(names(x))
+
+  # If no element is a list and all elements are of length one we return x, 
+  # converted to a data frame
+  if (!any(is_list) && all(lengths(x) == 1L)) {
+
+    # If the elements are named, each element becomes a column
+    if (is_named) {
+      return(do.call(kwb.utils::noFactorDataFrame, x))
+    } 
+
+    # Otherwise, the "name" argument must be given. It is used as column name 
+    # of the returned data frame
+    stopifnot(!is.null(name))
+
+    # List elements are concatenated with a separator to one string value
+    result <- kwb.utils::noFactorDataFrame(do.call(paste, c(x, sep = sep)))
+
+    # Name the (one and only) column
+    return(stats::setNames(result, name))
+  }
+
+  # If the elements are not named, flatten and row-bind them 
+  if (!is_named) {
+
+    stopifnot(all(sapply(x, is.list)))
+    stopifnot(all_have_identical_names(x))
+    return(do.call(rbind, lapply(x, flatten, name = name, sep = sep)))
+  }
+
+  # Get the part that is already flat (get_flat_part(x))
+  part_1 <- do.call(
+    data.frame, 
+    replace_null_with_na(x[!is_list])
+  )
+
+  # Names of the other elements (that are lists)
+  elements <- names(which(is_list))
+
+  # Loop through these elements, flatten them 
+  part_2_tables <- elements %>%
+    lapply(function(name) flatten(x[[name]], name = name, sep = sep)) %>%
+    stats::setNames(elements)
+
+  n_tables <- length(part_2_tables)
+
+  # and row-bind them
+  part_2 <- if (n_tables > 1L) {
+
+    #do.call(rbind, part_2_tables)
+
+    # Find a column name that does not yet exist
+    name_column <- kwb.utils::hsSafeName(
+      paste(elements, collapse = "_"),
+      names(part_2_tables[[1L]])
+    )
+
+    kwb.utils::rbindAll(part_2_tables, name_column)
+
+  } else if (n_tables == 1L) {
+
+    part_2_tables[[1L]]
+
+  } # else NULL
+
+  if (is.null(part_2)) {
+    return(part_1)
+  }
+
+  # We expect part_1 to have one row
+  check_for_exactly_one_row(part_1)
+
+  # Consider that part_2 may have no rows!
+  cbind(part_1[rep.int(1L, nrow(part_2)), , drop = FALSE], part_2)
+}
diff --git a/R/helpers.R b/R/helpers.R
@@ -0,0 +1,11 @@
+# remove_xid -------------------------------------------------------------------
+remove_xid <- function(df)
+{
+  kwb.utils::removeColumns(df, "X.id")
+}
+
+# remove_uuid ------------------------------------------------------------------
+remove_uuid <- function(df)
+{
+  kwb.utils::removeColumns(df, "uuid")
+}