From d2b8819837a94437fd516fe63a4f4b25b342122a Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 3 Dec 2025 17:39:08 +0100 Subject: [PATCH 01/23] initial commit dinamica dependencies --- .devcontainer/Dockerfile | 19 + .devcontainer/devcontainer.json | 52 +++ .devcontainer/install_pkg_deps.r | 19 + .devcontainer/install_vscode_devtools.sh | 35 ++ R/utils_dinamica.r | 220 +++++++++ inst/dinamica_model/evoland.ego-decoded | 434 ++++++++++++++++++ .../AllocateTransitions.ego-decoded | 120 +++++ .../CalcSimilarityOfDifferences.ego-decoded | 163 +++++++ .../CreateCubeOfProbabilityMaps.ego-decoded | 185 ++++++++ .../ExpandTableToUniqueKeys.ego-decoded | 40 ++ .../ListFilenames.ego-decoded | 68 +++ 11 files changed, 1355 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100755 .devcontainer/install_pkg_deps.r create mode 100755 .devcontainer/install_vscode_devtools.sh create mode 100644 R/utils_dinamica.r create mode 100644 inst/dinamica_model/evoland.ego-decoded create mode 100644 inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded create mode 100644 inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded create mode 100644 inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded create mode 100644 inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded create mode 100644 inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..a9ed3d7 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,19 @@ +FROM --platform=linux/amd64 ghcr.io/mmyrte/evoland:latest +LABEL authors="Carlson Büth, Jan Hartman" \ + version="8.3" \ + description="Docker image for Dinamica EGO." + +# Vignettes are being built with quarto +RUN /rocker_scripts/install_quarto.sh + +# Hand-knitted dependency discovery from DESCRIPTION avoids cache invalidation versus a +# remotes::install_deps based solution. +WORKDIR /builddir +COPY DESCRIPTION /builddir/DESCRIPTION +COPY .devcontainer/install_pkg_deps.r /builddir/install_pkg_deps.r +RUN /builddir/install_pkg_deps.r && rm -r /builddir +WORKDIR / + +# Install development dependencies for vscode-style development. +COPY .devcontainer/install_vscode_devtools.sh /rocker_scripts/install_vscode_devtools.sh +RUN /rocker_scripts/install_vscode_devtools.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..808abf2 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,52 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/debian +{ + "name": "evoland-devcontainer", + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + "platform": "linux/amd64" + } + }, + "mounts": [ + { + "source": "${localEnv:EVOLAND_CACHEDIR}", + "target": "/mnt/evoland_cache", + "type": "bind" + }, + { + "source": "${localEnv:EVOLAND_CALIBRATION_DIR}", + "target": "/mnt/calibration", + "type": "bind" + }, + { + "source": "${localEnv:EVOLAND_SIMULATION_DIR}", + "target": "/mnt/simulation", + "type": "bind" + } + ], + "containerEnv": { + "EVOLAND_DATA_BASEPATH": "/mnt/evoland-data", + "EVOLAND_CALIBRATION_DIR": "/mnt/calibration", + "EVOLAND_SIMULATION_DIR": "/mnt/simulation" + }, + "customizations": { + "vscode": { + "settings": { + "git.path": "/usr/bin/git", + "r.rterm.linux": "/usr/local/bin/radian", + "shellcheck.executablePath": "/usr/bin/shellcheck", + "r.plot.useHttpgd": true, + "r.bracketedPaste": true, + "rewrap.wrappingColumn": 88 + }, + "extensions": [ + "RDebugger.r-debugger", + "REditorSupport.r", + "quarto.quarto", + "timonwong.shellcheck" + ] + } + } +} diff --git a/.devcontainer/install_pkg_deps.r b/.devcontainer/install_pkg_deps.r new file mode 100755 index 0000000..95a8fe0 --- /dev/null +++ b/.devcontainer/install_pkg_deps.r @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript + +# Reusing logic from remotes::local_package_deps +# MIT licensed, source at https://github.com/r-lib/remotes/ + +desc <- remotes:::read_dcf("DESCRIPTION") + +dependencies <- c("Depends", "Imports", "LinkingTo") +dependencies <- intersect(dependencies, names(desc)) +pkg_deps <- + lapply(desc[dependencies], remotes:::parse_deps) |> + lapply(`[[`, "name") |> + unlist(use.names = FALSE) + +pkgs_to_install <- setdiff(pkg_deps, installed.packages()[, 1]) +install.packages( + pkgs_to_install, + Ncpus = max(1L, parallel::detectCores()) +) diff --git a/.devcontainer/install_vscode_devtools.sh b/.devcontainer/install_vscode_devtools.sh new file mode 100755 index 0000000..b651bd9 --- /dev/null +++ b/.devcontainer/install_vscode_devtools.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# Abort script if any command exits with non-zero status. Not foolproof. +set -e + +NCPUS=${NCPUS:-"-1"} + +apt-get update -qq +apt-get -y --no-install-recommends install \ + git \ + htop \ + libfontconfig1-dev \ + libfribidi-dev \ + libgit2-dev \ + libharfbuzz-dev \ + pipx \ + shellcheck + +rm -rf /var/lib/apt/lists/* + +PIPX_BIN_DIR=/usr/local/bin pipx install radian + +install2.r --error --skipinstalled -n $NCPUS \ + covr \ + devtools \ + languageserver \ + microbenchmark \ + profvis \ + quarto \ + rlang + +# httpgd is currently off of CRAN because of c++ compiler conflicts +# https://github.com/nx10/httpgd/issues/218 + +R -e "remotes::install_github('nx10/httpgd')" diff --git a/R/utils_dinamica.r b/R/utils_dinamica.r new file mode 100644 index 0000000..906269b --- /dev/null +++ b/R/utils_dinamica.r @@ -0,0 +1,220 @@ +#' Dinamica Utility Functions +#' +#' Interact with Dinamica from R, see **Functions** section below. +#' +#' @name dinamica_utils +NULL + +#' @describeIn dinamica_utils Execute a Dinamica .ego file using `DinamicaConsole` +#' @param model_path Path to the .ego model file to run. Any submodels must be included +#' in a directory of the exact form `basename(modelpath)_ego_Submodels`, [see +#' wiki](https://csr.ufmg.br/dinamica/dokuwiki/doku.php?id=submodels) +#' @param disable_parallel Whether to disable parallel steps (default TRUE) +#' @param log_level Logging level (1-7, default NULL) +#' @param additional_args Additional arguments to pass to DinamicaConsole, see +#' `DinamicaConsole -help` +#' @param write_logfile bool, write stdout&stderr to a file? +#' @param echo bool, direct echo to console? +#' +#' @export + +exec_dinamica <- function( + model_path, + disable_parallel = TRUE, + log_level = NULL, + additional_args = NULL, + write_logfile = TRUE, + echo = FALSE +) { + args <- character() + if (disable_parallel) { + args <- c(args, "-disable-parallel-steps") + } + if (!is.null(log_level)) { + args <- c(args, paste0("-log-level ", log_level)) + } + if (!is.null(additional_args)) { + args <- c(args, additional_args) + } + args <- c(args, model_path) + + if (write_logfile) { + logfile_path <- fs::path( + fs::path_dir(model_path), + format(Sys.time(), "%Y-%m-%d_%Hh%Mm%Ss_dinamica.log") + ) + cli::cli_inform( + "Logging to {.file {logfile_path}}" + ) + logfile_con <- file( + description = logfile_path, + open = "a" + ) + on.exit(close(logfile_con)) + # callback should have irrelevant overhead versus launching a shell, tee-ing a pipe, + # and stripping escape sequences with sed + stdout_cb <- function(chunk, process) { + cli::ansi_strip(chunk) |> + cat(file = logfile_con) + } + } else { + # register empty callback + stdout_cb <- function(chunk, process) { + NULL + } + } + + res <- processx::run( + # If called directly, DinamicaConsole does not flush its buffer upon SIGTERM. + # stdbuf -oL forces flushing the stdout buffer after every line. + command = "stdbuf", + args = c( + "-oL", + "DinamicaConsole", # assume that $PATH is complete + args + ), + error_on_status = FALSE, + echo = echo, + stdout_callback = stdout_cb, + stderr_callback = stdout_cb, + spinner = TRUE, + env = c( + "current", + DINAMICA_HOME = fs::path_dir(model_path) + ) + ) + + if (res[["status"]] != 0L) { + cli::cli_abort( + c( + "Dinamica registered an error.", + "Rerun with echo = TRUE write_logfile = TRUE to see what went wrong." + ), + class = "dinamicaconsole_error", + body = res[["stderr"]] + ) + } + + invisible(res) +} + +#' @describeIn dinamica_utils Set up evoland-specific Dinamica EGO files; execute using +#' [exec_dinamica()] +#' @param run_modelprechecks bool, Validate that everything's in place for a model run. +#' Will never be run if calibration. +#' @param config List of config params +#' @param calibration bool, Is this a calibration run? +#' @param work_dir Working dir, where to place ego files and control table +#' @param ... passed to [exec_dinamica()] +#' @export +run_evoland_dinamica_sim <- function( + run_modelprechecks = TRUE, + config = get_config(), + work_dir = format(Sys.time(), "%Y-%m-%d_%Hh%Mm%Ss"), + calibration = FALSE, + ... +) { + if (run_modelprechecks && !calibration) { + stopifnot(lulcc.modelprechecks()) + } + + # find raw ego files with decoded R/Python code chunks + decoded_files <- fs::dir_ls( + path = system.file("dinamica_model", package = "evoland"), + regexp = "evoland.*\\.ego-decoded$", + recurse = TRUE + ) + + purrr::walk(decoded_files, function(decoded_file) { + # Determine relative path and new output path with .ego extension + rel_path <- fs::path_rel( + path = decoded_file, + start = system.file("dinamica_model", package = "evoland") + ) + out_path <- fs::path_ext_set(fs::path(work_dir, rel_path), "ego") + fs::dir_create(fs::path_dir(out_path)) + process_dinamica_script(decoded_file, out_path) + }) + + # move simulation control csv into place + fs::file_copy( + ifelse( + calibration, + config[["calibration_ctrl_tbl_path"]], + config[["ctrl_tbl_path"]] + ), + fs::path(work_dir, "simulation_control.csv"), + overwrite = TRUE + ) + + cli::cli_inform("Starting to run model with Dinamica EGO") + exec_dinamica( + model_path = fs::path(work_dir, "evoland.ego"), + ... + ) +} + +#' @describeIn dinamica_utils Encode or decode raw R and Python code chunks in .ego +#' files and their submodels to/from base64 +#' @param infile Input file path. Treated as input if passed AsIs using `base::I()` +#' @param outfile Output file path (optional) +#' @param mode Character, either "encode" or "decode" +#' @param check Default TRUE, simple check to ensure that you're handling what you're expecting + +process_dinamica_script <- function(infile, outfile, mode = "encode", check = TRUE) { + mode <- rlang::arg_match(mode, c("encode", "decode")) + if (inherits(infile, "AsIs")) { + file_text <- infile + } else { + # read the input file as a single string + file_text <- readChar(infile, file.info(infile)$size) + } + + # match the Calculate R or Python Expression blocks - guesswork involved + pattern <- ':= Calculate(?:Python|R)Expression "(\\X*?)" (?:\\.no )?\\{\\{' + # extracts both full match [,1] and capture group [,2] + matches <- stringr::str_match_all(file_text, pattern)[[1]] + + if (check) { + non_base64_chars_present <- stringr::str_detect(matches[, 2], "[^A-Za-z0-9+=\\n/]") + if (mode == "encode" && any(!non_base64_chars_present)) { + stop( + "There are no non-base64 chars in one of the matched patterns, which seems ", + "unlikely for an unencoded code chunk. Override this check with ", + "check = FALSE if you're sure that this is an unencoded file." + ) + } else if (mode == "decode" && any(non_base64_chars_present)) { + stop( + "There are non-base64 chars in one of the matched patterns, which seems ", + "unlikely for an encoded code chunk. Override this check with ", + "check = FALSE if you're sure that this is an unencoded file." + ) + } + } + + if (nrow(matches) > 0) { + encoder_decoder <- ifelse( + mode == "encode", + \(code) base64enc::base64encode(charToRaw(code)), + \(code) rawToChar(base64enc::base64decode(code)) + ) + # matches[,2] contains the captured R/python code OR base64-encoded code + encoded_vec <- purrr::map_chr(matches[, 2], encoder_decoder) + # replace each original code with its base64 encoded version + for (i in seq_along(encoded_vec)) { + file_text <- stringr::str_replace( + string = file_text, + pattern = stringr::fixed(matches[i, 2]), + replacement = encoded_vec[i] + ) + } + } + + # Write to outfile if specified, otherwise return the substituted string + if (!missing(outfile)) { + writeChar(file_text, outfile, eos = NULL) + invisible(outfile) + } else { + file_text + } +} diff --git a/inst/dinamica_model/evoland.ego-decoded b/inst/dinamica_model/evoland.ego-decoded new file mode 100644 index 0000000..7266d06 --- /dev/null +++ b/inst/dinamica_model/evoland.ego-decoded @@ -0,0 +1,434 @@ +@charset = UTF-8 +@date = 2024-Jan-21 16:00:58 +@version = 7.2.0.20221230 +@submodel.import = CreateCubeOfProbabilityMaps { { \"transitionMatrix\" : Table, \"inputFolder\" : Folder } { } { \"probabilityMapsRasterCube\" : Map } }; AllocateTransitions { { \"lanscape\" : CategoricalMap, \"probabilities\" : Map, \"transitionMatrix\" : TransitionMatrix, \"percentOfTransitionsByExpansion\" : PercentMatrix, \"patchExpansionParameters\" : TransitionFunctionParameterMatrix, \"patchGenerationParameters\" : TransitionFunctionParameterMatrix } { \"printTransitionInfo\" : BooleanValue } { \"resultingLanscape\" : CategoricalMap } }; CalcSimilarityOfDifferences { { \"initialMap\" : CategoricalMap, \"observedMap\" : CategoricalMap, \"simulatedMap\" : CategoricalMap } { \"useExponentialDecay\" : BooleanValue, \"windowSize\" : PositiveIntegerValue, \"printSimilarities\" : BooleanValue, \"exponentialDecayDivisor\" : RealValue } { \"similarityMap\" : Map, \"similarity\" : RealValue } } +Script {{ + @collapsed = no + @alias = Load control table + _ := Group .none {{ + getEnvironmentVariables := CalculateRExpression " + message(Sys.time(), " - getEnvironmentVariables") + outputString("work_dir", getwd()) + " .no {{ }}; + + workingDirectory := ExtractStructString getEnvironmentVariables $"(work_dir)"; + + // Subset control table. The ForEach logic in Dinamica takes the first column + // for the simulationNumber, which R then uses to subset by csv row. Danger! + @collapsed = no + @alias = Subset control table to only simulations yet to be completed (i.e. modelled) + incompleteSimulationsRExpression := CalculateRExpression " + message(Sys.time(), " - incompleteSimulationsRExpression") + outputTable("remaining_simulations", evoland::get_remaining_simulations()) + " .no {{ }}; + + @viewer.table = yes + subsettedControlTable := ExtractStructTable incompleteSimulationsRExpression $"(remaining_simulations)"; + }}; + + // Iterate over each row of the control table performing the extrapolation + @collapsed = no + @alias = Simulation iteration + _ := ForEach subsettedControlTable subsettedControlTable {{ + step = step; + + @viewer.step = yes + simulationNumber := Step step; + + @collapsed = no + @alias = Set working directory + Workdir workingDirectory {{ + workdir = workdir; + + @collapsed = no + @viewer.executionCompletedSucessfully = yes + skipOnError1399 := SkipOnError .yes {{ + @collapsed = no + @alias = Initialise scenario + _ := Group .none {{ + // Prepare maps, create folders and pass back file paths for current simulation + @collapsed = no + @viewer.result = yes + initializeCurrentSimulation := CalculateRExpression " + message(Sys.time(), " - initializeCurrentSimulation") + config <- evoland::get_config() + sim_details <- evoland::get_simulation_params(simulation_id = v1) + evoland::create_init_lulc_raster(params = sim_details) + + # step length to construct simulatedLulcSavePath + outputDouble("step_length", config[["step_length"]]) + + # lookup table to iterate over (timeStepTable) + timesteps <- evoland::get_simulation_timesteps(params = sim_details) + outputLookupTable("simulation_time_steps", timesteps[["key"]], timesteps[["value"]]) + + # send simulated LULC folder path to Dinamica receiver: sim_results_path + outputString("sim_results_path", sim_details[["sim_results_path"]]) + + # send initial LULC map file path to Dinamica receiver: + outputString("initial_lulc_path", sim_details[["initial_lulc_path"]]) + + # send folder path as string to Dinamica receiver: allocation_params_folder_path + outputString("allocation_params_folder_path", sim_details[["params_folder_dinamica"]]) + " .no {{ + @alias = Row of current simulation in table + NumberValue simulationNumber 1; + }}; + + @viewer.string = yes + receiveInitialLulcFilePath := ExtractStructString initializeCurrentSimulation $"(initial_lulc_path)"; + + @viewer.lookupTable = yes + timeStepTable := ExtractStructLookupTable initializeCurrentSimulation $"(simulation_time_steps)"; + + stepLength := ExtractStructNumber initializeCurrentSimulation $"(step_length)"; + + @viewer.string = yes + allocationParamsFolderPath := ExtractStructString initializeCurrentSimulation $"(allocation_params_folder_path)"; + + @viewer.string = yes + receiveSimResultsPath := ExtractStructString initializeCurrentSimulation $"(sim_results_path)"; + }}; + + @collapsed = no + scenarioTimeStepIteration := ForEach timeStepTable timeStepTable {{ + step0 = step; + + // Receives current model time step + @viewer.step = yes + timeStep := Step step0; + + @alias = Transition rate table control + _ := Group .none {{ + + // Load Scenario/time specific transition matrix + @collapsed = no + loadTransitionRateTable := CalculateRExpression " + message(Sys.time(), " - loadTransitionRateTable") + outputTable("trans_rate_table", evoland::dinamica_load_trans_table( + simulation_id = v2, + year = v1 + )) + " .no {{ + @alias = Time step + NumberValue timeStep 1; + + @alias = Simulation number + NumberValue simulationNumber 2; + }}; + + transitionRatesTable := ExtractStructTable loadTransitionRateTable $"(trans_rate_table)"; + }}; + + @viewer.map = yes + loadInitialLulcMap := LoadCategoricalMap { + filename = receiveInitialLulcFilePath, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step0, + workdir = workdir + }; + + // simulated LULC map updated for each time step + @viewer.map = yes + updatedLulcMap := MuxCategoricalMap loadInitialLulcMap loadUpdatedLulcMap; + + @alias = Load allocation parameters + _ := Group .none {{ + @collapsed = no + @viewer.result = yes + // Modify file path for allocation parameters table for time step + allocationParametersPathForTimeStep := CreateString allocationParamsFolderPath {{ + @alias = Time step + NumberValue timeStep 1; + }}; + + @viewer.table = yes + loadTableOfAllocationParameters := LoadTable { + filename = allocationParametersPathForTimeStep, + uniqueIdKeyColumnCreation = .ifNecessary, + suffixDigits = 0, + step = step0, + workdir = workdir + }; + + @collapsed = no + @viewer.result = yes + splitParamTable := CalculateRExpression " + message(Sys.time(), " - splitParamTable") + param_table <- t1 + + # the third positional argument `, 2` is necessary for Dinamica to register that the + # table has 2 key columns (From and To) - could not find documentation for this + outputTable( + "expander_table", + param_table[, c("From", "To", "Perc_expander")], + 2 + ) + outputTable( + "patch_table", + param_table[, !(colnames(param_table) %in% c("Perc_expander", "Perc_patcher"))], + 2 + ) + " .no {{ + NumberTable loadTableOfAllocationParameters 1; + }}; + + @alias = % expansion table + @viewer.table = yes + expansionTable := ExtractStructTable splitParamTable $"(expander_table)"; + + @viewer.table = yes + patchRelatedParams := ExtractStructTable splitParamTable $"(patch_table)"; + }}; + + @collapsed = no + @alias = Transition potential calculation + _ := Group .none {{ + @collapsed = no + @alias = Transition potential R script + transitionPotentialRScript := CalculateRExpression " + message(Sys.time(), " - transitionPotentialRScript") + + prob_map_folder <- evoland::dinamica_trans_potent_calc( + simulation_num = v2, + time_step = v1 + ) + + outputString("probmap_folder_path", prob_map_folder) + " .no {{ + @alias = time_step + NumberValue timeStep 1; + + @alias = Current simulation number + NumberValue simulationNumber 2; + }}; + + @viewer.string = yes + folderPathForProbabilityMaps := ExtractStructString transitionPotentialRScript $"(probmap_folder_path)"; + + @viewer.probabilityMapsRasterCube = yes + createCubeOfProbabilityMaps7123 := CreateCubeOfProbabilityMaps transitionRatesTable folderPathForProbabilityMaps; + }}; + + @alias = Transition allocation + _ := Group .none {{ + @collapsed = no + createSimulatedLulcSavePath := CalculateRExpression " + message(Sys.time(), " - createSimulatedLulcSavePath") + simulated_lulc_year <- v1 + v2 + final_lulc_path <- file.path( + s1, + paste0(simulated_lulc_year, ".tif") + ) + + outputString("simulated_lulc_save_path", final_lulc_path) + #output the simulated map year for use in the implementation of deterministic transitions + outputDouble("simulated_lulc_year", simulated_lulc_year) + " .no {{ + @alias = Simulated LULC map base path + NumberString receiveSimResultsPath 1; + + @alias = Current time + NumberValue timeStep 1; + + @alias = Model step length + NumberValue stepLength 2; + }}; + + @viewer.string = yes + simulatedLulcSavePath := ExtractStructString createSimulatedLulcSavePath $"(simulated_lulc_save_path)"; + simulatedLulcYear := ExtractStructNumber createSimulatedLulcSavePath $"(simulated_lulc_year)"; + + @viewer.resultingLanscape = yes + allocateTransitions1454 := AllocateTransitions { + lanscape = updatedLulcMap, + probabilities = createCubeOfProbabilityMaps7123, + transitionMatrix = transitionRatesTable, + percentOfTransitionsByExpansion = expansionTable, + patchExpansionParameters = patchRelatedParams, + patchGenerationParameters = patchRelatedParams, + printTransitionInfo = .yes + }; + + + // Simulated LULC map saved at every time step. Receives time info from control + // operator and adds a suffix to the file path. + @alias = Save simulated LULC + SaveMap { + map = allocateTransitions1454, + filename = simulatedLulcSavePath, + suffixDigits = 0, + step = 0, + useCompression = .yes, + workdir = workdir, + ignoreCostlySparseCategories = .yes + }; + }}; + + @alias = Implement deterministic transitions + _ := Group .none {{ + @collapsed = no + _ := CalculateRExpression " + message(Sys.time(), " - deterministicTransitions") + + evoland::dinamica_deterministic_trans( + simulated_lulc_save_path = s1, + simulation_id = v1, + simulated_lulc_year = v2 + ) + " .no {{ + @alias = Simulated LULC save path + NumberString simulatedLulcSavePath 1; + + @alias = Simulation number + NumberValue simulationNumber 1; + + @alias = Simulated LULC year + NumberValue simulatedLulcYear 2; + }}; + + // + // === + // Load simulated LULC map that has had deterministic transition incorporated + loadUpdatedLulcMap := LoadCategoricalMap { + filename = simulatedLulcSavePath, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step0, + workdir = workdir + }; + }}; + }}; + + // + // === + // If simulating during calibration period validate outputs + @collapsed = no + @alias = Calibration period validation + _ := Group scenarioTimeStepIteration {{ + @collapsed = no + validateCalibration := CalculateRExpression " + message(Sys.time(), " - validateCalibration") + validation_params <- evoland::dinamica_use_validation( + simulation_id = v1, + sim_results_path = s1 + ) + + # double as bool: 0 -> don't validate, 1 -> do validate + outputDouble("Use_validation", validation_params[["validation_condition"]]) + + # where to write validation results + outputString("Val_map_path", validation_params[["Validation_map_path"]]) + outputString("Val_result_path", validation_params[["Validation_result_path"]]) + + # final observed and simulated years, respectively + outputString("Obs_LULC_path", validation_params[["Final_LULC_path"]]) + outputString("Sim_LULC_path", validation_params[["Sim_final_LULC_path"]]) + " .no {{ + @alias = Send simulation ID + NumberValue simulationNumber 1; + + @alias = Send file path for simulated LULC map + NumberString receiveSimResultsPath 1; + + }}; + + @viewer.string = yes + filePathLastObservedLulcYear := ExtractStructString validateCalibration $"(Obs_LULC_path)"; + + filePathValidationResults := ExtractStructString validateCalibration $"(Val_result_path)"; + + filePathLastSimulatedLulcYear := ExtractStructString validateCalibration $"(Sim_LULC_path)"; + + // returning 0 does not initiate validation, returning 1 does + @viewer.number = yes + useValidationCondition := ExtractStructNumber validateCalibration $"(Use_validation)"; + + filePathValidationMap := ExtractStructString validateCalibration $"(Val_map_path)"; + + _ := IfThen useValidationCondition .none {{ + loadObservedLulcMapForFinalYearOfSimulation := LoadCategoricalMap { + filename = filePathLastObservedLulcYear, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step, + workdir = workdir + }; + + @alias = Load Initial Lulc Map + loadInitialLulcMap0 := LoadCategoricalMap { + filename = receiveInitialLulcFilePath, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step, + workdir = workdir + }; + + loadSimulatedLulcMap := LoadCategoricalMap { + filename = filePathLastSimulatedLulcYear, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step, + workdir = workdir + }; + + // can this be replaced by logic in R? + @viewer.similarity = yes + @viewer.similarityMap = yes + similarityMap similarity := CalcSimilarityOfDifferences { + initialMap = loadInitialLulcMap0, + observedMap = loadObservedLulcMapForFinalYearOfSimulation, + simulatedMap = loadSimulatedLulcMap, + useExponentialDecay = .yes, + windowSize = 11, + printSimilarities = .yes, + exponentialDecayDivisor = 2 + }; + + SaveTextFile { + text = similarity, + filename = filePathValidationResults, + suffixDigits = 2, + step = step, + workdir = workdir + }; + + SaveMap { + map = similarityMap, + filename = filePathValidationMap, + suffixDigits = 2, + step = step, + useCompression = .yes, + workdir = workdir, + ignoreCostlySparseCategories = .yes + }; + }}; + }}; + }}; + + @alias = Update control table to reflect simulation complete + _ := Group .none {{ + @collapsed = no + _ := CalculateRExpression " + message(Sys.time(), " - update control table") + evoland::dinamica_update_control_table( + success = s1, + simulation_num = v1 + ) + " .no {{ + @alias = Completion boolean + NumberString skipOnError1399 1; + + @alias = Simulation number + NumberValue simulationNumber 1; + }}; + }}; + }}; + }}; +}}; diff --git a/inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded b/inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded new file mode 100644 index 0000000..8688704 --- /dev/null +++ b/inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded @@ -0,0 +1,120 @@ +@charset = UTF-8 +@submodel.name = AllocateTransitions +@author = Dinamica Team +@organization = CSR / UFMG +@submodel.description = Performs transition allocation on a landscape map using Expander and Patcher according to a transition matrix specifying the net rates, a probability map and other parameters defining patch geometry. +@submodel.group = Simulation +@showproperties = yes +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // The landscape map. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The initial landscape map. + @submodel.in.constant.name = lanscape + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + landscape := CategoricalMap .UNBOUND; + + // This matriz defines the percentage of total transitions performed by expansion + // of existent patches (using Expander operator). The complement of this matrix + // defines the percentage of transitions performed by generation of new patches + // (using Patcher operator). + @submodel.in.constant.advanced = no + @submodel.in.constant.description = This matriz defines the percentage of total transitions performed by expansion of existent patches (using Expander operator). The complement of this matrix defines the percentage of transitions performed by generation of new patches (using Patcher operator). + @submodel.in.constant.name = percentOfTransitionsByExpansion + @submodel.in.constant.optional = no + @submodel.in.constant.order = 4 + percentOfTransitionsByExpansion := PercentMatrix .UNBOUND; + + // The transition matrix defining net transition rates. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The matriz defining the net rate of each transition. + @submodel.in.constant.name = transitionMatrix + @submodel.in.constant.optional = no + @submodel.in.constant.order = 3 + transitionMatrix := TransitionMatrix .UNBOUND; + + // The parameters used to expand existent patches. The parameters are used by + // Expander operator. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The parameters used to expand existent patches. The parameters are used by Expander operator. + @submodel.in.constant.name = patchExpansionParameters + @submodel.in.constant.optional = no + @submodel.in.constant.order = 5 + patchExpansionParameters := TransitionFunctionParameterMatrix .UNBOUND; + + // The parameters used to generate new patches. These parameters are used by + // Patcher operator. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The parameters used to generate new patches. These parameters are used by Patcher operator. + @submodel.in.constant.name = patchGenerationParameters + @submodel.in.constant.optional = no + @submodel.in.constant.order = 6 + patchGenerationParameters := TransitionFunctionParameterMatrix .UNBOUND; + + // The probability map. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The map defining the probability of occurrence of each transition. + @submodel.in.constant.name = probabilities + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + probabilities := Map .UNBOUND; + + // If true, print allocation info on the application console. This is useful to + // help identify problems in the transition rates and probability maps. + @alias = print transition info? + @submodel.in.constant.advanced = no + @submodel.in.constant.description = If true, print allocation info on the application console. This is useful to help identify problems in the transition rates and probability maps. + @submodel.in.constant.name = printTransitionInfo + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 7 + printTransitionInfo := BooleanValue .yes; + + // Calculate the quantity of changes to be executed. + @collapsed = no + _ := Group .none {{ + transitionRates := CalcChangeMatrix landscape transitionMatrix; + + @alias = split transition rates + modulatedChanges complementaryChanges := ModulateChangeMatrix transitionRates percentOfTransitionsByExpansion; + }}; + + // Execute the transition functions. + @collapsed = no + _ := Group .none {{ + LogPolicy .debug2 printTransitionInfo {{ + @alias = landscape expander + changedLandscape corrodedProbabilities remainingChanges := Expander { + landscape = landscape, + probabilities = probabilities, + changes = modulatedChanges, + transitionParameters = patchExpansionParameters, + neighborWindowLines = 3, + neighborWindowColumns = 3, + pruneFactor = 10 + }; + }}; + + combinedTransitionRates := AddChangeMatrix complementaryChanges remainingChanges; + + LogPolicy .debug2 printTransitionInfo {{ + landscapePatcher _ _ := Patcher { + landscape = changedLandscape, + probabilities = corrodedProbabilities, + changes = combinedTransitionRates, + transitionParameters = patchGenerationParameters, + neighborWindowLines = 3, + neighborWindowColumns = 3, + pruneFactor = 10 + }; + }}; + }}; + + // The landscape map resulting from the transition allocation. + @alias = resulting landscape + @submodel.out.object.description = The resulting landscape map. + @submodel.out.object.name = resultingLanscape + @submodel.out.object.order = 1 + _ := CategoricalMap landscapePatcher; +}}; diff --git a/inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded b/inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded new file mode 100644 index 0000000..70cda48 --- /dev/null +++ b/inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded @@ -0,0 +1,163 @@ +@charset = UTF-8 +@submodel.title = Calc Similarity Of Differences +@author = Dinamica Team +@organization = CSR / UFMG +@submodel.description = Calculate the minimum fuzzy similarity using maps of changes. This operator calculates the map of differences between a initial map (first one of a time series) and an observed map (last one of a time series) and the differences between a initial map and a simulated map (the simulated one corresponding to the observed map). Then, these maps of differences are used by CalcReciprocalSimilarityMap to derive the minimum similary value. +@submodel.group = Validation +@notes = See the help for a detailed description of this method. +@showproperties = yes +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Initial map. + @submodel.in.constant.description = Initial Map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 0 + @submodel.in.constant.title = Initial Map + initialMap := CategoricalMap .UNBOUND; + + // Simulated map. + @submodel.in.constant.description = Simulated Map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + @submodel.in.constant.title = Simulated Map + simulatedMap := CategoricalMap .UNBOUND; + + // Observed (reference) map. + @submodel.in.constant.description = Observed (reference) map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + @submodel.in.constant.title = Observed Map + observedMap := CategoricalMap .UNBOUND; + + // If true, print the value of the mean similarities on the application console. + @alias = print similarities? + @submodel.in.constant.description = If true, print the values of the mean similarities on the application console. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 5 + @submodel.in.constant.title = Print Similarities + printSimilarities := BooleanValue .yes; + + // Only odd numbers are accepted. + @submodel.in.constant.description = Only odd numbers are accepted. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 4 + @submodel.in.constant.title = Window Size + windowSize := PositiveIntegerValue 11; + + // If true, the similarity is calculated using an exponential decay function + // truncated by the window size. Otherwise, a constant function is used within the + // specified window. + @submodel.in.constant.description = If true, the similarity is calculated using an exponential decay function truncated by the window size. Otherwise, a constant function is used within the specified window. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 3 + @submodel.in.constant.title = Use Exponential Decay + useExponentialDecay := BooleanValue .yes; + + // + // === + // Calculate map of observed changes. + observedChanges := CalculateCategoricalMap { + expression = [ + if i1 = i2 then + null + else + i2 + ], + cellType = .int32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap initialMap 1; + + NumberMap observedMap 2; + }}; + + // + // === + // Calculate map of simulated changes. + simulatedChanges := CalculateCategoricalMap { + expression = [ + if i1 = i2 then + null + else + i2 + ], + cellType = .int32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap initialMap 1; + + NumberMap simulatedMap 2; + }}; + + // Attenuation factor of the exponential decay function. + @submodel.in.constant.advanced = yes + @submodel.in.constant.description = Attenuation factor of the exponential decay function. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 6 + @submodel.in.constant.title = Exponential Decay Divisor + exponentialDecayDivisor := RealValue 2; + + LogPolicy .result printSimilarities {{ + @alias = similarities + firstSimilarity firstMean secondSimilarity secondMean := CalcReciprocalSimilarityMap { + firstMap = observedChanges, + secondMap = simulatedChanges, + windowSize = windowSize, + useExponentialDecay = useExponentialDecay, + cellType = .float32, + nullValue = .default, + exponentialDecayDivisor = exponentialDecayDivisor + }; + }}; + + minimumSimilarity := CalculateValue [ + min(v1, v2) + ] .none {{ + NumberValue firstMean 1; + + NumberValue secondMean 2; + }}; + + // Select the minimum similarity map. + @collapsed = yes + _ := Group .none {{ + @alias = is 1st similarity less than 2nd similarity? + is1StSimilarityLessThan2NdSimilarity := CalculateValue [ + v1 < v2 + ] .none {{ + NumberValue firstMean 1; + + NumberValue secondMean 2; + }}; + + _ := IfThen is1StSimilarityLessThan2NdSimilarity .none {{ + firstSimilarityMap := Map firstSimilarity; + }}; + + @collapsed = no + _ := IfNotThen is1StSimilarityLessThan2NdSimilarity .none {{ + secondSimilarityMap := Map secondSimilarity; + }}; + + minimumSimilarityMap := MapJunction firstSimilarityMap secondSimilarityMap; + }}; + + @alias = similarity map + @submodel.out.object.description = The minimum similarity map. + @submodel.out.object.optional = no + @submodel.out.object.order = 0 + @submodel.out.object.title = Similarity Map + _ := Map minimumSimilarityMap; + + @alias = similarity + @submodel.out.object.description = The minimum similarity index. + @submodel.out.object.optional = no + @submodel.out.object.order = 1 + @submodel.out.object.title = Similarity + _ := RealValue minimumSimilarity; +}}; \ No newline at end of file diff --git a/inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded b/inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded new file mode 100644 index 0000000..3881e0f --- /dev/null +++ b/inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded @@ -0,0 +1,185 @@ +@charset = UTF-8 +@submodel.name = CreateCubeOfProbabilityMaps +@submodel.description = "Stack various probabilities maps into a map cube renaming each one according to the transitions present in the transition matrix. Probability maps must be ordered using a prefix like 1_ , e.g 1_filename.tif according to the number of the row of the respective transition. + +The map cube is used as input for both patcher and expander operators" +@submodel.group = Map Algebra +@submodel.import = ListFilenames { { \"folder\" : Folder, \"filePattern\" : String } { \"searchSubFoldersRecursively\" : BooleanValue } { \"files\" : Table } }; ExpandTableToUniqueKeys { { \"constant\" : Table } { } { \"table\" : Table } } +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Folder containing files of probability maps, they must be ordered according to + // the number of rows of the transition matrix using as a prefix a number, e.g. + // 1_name.tif. Each map will be a layer in the cube raster set + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Folder containing files of probability maps, they must be ordered according to the number of rows of the transition matrix using as a prefix a number, e.g. 1_name.tif. Each map will be a layer in the cube raster set + @submodel.in.constant.name = inputFolder + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + inputFolder := Folder ".."; + + listFilenames6935 := ListFilenames { + folder = inputFolder, + filePattern = $"(*.tif)", + searchSubFoldersRecursively = .no + }; + + getFileNames := GetTableColumn listFilenames6935 2; + + // the transiton matrix + @submodel.in.constant.advanced = no + @submodel.in.constant.description = the transiton matrix + @submodel.in.constant.name = transitionMatrix + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + transitionMatrix := Table .UNBOUND; + + expandTableToUniqueKeys15358 := ExpandTableToUniqueKeys transitionMatrix; + + @collapsed = no + _ := ForEach expandTableToUniqueKeys15358 .none {{ + step = step; + + row := Step step; + + getTo := GetTableValue { + table = expandTableToUniqueKeys15358, + keys = row, + column = "To", + valueIfNotFound = .none + }; + + getFrom := GetTableValue { + table = expandTableToUniqueKeys15358, + keys = row, + column = "From", + valueIfNotFound = .none + }; + + muxTableLayerNames := MuxTable [ + "Key*", "layer_sufix#str", + ] setTableCellValue15385; + + createLayerName := CreateString $"()" {{ + NumberString $"(_)" 1; + + NumberString $"(to)" 2; + + NumberString $"(probability)" 3; + + NumberValue getFrom 1; + + NumberValue getTo 2; + }}; + + setTableCellValue15385 := SetTableCellValue { + table = muxTableLayerNames, + column = 2, + keys = row, + value = createLayerName + }; + }}; + + getTableValue13738 := GetTableValue { + table = getFileNames, + keys = [ 1 ], + column = 2, + valueIfNotFound = .none + }; + + loadFirstMap := LoadMap { + filename = getTableValue13738, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = .none, + workdir = .none + }; + + convertToDouble := CalculateMap { + expression = [ + i1 + ], + cellType = .float32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap loadFirstMap 1; + }}; + + tableJunction906 := TableJunction setTableCellValue15385 [ + "Key*", "Value", + ]; + + @collapsed = no + _ := ForEach (TableJunction setTableCellValue15385 [ + "Key*", "Value", + ]) .none {{ + step0 = step; + + layer := Step step0; + + getTableValue92360 := GetTableValue { + table = getFileNames, + keys = layer, + column = 2, + valueIfNotFound = .none + }; + + loadEvidence := LoadMap { + filename = getTableValue92360, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step0, + workdir = .none + }; + + muxMap27567 := MuxMap convertToDouble insertMapLayer27563; + + firstLayer := CalculateValue [ + if v1 = 1 then + 1 + else + 0 + ] .none {{ + NumberValue layer 1; + }}; + + getTableValue9236 := GetTableValue { + table = tableJunction906, + keys = layer, + column = 2, + valueIfNotFound = .none + }; + + @alias = convert to double + convertToDouble0 := CalculateMap { + expression = [ + i1 + ], + cellType = .float32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap loadEvidence 1; + }}; + + insertMapLayer27563 := InsertMapLayer { + map = muxMap27567, + layer = convertToDouble0, + layerPosition = layer, + layerName = getTableValue9236, + replaceLayer = firstLayer + }; + }}; + + // Value or constant representing a map provided as the functor input. + @alias = Object + @submodel.out.object.description = Value or constant representing a map provided as the functor input. + @submodel.out.object.name = probabilityMapsRasterCube + @submodel.out.object.order = 1 + _ := Map (MapJunction insertMapLayer27563 convertToDouble); +}}; diff --git a/inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded b/inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded new file mode 100644 index 0000000..19c08b6 --- /dev/null +++ b/inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded @@ -0,0 +1,40 @@ +@charset = UTF-8 +@submodel.name = ExpandTableToUniqueKeys +@submodel.description = Create a new Table containing the original table plus a key column with an unique index for each row of that Table. +@submodel.group = Table +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + @submodel.in.constant.advanced = no + @submodel.in.constant.name = constant + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + constant := Table .UNBOUND; + + @collapsed = no + calculatePythonExpression475 := CalculatePythonExpression "newHeader = dinamica.inputs["t1"][ 0 ] +newColumnName = "UniqueIndex" +while newColumnName in newHeader: + newColumnName += "_" + +for c in newHeader: + c.replace( "*", "" ) + +newHeader.insert(0, newColumnName) +newTable = [ newHeader ] +uniqueIndex = 1 +for rowIndex in range(1, len(dinamica.inputs["t1"])): + row = dinamica.inputs["t1"][ rowIndex ] + row.insert(0, uniqueIndex) + newTable.append(row) + uniqueIndex += 1 + +dinamica.outputs["table"] = newTable" {{ + NumberTable constant 1; + }}; + + @submodel.out.table.description = Extracted table. + @submodel.out.table.name = table + @submodel.out.table.order = 1 + _ := ExtractStructTable calculatePythonExpression475 $"(table)"; +}}; diff --git a/inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded b/inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded new file mode 100644 index 0000000..ce3fb37 --- /dev/null +++ b/inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded @@ -0,0 +1,68 @@ +@charset = UTF-8 +@submodel.name = ListFilenames +@author = Hermann Rodrigues +@organization = CSR / UFMG +@submodel.description = "List the filenames in the given folder (and maybe its sub-folders) that match the given file pattern. The resulting filenames include their corresponding file paths. +The functionality provided by this submodel is similar to the one provided by the \"List Filenames In Folder\" submodel, except that this implementation used Python internally. However, this version is simpler to use." +@submodel.group = Files +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Folder where the files are located. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Folder where the files are located. + @submodel.in.constant.name = folder + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + folder := Folder .UNBOUND; + + // If true, also search for files in the sub-folders of the given folder + // recursively. Otherwise, the result only includes files from the given folder. + @submodel.in.constant.advanced = yes + @submodel.in.constant.description = If true, also search for files in the sub-folders of the given folder recursively. Otherwise, the result only includes files from the given folder. + @submodel.in.constant.name = searchSubFoldersRecursively + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 3 + searchSubFoldersRecursively := BooleanValue .no; + + // Pattern of file names that will be returned. The pattern can include all + // wildcards supported by the OS shell. Ex: *.*, *.csv, my_image[0-9].tif, etc + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Pattern of file names that will be returned. The pattern can include all wildcards supported by the OS shell. Ex: *.*, *.csv, my_image[0-9].tif, etc + @submodel.in.constant.name = filePattern + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + filePattern := String .UNBOUND; + + calculatePythonExpression13799 := CalculatePythonExpression "dinamica.package("glob") + +folder = dinamica.inputs["s1"] +extension = dinamica.inputs["s2"] + +recurse_subfolders = dinamica.inputs["v1"] != 0 + +if recurse_subfolders: + full_filename_pattern = folder + "/**/" + extension; +else: + full_filename_pattern = folder + "/" + extension; + +files = glob.glob(full_filename_pattern, recursive=recurse_subfolders) +# sort files alphabetically +files = sorted(files) +files = [["Indices*#real", "Filenames#string"]] + list(enumerate(files, 1)) +print("Prepared output for Dinamica: ", files) + +dinamica.outputs["files"] = dinamica.prepareTable(files, 1) +" {{ + NumberString folder 1; + + NumberString filePattern 2; + + NumberValue searchSubFoldersRecursively 1; + }}; + + @submodel.out.table.description = The list of filename (and their corresponding paths) found in the given folder that match the given extension. + @submodel.out.table.name = files + @submodel.out.table.order = 1 + _ := ExtractStructTable calculatePythonExpression13799 $"(files)"; +}}; From 03405b0bf88a7158b76ae2ed183e03a4228a2514 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 09:46:21 +0100 Subject: [PATCH 02/23] getting rid of purrr, rlang, cli, fs --- DESCRIPTION | 3 +- NAMESPACE | 2 + R/coords_t.R | 6 +- R/intrv_meta_t.R | 9 +- R/pred_meta_t.R | 13 ++- R/util_download.R | 11 +-- R/utils_dinamica.r | 144 ++++++++++++++++---------------- man/dinamica_utils.Rd | 76 +++++++++++++++++ vignettes/evoland.qmd | 2 +- vignettes/ingest-predictors.qmd | 2 +- 10 files changed, 177 insertions(+), 91 deletions(-) create mode 100644 man/dinamica_utils.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c35060f..e41d486 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,11 +19,9 @@ Imports: DBI, duckdb, glue, - purrr, qs2, R6, Rcpp, - rlang, stringi, terra Suggests: @@ -64,3 +62,4 @@ Collate: 'util.R' 'util_download.R' 'util_terra.R' + 'utils_dinamica.r' diff --git a/NAMESPACE b/NAMESPACE index bf39ddc..5d387f5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,7 @@ export(create_pred_meta_t) export(create_trans_meta_t) export(download_and_verify) export(evoland_db) +export(exec_dinamica) export(extract_using_coords_t) export(fit_glm) export(fit_ranger) @@ -65,6 +66,7 @@ export(gof_ranger) export(grrf_filter) export(parquet_duckdb) export(print_rowwise_yaml) +export(run_evoland_dinamica_sim) export(validate) importFrom(Rcpp,sourceCpp) importFrom(data.table,":=") diff --git a/R/coords_t.R b/R/coords_t.R index e99609a..d3b1359 100644 --- a/R/coords_t.R +++ b/R/coords_t.R @@ -91,10 +91,12 @@ print.coords_t <- function(x, nrow = 10, ...) { #' @describeIn coords_t Create a set of square coordinates #' @export create_coords_t_square <- function(epsg, extent, resolution, ...) { - if (!rlang::is_scalar_integerish(epsg)) { + if ( + !(length(epsg) == 1L && (is.integer(epsg) || (is.numeric(epsg) && epsg == as.integer(epsg)))) + ) { stop("epsg must be scalar integerish") } - if (!rlang::is_scalar_double(resolution)) { + if (!(length(resolution) == 1L && is.double(resolution))) { stop("resolution must be scalar double") } if (!inherits(extent, "SpatExtent")) { diff --git a/R/intrv_meta_t.R b/R/intrv_meta_t.R index 0588de4..63b6f98 100644 --- a/R/intrv_meta_t.R +++ b/R/intrv_meta_t.R @@ -121,12 +121,13 @@ create_intrv_meta_t_row <- function( params ) { stopifnot( - rlang::is_scalar_character(name), - rlang::is_scalar_character(pretty_name), - rlang::is_scalar_character(description), + "name is not scalar character" = length(name) == 1L && is.character(name), + "pretty_name is not scalar character" = length(pretty_name) == 1L && is.character(pretty_name), + "description is not scalar character" = length(description) == 1L && is.character(description), is.integer(id_period_list), is.integer(id_trans_list), - rlang::is_scalar_logical(pre_allocation), + "pre_allocation is not scalar logical" = length(pre_allocation) == 1L && + is.logical(pre_allocation), inherits(sources, "data.frame"), ) check_missing_names(sources, c("url", "md5sum")) diff --git a/R/pred_meta_t.R b/R/pred_meta_t.R index 6fa15c9..291f040 100644 --- a/R/pred_meta_t.R +++ b/R/pred_meta_t.R @@ -83,13 +83,18 @@ create_pred_meta_t <- function(pred_spec) { # path is pred_spec > pred_name > leaf_name # we pluck each element, then replace potential null using %||% pretty_name = unlist( - purrr::map2(pluck_wildcard(pred_spec, NA, "pretty_name"), pred_names, ~ .x %||% .y) + mapply( + function(x, y) x %||% y, + pluck_wildcard(pred_spec, NA, "pretty_name"), + pred_names, + SIMPLIFY = FALSE + ) ), description = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "description"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "description"), function(x) x %||% NA_character_) ), orig_format = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "orig_format"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "orig_format"), function(x) x %||% NA_character_) ), sources = lapply( pred_spec, @@ -102,7 +107,7 @@ create_pred_meta_t <- function(pred_spec) { } ), unit = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "unit"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "unit"), function(x) x %||% NA_character_) ), factor_levels = pluck_wildcard(pred_spec, NA, "factor_levels") ) diff --git a/R/util_download.R b/R/util_download.R index ed94b50..7f19f17 100644 --- a/R/util_download.R +++ b/R/util_download.R @@ -32,12 +32,12 @@ download_and_verify <- function( .( url, md5sum, - found_files = purrr::map(file.path(target_dir, md5sum), list.files, full.names = TRUE) + found_files = lapply(file.path(target_dir, md5sum), list.files, full.names = TRUE) ) ] # can only handle one file per md5 folder - all_dt[, no_found_files := purrr::map_int(found_files, length)] + all_dt[, no_found_files := vapply(found_files, length, integer(1))] if (nrow(too_many_files <- all_dt[no_found_files > 1])) { stop( "Investigate: found more than one file for \n ", @@ -54,9 +54,10 @@ download_and_verify <- function( # if download, fetch to temp file in target_dir, then move to md5 folder once known # if not download, just return details on existing file, not rechecking md5sum - downloaded_dt <- data.table::rbindlist(purrr::pmap( - .l = all_dt, - .f = function(url, found_files, to_download, md5sum, ...) { + downloaded_dt <- data.table::rbindlist(.mapply( + dots = all_dt, + MoreArgs = NULL, + FUN = function(url, found_files, to_download, md5sum, ...) { if (to_download) { message("Downloading: ", url) temp_file <- tempfile(tmpdir = target_dir) diff --git a/R/utils_dinamica.r b/R/utils_dinamica.r index 906269b..bacd984 100644 --- a/R/utils_dinamica.r +++ b/R/utils_dinamica.r @@ -39,60 +39,65 @@ exec_dinamica <- function( args <- c(args, model_path) if (write_logfile) { - logfile_path <- fs::path( - fs::path_dir(model_path), + logfile_path <- file.path( + dirname(model_path), format(Sys.time(), "%Y-%m-%d_%Hh%Mm%Ss_dinamica.log") ) - cli::cli_inform( - "Logging to {.file {logfile_path}}" - ) - logfile_con <- file( - description = logfile_path, - open = "a" + message("Logging to ", logfile_path) + + # Use bash process substitution with sed to strip ANSI codes and tee to logfile + # This avoids the overhead of R callbacks for every chunk + res <- processx::run( + command = "bash", + args = c( + "-c", + sprintf( + "stdbuf -oL DinamicaConsole %s 2>&1 | sed 's/\\x1b\\[[0-9;]*m//g' | tee '%s'", + paste(shQuote(args), collapse = " "), + logfile_path + ) + ), + error_on_status = FALSE, + echo = echo, + spinner = TRUE, + env = c( + "current", + DINAMICA_HOME = dirname(model_path) + ) ) - on.exit(close(logfile_con)) - # callback should have irrelevant overhead versus launching a shell, tee-ing a pipe, - # and stripping escape sequences with sed - stdout_cb <- function(chunk, process) { - cli::ansi_strip(chunk) |> - cat(file = logfile_con) - } } else { - # register empty callback - stdout_cb <- function(chunk, process) { - NULL - } - } - - res <- processx::run( - # If called directly, DinamicaConsole does not flush its buffer upon SIGTERM. - # stdbuf -oL forces flushing the stdout buffer after every line. - command = "stdbuf", - args = c( - "-oL", - "DinamicaConsole", # assume that $PATH is complete - args - ), - error_on_status = FALSE, - echo = echo, - stdout_callback = stdout_cb, - stderr_callback = stdout_cb, - spinner = TRUE, - env = c( - "current", - DINAMICA_HOME = fs::path_dir(model_path) + res <- processx::run( + # If called directly, DinamicaConsole does not flush its buffer upon SIGTERM. + # stdbuf -oL forces flushing the stdout buffer after every line. + command = "stdbuf", + args = c( + "-oL", + "DinamicaConsole", # assume that $PATH is complete + args + ), + error_on_status = FALSE, + echo = echo, + spinner = TRUE, + env = c( + "current", + DINAMICA_HOME = dirname(model_path) + ) ) - ) + } if (res[["status"]] != 0L) { - cli::cli_abort( - c( - "Dinamica registered an error.", - "Rerun with echo = TRUE write_logfile = TRUE to see what went wrong." + err <- structure( + list( + message = paste( + "Dinamica registered an error.", + "Rerun with echo = TRUE and write_logfile = TRUE to see what went wrong.", + sep = "\n" + ), + stderr = res[["stderr"]] ), - class = "dinamicaconsole_error", - body = res[["stderr"]] + class = c("dinamicaconsole_error", "error", "condition") ) + stop(err) } invisible(res) @@ -119,37 +124,32 @@ run_evoland_dinamica_sim <- function( } # find raw ego files with decoded R/Python code chunks - decoded_files <- fs::dir_ls( + decoded_files <- list.files( path = system.file("dinamica_model", package = "evoland"), - regexp = "evoland.*\\.ego-decoded$", - recurse = TRUE + pattern = "evoland.*\\.ego-decoded$", + full.names = TRUE, + recursive = TRUE ) - purrr::walk(decoded_files, function(decoded_file) { + invisible(lapply(decoded_files, function(decoded_file) { # Determine relative path and new output path with .ego extension - rel_path <- fs::path_rel( - path = decoded_file, - start = system.file("dinamica_model", package = "evoland") - ) - out_path <- fs::path_ext_set(fs::path(work_dir, rel_path), "ego") - fs::dir_create(fs::path_dir(out_path)) + base_dir <- system.file("dinamica_model", package = "evoland") + rel_path <- substring(decoded_file, nchar(base_dir) + 2) + out_path <- sub("\\.ego-decoded$", ".ego", file.path(work_dir, rel_path)) + dir.create(dirname(out_path), showWarnings = FALSE, recursive = TRUE) process_dinamica_script(decoded_file, out_path) - }) + })) # move simulation control csv into place - fs::file_copy( - ifelse( - calibration, - config[["calibration_ctrl_tbl_path"]], - config[["ctrl_tbl_path"]] - ), - fs::path(work_dir, "simulation_control.csv"), + file.copy( + if (calibration) config[["calibration_ctrl_tbl_path"]] else config[["ctrl_tbl_path"]], + file.path(work_dir, "simulation_control.csv"), overwrite = TRUE ) - cli::cli_inform("Starting to run model with Dinamica EGO") + message("Starting to run model with Dinamica EGO") exec_dinamica( - model_path = fs::path(work_dir, "evoland.ego"), + model_path = file.path(work_dir, "evoland.ego"), ... ) } @@ -162,7 +162,7 @@ run_evoland_dinamica_sim <- function( #' @param check Default TRUE, simple check to ensure that you're handling what you're expecting process_dinamica_script <- function(infile, outfile, mode = "encode", check = TRUE) { - mode <- rlang::arg_match(mode, c("encode", "decode")) + mode <- match.arg(mode, c("encode", "decode")) if (inherits(infile, "AsIs")) { file_text <- infile } else { @@ -193,13 +193,13 @@ process_dinamica_script <- function(infile, outfile, mode = "encode", check = TR } if (nrow(matches) > 0) { - encoder_decoder <- ifelse( - mode == "encode", - \(code) base64enc::base64encode(charToRaw(code)), - \(code) rawToChar(base64enc::base64decode(code)) - ) + encoder_decoder <- if (mode == "encode") { + function(code) base64enc::base64encode(charToRaw(code)) + } else { + function(code) rawToChar(base64enc::base64decode(code)) + } # matches[,2] contains the captured R/python code OR base64-encoded code - encoded_vec <- purrr::map_chr(matches[, 2], encoder_decoder) + encoded_vec <- vapply(matches[, 2], encoder_decoder, character(1), USE.NAMES = FALSE) # replace each original code with its base64 encoded version for (i in seq_along(encoded_vec)) { file_text <- stringr::str_replace( diff --git a/man/dinamica_utils.Rd b/man/dinamica_utils.Rd new file mode 100644 index 0000000..f1c9dc4 --- /dev/null +++ b/man/dinamica_utils.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_dinamica.r +\name{dinamica_utils} +\alias{dinamica_utils} +\alias{exec_dinamica} +\alias{run_evoland_dinamica_sim} +\alias{process_dinamica_script} +\title{Dinamica Utility Functions} +\usage{ +exec_dinamica( + model_path, + disable_parallel = TRUE, + log_level = NULL, + additional_args = NULL, + write_logfile = TRUE, + echo = FALSE +) + +run_evoland_dinamica_sim( + run_modelprechecks = TRUE, + config = get_config(), + work_dir = format(Sys.time(), "\%Y-\%m-\%d_\%Hh\%Mm\%Ss"), + calibration = FALSE, + ... +) + +process_dinamica_script(infile, outfile, mode = "encode", check = TRUE) +} +\arguments{ +\item{model_path}{Path to the .ego model file to run. Any submodels must be included +in a directory of the exact form \verb{basename(modelpath)_ego_Submodels}, \href{https://csr.ufmg.br/dinamica/dokuwiki/doku.php?id=submodels}{see wiki}} + +\item{disable_parallel}{Whether to disable parallel steps (default TRUE)} + +\item{log_level}{Logging level (1-7, default NULL)} + +\item{additional_args}{Additional arguments to pass to DinamicaConsole, see +\code{DinamicaConsole -help}} + +\item{write_logfile}{bool, write stdout&stderr to a file?} + +\item{echo}{bool, direct echo to console?} + +\item{run_modelprechecks}{bool, Validate that everything's in place for a model run. +Will never be run if calibration.} + +\item{config}{List of config params} + +\item{work_dir}{Working dir, where to place ego files and control table} + +\item{calibration}{bool, Is this a calibration run?} + +\item{...}{passed to \code{\link[=exec_dinamica]{exec_dinamica()}}} + +\item{infile}{Input file path. Treated as input if passed AsIs using \code{base::I()}} + +\item{outfile}{Output file path (optional)} + +\item{mode}{Character, either "encode" or "decode"} + +\item{check}{Default TRUE, simple check to ensure that you're handling what you're expecting} +} +\description{ +Interact with Dinamica from R, see \strong{Functions} section below. +} +\section{Functions}{ +\itemize{ +\item \code{exec_dinamica()}: Execute a Dinamica .ego file using \code{DinamicaConsole} + +\item \code{run_evoland_dinamica_sim()}: Set up evoland-specific Dinamica EGO files; execute using +\code{\link[=exec_dinamica]{exec_dinamica()}} + +\item \code{process_dinamica_script()}: Encode or decode raw R and Python code chunks in .ego +files and their submodels to/from base64 + +}} diff --git a/vignettes/evoland.qmd b/vignettes/evoland.qmd index 37da973..c12bf91 100644 --- a/vignettes/evoland.qmd +++ b/vignettes/evoland.qmd @@ -120,7 +120,7 @@ zippath <- file.path( # find singular csv csv_file <- unzip(zippath, list = TRUE) |> - purrr::pluck("Name") |> + (\(x) x[["Name"]])() |> stringi::stri_subset_fixed(".csv") stopifnot(length(csv_file) == 1L) ``` diff --git a/vignettes/ingest-predictors.qmd b/vignettes/ingest-predictors.qmd index 46d0ef2..27fb60d 100644 --- a/vignettes/ingest-predictors.qmd +++ b/vignettes/ingest-predictors.qmd @@ -86,7 +86,7 @@ sonbase_sources <- extent_wide <- db$extent |> terra::extend(1000) sonbase_max <- - purrr::map( + lapply( sonbase_sources$local_path, \(x) terra::rast(x) |> terra::crop(extent_wide) ) |> From 9d7191853790874a684bd55409431ac5de2d5d45 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 12:05:04 +0100 Subject: [PATCH 03/23] dev env setup --- .devcontainer/devcontainer.json | 16 ++-------------- .devcontainer/install_pkg_deps.r | 2 +- .devcontainer/install_vscode_devtools.sh | 1 + .gitignore | 8 ++++++++ R/RcppExports.R | 1 - 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 808abf2..57aa81e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -12,24 +12,12 @@ "mounts": [ { "source": "${localEnv:EVOLAND_CACHEDIR}", - "target": "/mnt/evoland_cache", - "type": "bind" - }, - { - "source": "${localEnv:EVOLAND_CALIBRATION_DIR}", - "target": "/mnt/calibration", - "type": "bind" - }, - { - "source": "${localEnv:EVOLAND_SIMULATION_DIR}", - "target": "/mnt/simulation", + "target": "/mnt/evoland-cache", "type": "bind" } ], "containerEnv": { - "EVOLAND_DATA_BASEPATH": "/mnt/evoland-data", - "EVOLAND_CALIBRATION_DIR": "/mnt/calibration", - "EVOLAND_SIMULATION_DIR": "/mnt/simulation" + "EVOLAND_CACHEDIR": "/mnt/evoland-cache" }, "customizations": { "vscode": { diff --git a/.devcontainer/install_pkg_deps.r b/.devcontainer/install_pkg_deps.r index 95a8fe0..9568070 100755 --- a/.devcontainer/install_pkg_deps.r +++ b/.devcontainer/install_pkg_deps.r @@ -5,7 +5,7 @@ desc <- remotes:::read_dcf("DESCRIPTION") -dependencies <- c("Depends", "Imports", "LinkingTo") +dependencies <- c("Depends", "Imports", "LinkingTo", "Suggests") dependencies <- intersect(dependencies, names(desc)) pkg_deps <- lapply(desc[dependencies], remotes:::parse_deps) |> diff --git a/.devcontainer/install_vscode_devtools.sh b/.devcontainer/install_vscode_devtools.sh index b651bd9..f5a75b8 100755 --- a/.devcontainer/install_vscode_devtools.sh +++ b/.devcontainer/install_vscode_devtools.sh @@ -24,6 +24,7 @@ install2.r --error --skipinstalled -n $NCPUS \ covr \ devtools \ languageserver \ + lobstr \ microbenchmark \ profvis \ quarto \ diff --git a/.gitignore b/.gitignore index 5b5e683..ef5fafe 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,11 @@ docs # generally ignore databases, like .evolanddb, .duckdb, anything.db *db + +# IDE specific artefacts +*compile_commands* +*cobertura* +.cache/ + +# ignore drafts +*suddel* diff --git a/R/RcppExports.R b/R/RcppExports.R index 395824b..207be93 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -4,4 +4,3 @@ distance_neighbors_cpp <- function(coords_t, max_distance, resolution = 100.0, quiet = FALSE) { .Call(`_evoland_distance_neighbors_cpp`, coords_t, max_distance, resolution, quiet) } - From 7a596e6f31f309f57de6c35e17b851606fbb9097 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:04:44 +0100 Subject: [PATCH 04/23] smaller ego logic --- inst/dinamica_model/evoland.ego-decoded | 496 ++++-------------------- 1 file changed, 66 insertions(+), 430 deletions(-) diff --git a/inst/dinamica_model/evoland.ego-decoded b/inst/dinamica_model/evoland.ego-decoded index 7266d06..e0012fa 100644 --- a/inst/dinamica_model/evoland.ego-decoded +++ b/inst/dinamica_model/evoland.ego-decoded @@ -1,434 +1,70 @@ @charset = UTF-8 -@date = 2024-Jan-21 16:00:58 -@version = 7.2.0.20221230 +@date = 2025-Dec-04 16:00:58 +@version = 8.7.0.20250814 @submodel.import = CreateCubeOfProbabilityMaps { { \"transitionMatrix\" : Table, \"inputFolder\" : Folder } { } { \"probabilityMapsRasterCube\" : Map } }; AllocateTransitions { { \"lanscape\" : CategoricalMap, \"probabilities\" : Map, \"transitionMatrix\" : TransitionMatrix, \"percentOfTransitionsByExpansion\" : PercentMatrix, \"patchExpansionParameters\" : TransitionFunctionParameterMatrix, \"patchGenerationParameters\" : TransitionFunctionParameterMatrix } { \"printTransitionInfo\" : BooleanValue } { \"resultingLanscape\" : CategoricalMap } }; CalcSimilarityOfDifferences { { \"initialMap\" : CategoricalMap, \"observedMap\" : CategoricalMap, \"simulatedMap\" : CategoricalMap } { \"useExponentialDecay\" : BooleanValue, \"windowSize\" : PositiveIntegerValue, \"printSimilarities\" : BooleanValue, \"exponentialDecayDivisor\" : RealValue } { \"similarityMap\" : Map, \"similarity\" : RealValue } } Script {{ - @collapsed = no - @alias = Load control table - _ := Group .none {{ - getEnvironmentVariables := CalculateRExpression " - message(Sys.time(), " - getEnvironmentVariables") - outputString("work_dir", getwd()) - " .no {{ }}; - - workingDirectory := ExtractStructString getEnvironmentVariables $"(work_dir)"; - - // Subset control table. The ForEach logic in Dinamica takes the first column - // for the simulationNumber, which R then uses to subset by csv row. Danger! - @collapsed = no - @alias = Subset control table to only simulations yet to be completed (i.e. modelled) - incompleteSimulationsRExpression := CalculateRExpression " - message(Sys.time(), " - incompleteSimulationsRExpression") - outputTable("remaining_simulations", evoland::get_remaining_simulations()) - " .no {{ }}; - - @viewer.table = yes - subsettedControlTable := ExtractStructTable incompleteSimulationsRExpression $"(remaining_simulations)"; - }}; - - // Iterate over each row of the control table performing the extrapolation - @collapsed = no - @alias = Simulation iteration - _ := ForEach subsettedControlTable subsettedControlTable {{ - step = step; - - @viewer.step = yes - simulationNumber := Step step; - - @collapsed = no - @alias = Set working directory - Workdir workingDirectory {{ - workdir = workdir; - - @collapsed = no - @viewer.executionCompletedSucessfully = yes - skipOnError1399 := SkipOnError .yes {{ - @collapsed = no - @alias = Initialise scenario - _ := Group .none {{ - // Prepare maps, create folders and pass back file paths for current simulation - @collapsed = no - @viewer.result = yes - initializeCurrentSimulation := CalculateRExpression " - message(Sys.time(), " - initializeCurrentSimulation") - config <- evoland::get_config() - sim_details <- evoland::get_simulation_params(simulation_id = v1) - evoland::create_init_lulc_raster(params = sim_details) - - # step length to construct simulatedLulcSavePath - outputDouble("step_length", config[["step_length"]]) - - # lookup table to iterate over (timeStepTable) - timesteps <- evoland::get_simulation_timesteps(params = sim_details) - outputLookupTable("simulation_time_steps", timesteps[["key"]], timesteps[["value"]]) - - # send simulated LULC folder path to Dinamica receiver: sim_results_path - outputString("sim_results_path", sim_details[["sim_results_path"]]) - - # send initial LULC map file path to Dinamica receiver: - outputString("initial_lulc_path", sim_details[["initial_lulc_path"]]) - - # send folder path as string to Dinamica receiver: allocation_params_folder_path - outputString("allocation_params_folder_path", sim_details[["params_folder_dinamica"]]) - " .no {{ - @alias = Row of current simulation in table - NumberValue simulationNumber 1; - }}; - - @viewer.string = yes - receiveInitialLulcFilePath := ExtractStructString initializeCurrentSimulation $"(initial_lulc_path)"; - - @viewer.lookupTable = yes - timeStepTable := ExtractStructLookupTable initializeCurrentSimulation $"(simulation_time_steps)"; - - stepLength := ExtractStructNumber initializeCurrentSimulation $"(step_length)"; - - @viewer.string = yes - allocationParamsFolderPath := ExtractStructString initializeCurrentSimulation $"(allocation_params_folder_path)"; - - @viewer.string = yes - receiveSimResultsPath := ExtractStructString initializeCurrentSimulation $"(sim_results_path)"; - }}; - - @collapsed = no - scenarioTimeStepIteration := ForEach timeStepTable timeStepTable {{ - step0 = step; - - // Receives current model time step - @viewer.step = yes - timeStep := Step step0; - - @alias = Transition rate table control - _ := Group .none {{ - - // Load Scenario/time specific transition matrix - @collapsed = no - loadTransitionRateTable := CalculateRExpression " - message(Sys.time(), " - loadTransitionRateTable") - outputTable("trans_rate_table", evoland::dinamica_load_trans_table( - simulation_id = v2, - year = v1 - )) - " .no {{ - @alias = Time step - NumberValue timeStep 1; - - @alias = Simulation number - NumberValue simulationNumber 2; - }}; - - transitionRatesTable := ExtractStructTable loadTransitionRateTable $"(trans_rate_table)"; - }}; - - @viewer.map = yes - loadInitialLulcMap := LoadCategoricalMap { - filename = receiveInitialLulcFilePath, - nullValue = .none, - storageMode = .default, - suffixDigits = 0, - step = step0, - workdir = workdir - }; - - // simulated LULC map updated for each time step - @viewer.map = yes - updatedLulcMap := MuxCategoricalMap loadInitialLulcMap loadUpdatedLulcMap; - - @alias = Load allocation parameters - _ := Group .none {{ - @collapsed = no - @viewer.result = yes - // Modify file path for allocation parameters table for time step - allocationParametersPathForTimeStep := CreateString allocationParamsFolderPath {{ - @alias = Time step - NumberValue timeStep 1; - }}; - - @viewer.table = yes - loadTableOfAllocationParameters := LoadTable { - filename = allocationParametersPathForTimeStep, - uniqueIdKeyColumnCreation = .ifNecessary, - suffixDigits = 0, - step = step0, - workdir = workdir - }; - - @collapsed = no - @viewer.result = yes - splitParamTable := CalculateRExpression " - message(Sys.time(), " - splitParamTable") - param_table <- t1 - - # the third positional argument `, 2` is necessary for Dinamica to register that the - # table has 2 key columns (From and To) - could not find documentation for this - outputTable( - "expander_table", - param_table[, c("From", "To", "Perc_expander")], - 2 - ) - outputTable( - "patch_table", - param_table[, !(colnames(param_table) %in% c("Perc_expander", "Perc_patcher"))], - 2 - ) - " .no {{ - NumberTable loadTableOfAllocationParameters 1; - }}; - - @alias = % expansion table - @viewer.table = yes - expansionTable := ExtractStructTable splitParamTable $"(expander_table)"; - - @viewer.table = yes - patchRelatedParams := ExtractStructTable splitParamTable $"(patch_table)"; - }}; - - @collapsed = no - @alias = Transition potential calculation - _ := Group .none {{ - @collapsed = no - @alias = Transition potential R script - transitionPotentialRScript := CalculateRExpression " - message(Sys.time(), " - transitionPotentialRScript") - - prob_map_folder <- evoland::dinamica_trans_potent_calc( - simulation_num = v2, - time_step = v1 - ) - - outputString("probmap_folder_path", prob_map_folder) - " .no {{ - @alias = time_step - NumberValue timeStep 1; - - @alias = Current simulation number - NumberValue simulationNumber 2; - }}; - - @viewer.string = yes - folderPathForProbabilityMaps := ExtractStructString transitionPotentialRScript $"(probmap_folder_path)"; - - @viewer.probabilityMapsRasterCube = yes - createCubeOfProbabilityMaps7123 := CreateCubeOfProbabilityMaps transitionRatesTable folderPathForProbabilityMaps; - }}; - - @alias = Transition allocation - _ := Group .none {{ - @collapsed = no - createSimulatedLulcSavePath := CalculateRExpression " - message(Sys.time(), " - createSimulatedLulcSavePath") - simulated_lulc_year <- v1 + v2 - final_lulc_path <- file.path( - s1, - paste0(simulated_lulc_year, ".tif") - ) - - outputString("simulated_lulc_save_path", final_lulc_path) - #output the simulated map year for use in the implementation of deterministic transitions - outputDouble("simulated_lulc_year", simulated_lulc_year) - " .no {{ - @alias = Simulated LULC map base path - NumberString receiveSimResultsPath 1; - - @alias = Current time - NumberValue timeStep 1; - - @alias = Model step length - NumberValue stepLength 2; - }}; - - @viewer.string = yes - simulatedLulcSavePath := ExtractStructString createSimulatedLulcSavePath $"(simulated_lulc_save_path)"; - simulatedLulcYear := ExtractStructNumber createSimulatedLulcSavePath $"(simulated_lulc_year)"; - - @viewer.resultingLanscape = yes - allocateTransitions1454 := AllocateTransitions { - lanscape = updatedLulcMap, - probabilities = createCubeOfProbabilityMaps7123, - transitionMatrix = transitionRatesTable, - percentOfTransitionsByExpansion = expansionTable, - patchExpansionParameters = patchRelatedParams, - patchGenerationParameters = patchRelatedParams, - printTransitionInfo = .yes - }; - - - // Simulated LULC map saved at every time step. Receives time info from control - // operator and adds a suffix to the file path. - @alias = Save simulated LULC - SaveMap { - map = allocateTransitions1454, - filename = simulatedLulcSavePath, - suffixDigits = 0, - step = 0, - useCompression = .yes, - workdir = workdir, - ignoreCostlySparseCategories = .yes - }; - }}; - - @alias = Implement deterministic transitions - _ := Group .none {{ - @collapsed = no - _ := CalculateRExpression " - message(Sys.time(), " - deterministicTransitions") - - evoland::dinamica_deterministic_trans( - simulated_lulc_save_path = s1, - simulation_id = v1, - simulated_lulc_year = v2 - ) - " .no {{ - @alias = Simulated LULC save path - NumberString simulatedLulcSavePath 1; - - @alias = Simulation number - NumberValue simulationNumber 1; - - @alias = Simulated LULC year - NumberValue simulatedLulcYear 2; - }}; - - // - // === - // Load simulated LULC map that has had deterministic transition incorporated - loadUpdatedLulcMap := LoadCategoricalMap { - filename = simulatedLulcSavePath, - nullValue = .none, - storageMode = .default, - suffixDigits = 0, - step = step0, - workdir = workdir - }; - }}; - }}; - - // - // === - // If simulating during calibration period validate outputs - @collapsed = no - @alias = Calibration period validation - _ := Group scenarioTimeStepIteration {{ - @collapsed = no - validateCalibration := CalculateRExpression " - message(Sys.time(), " - validateCalibration") - validation_params <- evoland::dinamica_use_validation( - simulation_id = v1, - sim_results_path = s1 - ) - - # double as bool: 0 -> don't validate, 1 -> do validate - outputDouble("Use_validation", validation_params[["validation_condition"]]) - - # where to write validation results - outputString("Val_map_path", validation_params[["Validation_map_path"]]) - outputString("Val_result_path", validation_params[["Validation_result_path"]]) - - # final observed and simulated years, respectively - outputString("Obs_LULC_path", validation_params[["Final_LULC_path"]]) - outputString("Sim_LULC_path", validation_params[["Sim_final_LULC_path"]]) - " .no {{ - @alias = Send simulation ID - NumberValue simulationNumber 1; - - @alias = Send file path for simulated LULC map - NumberString receiveSimResultsPath 1; - - }}; - - @viewer.string = yes - filePathLastObservedLulcYear := ExtractStructString validateCalibration $"(Obs_LULC_path)"; - - filePathValidationResults := ExtractStructString validateCalibration $"(Val_result_path)"; - - filePathLastSimulatedLulcYear := ExtractStructString validateCalibration $"(Sim_LULC_path)"; - - // returning 0 does not initiate validation, returning 1 does - @viewer.number = yes - useValidationCondition := ExtractStructNumber validateCalibration $"(Use_validation)"; - - filePathValidationMap := ExtractStructString validateCalibration $"(Val_map_path)"; - - _ := IfThen useValidationCondition .none {{ - loadObservedLulcMapForFinalYearOfSimulation := LoadCategoricalMap { - filename = filePathLastObservedLulcYear, - nullValue = .none, - storageMode = .default, - suffixDigits = 0, - step = step, - workdir = workdir - }; - - @alias = Load Initial Lulc Map - loadInitialLulcMap0 := LoadCategoricalMap { - filename = receiveInitialLulcFilePath, - nullValue = .none, - storageMode = .default, - suffixDigits = 0, - step = step, - workdir = workdir - }; - - loadSimulatedLulcMap := LoadCategoricalMap { - filename = filePathLastSimulatedLulcYear, - nullValue = .none, - storageMode = .default, - suffixDigits = 0, - step = step, - workdir = workdir - }; - - // can this be replaced by logic in R? - @viewer.similarity = yes - @viewer.similarityMap = yes - similarityMap similarity := CalcSimilarityOfDifferences { - initialMap = loadInitialLulcMap0, - observedMap = loadObservedLulcMapForFinalYearOfSimulation, - simulatedMap = loadSimulatedLulcMap, - useExponentialDecay = .yes, - windowSize = 11, - printSimilarities = .yes, - exponentialDecayDivisor = 2 - }; - - SaveTextFile { - text = similarity, - filename = filePathValidationResults, - suffixDigits = 2, - step = step, - workdir = workdir - }; - - SaveMap { - map = similarityMap, - filename = filePathValidationMap, - suffixDigits = 2, - step = step, - useCompression = .yes, - workdir = workdir, - ignoreCostlySparseCategories = .yes - }; - }}; - }}; - }}; - - @alias = Update control table to reflect simulation complete - _ := Group .none {{ - @collapsed = no - _ := CalculateRExpression " - message(Sys.time(), " - update control table") - evoland::dinamica_update_control_table( - success = s1, - simulation_num = v1 - ) - " .no {{ - @alias = Completion boolean - NumberString skipOnError1399 1; - - @alias = Simulation number - NumberValue simulationNumber 1; - }}; - }}; - }}; - }}; + getVariables := CalculateRExpression " + message(Sys.time(), " - getEnvironmentVariables") + outputString("probabilityMapDir", file.path(getwd(), "probability_map_dir")) + outputString("anteriorMapPath", file.path(getwd(), "anterior.tif")) + outputString("posteriorMapPath", file.path(getwd(), "posterior.tif")) + + # the 2 is for two key columns, which i don't know how to set when using LoadTable + outputTable( + "expansionTable", + read.csv(file.path(getwd(), "expansion_table.csv")), + 2 + ) + outputTable( + "patcherTable", + read.csv(file.path(getwd(), "patcher_table.csv")), + 2 + ) + outputTable( + "transitionRatesTable", + read.csv(file.path(getwd(), "trans_rates.csv")), + 2 + ) + " .no {{ }}; + + probabilityMapDir := ExtractStructString getVariables $"(probabilityMapDir)"; + anteriorMapPath := ExtractStructString getVariables $"(anteriorMapPath)"; + posteriorMapPath := ExtractStructString getVariables $"(posteriorMapPath)"; + + // this includes From*, To*, Rate + transitionRatesTable := ExtractStructTable getVariables $"(transitionRatesTable)"; + // this includes From*, To*, Perc_expander + expansionTable := ExtractStructTable getVariables $"(expansionTable)"; + // this includes From*, To*, Mean_Patch_Size, Patch_Size_Variance, Patch_Isometry + patcherTable := ExtractStructTable getVariables $"(patcherTable)"; + + anteriorMap := LoadCategoricalMap { + filename = anteriorMapPath, + nullValue = .none, + storageMode = .default + }; + + @alias = Create cube of probability maps + cubeOfProbMaps := CreateCubeOfProbabilityMaps transitionRatesTable probabilityMapDir; + + @alias = Allocate transitions + posteriorMap := AllocateTransitions { + lanscape = anteriorMap, + probabilities = cubeOfProbMaps, + transitionMatrix = transitionRatesTable, + percentOfTransitionsByExpansion = expansionTable, + patchExpansionParameters = patcherTable, + patchGenerationParameters = patcherTable, + printTransitionInfo = .yes + }; + + // Simulated LULC map saved at every time step. Receives time info from control + // operator and adds a suffix to the file path. + @alias = Save simulated LULC + SaveMap { + map = posteriorMap, + filename = posteriorMapPath, + useCompression = .yes + }; }}; + From f1c8e824d3676a322c4b96da66dcc1a33dca7f9f Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:15:49 +0100 Subject: [PATCH 05/23] test dinamica, rename allocation --- R/RcppExports.R | 1 + R/utils_dinamica.r | 4 +- .../allocation.ego-decoded} | 0 .../AllocateTransitions.ego-decoded | 0 .../CalcSimilarityOfDifferences.ego-decoded | 0 .../CreateCubeOfProbabilityMaps.ego-decoded | 0 .../ExpandTableToUniqueKeys.ego-decoded | 0 .../ListFilenames.ego-decoded | 0 inst/tinytest/test_util_dinamica.R | 129 ++++++++++++++++++ 9 files changed, 132 insertions(+), 2 deletions(-) rename inst/{dinamica_model/evoland.ego-decoded => dinamica_models/allocation.ego-decoded} (100%) rename inst/{dinamica_model/evoland_ego_Submodels => dinamica_models/allocation_ego_Submodels}/AllocateTransitions.ego-decoded (100%) rename inst/{dinamica_model/evoland_ego_Submodels => dinamica_models/allocation_ego_Submodels}/CalcSimilarityOfDifferences.ego-decoded (100%) rename inst/{dinamica_model/evoland_ego_Submodels => dinamica_models/allocation_ego_Submodels}/CreateCubeOfProbabilityMaps.ego-decoded (100%) rename inst/{dinamica_model/evoland_ego_Submodels => dinamica_models/allocation_ego_Submodels}/ExpandTableToUniqueKeys.ego-decoded (100%) rename inst/{dinamica_model/evoland_ego_Submodels => dinamica_models/allocation_ego_Submodels}/ListFilenames.ego-decoded (100%) create mode 100644 inst/tinytest/test_util_dinamica.R diff --git a/R/RcppExports.R b/R/RcppExports.R index 207be93..395824b 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -4,3 +4,4 @@ distance_neighbors_cpp <- function(coords_t, max_distance, resolution = 100.0, quiet = FALSE) { .Call(`_evoland_distance_neighbors_cpp`, coords_t, max_distance, resolution, quiet) } + diff --git a/R/utils_dinamica.r b/R/utils_dinamica.r index bacd984..09f5ab3 100644 --- a/R/utils_dinamica.r +++ b/R/utils_dinamica.r @@ -52,7 +52,7 @@ exec_dinamica <- function( args = c( "-c", sprintf( - "stdbuf -oL DinamicaConsole %s 2>&1 | sed 's/\\x1b\\[[0-9;]*m//g' | tee '%s'", + "set -o pipefail; stdbuf -oL DinamicaConsole %s 2>&1 | sed 's/\\x1b\\[[0-9;]*m//g' | tee '%s'; exit ${PIPESTATUS[0]}", paste(shQuote(args), collapse = " "), logfile_path ) @@ -90,7 +90,7 @@ exec_dinamica <- function( list( message = paste( "Dinamica registered an error.", - "Rerun with echo = TRUE and write_logfile = TRUE to see what went wrong.", + "Rerun with echo = TRUE or write_logfile = TRUE to see what went wrong.", sep = "\n" ), stderr = res[["stderr"]] diff --git a/inst/dinamica_model/evoland.ego-decoded b/inst/dinamica_models/allocation.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland.ego-decoded rename to inst/dinamica_models/allocation.ego-decoded diff --git a/inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/AllocateTransitions.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland_ego_Submodels/AllocateTransitions.ego-decoded rename to inst/dinamica_models/allocation_ego_Submodels/AllocateTransitions.ego-decoded diff --git a/inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded rename to inst/dinamica_models/allocation_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded diff --git a/inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded rename to inst/dinamica_models/allocation_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded diff --git a/inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded rename to inst/dinamica_models/allocation_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded diff --git a/inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/ListFilenames.ego-decoded similarity index 100% rename from inst/dinamica_model/evoland_ego_Submodels/ListFilenames.ego-decoded rename to inst/dinamica_models/allocation_ego_Submodels/ListFilenames.ego-decoded diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R new file mode 100644 index 0000000..7ca7dd7 --- /dev/null +++ b/inst/tinytest/test_util_dinamica.R @@ -0,0 +1,129 @@ +library(tinytest) + +# nolint start +sample_dinamica_script_encoded <- ' +@charset = UTF-8 +@date = 2023-Oct-13 15:04:52 +@version = 8.3 +Script {{ + realValue1652 := RealValue 2; + + @collapsed = no + calculateRexpression1653 := CalculateRExpression "b3V0cHV0IDwtIHYxKjIKb3V0cHV0RG91YmxlKCJvdXRwdXRfbnVtYmVyIiwgb3V0cHV0KQ==" .no {{ + NumberValue realValue1652 1; + }}; + + @viewer.number = yes + _ := ExtractStructNumber calculateRexpression1653 $"(output_number)"; +}}; +' + +sample_dinamica_script_decoded <- ' +@charset = UTF-8 +@date = 2023-Oct-13 15:04:52 +@version = 8.3 +Script {{ + realValue1652 := RealValue 2; + + @collapsed = no + calculateRexpression1653 := CalculateRExpression "stop("runcible spoon")" .no {{ + NumberValue realValue1652 1; + }}; + + @viewer.number = yes + _ := ExtractStructNumber calculateRexpression1653 $"(output_number)"; +}}; +' +# nolint end + +# Test: process_dinamica_script encodes correctly +expect_error( + process_dinamica_script( + I(sample_dinamica_script_encoded), + mode = "encode" + ), + pattern = "seems unlikely for an unencoded code chunk" +) + +expect_match( + process_dinamica_script( + I(sample_dinamica_script_encoded), + mode = "decode" + ), + 'output <- v1\\*2\\noutputDouble\\("output_number", output\\)' +) + +# Test: process_dinamica_script decodes correctly +expect_error( + process_dinamica_script( + I(sample_dinamica_script_decoded), + mode = "decode" + ), + pattern = "seems unlikely for an encoded code chunk" +) + +expect_match( + process_dinamica_script( + I(sample_dinamica_script_decoded), + mode = "encode" + ), + "c3RvcCgicnVuY2libGUgc3Bvb24iKQ==" +) + +# Test: process_dinamica_script is idempotent +expect_equal( + { + sample_dinamica_script_encoded |> + I() |> + process_dinamica_script(mode = "decode") |> + I() |> + process_dinamica_script(mode = "encode") + }, + sample_dinamica_script_encoded +) + +expect_equal( + { + sample_dinamica_script_decoded |> + I() |> + process_dinamica_script(mode = "encode") |> + I() |> + process_dinamica_script(mode = "decode") + }, + sample_dinamica_script_decoded +) + +# Test: exec_dinamica works +tmpfile_ego <- tempfile(fileext = ".ego") +writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL +) +expect_identical( + exec_dinamica(tmpfile_ego)[["status"]], + 0L +) +unlink(tmpfile_ego) + +# Test: exec_dinamica with echo +tmpfile_ego <- tempfile(fileext = ".ego") +writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL +) +expect_stdout( + exec_dinamica(tmpfile_ego, echo = TRUE, write_logfile = FALSE), + "Running model script" +) +unlink(tmpfile_ego) + +# Test: exec_dinamica fails +tmpfile_ego <- tempfile(fileext = ".ego") +process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) +expect_error( + exec_dinamica(tmpfile_ego, echo = TRUE), + pattern = "runcible spoon" +) +unlink(tmpfile_ego) From 9cc464327f4c40a4365caa84739308f95eee3b3a Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:57:01 +0100 Subject: [PATCH 06/23] non-exported issue / more better log suppression --- inst/tinytest/test_util_dinamica.R | 41 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R index 7ca7dd7..720b580 100644 --- a/inst/tinytest/test_util_dinamica.R +++ b/inst/tinytest/test_util_dinamica.R @@ -36,9 +36,9 @@ Script {{ ' # nolint end -# Test: process_dinamica_script encodes correctly +# Test: evoland:::process_dinamica_script encodes correctly expect_error( - process_dinamica_script( + evoland:::process_dinamica_script( I(sample_dinamica_script_encoded), mode = "encode" ), @@ -46,16 +46,16 @@ expect_error( ) expect_match( - process_dinamica_script( + evoland:::process_dinamica_script( I(sample_dinamica_script_encoded), mode = "decode" ), 'output <- v1\\*2\\noutputDouble\\("output_number", output\\)' ) -# Test: process_dinamica_script decodes correctly +# Test: evoland:::process_dinamica_script decodes correctly expect_error( - process_dinamica_script( + evoland:::process_dinamica_script( I(sample_dinamica_script_decoded), mode = "decode" ), @@ -63,21 +63,21 @@ expect_error( ) expect_match( - process_dinamica_script( + evoland:::process_dinamica_script( I(sample_dinamica_script_decoded), mode = "encode" ), "c3RvcCgicnVuY2libGUgc3Bvb24iKQ==" ) -# Test: process_dinamica_script is idempotent +# Test: evoland:::process_dinamica_script is idempotent expect_equal( { sample_dinamica_script_encoded |> I() |> - process_dinamica_script(mode = "decode") |> + evoland:::process_dinamica_script(mode = "decode") |> I() |> - process_dinamica_script(mode = "encode") + evoland:::process_dinamica_script(mode = "encode") }, sample_dinamica_script_encoded ) @@ -86,9 +86,9 @@ expect_equal( { sample_dinamica_script_decoded |> I() |> - process_dinamica_script(mode = "encode") |> + evoland:::process_dinamica_script(mode = "encode") |> I() |> - process_dinamica_script(mode = "decode") + evoland:::process_dinamica_script(mode = "decode") }, sample_dinamica_script_decoded ) @@ -100,9 +100,12 @@ writeChar( tmpfile_ego, eos = NULL ) -expect_identical( - exec_dinamica(tmpfile_ego)[["status"]], - 0L +expect_message( + expect_identical( + exec_dinamica(tmpfile_ego)[["status"]], + 0L + ), + "Logging to" ) unlink(tmpfile_ego) @@ -121,9 +124,13 @@ unlink(tmpfile_ego) # Test: exec_dinamica fails tmpfile_ego <- tempfile(fileext = ".ego") -process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) -expect_error( - exec_dinamica(tmpfile_ego, echo = TRUE), +evoland:::process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) + +expect_stdout( + expect_error( + exec_dinamica(tmpfile_ego, echo = TRUE), + "Dinamica registered an error" + ), pattern = "runcible spoon" ) unlink(tmpfile_ego) From b43d70a001097fa9c10c8f9f209693b74f6af3ad Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 21:31:33 +0100 Subject: [PATCH 07/23] add r-cmd-check for multiplatform surety --- .github/workflows/R-CMD-check.yaml | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .github/workflows/R-CMD-check.yaml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..562fe0f --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,51 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' From b1663b23bf15e6199a88e18ef405752924e5cf06 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 4 Dec 2025 21:36:55 +0100 Subject: [PATCH 08/23] check for DinamicaConsole on PATH --- R/utils_dinamica.r | 6 +++ inst/tinytest/test_util_dinamica.R | 84 +++++++++++++++--------------- 2 files changed, 49 insertions(+), 41 deletions(-) diff --git a/R/utils_dinamica.r b/R/utils_dinamica.r index 09f5ab3..4265127 100644 --- a/R/utils_dinamica.r +++ b/R/utils_dinamica.r @@ -26,6 +26,12 @@ exec_dinamica <- function( write_logfile = TRUE, echo = FALSE ) { + if (Sys.which("DinamicaConsole") == "") { + stop( + "DinamicaConsole not found on PATH. ", + "Please ensure Dinamica EGO is installed and DinamicaConsole is available." + ) + } args <- character() if (disable_parallel) { args <- c(args, "-disable-parallel-steps") diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R index 720b580..c880605 100644 --- a/inst/tinytest/test_util_dinamica.R +++ b/inst/tinytest/test_util_dinamica.R @@ -93,44 +93,46 @@ expect_equal( sample_dinamica_script_decoded ) -# Test: exec_dinamica works -tmpfile_ego <- tempfile(fileext = ".ego") -writeChar( - sample_dinamica_script_encoded, - tmpfile_ego, - eos = NULL -) -expect_message( - expect_identical( - exec_dinamica(tmpfile_ego)[["status"]], - 0L - ), - "Logging to" -) -unlink(tmpfile_ego) - -# Test: exec_dinamica with echo -tmpfile_ego <- tempfile(fileext = ".ego") -writeChar( - sample_dinamica_script_encoded, - tmpfile_ego, - eos = NULL -) -expect_stdout( - exec_dinamica(tmpfile_ego, echo = TRUE, write_logfile = FALSE), - "Running model script" -) -unlink(tmpfile_ego) - -# Test: exec_dinamica fails -tmpfile_ego <- tempfile(fileext = ".ego") -evoland:::process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) - -expect_stdout( - expect_error( - exec_dinamica(tmpfile_ego, echo = TRUE), - "Dinamica registered an error" - ), - pattern = "runcible spoon" -) -unlink(tmpfile_ego) +if (length(Sys.which("DinamicaConsole")) == 1L) { + # Test: exec_dinamica works + tmpfile_ego <- tempfile(fileext = ".ego") + writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL + ) + expect_message( + expect_identical( + exec_dinamica(tmpfile_ego)[["status"]], + 0L + ), + "Logging to" + ) + unlink(tmpfile_ego) + + # Test: exec_dinamica with echo + tmpfile_ego <- tempfile(fileext = ".ego") + writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL + ) + expect_stdout( + exec_dinamica(tmpfile_ego, echo = TRUE, write_logfile = FALSE), + "Running model script" + ) + unlink(tmpfile_ego) + + # Test: exec_dinamica fails + tmpfile_ego <- tempfile(fileext = ".ego") + evoland:::process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) + + expect_stdout( + expect_error( + exec_dinamica(tmpfile_ego, echo = TRUE), + "Dinamica registered an error" + ), + pattern = "runcible spoon" + ) + unlink(tmpfile_ego) +} From 6d0c2c48f0f69155ce2fa3ef0591c9c422036baa Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Fri, 5 Dec 2025 10:25:06 +0100 Subject: [PATCH 09/23] fix failing tests on macos --- R/util_terra.R | 4 ++-- inst/tinytest/test_util_dinamica.R | 2 +- inst/tinytest/test_util_terra.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/util_terra.R b/R/util_terra.R index 463c772..4921173 100644 --- a/R/util_terra.R +++ b/R/util_terra.R @@ -20,7 +20,7 @@ extract_using_coords_t.SpatRaster <- function(x, coords_t, na_omit = TRUE) { pts <- coords_t[, .(id_coord, lon, lat)] |> data.table::as.data.table() |> - terra::vect() + terra::vect(crs = terra::crs(x)) # TODO we need to add an EPSG attr to coords out <- terra::extract( @@ -52,7 +52,7 @@ extract_using_coords_t.SpatVector <- function(x, coords_t, na_omit = TRUE) { pts <- coords_t[, .(lon, lat)] |> as.matrix() |> - terra::vect() + terra::vect(crs = terra::crs(x)) # TODO we need to add an EPSG attr to coords tmp <- terra::extract( diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R index c880605..729293a 100644 --- a/inst/tinytest/test_util_dinamica.R +++ b/inst/tinytest/test_util_dinamica.R @@ -93,7 +93,7 @@ expect_equal( sample_dinamica_script_decoded ) -if (length(Sys.which("DinamicaConsole")) == 1L) { +if (Sys.which("DinamicaConsole") != "") { # Test: exec_dinamica works tmpfile_ego <- tempfile(fileext = ".ego") writeChar( diff --git a/inst/tinytest/test_util_terra.R b/inst/tinytest/test_util_terra.R index 35a0bab..1d6a566 100644 --- a/inst/tinytest/test_util_terra.R +++ b/inst/tinytest/test_util_terra.R @@ -83,7 +83,7 @@ expect_silent(vector_result <- extract_using_coords_t(vect_data, coords_t)) expect_equal( - vector_result[id_coord %in% 1], + vector_result[id_coord == 1][order(attribute, -rank(value))], data.table::data.table( id_coord = 1L, attribute = factor( From 51dc76f4cfcbeadfee71924a4c8e5ffb328df9bb Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:12:51 +0100 Subject: [PATCH 10/23] better dinamica testing? --- DESCRIPTION | 8 ++-- R/utils_dinamica.r | 73 ++++++++++++++++++++---------- inst/tinytest/test_util_dinamica.R | 27 ++++++++--- 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e41d486..906a133 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ URL: https://ethzplus.github.io/evoland-plus, https://github.com/ethzplus/evolan BugReports: https://github.com/ethzplus/evoland-plus/issues Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.2) Imports: @@ -25,11 +25,13 @@ Imports: stringi, terra Suggests: + base64enc, butcher, pROC, - tinytest, + processx, quarto, - ranger + ranger, + tinytest VignetteBuilder: quarto Config/testthat/edition: 3 LinkingTo: diff --git a/R/utils_dinamica.r b/R/utils_dinamica.r index 4265127..ecd0ad2 100644 --- a/R/utils_dinamica.r +++ b/R/utils_dinamica.r @@ -26,6 +26,14 @@ exec_dinamica <- function( write_logfile = TRUE, echo = FALSE ) { + if (!requireNamespace("processx", quietly = TRUE)) { + stop( + "Package 'processx' is required for this function. ", + "Please install it with: install.packages('processx')", + call. = FALSE + ) + } + if (Sys.which("DinamicaConsole") == "") { stop( "DinamicaConsole not found on PATH. ", @@ -52,7 +60,6 @@ exec_dinamica <- function( message("Logging to ", logfile_path) # Use bash process substitution with sed to strip ANSI codes and tee to logfile - # This avoids the overhead of R callbacks for every chunk res <- processx::run( command = "bash", args = c( @@ -91,19 +98,14 @@ exec_dinamica <- function( ) } - if (res[["status"]] != 0L) { - err <- structure( - list( - message = paste( - "Dinamica registered an error.", - "Rerun with echo = TRUE or write_logfile = TRUE to see what went wrong.", - sep = "\n" - ), - stderr = res[["stderr"]] - ), - class = c("dinamicaconsole_error", "error", "condition") + if ( + res[["status"]] != 0L || + grepl("Dinamica EGO exited with an error", res[["stdout"]]) + ) { + stop( + "Dinamica registered an error. \n", + "Rerun with echo = TRUE or check logfile to see what went wrong." ) - stop(err) } invisible(res) @@ -168,21 +170,41 @@ run_evoland_dinamica_sim <- function( #' @param check Default TRUE, simple check to ensure that you're handling what you're expecting process_dinamica_script <- function(infile, outfile, mode = "encode", check = TRUE) { + if (!requireNamespace("base64enc", quietly = TRUE)) { + stop( + "Package 'base64enc' is required for this function. ", + "Please install it with: install.packages('base64enc')", + call. = FALSE + ) + } + mode <- match.arg(mode, c("encode", "decode")) if (inherits(infile, "AsIs")) { - file_text <- infile + file_text <- unclass(infile) } else { # read the input file as a single string file_text <- readChar(infile, file.info(infile)$size) } # match the Calculate R or Python Expression blocks - guesswork involved - pattern <- ':= Calculate(?:Python|R)Expression "(\\X*?)" (?:\\.no )?\\{\\{' - # extracts both full match [,1] and capture group [,2] - matches <- stringr::str_match_all(file_text, pattern)[[1]] + pattern <- r'(:= Calculate(?:Python|R)Expression "(\X*?)" (?:\.no )?\{\{)' + match_positions <- gregexpr(pattern, file_text, perl = TRUE)[[1]] + if (match_positions[1] == -1) { + # none found (position -1) + matches <- character(0) + } else { + # extracts first the full match as char vect and then the captured group + full_matches <- regmatches(file_text, match_positions) + # Extract the first capture group for each match + all_matches <- lapply(full_matches, function(m) { + cap <- regmatches(m, regexec(pattern, m, perl = TRUE))[[1]] + cap[2] + }) + matches <- do.call(c, all_matches) + } if (check) { - non_base64_chars_present <- stringr::str_detect(matches[, 2], "[^A-Za-z0-9+=\\n/]") + non_base64_chars_present <- grepl("[^A-Za-z0-9+=\\n/]", matches) if (mode == "encode" && any(!non_base64_chars_present)) { stop( "There are no non-base64 chars in one of the matched patterns, which seems ", @@ -198,20 +220,21 @@ process_dinamica_script <- function(infile, outfile, mode = "encode", check = TR } } - if (nrow(matches) > 0) { + if (length(matches) > 0) { encoder_decoder <- if (mode == "encode") { function(code) base64enc::base64encode(charToRaw(code)) } else { function(code) rawToChar(base64enc::base64decode(code)) } - # matches[,2] contains the captured R/python code OR base64-encoded code - encoded_vec <- vapply(matches[, 2], encoder_decoder, character(1), USE.NAMES = FALSE) + # matches contains the captured R/python code OR base64-encoded code + encoded_vec <- vapply(matches, encoder_decoder, character(1), USE.NAMES = FALSE) # replace each original code with its base64 encoded version for (i in seq_along(encoded_vec)) { - file_text <- stringr::str_replace( - string = file_text, - pattern = stringr::fixed(matches[i, 2]), - replacement = encoded_vec[i] + file_text <- sub( + pattern = matches[i], + replacement = encoded_vec[i], + x = file_text, + fixed = TRUE ) } } diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R index 729293a..f5220c6 100644 --- a/inst/tinytest/test_util_dinamica.R +++ b/inst/tinytest/test_util_dinamica.R @@ -94,6 +94,14 @@ expect_equal( ) if (Sys.which("DinamicaConsole") != "") { + stopifnot( + !is.na(Sys.getenv("DINAMICA_EGO_8_INSTALLATION_DIRECTORY", unset = NA)), + !is.na(Sys.getenv("DINAMICA_EGO_CLI", unset = NA)), + !is.na(Sys.getenv("DINAMICA_EGO_8_HOME", unset = NA)) + ) + Sys.setenv( + "DINAMICA_EGO_8_TEMP_DIR" = tempdir() + ) # Test: exec_dinamica works tmpfile_ego <- tempfile(fileext = ".ego") writeChar( @@ -102,9 +110,9 @@ if (Sys.which("DinamicaConsole") != "") { eos = NULL ) expect_message( - expect_identical( - exec_dinamica(tmpfile_ego)[["status"]], - 0L + expect_length( + exec_dinamica(tmpfile_ego), + 4 # list of status, stdout, stderr, timeout ), "Logging to" ) @@ -127,12 +135,17 @@ if (Sys.which("DinamicaConsole") != "") { tmpfile_ego <- tempfile(fileext = ".ego") evoland:::process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) + # capture the R error for the Dinamica CalculateRExpression (via stdout) expect_stdout( - expect_error( - exec_dinamica(tmpfile_ego, echo = TRUE), - "Dinamica registered an error" + # silence the logging message within this calling process + expect_message( + # capture the R error within this calling process + expect_error( + exec_dinamica(tmpfile_ego, echo = TRUE), + "Dinamica registered an error" + ) ), - pattern = "runcible spoon" + pattern = "Error caught in R execution: 'runcible spoon'" ) unlink(tmpfile_ego) } From 19435193960c82d6f5efe6f285b668e5691cd204 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:13:09 +0100 Subject: [PATCH 11/23] reduced testing matrix --- .github/workflows/R-CMD-check.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 562fe0f..03c0032 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -19,11 +19,11 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - { os: macos-latest, r: "release" } + # - {os: windows-latest, r: 'release'} + # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - { os: ubuntu-latest, r: "release" } + # - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From bd33ed7a57fab238273765160bcaa083f776ef13 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:50:25 +0100 Subject: [PATCH 12/23] github runners: ubuntu 24.04 on ARM + macOS system deps --- .github/workflows/R-CMD-check.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 03c0032..776e959 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -23,6 +23,7 @@ jobs: # - {os: windows-latest, r: 'release'} # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - { os: ubuntu-latest, r: "release" } + - { os: ubuntu-24.04-arm, r: "release" } # - {os: ubuntu-latest, r: 'oldrel-1'} env: @@ -40,6 +41,10 @@ jobs: http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + - name: Install macOS system dependencies + if: runner.os == 'macos' + run: brew install gdal proj + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck From 9535759d94e0b0328f5e442eb0f330c48ec9d66c Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:55:12 +0100 Subject: [PATCH 13/23] don't run actions on every PR push --- .github/workflows/R-CMD-check.yaml | 1 - .github/workflows/pkgdown.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 776e959..a4cef02 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -3,7 +3,6 @@ on: push: branches: [main, master] - pull_request: name: R-CMD-check.yaml diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index dda4b15..497bbe3 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -3,7 +3,6 @@ on: push: branches: [main, master] - pull_request: release: types: [published] workflow_dispatch: From 5b4631085773694ee71b598071d3f97ebb419ea9 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 16:59:46 +0100 Subject: [PATCH 14/23] disable ARM tests --- .Rbuildignore | 1 + .github/workflows/R-CMD-check.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 1890937..0c10af5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ air.toml ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^.devcontainer$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a4cef02..28a29fd 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,7 +22,7 @@ jobs: # - {os: windows-latest, r: 'release'} # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - { os: ubuntu-latest, r: "release" } - - { os: ubuntu-24.04-arm, r: "release" } + # - { os: ubuntu-24.04-arm, r: "release" } # - {os: ubuntu-latest, r: 'oldrel-1'} env: From eb7f5422cbbae4f34e941073ccd54c1c46c45cc9 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:04:29 +0100 Subject: [PATCH 15/23] add JSON format support --- R/evoland_db.R | 6 +-- R/evoland_db_tables.R | 23 ++++++---- R/parquet_duckdb.R | 67 +++++++++++++++++++---------- inst/tinytest/test_parquet_duckdb.R | 24 +++++------ inst/tinytest/test_trans_models_t.R | 5 ++- inst/tinytest/test_trans_preds_t.R | 6 +-- man/evoland_db.Rd | 6 +-- man/parquet_duckdb.Rd | 10 +++-- 8 files changed, 90 insertions(+), 57 deletions(-) diff --git a/R/evoland_db.R b/R/evoland_db.R index 9aebd16..cd4cf50 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -2,7 +2,7 @@ #' #' @description #' An R6 class that provides an interface to a folder-based data storage system -#' for the evoland package. Each table is stored as a parquet (or CSV) file. +#' for the evoland package. Each table is stored as a parquet (or JSON) file. #' This class uses DuckDB for in-memory SQL operations while persisting data #' to disk in parquet format for better compression. #' @@ -27,14 +27,14 @@ evoland_db <- R6::R6Class( #' @description #' Initialize a new evoland_db object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "csv"). + #' @param default_format Character. Default file format ("parquet" or "json"). #' Default is "parquet". #' @param ... passed on to `set_report` #' #' @return A new `evoland_db` object initialize = function( path, - default_format = c("parquet", "csv"), + default_format = c("parquet", "json"), ... ) { # Initialize parent class with spatial extension diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R index 42821c0..5895cf2 100644 --- a/R/evoland_db_tables.R +++ b/R/evoland_db_tables.R @@ -33,10 +33,9 @@ create_table_binding <- function( key_cols = NULL, autoincrement_cols = NULL, map_cols = NULL, + format = NULL, ... ) { - extra_args <- list(...) - function(x) { if (missing(x)) { fetched <- self$fetch(table_name) @@ -45,7 +44,7 @@ create_table_binding <- function( fetched <- convert_list_cols(fetched, map_cols, kv_df_to_list) } - return(do.call(as_fn, c(list(fetched), extra_args))) + return(as_fn(fetched, ...)) } stopifnot(inherits(x, table_name)) @@ -56,7 +55,8 @@ create_table_binding <- function( key_cols = key_cols, autoincrement_cols = autoincrement_cols, map_cols = map_cols, - method = "upsert" + method = "upsert", + format = format ) } } @@ -84,7 +84,8 @@ evoland_db$set("active", "lulc_meta_t", function(x) { self, "lulc_meta_t", as_lulc_meta_t, - key_cols = "id_lulc" + key_cols = "id_lulc", + form = "json" )(x) }) @@ -135,7 +136,8 @@ evoland_db$set("active", "pred_meta_t", function(x) { "pred_meta_t", as_pred_meta_t, key_cols = "name", - autoincrement_cols = "id_pred" + autoincrement_cols = "id_pred", + format = "json" )(x) }) @@ -145,7 +147,8 @@ evoland_db$set("active", "trans_meta_t", function(x) { "trans_meta_t", as_trans_meta_t, key_cols = c("id_lulc_anterior", "id_lulc_posterior"), - autoincrement_cols = "id_trans" + autoincrement_cols = "id_trans", + format = "json" )(x) }) @@ -164,7 +167,8 @@ evoland_db$set("active", "intrv_meta_t", function(x) { "intrv_meta_t", as_intrv_meta_t, key_cols = "id_intrv", - map_cols = "params" + map_cols = "params", + format = "json" )(x) }) @@ -193,7 +197,8 @@ evoland_db$set("active", "alloc_params_t", function(x) { "alloc_params_t", as_alloc_params_t, key_cols = c("id_trans", "id_period"), - map_cols = c("alloc_params", "goodness_of_fit") + map_cols = c("alloc_params", "goodness_of_fit"), + format = "json" )(x) }) diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index 74c4486..b167c9b 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -28,14 +28,14 @@ parquet_duckdb <- R6::R6Class( #' @description #' Initialize a new parquet_duckdb object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "csv"). + #' @param default_format Character. Default file format ("parquet" or "json"). #' Default is "parquet". #' @param extensions Character vector of DuckDB extensions to load (e.g., "spatial") #' #' @return A new `parquet_duckdb` object initialize = function( path, - default_format = c("parquet", "csv"), + default_format = c("parquet", "json"), extensions = character(0) ) { # Create folder if it doesn't exist @@ -43,12 +43,7 @@ parquet_duckdb <- R6::R6Class( # Set format / writeopts self$default_format <- match.arg(default_format) - self$writeopts <- switch( - self$default_format, - parquet = "format parquet, compression zstd", - csv = "format csv", - stop(glue::glue("Unsupported format: {self$default_format}")) - ) + self$writeopts <- private$format_to_writeopts(self$default_format) # Create in-memory connection for SQL operations self$connection <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") @@ -153,7 +148,7 @@ parquet_duckdb <- R6::R6Class( #' List all tables (files) in storage #' @return Character vector of table names list_tables = function() { - list.files(self$path, pattern = "\\.(parquet|csv)$", full.names = FALSE) |> + list.files(self$path, pattern = "\\.(parquet|json)$", full.names = FALSE) |> tools::file_path_sans_ext() |> unique() |> sort() @@ -242,13 +237,16 @@ parquet_duckdb <- R6::R6Class( return(count_before) } + # Use the same format as the existing file + writeopts <- private$format_to_writeopts(file_info$format) + self$execute(glue::glue( r"{ copy ( select * from read_{file_info$format}('{file_info$path}') where not ({where}) ) - to '{file_info$path}' ({self$writeopts}) + to '{file_info$path}' ({writeopts}) }" )) @@ -268,6 +266,8 @@ parquet_duckdb <- R6::R6Class( #' @param map_cols Character vector of columns to convert to MAP format #' @param method Character, one of "overwrite", "append", "upsert" (upsert being an #' update for existing rows, and insert for new rows) + #' @param format Character. Optional format override ("parquet" or "json"). If NULL, + #' uses the existing file's format or default_format for new files. #' @return Invisible NULL (called for side effects) commit = function( x, @@ -275,7 +275,8 @@ parquet_duckdb <- R6::R6Class( key_cols, autoincrement_cols = character(0), map_cols = character(0), - method = c("overwrite", "append", "upsert") + method = c("overwrite", "append", "upsert"), + format = NULL ) { method <- match.arg(method) @@ -285,7 +286,7 @@ parquet_duckdb <- R6::R6Class( "select column_name from (describe new_data_v)" )[[1]] - file_info <- private$get_file_path(table_name) + file_info <- private$get_file_path(table_name, format) if (method == "overwrite" || !file_info$exists) { # in case overwrite explicitly required, or no previously existing data to @@ -455,12 +456,13 @@ parquet_duckdb <- R6::R6Class( sep = ",\n " ) + writeopts <- private$format_to_writeopts(file_info$format) self$execute(glue::glue( r"{ copy ( select {select_expr} from new_data_v - ) to '{file_info$path}' ({self$writeopts}) + ) to '{file_info$path}' ({writeopts}) }" )) }, @@ -487,6 +489,8 @@ parquet_duckdb <- R6::R6Class( sep = ",\n " ) + writeopts <- private$format_to_writeopts(file_info$format) + # Concatenation using UNION ALL; "by name" handles missing columns self$execute(glue::glue( r"{ @@ -496,7 +500,7 @@ parquet_duckdb <- R6::R6Class( select {select_new} from new_data_v ) - to '{file_info$path}' ({self$writeopts}) + to '{file_info$path}' ({writeopts}) }" )) }, @@ -568,36 +572,53 @@ parquet_duckdb <- R6::R6Class( }" )) - self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({self$writeopts})")) + writeopts <- private$format_to_writeopts(file_info$format) + + self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({writeopts})")) }, # Get file path and format for a table # # @param table_name Character string table name + # @param format_override Character. Optional format override # @return List with path, format, and exists flag - get_file_path = function(table_name) { - # Check for parquet first, then csv + get_file_path = function(table_name, format_override = NULL) { + # Check for parquet first, then json parquet_path <- file.path(self$path, paste0(table_name, ".parquet")) - csv_path <- file.path(self$path, paste0(table_name, ".csv")) + json_path <- file.path(self$path, paste0(table_name, ".json")) if (file.exists(parquet_path)) { return(list(path = parquet_path, format = "parquet", exists = TRUE)) - } else if (file.exists(csv_path)) { - return(list(path = csv_path, format = "csv", exists = TRUE)) + } else if (file.exists(json_path)) { + return(list(path = json_path, format = "json", exists = TRUE)) } else { - # Return default format for new file + # Return format override or default format for new file + use_format <- format_override %||% self$default_format default_path <- file.path( self$path, - paste0(table_name, ".", self$default_format) + paste0(table_name, ".", use_format) ) return(list( path = default_path, - format = self$default_format, + format = use_format, exists = FALSE )) } }, + # Convert format to writeopts + # + # @param format Character. Format name ("parquet" or "json") + # @return Character. DuckDB write options string + format_to_writeopts = function(format) { + switch( + format, + parquet = "format parquet, compression zstd", + json = "format json", + stop(glue::glue("Unsupported format: {format}")) + ) + }, + # Register new_data_v table, optionally converting MAP columns # # @param x Data to register diff --git a/inst/tinytest/test_parquet_duckdb.R b/inst/tinytest/test_parquet_duckdb.R index 5f1c3db..97649a6 100644 --- a/inst/tinytest/test_parquet_duckdb.R +++ b/inst/tinytest/test_parquet_duckdb.R @@ -318,24 +318,24 @@ expect_true(inherits(result, "data.table")) expect_true("max_id" %in% names(result)) db$detach_table("test_attach") -# Test 31: CSV format support -test_dir_csv <- tempfile("parquet_duckdb_csv_") -on.exit(unlink(test_dir_csv, recursive = TRUE), add = TRUE) +# Test 31: JSON format support +test_dir_json <- tempfile("parquet_duckdb_json_") +on.exit(unlink(test_dir_json, recursive = TRUE), add = TRUE) -db_csv <- parquet_duckdb$new( - path = test_dir_csv, - default_format = "csv" +db_json <- parquet_duckdb$new( + path = test_dir_json, + default_format = "json" ) -expect_equal(db_csv$default_format, "csv") +expect_equal(db_json$default_format, "json") -test_csv_data <- data.table::data.table( +test_json_data <- data.table::data.table( id = 1:3, name = c("a", "b", "c") ) -db_csv$commit(test_csv_data, "csv_table", method = "overwrite") -expect_true("csv_table" %in% db_csv$list_tables()) -retrieved <- db_csv$fetch("csv_table") -expect_equal(retrieved, test_csv_data) +db_json$commit(test_json_data, "json_table", method = "overwrite") +expect_true("json_table" %in% db_json$list_tables()) +retrieved <- db_json$fetch("json_table") +expect_equal(retrieved, test_json_data) # Test 32: Extension loading test_dir_ext <- tempfile("parquet_duckdb_ext_") diff --git a/inst/tinytest/test_trans_models_t.R b/inst/tinytest/test_trans_models_t.R index b5fcb02..fc6aedd 100644 --- a/inst/tinytest/test_trans_models_t.R +++ b/inst/tinytest/test_trans_models_t.R @@ -103,7 +103,10 @@ pred_spec_tm <- list( sources = list(list(url = "https://example.com/roads.gpkg", md5sum = "ghi789")) ) ) -db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm) +expect_warning( + db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm), + "Overriding existing IDs" +) # Add predictor data - mix of static and time-varying set.seed(43) diff --git a/inst/tinytest/test_trans_preds_t.R b/inst/tinytest/test_trans_preds_t.R index 6cee99a..59d1bc7 100644 --- a/inst/tinytest/test_trans_preds_t.R +++ b/inst/tinytest/test_trans_preds_t.R @@ -147,10 +147,10 @@ pred_data_int <- data.table::data.table( ) db_tps$pred_data_t_int <- as_pred_data_t(pred_data_int, type = "int") -expect_true(setequal( - db_tps$trans_pred_data_v(1L)[, id_coord], +expect_equal( + sort(db_tps$trans_pred_data_v(1L)[, id_coord]), c(1, 2, 2, 4, 4, 7, 8, 10, 10, 12, 12, 13, 13, 16, 16, 17, 17, 18, 20, 21, 23, 24) -)) +) # Test pruning expect_message( diff --git a/man/evoland_db.Rd b/man/evoland_db.Rd index 590d1ad..93d91aa 100644 --- a/man/evoland_db.Rd +++ b/man/evoland_db.Rd @@ -5,7 +5,7 @@ \title{R6 Class for Folder-Based Data Storage Interface} \description{ An R6 class that provides an interface to a folder-based data storage system -for the evoland package. Each table is stored as a parquet (or CSV) file. +for the evoland package. Each table is stored as a parquet (or JSON) file. This class uses DuckDB for in-memory SQL operations while persisting data to disk in parquet format for better compression. @@ -95,7 +95,7 @@ Additional methods and active bindings are added to this class in separate files \subsection{Method \code{new()}}{ Initialize a new evoland_db object \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{evoland_db$new(path, default_format = c("parquet", "csv"), ...)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{evoland_db$new(path, default_format = c("parquet", "json"), ...)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -103,7 +103,7 @@ Initialize a new evoland_db object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "csv"). +\item{\code{default_format}}{Character. Default file format ("parquet" or "json"). Default is "parquet".} \item{\code{...}}{passed on to \code{set_report}} diff --git a/man/parquet_duckdb.Rd b/man/parquet_duckdb.Rd index cb41afa..58f2f22 100644 --- a/man/parquet_duckdb.Rd +++ b/man/parquet_duckdb.Rd @@ -48,7 +48,7 @@ Initialize a new parquet_duckdb object \subsection{Usage}{ \if{html}{\out{
}}\preformatted{parquet_duckdb$new( path, - default_format = c("parquet", "csv"), + default_format = c("parquet", "json"), extensions = character(0) )}\if{html}{\out{
}} } @@ -58,7 +58,7 @@ Initialize a new parquet_duckdb object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "csv"). +\item{\code{default_format}}{Character. Default file format ("parquet" or "json"). Default is "parquet".} \item{\code{extensions}}{Character vector of DuckDB extensions to load (e.g., "spatial")} @@ -270,7 +270,8 @@ identity columns, and list-to-MAP conversion. key_cols, autoincrement_cols = character(0), map_cols = character(0), - method = c("overwrite", "append", "upsert") + method = c("overwrite", "append", "upsert"), + format = NULL )}\if{html}{\out{}} } @@ -290,6 +291,9 @@ use all columns starting with \code{id_}} \item{\code{method}}{Character, one of "overwrite", "append", "upsert" (upsert being an update for existing rows, and insert for new rows)} + +\item{\code{format}}{Character. Optional format override ("parquet" or "json"). If NULL, +uses the existing file's format or default_format for new files.} } \if{html}{\out{}} } From af53413e870237db6266b24724f20f6fdc442e26 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:09:09 +0100 Subject: [PATCH 16/23] R CMD check: add workflow_dispatch --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 28a29fd..156d157 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -3,6 +3,7 @@ on: push: branches: [main, master] + workflow_dispatch: name: R-CMD-check.yaml From e710ff7177b82e8de31da1094ddf8dc8ac2e64a2 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:22:00 +0100 Subject: [PATCH 17/23] move reporting_t into class --- DESCRIPTION | 1 + NAMESPACE | 2 ++ R/evoland_db.R | 25 +--------------- R/evoland_db_tables.R | 9 ++++++ R/reporting_t.R | 66 +++++++++++++++++++++++++++++++++++++++++++ man/reporting_t.Rd | 20 +++++++++++++ 6 files changed, 99 insertions(+), 24 deletions(-) create mode 100644 R/reporting_t.R create mode 100644 man/reporting_t.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 906a133..98e7c6b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,6 +56,7 @@ Collate: 'periods_t.R' 'pred_data_t.R' 'pred_meta_t.R' + 'reporting_t.R' 'trans_meta_t.R' 'trans_models_glm.R' 'trans_models_rf.R' diff --git a/NAMESPACE b/NAMESPACE index 5d387f5..6734768 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ S3method(validate,pred_data_t_bool) S3method(validate,pred_data_t_float) S3method(validate,pred_data_t_int) S3method(validate,pred_meta_t) +S3method(validate,reporting_t) S3method(validate,trans_meta_t) S3method(validate,trans_models_t) S3method(validate,trans_preds_t) @@ -43,6 +44,7 @@ export(as_neighbors_t) export(as_periods_t) export(as_pred_data_t) export(as_pred_meta_t) +export(as_reporting_t) export(as_trans_meta_t) export(as_trans_models_t) export(as_trans_preds_t) diff --git a/R/evoland_db.R b/R/evoland_db.R index cd4cf50..6c5a692 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -83,30 +83,7 @@ evoland_db <- R6::R6Class( #' @param ... each named argument is entered into the table with the argument name #' as its key set_report = function(...) { - params <- list(...) - if (self$row_count("reporting_t") == 0L) { - # only upsert if these values are missing upon DB init - params[["report_name"]] <- - params[["report_name"]] %||% "evoland_scenario" - params[["report_name_pretty"]] <- - params[["report_name_pretty"]] %||% "Default Evoland Scenario" - params[["report_include_date"]] <- - params[["report_include_date"]] %||% "TRUE" - params[["creator_username"]] <- - params[["creator_username"]] %||% Sys.getenv("USER", unset = "unknown") - } - params[["last_opened"]] <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") - params[["last_opened_username"]] <- Sys.getenv("USER", unset = "unknown") - - self$commit( - data.table::as.data.table(list( - key = names(params), # cannot name a column "key" in data.table() - value = unlist(params) - )), - table_name = "reporting_t", - key_cols = "key", - method = "upsert" - ) + db_set_report(self, ...) }, #' @description diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R index 5895cf2..c9ad4c5 100644 --- a/R/evoland_db_tables.R +++ b/R/evoland_db_tables.R @@ -210,3 +210,12 @@ evoland_db$set("active", "neighbors_t", function(x) { key_cols = c("id_coord_origin", "id_coord_neighbor") )(x) }) + +evoland_db$set("active", "reporting_t", function(x) { + create_table_binding( + self, + "reporting_t", + as_reporting_t, + key_cols = "key" + )(x) +}) diff --git a/R/reporting_t.R b/R/reporting_t.R new file mode 100644 index 0000000..24627eb --- /dev/null +++ b/R/reporting_t.R @@ -0,0 +1,66 @@ +#' Create Reporting Table +#' +#' The reporting table holds information handy for writing out reports (tables, +#' graphs...) +#' +#' @name reporting_t +#' +#' @return A data.table of class "reporting_t" with columns: +#' - `key`: Unique character key +#' - `value`: Value as character +#' @export +as_reporting_t <- function(x) { + if (missing(x)) { + x <- data.table::data.table( + key = character(), + value = character() + ) + } + new_evoland_table( + x, + "reporting_t", + "key" + ) +} + +db_set_report <- function(self, ...) { + params <- list(...) + if (self$row_count("reporting_t") == 0L) { + # only upsert if these values are missing upon DB init + params[["report_name"]] <- + params[["report_name"]] %||% "evoland_scenario" + params[["report_name_pretty"]] <- + params[["report_name_pretty"]] %||% "Default Evoland Scenario" + params[["report_include_date"]] <- + params[["report_include_date"]] %||% "TRUE" + params[["creator_username"]] <- + params[["creator_username"]] %||% Sys.getenv("USER", unset = "unknown") + } + params[["last_opened"]] <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") + params[["last_opened_username"]] <- Sys.getenv("USER", unset = "unknown") + + self$reporting_t <- as_reporting_t(list( + key = names(params), # cannot name a column "key" in data.table() + value = unlist(params) + )) +} + + +#' @export +validate.reporting_t <- function(x, ...) { + NextMethod() + + data.table::setcolorder( + x, + c( + "key", + "value" + ) + ) + + stopifnot( + !anyDuplicated(x[["key"]]) + ) + + return(x) +} diff --git a/man/reporting_t.Rd b/man/reporting_t.Rd new file mode 100644 index 0000000..0a5d592 --- /dev/null +++ b/man/reporting_t.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reporting_t.R +\name{reporting_t} +\alias{reporting_t} +\alias{as_reporting_t} +\title{Create Reporting Table} +\usage{ +as_reporting_t(x) +} +\value{ +A data.table of class "reporting_t" with columns: +\itemize{ +\item \code{key}: Unique character key +\item \code{value}: Value as character +} +} +\description{ +The reporting table holds information handy for writing out reports (tables, +graphs...) +} From 99338fd110988412afd423f7de7a94b15d8977cf Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 22:23:14 +0100 Subject: [PATCH 18/23] remove evoland_db$fetch special case --- R/evoland_db.R | 27 --------------------------- inst/tinytest/test_parquet_duckdb.R | 6 ++---- inst/tinytest/test_trans_models_t.R | 7 +++---- inst/tinytest/test_trans_preds_t.R | 3 +-- man/evoland_db.Rd | 26 +------------------------- 5 files changed, 7 insertions(+), 62 deletions(-) diff --git a/R/evoland_db.R b/R/evoland_db.R index 6c5a692..e0e22ca 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -50,33 +50,6 @@ evoland_db <- R6::R6Class( invisible(self) }, - #' @description - #' Fetch data from storage with evoland-specific view support - #' @param table_name Character string. Name of the table to query. - #' @param where Character string. Optional WHERE clause for the SQL query. - #' @param limit integerish, limit the amount of rows to return - #' - #' @return A data.table - fetch = function(table_name, where = NULL, limit = NULL) { - # Check if this is a view (active binding) - if ( - # TODO these should probably not be active bindings, but instead methods with - # predefined query parameters - table_name %in% - c("lulc_meta_long_v", "pred_sources_v", "transitions_v", "extent", "coords_minimal") - ) { - return(self[[table_name]]) - } - - file_info <- private$get_file_path(table_name) - - if (!file_info$exists) { - stop("Table `", table_name, "` does not exist") - } - - super$fetch(table_name, where, limit) - }, - ### Setter methods ---- #' @description #' Set reporting metadata diff --git a/inst/tinytest/test_parquet_duckdb.R b/inst/tinytest/test_parquet_duckdb.R index 97649a6..caaa973 100644 --- a/inst/tinytest/test_parquet_duckdb.R +++ b/inst/tinytest/test_parquet_duckdb.R @@ -21,10 +21,8 @@ expect_true(inherits(db$connection, "duckdb_connection")) # Test 2: Initial state - no tables expect_identical(db$list_tables(), character(0)) -# Test 3: Fetch from non-existent table returns empty data.table -result <- db$fetch("nonexistent_table") -expect_true(inherits(result, "data.table")) -expect_equal(nrow(result), 0L) +# Test 3: Fetch for nonexistent errors out +expect_error(db$fetch("nonexistent_table"), "does not exist") # Test 4: Row count for non-existent table expect_equal(db$row_count("nonexistent_table"), 0L) diff --git a/inst/tinytest/test_trans_models_t.R b/inst/tinytest/test_trans_models_t.R index fc6aedd..53dba30 100644 --- a/inst/tinytest/test_trans_models_t.R +++ b/inst/tinytest/test_trans_models_t.R @@ -76,7 +76,7 @@ lulc_data <- data.table::rbindlist(list( db_tm$lulc_data_t <- as_lulc_data_t(lulc_data) # Create transition metadata -transitions <- db_tm$fetch("transitions_v") +transitions <- db_tm$transitions_v db_tm$trans_meta_t <- create_trans_meta_t( transitions, min_cardinality_abs = 5L @@ -103,9 +103,8 @@ pred_spec_tm <- list( sources = list(list(url = "https://example.com/roads.gpkg", md5sum = "ghi789")) ) ) -expect_warning( - db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm), - "Overriding existing IDs" +expect_silent( + db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm) ) # Add predictor data - mix of static and time-varying diff --git a/inst/tinytest/test_trans_preds_t.R b/inst/tinytest/test_trans_preds_t.R index 59d1bc7..1b6f66a 100644 --- a/inst/tinytest/test_trans_preds_t.R +++ b/inst/tinytest/test_trans_preds_t.R @@ -57,9 +57,8 @@ lulc_data <- data.table::rbindlist(list( db_tps$lulc_data_t <- as_lulc_data_t(lulc_data) # Create transition metadata -transitions <- db_tps$fetch("transitions_v") db_tps$trans_meta_t <- create_trans_meta_t( - transitions, + db_tps$transitions_v, min_cardinality_abs = 5L ) diff --git a/man/evoland_db.Rd b/man/evoland_db.Rd index 93d91aa..2fced8e 100644 --- a/man/evoland_db.Rd +++ b/man/evoland_db.Rd @@ -29,7 +29,6 @@ Additional methods and active bindings are added to this class in separate files \item \href{#method-evoland_db-generate_neighbor_predictors}{\code{evoland_db$generate_neighbor_predictors()}} \item \href{#method-evoland_db-trans_pred_data_v}{\code{evoland_db$trans_pred_data_v()}} \item \href{#method-evoland_db-new}{\code{evoland_db$new()}} -\item \href{#method-evoland_db-fetch}{\code{evoland_db$fetch()}} \item \href{#method-evoland_db-set_report}{\code{evoland_db$set_report()}} \item \href{#method-evoland_db-set_coords}{\code{evoland_db$set_coords()}} \item \href{#method-evoland_db-set_periods}{\code{evoland_db$set_periods()}} @@ -49,6 +48,7 @@ Additional methods and active bindings are added to this class in separate files
  • evoland::parquet_duckdb$delete_from()
  • evoland::parquet_duckdb$detach_table()
  • evoland::parquet_duckdb$execute()
  • +
  • evoland::parquet_duckdb$fetch()
  • evoland::parquet_duckdb$get_query()
  • evoland::parquet_duckdb$list_tables()
  • evoland::parquet_duckdb$print()
  • @@ -115,30 +115,6 @@ A new \code{evoland_db} object } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-evoland_db-fetch}{}}} -\subsection{Method \code{fetch()}}{ -Fetch data from storage with evoland-specific view support -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{evoland_db$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{table_name}}{Character string. Name of the table to query.} - -\item{\code{where}}{Character string. Optional WHERE clause for the SQL query.} - -\item{\code{limit}}{integerish, limit the amount of rows to return} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -A data.table -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-evoland_db-set_report}{}}} \subsection{Method \code{set_report()}}{ From f7283f570a3351d628642a77a646ddff36ee393b Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Wed, 10 Dec 2025 22:23:44 +0100 Subject: [PATCH 19/23] add json-specific col specs --- R/evoland_db_tables.R | 64 +++++++++++++++++++++++++++------ R/parquet_duckdb.R | 33 ++++++++++++++--- inst/tinytest/test_evoland_db.R | 2 +- man/parquet_duckdb.Rd | 12 ++++++- 4 files changed, 95 insertions(+), 16 deletions(-) diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R index c9ad4c5..cbc50b7 100644 --- a/R/evoland_db_tables.R +++ b/R/evoland_db_tables.R @@ -34,15 +34,16 @@ create_table_binding <- function( autoincrement_cols = NULL, map_cols = NULL, format = NULL, + json_colspec = NULL, ... ) { function(x) { if (missing(x)) { - fetched <- self$fetch(table_name) - - if (!is.null(map_cols) && nrow(fetched) > 0) { - fetched <- convert_list_cols(fetched, map_cols, kv_df_to_list) - } + fetched <- self$fetch( + table_name = table_name, + map_cols = map_cols, + json_colspec = json_colspec + ) return(as_fn(fetched, ...)) } @@ -85,7 +86,14 @@ evoland_db$set("active", "lulc_meta_t", function(x) { "lulc_meta_t", as_lulc_meta_t, key_cols = "id_lulc", - form = "json" + form = "json", + json_colspec = r"[ + id_lulc: 'int', + name: 'varchar', + pretty_name: 'varchar', + description: 'varchar', + src_classes: 'int[]' + ]" )(x) }) @@ -137,7 +145,17 @@ evoland_db$set("active", "pred_meta_t", function(x) { as_pred_meta_t, key_cols = "name", autoincrement_cols = "id_pred", - format = "json" + form = "json", + json_colspec = r"[ + id_pred: 'int', + name: 'varchar', + pretty_name: 'varchar', + description: 'varchar', + orig_format: 'varchar', + sources: 'struct(url varchar, md5sum varchar)[]', + unit: 'varchar', + factor_levels: 'map(integer, varchar)' + ]" )(x) }) @@ -148,7 +166,16 @@ evoland_db$set("active", "trans_meta_t", function(x) { as_trans_meta_t, key_cols = c("id_lulc_anterior", "id_lulc_posterior"), autoincrement_cols = "id_trans", - format = "json" + form = "json", + json_colspec = r"[ + id_trans: 'int', + id_lulc_anterior: 'int', + id_lulc_posterior: 'int', + cardinality: 'int', + frequency_rel: 'float', + frequency_abs: 'float', + is_viable: 'bool' + ]" )(x) }) @@ -168,7 +195,18 @@ evoland_db$set("active", "intrv_meta_t", function(x) { as_intrv_meta_t, key_cols = "id_intrv", map_cols = "params", - format = "json" + form = "json", + json_colspec = r"[ + id_intrv: 'int', + id_period_list: 'int[]', + id_trans_list: 'int[]', + pre_allocation: 'bool', + name: 'varchar', + pretty_name: 'varchar', + description: 'varchar', + sources: 'struct(url varchar, md5sum varchar)[]', + params: 'map(varchar, varchar)' + ]" )(x) }) @@ -198,7 +236,13 @@ evoland_db$set("active", "alloc_params_t", function(x) { as_alloc_params_t, key_cols = c("id_trans", "id_period"), map_cols = c("alloc_params", "goodness_of_fit"), - format = "json" + form = "json", + json_colspec = r"[ + id_trans: 'int', + id_period: 'int', + alloc_params: 'map(varchar, double)', + goodness_of_fit: 'map(varchar, double)' + ]" )(x) }) diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index b167c9b..87cb174 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -196,17 +196,36 @@ parquet_duckdb <- R6::R6Class( #' @param table_name Character string. Name of the table to query. #' @param where Character string. Optional WHERE clause for the SQL query. #' @param limit Integer. Optional limit on number of rows to return. + #' @param map_cols Vector of columns to be converted from key/value structs to R lists + #' @param json_colspec For JSON files, an optional column type specification #' #' @return A data.table - fetch = function(table_name, where = NULL, limit = NULL) { + fetch = function( + table_name, + where = NULL, + limit = NULL, + map_cols = NULL, + json_colspec = NULL + ) { file_info <- private$get_file_path(table_name) if (!file_info$exists) { - return(data.table::data.table()) + stop("Table `", table_name, "` does not exist") } # build sql query - sql <- glue::glue("select * from read_{file_info$format}('{file_info$path}')") + if (!is.null(json_colspec)) { + if (file_info$format != "json") { + stop("Cannot pass json_colspec if format is not json") + } + sql <- glue::glue( + "from read_('', columns = {})", + .open = "<", + .close = ">" + ) + } else { + sql <- glue::glue("from read_{file_info$format}('{file_info$path}')") + } if (!is.null(where)) { sql <- glue::glue("{sql} where {where}") @@ -215,7 +234,13 @@ parquet_duckdb <- R6::R6Class( sql <- glue::glue("{sql} limit {limit}") } - self$get_query(sql) + res <- self$get_query(sql) + + if (!is.null(map_cols) && nrow(res) > 0) { + return(convert_list_cols(res, map_cols, kv_df_to_list)) + } + + res }, #' @description diff --git a/inst/tinytest/test_evoland_db.R b/inst/tinytest/test_evoland_db.R index 6b139b9..a66b146 100644 --- a/inst/tinytest/test_evoland_db.R +++ b/inst/tinytest/test_evoland_db.R @@ -227,7 +227,7 @@ expect_equal(db$intrv_meta_t, intrv_meta_t) # Test 11: Active bindings - trans_meta_t expect_silent(db$trans_meta_t <- trans_meta_t) -expect_equal(db$trans_meta_t[, c(-1)], trans_meta_t) +expect_equal(db$trans_meta_t[, c(-1)], trans_meta_t, tolerance = 1e7) # Test 13: Active bindings - alloc_params_t (with MAP columns) expect_silent(db$alloc_params_t <- alloc_params_t) diff --git a/man/parquet_duckdb.Rd b/man/parquet_duckdb.Rd index 58f2f22..90fcd92 100644 --- a/man/parquet_duckdb.Rd +++ b/man/parquet_duckdb.Rd @@ -217,7 +217,13 @@ Result of func \subsection{Method \code{fetch()}}{ Fetch data from a table \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch( + table_name, + where = NULL, + limit = NULL, + map_cols = NULL, + json_colspec = NULL +)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -228,6 +234,10 @@ Fetch data from a table \item{\code{where}}{Character string. Optional WHERE clause for the SQL query.} \item{\code{limit}}{Integer. Optional limit on number of rows to return.} + +\item{\code{map_cols}}{Vector of columns to be converted from key/value structs to R lists} + +\item{\code{json_colspec}}{For JSON files, an optional column type specification} } \if{html}{\out{}} } From 32fc5ca8850bdad0c559d1919a7354627cdb2e6e Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 11 Dec 2025 13:40:04 +0100 Subject: [PATCH 20/23] load json, just in case --- R/evoland_db.R | 2 +- R/parquet_duckdb.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/evoland_db.R b/R/evoland_db.R index e0e22ca..fc91b49 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -41,7 +41,7 @@ evoland_db <- R6::R6Class( super$initialize( path = path, default_format = default_format, - extensions = "spatial" + extensions = c("spatial", "json") ) # Set evoland-specific reporting metadata diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index 87cb174..df016fc 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -36,7 +36,7 @@ parquet_duckdb <- R6::R6Class( initialize = function( path, default_format = c("parquet", "json"), - extensions = character(0) + extensions = c("json") ) { # Create folder if it doesn't exist self$path <- ensure_dir(path) From dda68f0cc1b922f3a7daf9350fa1f1adc122036f Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Thu, 11 Dec 2025 18:09:12 +0100 Subject: [PATCH 21/23] duckdb 1.4.3 solves bug, nothing to do with parquet -> fully hardcode parquet --- DESCRIPTION | 2 +- R/evoland_db.R | 6 +- R/evoland_db_neighbors.R | 2 +- R/evoland_db_tables.R | 66 ++---------- R/parquet_duckdb.R | 154 +++++++--------------------- inst/tinytest/test_parquet_duckdb.R | 27 +---- man/evoland_db.Rd | 5 +- man/parquet_duckdb.Rd | 34 +----- 8 files changed, 56 insertions(+), 240 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 98e7c6b..c8d9752 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,7 @@ Imports: curl, data.table, DBI, - duckdb, + duckdb (>= 1.4.3), glue, qs2, R6, diff --git a/R/evoland_db.R b/R/evoland_db.R index fc91b49..c352e5e 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -27,21 +27,17 @@ evoland_db <- R6::R6Class( #' @description #' Initialize a new evoland_db object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "json"). - #' Default is "parquet". #' @param ... passed on to `set_report` #' #' @return A new `evoland_db` object initialize = function( path, - default_format = c("parquet", "json"), ... ) { # Initialize parent class with spatial extension super$initialize( path = path, - default_format = default_format, - extensions = c("spatial", "json") + extensions = "spatial" ) # Set evoland-specific reporting metadata diff --git a/R/evoland_db_neighbors.R b/R/evoland_db_neighbors.R index e264fcd..72ee112 100644 --- a/R/evoland_db_neighbors.R +++ b/R/evoland_db_neighbors.R @@ -110,7 +110,7 @@ evoland_db$set("public", "generate_neighbor_predictors", function() { }" ) self$execute( - "create or replace view pred_meta_upsert_v as + "create or replace view pred_meta_upsert_v as select name, pretty_name, description, orig_format, sources, unit, factor_levels from pred_meta_neighbors_t" ) diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R index cbc50b7..30a7d8a 100644 --- a/R/evoland_db_tables.R +++ b/R/evoland_db_tables.R @@ -33,16 +33,13 @@ create_table_binding <- function( key_cols = NULL, autoincrement_cols = NULL, map_cols = NULL, - format = NULL, - json_colspec = NULL, ... ) { function(x) { if (missing(x)) { fetched <- self$fetch( table_name = table_name, - map_cols = map_cols, - json_colspec = json_colspec + map_cols = map_cols ) return(as_fn(fetched, ...)) @@ -56,8 +53,7 @@ create_table_binding <- function( key_cols = key_cols, autoincrement_cols = autoincrement_cols, map_cols = map_cols, - method = "upsert", - format = format + method = "upsert" ) } } @@ -85,15 +81,7 @@ evoland_db$set("active", "lulc_meta_t", function(x) { self, "lulc_meta_t", as_lulc_meta_t, - key_cols = "id_lulc", - form = "json", - json_colspec = r"[ - id_lulc: 'int', - name: 'varchar', - pretty_name: 'varchar', - description: 'varchar', - src_classes: 'int[]' - ]" + key_cols = "id_lulc" )(x) }) @@ -144,18 +132,7 @@ evoland_db$set("active", "pred_meta_t", function(x) { "pred_meta_t", as_pred_meta_t, key_cols = "name", - autoincrement_cols = "id_pred", - form = "json", - json_colspec = r"[ - id_pred: 'int', - name: 'varchar', - pretty_name: 'varchar', - description: 'varchar', - orig_format: 'varchar', - sources: 'struct(url varchar, md5sum varchar)[]', - unit: 'varchar', - factor_levels: 'map(integer, varchar)' - ]" + autoincrement_cols = "id_pred" )(x) }) @@ -165,17 +142,7 @@ evoland_db$set("active", "trans_meta_t", function(x) { "trans_meta_t", as_trans_meta_t, key_cols = c("id_lulc_anterior", "id_lulc_posterior"), - autoincrement_cols = "id_trans", - form = "json", - json_colspec = r"[ - id_trans: 'int', - id_lulc_anterior: 'int', - id_lulc_posterior: 'int', - cardinality: 'int', - frequency_rel: 'float', - frequency_abs: 'float', - is_viable: 'bool' - ]" + autoincrement_cols = "id_trans" )(x) }) @@ -194,19 +161,7 @@ evoland_db$set("active", "intrv_meta_t", function(x) { "intrv_meta_t", as_intrv_meta_t, key_cols = "id_intrv", - map_cols = "params", - form = "json", - json_colspec = r"[ - id_intrv: 'int', - id_period_list: 'int[]', - id_trans_list: 'int[]', - pre_allocation: 'bool', - name: 'varchar', - pretty_name: 'varchar', - description: 'varchar', - sources: 'struct(url varchar, md5sum varchar)[]', - params: 'map(varchar, varchar)' - ]" + map_cols = "params" )(x) }) @@ -235,14 +190,7 @@ evoland_db$set("active", "alloc_params_t", function(x) { "alloc_params_t", as_alloc_params_t, key_cols = c("id_trans", "id_period"), - map_cols = c("alloc_params", "goodness_of_fit"), - form = "json", - json_colspec = r"[ - id_trans: 'int', - id_period: 'int', - alloc_params: 'map(varchar, double)', - goodness_of_fit: 'map(varchar, double)' - ]" + map_cols = c("alloc_params", "goodness_of_fit") )(x) }) diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index df016fc..573080a 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -19,32 +19,22 @@ parquet_duckdb <- R6::R6Class( #' @field path Character string path to the data folder path = NULL, - #' @field default_format Default file format for new tables - default_format = NULL, - - #' @field writeopts Default write options for DuckDB - writeopts = NULL, + #' @field writeopts Write options for DuckDB parquet output + writeopts = "format parquet, compression zstd", #' @description #' Initialize a new parquet_duckdb object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "json"). - #' Default is "parquet". #' @param extensions Character vector of DuckDB extensions to load (e.g., "spatial") #' #' @return A new `parquet_duckdb` object initialize = function( path, - default_format = c("parquet", "json"), - extensions = c("json") + extensions = character(0) ) { # Create folder if it doesn't exist self$path <- ensure_dir(path) - # Set format / writeopts - self$default_format <- match.arg(default_format) - self$writeopts <- private$format_to_writeopts(self$default_format) - # Create in-memory connection for SQL operations self$connection <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") @@ -92,15 +82,15 @@ parquet_duckdb <- R6::R6Class( }, #' @description - #' Attach a table from parquet/CSV file as a temporary table in DuckDB + #' Attach a table from parquet file as a temporary table in DuckDB #' @param table_name Character. Name of table to attach. #' @param columns Character vector. Optional SQL column selection, defaults to "*" #' @param where Character. Optional SQL WHERE clause to subset the table. #' @return Invisible NULL (called for side effects) attach_table = function(table_name, columns = "*", where = NULL) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { stop(glue::glue("Table '{table_name}' does not exist at path: {self$path}")) } @@ -108,7 +98,7 @@ parquet_duckdb <- R6::R6Class( sql <- glue::glue( "create temp table {table_name} as ", "select {paste(columns, collapse = ', ')} ", - "from read_{file_info$format}('{file_info$path}')" + "from read_parquet('{file_path}')" ) if (!is.null(where)) { @@ -133,14 +123,14 @@ parquet_duckdb <- R6::R6Class( #' @param table_name Character string. Name of the table to query. #' @return Integer number of rows row_count = function(table_name) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { return(0L) } self$get_query( - glue::glue("select count(*) as n from read_{file_info$format}('{file_info$path}')") + glue::glue("select count(*) as n from read_parquet('{file_path}')") )[[1]] }, @@ -148,9 +138,8 @@ parquet_duckdb <- R6::R6Class( #' List all tables (files) in storage #' @return Character vector of table names list_tables = function() { - list.files(self$path, pattern = "\\.(parquet|json)$", full.names = FALSE) |> + list.files(self$path, pattern = "\\.parquet$", full.names = FALSE) |> tools::file_path_sans_ext() |> - unique() |> sort() }, @@ -197,35 +186,22 @@ parquet_duckdb <- R6::R6Class( #' @param where Character string. Optional WHERE clause for the SQL query. #' @param limit Integer. Optional limit on number of rows to return. #' @param map_cols Vector of columns to be converted from key/value structs to R lists - #' @param json_colspec For JSON files, an optional column type specification #' #' @return A data.table fetch = function( table_name, where = NULL, limit = NULL, - map_cols = NULL, - json_colspec = NULL + map_cols = NULL ) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { stop("Table `", table_name, "` does not exist") } # build sql query - if (!is.null(json_colspec)) { - if (file_info$format != "json") { - stop("Cannot pass json_colspec if format is not json") - } - sql <- glue::glue( - "from read_('', columns = {})", - .open = "<", - .close = ">" - ) - } else { - sql <- glue::glue("from read_{file_info$format}('{file_info$path}')") - } + sql <- glue::glue("from read_parquet('{file_path}')") if (!is.null(where)) { sql <- glue::glue("{sql} where {where}") @@ -249,29 +225,26 @@ parquet_duckdb <- R6::R6Class( #' @param where Character string. Optional WHERE clause; if NULL, deletes all rows. #' @return Number of rows deleted delete_from = function(table_name, where = NULL) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { return(0L) } count_before <- self$row_count(table_name) if (is.null(where)) { - file.remove(file_info$path) + file.remove(file_path) return(count_before) } - # Use the same format as the existing file - writeopts <- private$format_to_writeopts(file_info$format) - self$execute(glue::glue( r"{ copy ( - select * from read_{file_info$format}('{file_info$path}') + select * from read_parquet('{file_path}') where not ({where}) ) - to '{file_info$path}' ({writeopts}) + to '{file_path}' ({self$writeopts}) }" )) @@ -291,8 +264,6 @@ parquet_duckdb <- R6::R6Class( #' @param map_cols Character vector of columns to convert to MAP format #' @param method Character, one of "overwrite", "append", "upsert" (upsert being an #' update for existing rows, and insert for new rows) - #' @param format Character. Optional format override ("parquet" or "json"). If NULL, - #' uses the existing file's format or default_format for new files. #' @return Invisible NULL (called for side effects) commit = function( x, @@ -300,8 +271,7 @@ parquet_duckdb <- R6::R6Class( key_cols, autoincrement_cols = character(0), map_cols = character(0), - method = c("overwrite", "append", "upsert"), - format = NULL + method = c("overwrite", "append", "upsert") ) { method <- match.arg(method) @@ -311,16 +281,16 @@ parquet_duckdb <- R6::R6Class( "select column_name from (describe new_data_v)" )[[1]] - file_info <- private$get_file_path(table_name, format) + file_path <- private$get_file_path(table_name) - if (method == "overwrite" || !file_info$exists) { + if (method == "overwrite" || !file.exists(file_path)) { # in case overwrite explicitly required, or no previously existing data to # append or upsert to; rest of logic can be skipped return(private$commit_overwrite( table_name = table_name, all_cols = all_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path )) } @@ -338,7 +308,7 @@ parquet_duckdb <- R6::R6Class( table_name = table_name, all_cols = all_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path ) } else { private$commit_upsert( @@ -346,7 +316,7 @@ parquet_duckdb <- R6::R6Class( all_cols = all_cols, key_cols = key_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path ) } }, @@ -400,16 +370,10 @@ parquet_duckdb <- R6::R6Class( cat(classes[1], "Object. Inherits from", toString(classes[-1]), "\n") } - compression <- if (grepl("compression\\s+(\\w+)", self$writeopts)) { - sub(".*compression\\s+(\\w+).*", "\\1", self$writeopts) - } else { - "none" - } - # Database info on one line cat( glue::glue( - "Database: {self$path} | Format: {self$default_format} | Compression: {compression}" + "Database: {self$path} | Write Options: {self$writeopts}" ), "\n\n" ) @@ -458,13 +422,12 @@ parquet_duckdb <- R6::R6Class( #' param x Data frame to commit. If character, in-duckdb-memory table. #' param table_name Character string table name #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_overwrite = function( table_name, all_cols, autoincrement_cols = character(0), - file_info + file_path ) { # Warn if overriding existing IDs if (length(intersect(autoincrement_cols, all_cols)) > 0) { @@ -481,13 +444,12 @@ parquet_duckdb <- R6::R6Class( sep = ",\n " ) - writeopts <- private$format_to_writeopts(file_info$format) self$execute(glue::glue( r"{ copy ( select {select_expr} from new_data_v - ) to '{file_info$path}' ({writeopts}) + ) to '{file_path}' ({self$writeopts}) }" )) }, @@ -495,13 +457,12 @@ parquet_duckdb <- R6::R6Class( #' param x Data frame to commit. If character, in-duckdb-memory table. #' param table_name Character string table name #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_append = function( table_name, all_cols, autoincrement_cols = character(0), - file_info + file_path ) { ordinary_cols <- setdiff(all_cols, autoincrement_cols) select_new <- glue::glue_collapse( @@ -514,8 +475,6 @@ parquet_duckdb <- R6::R6Class( sep = ",\n " ) - writeopts <- private$format_to_writeopts(file_info$format) - # Concatenation using UNION ALL; "by name" handles missing columns self$execute(glue::glue( r"{ @@ -525,7 +484,7 @@ parquet_duckdb <- R6::R6Class( select {select_new} from new_data_v ) - to '{file_info$path}' ({writeopts}) + to '{file_path}' ({self$writeopts}) }" )) }, @@ -535,14 +494,13 @@ parquet_duckdb <- R6::R6Class( #' param key_cols Character vector of columns that define uniqueness. If missing, #' use all columns starting with `id_` #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_upsert = function( table_name, all_cols, key_cols, autoincrement_cols = character(0), - file_info + file_path ) { # Update existing data ordinary_cols <- setdiff(all_cols, union(key_cols, autoincrement_cols)) @@ -597,51 +555,15 @@ parquet_duckdb <- R6::R6Class( }" )) - writeopts <- private$format_to_writeopts(file_info$format) - - self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({writeopts})")) + self$execute(glue::glue("copy {table_name} to '{file_path}' ({self$writeopts})")) }, - # Get file path and format for a table + # Get file path for a table # # @param table_name Character string table name - # @param format_override Character. Optional format override - # @return List with path, format, and exists flag - get_file_path = function(table_name, format_override = NULL) { - # Check for parquet first, then json - parquet_path <- file.path(self$path, paste0(table_name, ".parquet")) - json_path <- file.path(self$path, paste0(table_name, ".json")) - - if (file.exists(parquet_path)) { - return(list(path = parquet_path, format = "parquet", exists = TRUE)) - } else if (file.exists(json_path)) { - return(list(path = json_path, format = "json", exists = TRUE)) - } else { - # Return format override or default format for new file - use_format <- format_override %||% self$default_format - default_path <- file.path( - self$path, - paste0(table_name, ".", use_format) - ) - return(list( - path = default_path, - format = use_format, - exists = FALSE - )) - } - }, - - # Convert format to writeopts - # - # @param format Character. Format name ("parquet" or "json") - # @return Character. DuckDB write options string - format_to_writeopts = function(format) { - switch( - format, - parquet = "format parquet, compression zstd", - json = "format json", - stop(glue::glue("Unsupported format: {format}")) - ) + # @return Character path to parquet file + get_file_path = function(table_name) { + file.path(self$path, paste0(table_name, ".parquet")) }, # Register new_data_v table, optionally converting MAP columns @@ -651,7 +573,6 @@ parquet_duckdb <- R6::R6Class( # @return NULL (called for side effects) register_new_data_v = function(x, map_cols = character(0)) { if (is.character(x)) { - # TODO add tests self$execute(glue::glue("create view new_data_v as from {x}")) return(invisible(NULL)) } @@ -685,7 +606,6 @@ parquet_duckdb <- R6::R6Class( # Cleanup new_data_v and related tables # - # @param map_cols Character vector indicating if MAP conversion was used # @return NULL (called for side effects) cleanup_new_data_v = function() { try(duckdb::duckdb_unregister(self$connection, "new_data_v"), silent = TRUE) diff --git a/inst/tinytest/test_parquet_duckdb.R b/inst/tinytest/test_parquet_duckdb.R index caaa973..60f1401 100644 --- a/inst/tinytest/test_parquet_duckdb.R +++ b/inst/tinytest/test_parquet_duckdb.R @@ -8,13 +8,11 @@ on.exit(unlink(test_dir, recursive = TRUE), add = TRUE) # Test 1: Initialization expect_silent( db <- parquet_duckdb$new( - path = test_dir, - default_format = "parquet" + path = test_dir ) ) expect_true(inherits(db, "parquet_duckdb")) expect_true(dir.exists(test_dir)) -expect_equal(db$default_format, "parquet") expect_true(!is.null(db$connection)) expect_true(inherits(db$connection, "duckdb_connection")) @@ -316,26 +314,7 @@ expect_true(inherits(result, "data.table")) expect_true("max_id" %in% names(result)) db$detach_table("test_attach") -# Test 31: JSON format support -test_dir_json <- tempfile("parquet_duckdb_json_") -on.exit(unlink(test_dir_json, recursive = TRUE), add = TRUE) - -db_json <- parquet_duckdb$new( - path = test_dir_json, - default_format = "json" -) -expect_equal(db_json$default_format, "json") - -test_json_data <- data.table::data.table( - id = 1:3, - name = c("a", "b", "c") -) -db_json$commit(test_json_data, "json_table", method = "overwrite") -expect_true("json_table" %in% db_json$list_tables()) -retrieved <- db_json$fetch("json_table") -expect_equal(retrieved, test_json_data) - -# Test 32: Extension loading +# Test 31: Extension loading test_dir_ext <- tempfile("parquet_duckdb_ext_") on.exit(unlink(test_dir_ext, recursive = TRUE), add = TRUE) @@ -348,7 +327,7 @@ expect_silent( db_ext$get_query("SELECT ST_Point(0, 0) as geom") ) -# Test 33: Persistence across connections +# Test 32: Persistence across connections db$commit( method = "overwrite", test_data_1, diff --git a/man/evoland_db.Rd b/man/evoland_db.Rd index 2fced8e..7d59a3e 100644 --- a/man/evoland_db.Rd +++ b/man/evoland_db.Rd @@ -95,7 +95,7 @@ Additional methods and active bindings are added to this class in separate files \subsection{Method \code{new()}}{ Initialize a new evoland_db object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{evoland_db$new(path, default_format = c("parquet", "json"), ...)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{evoland_db$new(path, ...)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -103,9 +103,6 @@ Initialize a new evoland_db object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "json"). -Default is "parquet".} - \item{\code{...}}{passed on to \code{set_report}} } \if{html}{\out{}} diff --git a/man/parquet_duckdb.Rd b/man/parquet_duckdb.Rd index 90fcd92..5e437aa 100644 --- a/man/parquet_duckdb.Rd +++ b/man/parquet_duckdb.Rd @@ -16,9 +16,7 @@ domain-specific database classes. \item{\code{path}}{Character string path to the data folder} -\item{\code{default_format}}{Default file format for new tables} - -\item{\code{writeopts}}{Default write options for DuckDB} +\item{\code{writeopts}}{Write options for DuckDB parquet output} } \if{html}{\out{}} } @@ -46,11 +44,7 @@ domain-specific database classes. \subsection{Method \code{new()}}{ Initialize a new parquet_duckdb object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{parquet_duckdb$new( - path, - default_format = c("parquet", "json"), - extensions = character(0) -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{parquet_duckdb$new(path, extensions = character(0))}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -58,9 +52,6 @@ Initialize a new parquet_duckdb object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "json"). -Default is "parquet".} - \item{\code{extensions}}{Character vector of DuckDB extensions to load (e.g., "spatial")} } \if{html}{\out{}} @@ -113,7 +104,7 @@ A data.table with query results \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-parquet_duckdb-attach_table}{}}} \subsection{Method \code{attach_table()}}{ -Attach a table from parquet/CSV file as a temporary table in DuckDB +Attach a table from parquet file as a temporary table in DuckDB \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{parquet_duckdb$attach_table(table_name, columns = "*", where = NULL)}\if{html}{\out{
    }} } @@ -217,13 +208,7 @@ Result of func \subsection{Method \code{fetch()}}{ Fetch data from a table \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch( - table_name, - where = NULL, - limit = NULL, - map_cols = NULL, - json_colspec = NULL -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch(table_name, where = NULL, limit = NULL, map_cols = NULL)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -236,8 +221,6 @@ Fetch data from a table \item{\code{limit}}{Integer. Optional limit on number of rows to return.} \item{\code{map_cols}}{Vector of columns to be converted from key/value structs to R lists} - -\item{\code{json_colspec}}{For JSON files, an optional column type specification} } \if{html}{\out{}} } @@ -280,8 +263,7 @@ identity columns, and list-to-MAP conversion. key_cols, autoincrement_cols = character(0), map_cols = character(0), - method = c("overwrite", "append", "upsert"), - format = NULL + method = c("overwrite", "append", "upsert") )}\if{html}{\out{}} } @@ -301,9 +283,6 @@ use all columns starting with \code{id_}} \item{\code{method}}{Character, one of "overwrite", "append", "upsert" (upsert being an update for existing rows, and insert for new rows)} - -\item{\code{format}}{Character. Optional format override ("parquet" or "json"). If NULL, -uses the existing file's format or default_format for new files.} } \if{html}{\out{}} } @@ -332,19 +311,16 @@ self (invisibly) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param key_cols Character vector of columns that define uniqueness. If missing, use all columns starting with \code{id_} param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) } } From e18df587e84c4109cf647bdf7a3cfc5a352f5487 Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Fri, 12 Dec 2025 08:41:03 +0100 Subject: [PATCH 22/23] add back reminder --- R/parquet_duckdb.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index 573080a..20f463e 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -573,6 +573,7 @@ parquet_duckdb <- R6::R6Class( # @return NULL (called for side effects) register_new_data_v = function(x, map_cols = character(0)) { if (is.character(x)) { + # TODO add tests self$execute(glue::glue("create view new_data_v as from {x}")) return(invisible(NULL)) } From 2d5ca98b7ed21bbde6ece39e1e3d7a02d739b55b Mon Sep 17 00:00:00 2001 From: mmyrte <24587121+mmyrte@users.noreply.github.com> Date: Fri, 12 Dec 2025 08:46:03 +0100 Subject: [PATCH 23/23] align util_dinamica with other utils --- DESCRIPTION | 2 +- R/{utils_dinamica.r => util_dinamica.r} | 8 ++++---- man/{dinamica_utils.Rd => util_dinamica.Rd} | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) rename R/{utils_dinamica.r => util_dinamica.r} (96%) rename man/{dinamica_utils.Rd => util_dinamica.Rd} (96%) diff --git a/DESCRIPTION b/DESCRIPTION index c8d9752..61d78a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,6 +63,6 @@ Collate: 'trans_models_t.R' 'trans_preds_t.R' 'util.R' + 'util_dinamica.r' 'util_download.R' 'util_terra.R' - 'utils_dinamica.r' diff --git a/R/utils_dinamica.r b/R/util_dinamica.r similarity index 96% rename from R/utils_dinamica.r rename to R/util_dinamica.r index ecd0ad2..6b84823 100644 --- a/R/utils_dinamica.r +++ b/R/util_dinamica.r @@ -2,10 +2,10 @@ #' #' Interact with Dinamica from R, see **Functions** section below. #' -#' @name dinamica_utils +#' @name util_dinamica NULL -#' @describeIn dinamica_utils Execute a Dinamica .ego file using `DinamicaConsole` +#' @describeIn util_dinamica Execute a Dinamica .ego file using `DinamicaConsole` #' @param model_path Path to the .ego model file to run. Any submodels must be included #' in a directory of the exact form `basename(modelpath)_ego_Submodels`, [see #' wiki](https://csr.ufmg.br/dinamica/dokuwiki/doku.php?id=submodels) @@ -111,7 +111,7 @@ exec_dinamica <- function( invisible(res) } -#' @describeIn dinamica_utils Set up evoland-specific Dinamica EGO files; execute using +#' @describeIn util_dinamica Set up evoland-specific Dinamica EGO files; execute using #' [exec_dinamica()] #' @param run_modelprechecks bool, Validate that everything's in place for a model run. #' Will never be run if calibration. @@ -162,7 +162,7 @@ run_evoland_dinamica_sim <- function( ) } -#' @describeIn dinamica_utils Encode or decode raw R and Python code chunks in .ego +#' @describeIn util_dinamica Encode or decode raw R and Python code chunks in .ego #' files and their submodels to/from base64 #' @param infile Input file path. Treated as input if passed AsIs using `base::I()` #' @param outfile Output file path (optional) diff --git a/man/dinamica_utils.Rd b/man/util_dinamica.Rd similarity index 96% rename from man/dinamica_utils.Rd rename to man/util_dinamica.Rd index f1c9dc4..d15d4da 100644 --- a/man/dinamica_utils.Rd +++ b/man/util_dinamica.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils_dinamica.r -\name{dinamica_utils} -\alias{dinamica_utils} +% Please edit documentation in R/util_dinamica.r +\name{util_dinamica} +\alias{util_dinamica} \alias{exec_dinamica} \alias{run_evoland_dinamica_sim} \alias{process_dinamica_script}