diff --git a/.Rbuildignore b/.Rbuildignore index 1890937..0c10af5 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,4 @@ air.toml ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^.devcontainer$ diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..a9ed3d7 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,19 @@ +FROM --platform=linux/amd64 ghcr.io/mmyrte/evoland:latest +LABEL authors="Carlson Büth, Jan Hartman" \ + version="8.3" \ + description="Docker image for Dinamica EGO." + +# Vignettes are being built with quarto +RUN /rocker_scripts/install_quarto.sh + +# Hand-knitted dependency discovery from DESCRIPTION avoids cache invalidation versus a +# remotes::install_deps based solution. +WORKDIR /builddir +COPY DESCRIPTION /builddir/DESCRIPTION +COPY .devcontainer/install_pkg_deps.r /builddir/install_pkg_deps.r +RUN /builddir/install_pkg_deps.r && rm -r /builddir +WORKDIR / + +# Install development dependencies for vscode-style development. +COPY .devcontainer/install_vscode_devtools.sh /rocker_scripts/install_vscode_devtools.sh +RUN /rocker_scripts/install_vscode_devtools.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..57aa81e --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/debian +{ + "name": "evoland-devcontainer", + "build": { + "dockerfile": "Dockerfile", + "context": "..", + "args": { + "platform": "linux/amd64" + } + }, + "mounts": [ + { + "source": "${localEnv:EVOLAND_CACHEDIR}", + "target": "/mnt/evoland-cache", + "type": "bind" + } + ], + "containerEnv": { + "EVOLAND_CACHEDIR": "/mnt/evoland-cache" + }, + "customizations": { + "vscode": { + "settings": { + "git.path": "/usr/bin/git", + "r.rterm.linux": "/usr/local/bin/radian", + "shellcheck.executablePath": "/usr/bin/shellcheck", + "r.plot.useHttpgd": true, + "r.bracketedPaste": true, + "rewrap.wrappingColumn": 88 + }, + "extensions": [ + "RDebugger.r-debugger", + "REditorSupport.r", + "quarto.quarto", + "timonwong.shellcheck" + ] + } + } +} diff --git a/.devcontainer/install_pkg_deps.r b/.devcontainer/install_pkg_deps.r new file mode 100755 index 0000000..9568070 --- /dev/null +++ b/.devcontainer/install_pkg_deps.r @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript + +# Reusing logic from remotes::local_package_deps +# MIT licensed, source at https://github.com/r-lib/remotes/ + +desc <- remotes:::read_dcf("DESCRIPTION") + +dependencies <- c("Depends", "Imports", "LinkingTo", "Suggests") +dependencies <- intersect(dependencies, names(desc)) +pkg_deps <- + lapply(desc[dependencies], remotes:::parse_deps) |> + lapply(`[[`, "name") |> + unlist(use.names = FALSE) + +pkgs_to_install <- setdiff(pkg_deps, installed.packages()[, 1]) +install.packages( + pkgs_to_install, + Ncpus = max(1L, parallel::detectCores()) +) diff --git a/.devcontainer/install_vscode_devtools.sh b/.devcontainer/install_vscode_devtools.sh new file mode 100755 index 0000000..f5a75b8 --- /dev/null +++ b/.devcontainer/install_vscode_devtools.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Abort script if any command exits with non-zero status. Not foolproof. +set -e + +NCPUS=${NCPUS:-"-1"} + +apt-get update -qq +apt-get -y --no-install-recommends install \ + git \ + htop \ + libfontconfig1-dev \ + libfribidi-dev \ + libgit2-dev \ + libharfbuzz-dev \ + pipx \ + shellcheck + +rm -rf /var/lib/apt/lists/* + +PIPX_BIN_DIR=/usr/local/bin pipx install radian + +install2.r --error --skipinstalled -n $NCPUS \ + covr \ + devtools \ + languageserver \ + lobstr \ + microbenchmark \ + profvis \ + quarto \ + rlang + +# httpgd is currently off of CRAN because of c++ compiler conflicts +# https://github.com/nx10/httpgd/issues/218 + +R -e "remotes::install_github('nx10/httpgd')" diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..156d157 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,56 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + workflow_dispatch: + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - { os: macos-latest, r: "release" } + # - {os: windows-latest, r: 'release'} + # - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - { os: ubuntu-latest, r: "release" } + # - { os: ubuntu-24.04-arm, r: "release" } + # - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - name: Install macOS system dependencies + if: runner.os == 'macos' + run: brew install gdal proj + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index dda4b15..497bbe3 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -3,7 +3,6 @@ on: push: branches: [main, master] - pull_request: release: types: [published] workflow_dispatch: diff --git a/.gitignore b/.gitignore index 5b5e683..ef5fafe 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,11 @@ docs # generally ignore databases, like .evolanddb, .duckdb, anything.db *db + +# IDE specific artefacts +*compile_commands* +*cobertura* +.cache/ + +# ignore drafts +*suddel* diff --git a/DESCRIPTION b/DESCRIPTION index c35060f..61d78a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,28 +10,28 @@ URL: https://ethzplus.github.io/evoland-plus, https://github.com/ethzplus/evolan BugReports: https://github.com/ethzplus/evoland-plus/issues Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.2) Imports: curl, data.table, DBI, - duckdb, + duckdb (>= 1.4.3), glue, - purrr, qs2, R6, Rcpp, - rlang, stringi, terra Suggests: + base64enc, butcher, pROC, - tinytest, + processx, quarto, - ranger + ranger, + tinytest VignetteBuilder: quarto Config/testthat/edition: 3 LinkingTo: @@ -56,11 +56,13 @@ Collate: 'periods_t.R' 'pred_data_t.R' 'pred_meta_t.R' + 'reporting_t.R' 'trans_meta_t.R' 'trans_models_glm.R' 'trans_models_rf.R' 'trans_models_t.R' 'trans_preds_t.R' 'util.R' + 'util_dinamica.r' 'util_download.R' 'util_terra.R' diff --git a/NAMESPACE b/NAMESPACE index bf39ddc..6734768 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ S3method(validate,pred_data_t_bool) S3method(validate,pred_data_t_float) S3method(validate,pred_data_t_int) S3method(validate,pred_meta_t) +S3method(validate,reporting_t) S3method(validate,trans_meta_t) S3method(validate,trans_models_t) S3method(validate,trans_preds_t) @@ -43,6 +44,7 @@ export(as_neighbors_t) export(as_periods_t) export(as_pred_data_t) export(as_pred_meta_t) +export(as_reporting_t) export(as_trans_meta_t) export(as_trans_models_t) export(as_trans_preds_t) @@ -57,6 +59,7 @@ export(create_pred_meta_t) export(create_trans_meta_t) export(download_and_verify) export(evoland_db) +export(exec_dinamica) export(extract_using_coords_t) export(fit_glm) export(fit_ranger) @@ -65,6 +68,7 @@ export(gof_ranger) export(grrf_filter) export(parquet_duckdb) export(print_rowwise_yaml) +export(run_evoland_dinamica_sim) export(validate) importFrom(Rcpp,sourceCpp) importFrom(data.table,":=") diff --git a/R/coords_t.R b/R/coords_t.R index e99609a..d3b1359 100644 --- a/R/coords_t.R +++ b/R/coords_t.R @@ -91,10 +91,12 @@ print.coords_t <- function(x, nrow = 10, ...) { #' @describeIn coords_t Create a set of square coordinates #' @export create_coords_t_square <- function(epsg, extent, resolution, ...) { - if (!rlang::is_scalar_integerish(epsg)) { + if ( + !(length(epsg) == 1L && (is.integer(epsg) || (is.numeric(epsg) && epsg == as.integer(epsg)))) + ) { stop("epsg must be scalar integerish") } - if (!rlang::is_scalar_double(resolution)) { + if (!(length(resolution) == 1L && is.double(resolution))) { stop("resolution must be scalar double") } if (!inherits(extent, "SpatExtent")) { diff --git a/R/evoland_db.R b/R/evoland_db.R index 9aebd16..c352e5e 100644 --- a/R/evoland_db.R +++ b/R/evoland_db.R @@ -2,7 +2,7 @@ #' #' @description #' An R6 class that provides an interface to a folder-based data storage system -#' for the evoland package. Each table is stored as a parquet (or CSV) file. +#' for the evoland package. Each table is stored as a parquet (or JSON) file. #' This class uses DuckDB for in-memory SQL operations while persisting data #' to disk in parquet format for better compression. #' @@ -27,20 +27,16 @@ evoland_db <- R6::R6Class( #' @description #' Initialize a new evoland_db object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "csv"). - #' Default is "parquet". #' @param ... passed on to `set_report` #' #' @return A new `evoland_db` object initialize = function( path, - default_format = c("parquet", "csv"), ... ) { # Initialize parent class with spatial extension super$initialize( path = path, - default_format = default_format, extensions = "spatial" ) @@ -50,63 +46,13 @@ evoland_db <- R6::R6Class( invisible(self) }, - #' @description - #' Fetch data from storage with evoland-specific view support - #' @param table_name Character string. Name of the table to query. - #' @param where Character string. Optional WHERE clause for the SQL query. - #' @param limit integerish, limit the amount of rows to return - #' - #' @return A data.table - fetch = function(table_name, where = NULL, limit = NULL) { - # Check if this is a view (active binding) - if ( - # TODO these should probably not be active bindings, but instead methods with - # predefined query parameters - table_name %in% - c("lulc_meta_long_v", "pred_sources_v", "transitions_v", "extent", "coords_minimal") - ) { - return(self[[table_name]]) - } - - file_info <- private$get_file_path(table_name) - - if (!file_info$exists) { - stop("Table `", table_name, "` does not exist") - } - - super$fetch(table_name, where, limit) - }, - ### Setter methods ---- #' @description #' Set reporting metadata #' @param ... each named argument is entered into the table with the argument name #' as its key set_report = function(...) { - params <- list(...) - if (self$row_count("reporting_t") == 0L) { - # only upsert if these values are missing upon DB init - params[["report_name"]] <- - params[["report_name"]] %||% "evoland_scenario" - params[["report_name_pretty"]] <- - params[["report_name_pretty"]] %||% "Default Evoland Scenario" - params[["report_include_date"]] <- - params[["report_include_date"]] %||% "TRUE" - params[["creator_username"]] <- - params[["creator_username"]] %||% Sys.getenv("USER", unset = "unknown") - } - params[["last_opened"]] <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") - params[["last_opened_username"]] <- Sys.getenv("USER", unset = "unknown") - - self$commit( - data.table::as.data.table(list( - key = names(params), # cannot name a column "key" in data.table() - value = unlist(params) - )), - table_name = "reporting_t", - key_cols = "key", - method = "upsert" - ) + db_set_report(self, ...) }, #' @description diff --git a/R/evoland_db_neighbors.R b/R/evoland_db_neighbors.R index e264fcd..72ee112 100644 --- a/R/evoland_db_neighbors.R +++ b/R/evoland_db_neighbors.R @@ -110,7 +110,7 @@ evoland_db$set("public", "generate_neighbor_predictors", function() { }" ) self$execute( - "create or replace view pred_meta_upsert_v as + "create or replace view pred_meta_upsert_v as select name, pretty_name, description, orig_format, sources, unit, factor_levels from pred_meta_neighbors_t" ) diff --git a/R/evoland_db_tables.R b/R/evoland_db_tables.R index 42821c0..30a7d8a 100644 --- a/R/evoland_db_tables.R +++ b/R/evoland_db_tables.R @@ -35,17 +35,14 @@ create_table_binding <- function( map_cols = NULL, ... ) { - extra_args <- list(...) - function(x) { if (missing(x)) { - fetched <- self$fetch(table_name) - - if (!is.null(map_cols) && nrow(fetched) > 0) { - fetched <- convert_list_cols(fetched, map_cols, kv_df_to_list) - } + fetched <- self$fetch( + table_name = table_name, + map_cols = map_cols + ) - return(do.call(as_fn, c(list(fetched), extra_args))) + return(as_fn(fetched, ...)) } stopifnot(inherits(x, table_name)) @@ -205,3 +202,12 @@ evoland_db$set("active", "neighbors_t", function(x) { key_cols = c("id_coord_origin", "id_coord_neighbor") )(x) }) + +evoland_db$set("active", "reporting_t", function(x) { + create_table_binding( + self, + "reporting_t", + as_reporting_t, + key_cols = "key" + )(x) +}) diff --git a/R/intrv_meta_t.R b/R/intrv_meta_t.R index 0588de4..63b6f98 100644 --- a/R/intrv_meta_t.R +++ b/R/intrv_meta_t.R @@ -121,12 +121,13 @@ create_intrv_meta_t_row <- function( params ) { stopifnot( - rlang::is_scalar_character(name), - rlang::is_scalar_character(pretty_name), - rlang::is_scalar_character(description), + "name is not scalar character" = length(name) == 1L && is.character(name), + "pretty_name is not scalar character" = length(pretty_name) == 1L && is.character(pretty_name), + "description is not scalar character" = length(description) == 1L && is.character(description), is.integer(id_period_list), is.integer(id_trans_list), - rlang::is_scalar_logical(pre_allocation), + "pre_allocation is not scalar logical" = length(pre_allocation) == 1L && + is.logical(pre_allocation), inherits(sources, "data.frame"), ) check_missing_names(sources, c("url", "md5sum")) diff --git a/R/parquet_duckdb.R b/R/parquet_duckdb.R index 74c4486..20f463e 100644 --- a/R/parquet_duckdb.R +++ b/R/parquet_duckdb.R @@ -19,37 +19,22 @@ parquet_duckdb <- R6::R6Class( #' @field path Character string path to the data folder path = NULL, - #' @field default_format Default file format for new tables - default_format = NULL, - - #' @field writeopts Default write options for DuckDB - writeopts = NULL, + #' @field writeopts Write options for DuckDB parquet output + writeopts = "format parquet, compression zstd", #' @description #' Initialize a new parquet_duckdb object #' @param path Character string. Path to the data folder. - #' @param default_format Character. Default file format ("parquet" or "csv"). - #' Default is "parquet". #' @param extensions Character vector of DuckDB extensions to load (e.g., "spatial") #' #' @return A new `parquet_duckdb` object initialize = function( path, - default_format = c("parquet", "csv"), extensions = character(0) ) { # Create folder if it doesn't exist self$path <- ensure_dir(path) - # Set format / writeopts - self$default_format <- match.arg(default_format) - self$writeopts <- switch( - self$default_format, - parquet = "format parquet, compression zstd", - csv = "format csv", - stop(glue::glue("Unsupported format: {self$default_format}")) - ) - # Create in-memory connection for SQL operations self$connection <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") @@ -97,15 +82,15 @@ parquet_duckdb <- R6::R6Class( }, #' @description - #' Attach a table from parquet/CSV file as a temporary table in DuckDB + #' Attach a table from parquet file as a temporary table in DuckDB #' @param table_name Character. Name of table to attach. #' @param columns Character vector. Optional SQL column selection, defaults to "*" #' @param where Character. Optional SQL WHERE clause to subset the table. #' @return Invisible NULL (called for side effects) attach_table = function(table_name, columns = "*", where = NULL) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { stop(glue::glue("Table '{table_name}' does not exist at path: {self$path}")) } @@ -113,7 +98,7 @@ parquet_duckdb <- R6::R6Class( sql <- glue::glue( "create temp table {table_name} as ", "select {paste(columns, collapse = ', ')} ", - "from read_{file_info$format}('{file_info$path}')" + "from read_parquet('{file_path}')" ) if (!is.null(where)) { @@ -138,14 +123,14 @@ parquet_duckdb <- R6::R6Class( #' @param table_name Character string. Name of the table to query. #' @return Integer number of rows row_count = function(table_name) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { return(0L) } self$get_query( - glue::glue("select count(*) as n from read_{file_info$format}('{file_info$path}')") + glue::glue("select count(*) as n from read_parquet('{file_path}')") )[[1]] }, @@ -153,9 +138,8 @@ parquet_duckdb <- R6::R6Class( #' List all tables (files) in storage #' @return Character vector of table names list_tables = function() { - list.files(self$path, pattern = "\\.(parquet|csv)$", full.names = FALSE) |> + list.files(self$path, pattern = "\\.parquet$", full.names = FALSE) |> tools::file_path_sans_ext() |> - unique() |> sort() }, @@ -201,17 +185,23 @@ parquet_duckdb <- R6::R6Class( #' @param table_name Character string. Name of the table to query. #' @param where Character string. Optional WHERE clause for the SQL query. #' @param limit Integer. Optional limit on number of rows to return. + #' @param map_cols Vector of columns to be converted from key/value structs to R lists #' #' @return A data.table - fetch = function(table_name, where = NULL, limit = NULL) { - file_info <- private$get_file_path(table_name) + fetch = function( + table_name, + where = NULL, + limit = NULL, + map_cols = NULL + ) { + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { - return(data.table::data.table()) + if (!file.exists(file_path)) { + stop("Table `", table_name, "` does not exist") } # build sql query - sql <- glue::glue("select * from read_{file_info$format}('{file_info$path}')") + sql <- glue::glue("from read_parquet('{file_path}')") if (!is.null(where)) { sql <- glue::glue("{sql} where {where}") @@ -220,7 +210,13 @@ parquet_duckdb <- R6::R6Class( sql <- glue::glue("{sql} limit {limit}") } - self$get_query(sql) + res <- self$get_query(sql) + + if (!is.null(map_cols) && nrow(res) > 0) { + return(convert_list_cols(res, map_cols, kv_df_to_list)) + } + + res }, #' @description @@ -229,26 +225,26 @@ parquet_duckdb <- R6::R6Class( #' @param where Character string. Optional WHERE clause; if NULL, deletes all rows. #' @return Number of rows deleted delete_from = function(table_name, where = NULL) { - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (!file_info$exists) { + if (!file.exists(file_path)) { return(0L) } count_before <- self$row_count(table_name) if (is.null(where)) { - file.remove(file_info$path) + file.remove(file_path) return(count_before) } self$execute(glue::glue( r"{ copy ( - select * from read_{file_info$format}('{file_info$path}') + select * from read_parquet('{file_path}') where not ({where}) ) - to '{file_info$path}' ({self$writeopts}) + to '{file_path}' ({self$writeopts}) }" )) @@ -285,16 +281,16 @@ parquet_duckdb <- R6::R6Class( "select column_name from (describe new_data_v)" )[[1]] - file_info <- private$get_file_path(table_name) + file_path <- private$get_file_path(table_name) - if (method == "overwrite" || !file_info$exists) { + if (method == "overwrite" || !file.exists(file_path)) { # in case overwrite explicitly required, or no previously existing data to # append or upsert to; rest of logic can be skipped return(private$commit_overwrite( table_name = table_name, all_cols = all_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path )) } @@ -312,7 +308,7 @@ parquet_duckdb <- R6::R6Class( table_name = table_name, all_cols = all_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path ) } else { private$commit_upsert( @@ -320,7 +316,7 @@ parquet_duckdb <- R6::R6Class( all_cols = all_cols, key_cols = key_cols, autoincrement_cols = autoincrement_cols, - file_info = file_info + file_path = file_path ) } }, @@ -374,16 +370,10 @@ parquet_duckdb <- R6::R6Class( cat(classes[1], "Object. Inherits from", toString(classes[-1]), "\n") } - compression <- if (grepl("compression\\s+(\\w+)", self$writeopts)) { - sub(".*compression\\s+(\\w+).*", "\\1", self$writeopts) - } else { - "none" - } - # Database info on one line cat( glue::glue( - "Database: {self$path} | Format: {self$default_format} | Compression: {compression}" + "Database: {self$path} | Write Options: {self$writeopts}" ), "\n\n" ) @@ -432,13 +422,12 @@ parquet_duckdb <- R6::R6Class( #' param x Data frame to commit. If character, in-duckdb-memory table. #' param table_name Character string table name #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_overwrite = function( table_name, all_cols, autoincrement_cols = character(0), - file_info + file_path ) { # Warn if overriding existing IDs if (length(intersect(autoincrement_cols, all_cols)) > 0) { @@ -460,7 +449,7 @@ parquet_duckdb <- R6::R6Class( copy ( select {select_expr} from new_data_v - ) to '{file_info$path}' ({self$writeopts}) + ) to '{file_path}' ({self$writeopts}) }" )) }, @@ -468,13 +457,12 @@ parquet_duckdb <- R6::R6Class( #' param x Data frame to commit. If character, in-duckdb-memory table. #' param table_name Character string table name #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_append = function( table_name, all_cols, autoincrement_cols = character(0), - file_info + file_path ) { ordinary_cols <- setdiff(all_cols, autoincrement_cols) select_new <- glue::glue_collapse( @@ -496,7 +484,7 @@ parquet_duckdb <- R6::R6Class( select {select_new} from new_data_v ) - to '{file_info$path}' ({self$writeopts}) + to '{file_path}' ({self$writeopts}) }" )) }, @@ -506,14 +494,13 @@ parquet_duckdb <- R6::R6Class( #' param key_cols Character vector of columns that define uniqueness. If missing, #' use all columns starting with `id_` #' param autoincrement_cols Character vector of column names to auto-increment - #' param map_cols Character vector of columns to convert to MAP format #' return Invisible NULL (called for side effects) commit_upsert = function( table_name, all_cols, key_cols, autoincrement_cols = character(0), - file_info + file_path ) { # Update existing data ordinary_cols <- setdiff(all_cols, union(key_cols, autoincrement_cols)) @@ -568,34 +555,15 @@ parquet_duckdb <- R6::R6Class( }" )) - self$execute(glue::glue("copy {table_name} to '{file_info$path}' ({self$writeopts})")) + self$execute(glue::glue("copy {table_name} to '{file_path}' ({self$writeopts})")) }, - # Get file path and format for a table + # Get file path for a table # # @param table_name Character string table name - # @return List with path, format, and exists flag + # @return Character path to parquet file get_file_path = function(table_name) { - # Check for parquet first, then csv - parquet_path <- file.path(self$path, paste0(table_name, ".parquet")) - csv_path <- file.path(self$path, paste0(table_name, ".csv")) - - if (file.exists(parquet_path)) { - return(list(path = parquet_path, format = "parquet", exists = TRUE)) - } else if (file.exists(csv_path)) { - return(list(path = csv_path, format = "csv", exists = TRUE)) - } else { - # Return default format for new file - default_path <- file.path( - self$path, - paste0(table_name, ".", self$default_format) - ) - return(list( - path = default_path, - format = self$default_format, - exists = FALSE - )) - } + file.path(self$path, paste0(table_name, ".parquet")) }, # Register new_data_v table, optionally converting MAP columns @@ -639,7 +607,6 @@ parquet_duckdb <- R6::R6Class( # Cleanup new_data_v and related tables # - # @param map_cols Character vector indicating if MAP conversion was used # @return NULL (called for side effects) cleanup_new_data_v = function() { try(duckdb::duckdb_unregister(self$connection, "new_data_v"), silent = TRUE) diff --git a/R/pred_meta_t.R b/R/pred_meta_t.R index 6fa15c9..291f040 100644 --- a/R/pred_meta_t.R +++ b/R/pred_meta_t.R @@ -83,13 +83,18 @@ create_pred_meta_t <- function(pred_spec) { # path is pred_spec > pred_name > leaf_name # we pluck each element, then replace potential null using %||% pretty_name = unlist( - purrr::map2(pluck_wildcard(pred_spec, NA, "pretty_name"), pred_names, ~ .x %||% .y) + mapply( + function(x, y) x %||% y, + pluck_wildcard(pred_spec, NA, "pretty_name"), + pred_names, + SIMPLIFY = FALSE + ) ), description = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "description"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "description"), function(x) x %||% NA_character_) ), orig_format = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "orig_format"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "orig_format"), function(x) x %||% NA_character_) ), sources = lapply( pred_spec, @@ -102,7 +107,7 @@ create_pred_meta_t <- function(pred_spec) { } ), unit = unlist( - purrr::map(pluck_wildcard(pred_spec, NA, "unit"), ~ .x %||% NA_character_) + lapply(pluck_wildcard(pred_spec, NA, "unit"), function(x) x %||% NA_character_) ), factor_levels = pluck_wildcard(pred_spec, NA, "factor_levels") ) diff --git a/R/reporting_t.R b/R/reporting_t.R new file mode 100644 index 0000000..24627eb --- /dev/null +++ b/R/reporting_t.R @@ -0,0 +1,66 @@ +#' Create Reporting Table +#' +#' The reporting table holds information handy for writing out reports (tables, +#' graphs...) +#' +#' @name reporting_t +#' +#' @return A data.table of class "reporting_t" with columns: +#' - `key`: Unique character key +#' - `value`: Value as character +#' @export +as_reporting_t <- function(x) { + if (missing(x)) { + x <- data.table::data.table( + key = character(), + value = character() + ) + } + new_evoland_table( + x, + "reporting_t", + "key" + ) +} + +db_set_report <- function(self, ...) { + params <- list(...) + if (self$row_count("reporting_t") == 0L) { + # only upsert if these values are missing upon DB init + params[["report_name"]] <- + params[["report_name"]] %||% "evoland_scenario" + params[["report_name_pretty"]] <- + params[["report_name_pretty"]] %||% "Default Evoland Scenario" + params[["report_include_date"]] <- + params[["report_include_date"]] %||% "TRUE" + params[["creator_username"]] <- + params[["creator_username"]] %||% Sys.getenv("USER", unset = "unknown") + } + params[["last_opened"]] <- format(Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC") + params[["last_opened_username"]] <- Sys.getenv("USER", unset = "unknown") + + self$reporting_t <- as_reporting_t(list( + key = names(params), # cannot name a column "key" in data.table() + value = unlist(params) + )) +} + + +#' @export +validate.reporting_t <- function(x, ...) { + NextMethod() + + data.table::setcolorder( + x, + c( + "key", + "value" + ) + ) + + stopifnot( + !anyDuplicated(x[["key"]]) + ) + + return(x) +} diff --git a/R/util_dinamica.r b/R/util_dinamica.r new file mode 100644 index 0000000..6b84823 --- /dev/null +++ b/R/util_dinamica.r @@ -0,0 +1,249 @@ +#' Dinamica Utility Functions +#' +#' Interact with Dinamica from R, see **Functions** section below. +#' +#' @name util_dinamica +NULL + +#' @describeIn util_dinamica Execute a Dinamica .ego file using `DinamicaConsole` +#' @param model_path Path to the .ego model file to run. Any submodels must be included +#' in a directory of the exact form `basename(modelpath)_ego_Submodels`, [see +#' wiki](https://csr.ufmg.br/dinamica/dokuwiki/doku.php?id=submodels) +#' @param disable_parallel Whether to disable parallel steps (default TRUE) +#' @param log_level Logging level (1-7, default NULL) +#' @param additional_args Additional arguments to pass to DinamicaConsole, see +#' `DinamicaConsole -help` +#' @param write_logfile bool, write stdout&stderr to a file? +#' @param echo bool, direct echo to console? +#' +#' @export + +exec_dinamica <- function( + model_path, + disable_parallel = TRUE, + log_level = NULL, + additional_args = NULL, + write_logfile = TRUE, + echo = FALSE +) { + if (!requireNamespace("processx", quietly = TRUE)) { + stop( + "Package 'processx' is required for this function. ", + "Please install it with: install.packages('processx')", + call. = FALSE + ) + } + + if (Sys.which("DinamicaConsole") == "") { + stop( + "DinamicaConsole not found on PATH. ", + "Please ensure Dinamica EGO is installed and DinamicaConsole is available." + ) + } + args <- character() + if (disable_parallel) { + args <- c(args, "-disable-parallel-steps") + } + if (!is.null(log_level)) { + args <- c(args, paste0("-log-level ", log_level)) + } + if (!is.null(additional_args)) { + args <- c(args, additional_args) + } + args <- c(args, model_path) + + if (write_logfile) { + logfile_path <- file.path( + dirname(model_path), + format(Sys.time(), "%Y-%m-%d_%Hh%Mm%Ss_dinamica.log") + ) + message("Logging to ", logfile_path) + + # Use bash process substitution with sed to strip ANSI codes and tee to logfile + res <- processx::run( + command = "bash", + args = c( + "-c", + sprintf( + "set -o pipefail; stdbuf -oL DinamicaConsole %s 2>&1 | sed 's/\\x1b\\[[0-9;]*m//g' | tee '%s'; exit ${PIPESTATUS[0]}", + paste(shQuote(args), collapse = " "), + logfile_path + ) + ), + error_on_status = FALSE, + echo = echo, + spinner = TRUE, + env = c( + "current", + DINAMICA_HOME = dirname(model_path) + ) + ) + } else { + res <- processx::run( + # If called directly, DinamicaConsole does not flush its buffer upon SIGTERM. + # stdbuf -oL forces flushing the stdout buffer after every line. + command = "stdbuf", + args = c( + "-oL", + "DinamicaConsole", # assume that $PATH is complete + args + ), + error_on_status = FALSE, + echo = echo, + spinner = TRUE, + env = c( + "current", + DINAMICA_HOME = dirname(model_path) + ) + ) + } + + if ( + res[["status"]] != 0L || + grepl("Dinamica EGO exited with an error", res[["stdout"]]) + ) { + stop( + "Dinamica registered an error. \n", + "Rerun with echo = TRUE or check logfile to see what went wrong." + ) + } + + invisible(res) +} + +#' @describeIn util_dinamica Set up evoland-specific Dinamica EGO files; execute using +#' [exec_dinamica()] +#' @param run_modelprechecks bool, Validate that everything's in place for a model run. +#' Will never be run if calibration. +#' @param config List of config params +#' @param calibration bool, Is this a calibration run? +#' @param work_dir Working dir, where to place ego files and control table +#' @param ... passed to [exec_dinamica()] +#' @export +run_evoland_dinamica_sim <- function( + run_modelprechecks = TRUE, + config = get_config(), + work_dir = format(Sys.time(), "%Y-%m-%d_%Hh%Mm%Ss"), + calibration = FALSE, + ... +) { + if (run_modelprechecks && !calibration) { + stopifnot(lulcc.modelprechecks()) + } + + # find raw ego files with decoded R/Python code chunks + decoded_files <- list.files( + path = system.file("dinamica_model", package = "evoland"), + pattern = "evoland.*\\.ego-decoded$", + full.names = TRUE, + recursive = TRUE + ) + + invisible(lapply(decoded_files, function(decoded_file) { + # Determine relative path and new output path with .ego extension + base_dir <- system.file("dinamica_model", package = "evoland") + rel_path <- substring(decoded_file, nchar(base_dir) + 2) + out_path <- sub("\\.ego-decoded$", ".ego", file.path(work_dir, rel_path)) + dir.create(dirname(out_path), showWarnings = FALSE, recursive = TRUE) + process_dinamica_script(decoded_file, out_path) + })) + + # move simulation control csv into place + file.copy( + if (calibration) config[["calibration_ctrl_tbl_path"]] else config[["ctrl_tbl_path"]], + file.path(work_dir, "simulation_control.csv"), + overwrite = TRUE + ) + + message("Starting to run model with Dinamica EGO") + exec_dinamica( + model_path = file.path(work_dir, "evoland.ego"), + ... + ) +} + +#' @describeIn util_dinamica Encode or decode raw R and Python code chunks in .ego +#' files and their submodels to/from base64 +#' @param infile Input file path. Treated as input if passed AsIs using `base::I()` +#' @param outfile Output file path (optional) +#' @param mode Character, either "encode" or "decode" +#' @param check Default TRUE, simple check to ensure that you're handling what you're expecting + +process_dinamica_script <- function(infile, outfile, mode = "encode", check = TRUE) { + if (!requireNamespace("base64enc", quietly = TRUE)) { + stop( + "Package 'base64enc' is required for this function. ", + "Please install it with: install.packages('base64enc')", + call. = FALSE + ) + } + + mode <- match.arg(mode, c("encode", "decode")) + if (inherits(infile, "AsIs")) { + file_text <- unclass(infile) + } else { + # read the input file as a single string + file_text <- readChar(infile, file.info(infile)$size) + } + + # match the Calculate R or Python Expression blocks - guesswork involved + pattern <- r'(:= Calculate(?:Python|R)Expression "(\X*?)" (?:\.no )?\{\{)' + match_positions <- gregexpr(pattern, file_text, perl = TRUE)[[1]] + if (match_positions[1] == -1) { + # none found (position -1) + matches <- character(0) + } else { + # extracts first the full match as char vect and then the captured group + full_matches <- regmatches(file_text, match_positions) + # Extract the first capture group for each match + all_matches <- lapply(full_matches, function(m) { + cap <- regmatches(m, regexec(pattern, m, perl = TRUE))[[1]] + cap[2] + }) + matches <- do.call(c, all_matches) + } + + if (check) { + non_base64_chars_present <- grepl("[^A-Za-z0-9+=\\n/]", matches) + if (mode == "encode" && any(!non_base64_chars_present)) { + stop( + "There are no non-base64 chars in one of the matched patterns, which seems ", + "unlikely for an unencoded code chunk. Override this check with ", + "check = FALSE if you're sure that this is an unencoded file." + ) + } else if (mode == "decode" && any(non_base64_chars_present)) { + stop( + "There are non-base64 chars in one of the matched patterns, which seems ", + "unlikely for an encoded code chunk. Override this check with ", + "check = FALSE if you're sure that this is an unencoded file." + ) + } + } + + if (length(matches) > 0) { + encoder_decoder <- if (mode == "encode") { + function(code) base64enc::base64encode(charToRaw(code)) + } else { + function(code) rawToChar(base64enc::base64decode(code)) + } + # matches contains the captured R/python code OR base64-encoded code + encoded_vec <- vapply(matches, encoder_decoder, character(1), USE.NAMES = FALSE) + # replace each original code with its base64 encoded version + for (i in seq_along(encoded_vec)) { + file_text <- sub( + pattern = matches[i], + replacement = encoded_vec[i], + x = file_text, + fixed = TRUE + ) + } + } + + # Write to outfile if specified, otherwise return the substituted string + if (!missing(outfile)) { + writeChar(file_text, outfile, eos = NULL) + invisible(outfile) + } else { + file_text + } +} diff --git a/R/util_download.R b/R/util_download.R index ed94b50..7f19f17 100644 --- a/R/util_download.R +++ b/R/util_download.R @@ -32,12 +32,12 @@ download_and_verify <- function( .( url, md5sum, - found_files = purrr::map(file.path(target_dir, md5sum), list.files, full.names = TRUE) + found_files = lapply(file.path(target_dir, md5sum), list.files, full.names = TRUE) ) ] # can only handle one file per md5 folder - all_dt[, no_found_files := purrr::map_int(found_files, length)] + all_dt[, no_found_files := vapply(found_files, length, integer(1))] if (nrow(too_many_files <- all_dt[no_found_files > 1])) { stop( "Investigate: found more than one file for \n ", @@ -54,9 +54,10 @@ download_and_verify <- function( # if download, fetch to temp file in target_dir, then move to md5 folder once known # if not download, just return details on existing file, not rechecking md5sum - downloaded_dt <- data.table::rbindlist(purrr::pmap( - .l = all_dt, - .f = function(url, found_files, to_download, md5sum, ...) { + downloaded_dt <- data.table::rbindlist(.mapply( + dots = all_dt, + MoreArgs = NULL, + FUN = function(url, found_files, to_download, md5sum, ...) { if (to_download) { message("Downloading: ", url) temp_file <- tempfile(tmpdir = target_dir) diff --git a/R/util_terra.R b/R/util_terra.R index 463c772..4921173 100644 --- a/R/util_terra.R +++ b/R/util_terra.R @@ -20,7 +20,7 @@ extract_using_coords_t.SpatRaster <- function(x, coords_t, na_omit = TRUE) { pts <- coords_t[, .(id_coord, lon, lat)] |> data.table::as.data.table() |> - terra::vect() + terra::vect(crs = terra::crs(x)) # TODO we need to add an EPSG attr to coords out <- terra::extract( @@ -52,7 +52,7 @@ extract_using_coords_t.SpatVector <- function(x, coords_t, na_omit = TRUE) { pts <- coords_t[, .(lon, lat)] |> as.matrix() |> - terra::vect() + terra::vect(crs = terra::crs(x)) # TODO we need to add an EPSG attr to coords tmp <- terra::extract( diff --git a/inst/dinamica_models/allocation.ego-decoded b/inst/dinamica_models/allocation.ego-decoded new file mode 100644 index 0000000..e0012fa --- /dev/null +++ b/inst/dinamica_models/allocation.ego-decoded @@ -0,0 +1,70 @@ +@charset = UTF-8 +@date = 2025-Dec-04 16:00:58 +@version = 8.7.0.20250814 +@submodel.import = CreateCubeOfProbabilityMaps { { \"transitionMatrix\" : Table, \"inputFolder\" : Folder } { } { \"probabilityMapsRasterCube\" : Map } }; AllocateTransitions { { \"lanscape\" : CategoricalMap, \"probabilities\" : Map, \"transitionMatrix\" : TransitionMatrix, \"percentOfTransitionsByExpansion\" : PercentMatrix, \"patchExpansionParameters\" : TransitionFunctionParameterMatrix, \"patchGenerationParameters\" : TransitionFunctionParameterMatrix } { \"printTransitionInfo\" : BooleanValue } { \"resultingLanscape\" : CategoricalMap } }; CalcSimilarityOfDifferences { { \"initialMap\" : CategoricalMap, \"observedMap\" : CategoricalMap, \"simulatedMap\" : CategoricalMap } { \"useExponentialDecay\" : BooleanValue, \"windowSize\" : PositiveIntegerValue, \"printSimilarities\" : BooleanValue, \"exponentialDecayDivisor\" : RealValue } { \"similarityMap\" : Map, \"similarity\" : RealValue } } +Script {{ + getVariables := CalculateRExpression " + message(Sys.time(), " - getEnvironmentVariables") + outputString("probabilityMapDir", file.path(getwd(), "probability_map_dir")) + outputString("anteriorMapPath", file.path(getwd(), "anterior.tif")) + outputString("posteriorMapPath", file.path(getwd(), "posterior.tif")) + + # the 2 is for two key columns, which i don't know how to set when using LoadTable + outputTable( + "expansionTable", + read.csv(file.path(getwd(), "expansion_table.csv")), + 2 + ) + outputTable( + "patcherTable", + read.csv(file.path(getwd(), "patcher_table.csv")), + 2 + ) + outputTable( + "transitionRatesTable", + read.csv(file.path(getwd(), "trans_rates.csv")), + 2 + ) + " .no {{ }}; + + probabilityMapDir := ExtractStructString getVariables $"(probabilityMapDir)"; + anteriorMapPath := ExtractStructString getVariables $"(anteriorMapPath)"; + posteriorMapPath := ExtractStructString getVariables $"(posteriorMapPath)"; + + // this includes From*, To*, Rate + transitionRatesTable := ExtractStructTable getVariables $"(transitionRatesTable)"; + // this includes From*, To*, Perc_expander + expansionTable := ExtractStructTable getVariables $"(expansionTable)"; + // this includes From*, To*, Mean_Patch_Size, Patch_Size_Variance, Patch_Isometry + patcherTable := ExtractStructTable getVariables $"(patcherTable)"; + + anteriorMap := LoadCategoricalMap { + filename = anteriorMapPath, + nullValue = .none, + storageMode = .default + }; + + @alias = Create cube of probability maps + cubeOfProbMaps := CreateCubeOfProbabilityMaps transitionRatesTable probabilityMapDir; + + @alias = Allocate transitions + posteriorMap := AllocateTransitions { + lanscape = anteriorMap, + probabilities = cubeOfProbMaps, + transitionMatrix = transitionRatesTable, + percentOfTransitionsByExpansion = expansionTable, + patchExpansionParameters = patcherTable, + patchGenerationParameters = patcherTable, + printTransitionInfo = .yes + }; + + // Simulated LULC map saved at every time step. Receives time info from control + // operator and adds a suffix to the file path. + @alias = Save simulated LULC + SaveMap { + map = posteriorMap, + filename = posteriorMapPath, + useCompression = .yes + }; +}}; + diff --git a/inst/dinamica_models/allocation_ego_Submodels/AllocateTransitions.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/AllocateTransitions.ego-decoded new file mode 100644 index 0000000..8688704 --- /dev/null +++ b/inst/dinamica_models/allocation_ego_Submodels/AllocateTransitions.ego-decoded @@ -0,0 +1,120 @@ +@charset = UTF-8 +@submodel.name = AllocateTransitions +@author = Dinamica Team +@organization = CSR / UFMG +@submodel.description = Performs transition allocation on a landscape map using Expander and Patcher according to a transition matrix specifying the net rates, a probability map and other parameters defining patch geometry. +@submodel.group = Simulation +@showproperties = yes +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // The landscape map. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The initial landscape map. + @submodel.in.constant.name = lanscape + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + landscape := CategoricalMap .UNBOUND; + + // This matriz defines the percentage of total transitions performed by expansion + // of existent patches (using Expander operator). The complement of this matrix + // defines the percentage of transitions performed by generation of new patches + // (using Patcher operator). + @submodel.in.constant.advanced = no + @submodel.in.constant.description = This matriz defines the percentage of total transitions performed by expansion of existent patches (using Expander operator). The complement of this matrix defines the percentage of transitions performed by generation of new patches (using Patcher operator). + @submodel.in.constant.name = percentOfTransitionsByExpansion + @submodel.in.constant.optional = no + @submodel.in.constant.order = 4 + percentOfTransitionsByExpansion := PercentMatrix .UNBOUND; + + // The transition matrix defining net transition rates. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The matriz defining the net rate of each transition. + @submodel.in.constant.name = transitionMatrix + @submodel.in.constant.optional = no + @submodel.in.constant.order = 3 + transitionMatrix := TransitionMatrix .UNBOUND; + + // The parameters used to expand existent patches. The parameters are used by + // Expander operator. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The parameters used to expand existent patches. The parameters are used by Expander operator. + @submodel.in.constant.name = patchExpansionParameters + @submodel.in.constant.optional = no + @submodel.in.constant.order = 5 + patchExpansionParameters := TransitionFunctionParameterMatrix .UNBOUND; + + // The parameters used to generate new patches. These parameters are used by + // Patcher operator. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The parameters used to generate new patches. These parameters are used by Patcher operator. + @submodel.in.constant.name = patchGenerationParameters + @submodel.in.constant.optional = no + @submodel.in.constant.order = 6 + patchGenerationParameters := TransitionFunctionParameterMatrix .UNBOUND; + + // The probability map. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = The map defining the probability of occurrence of each transition. + @submodel.in.constant.name = probabilities + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + probabilities := Map .UNBOUND; + + // If true, print allocation info on the application console. This is useful to + // help identify problems in the transition rates and probability maps. + @alias = print transition info? + @submodel.in.constant.advanced = no + @submodel.in.constant.description = If true, print allocation info on the application console. This is useful to help identify problems in the transition rates and probability maps. + @submodel.in.constant.name = printTransitionInfo + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 7 + printTransitionInfo := BooleanValue .yes; + + // Calculate the quantity of changes to be executed. + @collapsed = no + _ := Group .none {{ + transitionRates := CalcChangeMatrix landscape transitionMatrix; + + @alias = split transition rates + modulatedChanges complementaryChanges := ModulateChangeMatrix transitionRates percentOfTransitionsByExpansion; + }}; + + // Execute the transition functions. + @collapsed = no + _ := Group .none {{ + LogPolicy .debug2 printTransitionInfo {{ + @alias = landscape expander + changedLandscape corrodedProbabilities remainingChanges := Expander { + landscape = landscape, + probabilities = probabilities, + changes = modulatedChanges, + transitionParameters = patchExpansionParameters, + neighborWindowLines = 3, + neighborWindowColumns = 3, + pruneFactor = 10 + }; + }}; + + combinedTransitionRates := AddChangeMatrix complementaryChanges remainingChanges; + + LogPolicy .debug2 printTransitionInfo {{ + landscapePatcher _ _ := Patcher { + landscape = changedLandscape, + probabilities = corrodedProbabilities, + changes = combinedTransitionRates, + transitionParameters = patchGenerationParameters, + neighborWindowLines = 3, + neighborWindowColumns = 3, + pruneFactor = 10 + }; + }}; + }}; + + // The landscape map resulting from the transition allocation. + @alias = resulting landscape + @submodel.out.object.description = The resulting landscape map. + @submodel.out.object.name = resultingLanscape + @submodel.out.object.order = 1 + _ := CategoricalMap landscapePatcher; +}}; diff --git a/inst/dinamica_models/allocation_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded new file mode 100644 index 0000000..70cda48 --- /dev/null +++ b/inst/dinamica_models/allocation_ego_Submodels/CalcSimilarityOfDifferences.ego-decoded @@ -0,0 +1,163 @@ +@charset = UTF-8 +@submodel.title = Calc Similarity Of Differences +@author = Dinamica Team +@organization = CSR / UFMG +@submodel.description = Calculate the minimum fuzzy similarity using maps of changes. This operator calculates the map of differences between a initial map (first one of a time series) and an observed map (last one of a time series) and the differences between a initial map and a simulated map (the simulated one corresponding to the observed map). Then, these maps of differences are used by CalcReciprocalSimilarityMap to derive the minimum similary value. +@submodel.group = Validation +@notes = See the help for a detailed description of this method. +@showproperties = yes +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Initial map. + @submodel.in.constant.description = Initial Map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 0 + @submodel.in.constant.title = Initial Map + initialMap := CategoricalMap .UNBOUND; + + // Simulated map. + @submodel.in.constant.description = Simulated Map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + @submodel.in.constant.title = Simulated Map + simulatedMap := CategoricalMap .UNBOUND; + + // Observed (reference) map. + @submodel.in.constant.description = Observed (reference) map. + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + @submodel.in.constant.title = Observed Map + observedMap := CategoricalMap .UNBOUND; + + // If true, print the value of the mean similarities on the application console. + @alias = print similarities? + @submodel.in.constant.description = If true, print the values of the mean similarities on the application console. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 5 + @submodel.in.constant.title = Print Similarities + printSimilarities := BooleanValue .yes; + + // Only odd numbers are accepted. + @submodel.in.constant.description = Only odd numbers are accepted. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 4 + @submodel.in.constant.title = Window Size + windowSize := PositiveIntegerValue 11; + + // If true, the similarity is calculated using an exponential decay function + // truncated by the window size. Otherwise, a constant function is used within the + // specified window. + @submodel.in.constant.description = If true, the similarity is calculated using an exponential decay function truncated by the window size. Otherwise, a constant function is used within the specified window. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 3 + @submodel.in.constant.title = Use Exponential Decay + useExponentialDecay := BooleanValue .yes; + + // + // === + // Calculate map of observed changes. + observedChanges := CalculateCategoricalMap { + expression = [ + if i1 = i2 then + null + else + i2 + ], + cellType = .int32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap initialMap 1; + + NumberMap observedMap 2; + }}; + + // + // === + // Calculate map of simulated changes. + simulatedChanges := CalculateCategoricalMap { + expression = [ + if i1 = i2 then + null + else + i2 + ], + cellType = .int32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap initialMap 1; + + NumberMap simulatedMap 2; + }}; + + // Attenuation factor of the exponential decay function. + @submodel.in.constant.advanced = yes + @submodel.in.constant.description = Attenuation factor of the exponential decay function. + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 6 + @submodel.in.constant.title = Exponential Decay Divisor + exponentialDecayDivisor := RealValue 2; + + LogPolicy .result printSimilarities {{ + @alias = similarities + firstSimilarity firstMean secondSimilarity secondMean := CalcReciprocalSimilarityMap { + firstMap = observedChanges, + secondMap = simulatedChanges, + windowSize = windowSize, + useExponentialDecay = useExponentialDecay, + cellType = .float32, + nullValue = .default, + exponentialDecayDivisor = exponentialDecayDivisor + }; + }}; + + minimumSimilarity := CalculateValue [ + min(v1, v2) + ] .none {{ + NumberValue firstMean 1; + + NumberValue secondMean 2; + }}; + + // Select the minimum similarity map. + @collapsed = yes + _ := Group .none {{ + @alias = is 1st similarity less than 2nd similarity? + is1StSimilarityLessThan2NdSimilarity := CalculateValue [ + v1 < v2 + ] .none {{ + NumberValue firstMean 1; + + NumberValue secondMean 2; + }}; + + _ := IfThen is1StSimilarityLessThan2NdSimilarity .none {{ + firstSimilarityMap := Map firstSimilarity; + }}; + + @collapsed = no + _ := IfNotThen is1StSimilarityLessThan2NdSimilarity .none {{ + secondSimilarityMap := Map secondSimilarity; + }}; + + minimumSimilarityMap := MapJunction firstSimilarityMap secondSimilarityMap; + }}; + + @alias = similarity map + @submodel.out.object.description = The minimum similarity map. + @submodel.out.object.optional = no + @submodel.out.object.order = 0 + @submodel.out.object.title = Similarity Map + _ := Map minimumSimilarityMap; + + @alias = similarity + @submodel.out.object.description = The minimum similarity index. + @submodel.out.object.optional = no + @submodel.out.object.order = 1 + @submodel.out.object.title = Similarity + _ := RealValue minimumSimilarity; +}}; \ No newline at end of file diff --git a/inst/dinamica_models/allocation_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded new file mode 100644 index 0000000..3881e0f --- /dev/null +++ b/inst/dinamica_models/allocation_ego_Submodels/CreateCubeOfProbabilityMaps.ego-decoded @@ -0,0 +1,185 @@ +@charset = UTF-8 +@submodel.name = CreateCubeOfProbabilityMaps +@submodel.description = "Stack various probabilities maps into a map cube renaming each one according to the transitions present in the transition matrix. Probability maps must be ordered using a prefix like 1_ , e.g 1_filename.tif according to the number of the row of the respective transition. + +The map cube is used as input for both patcher and expander operators" +@submodel.group = Map Algebra +@submodel.import = ListFilenames { { \"folder\" : Folder, \"filePattern\" : String } { \"searchSubFoldersRecursively\" : BooleanValue } { \"files\" : Table } }; ExpandTableToUniqueKeys { { \"constant\" : Table } { } { \"table\" : Table } } +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Folder containing files of probability maps, they must be ordered according to + // the number of rows of the transition matrix using as a prefix a number, e.g. + // 1_name.tif. Each map will be a layer in the cube raster set + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Folder containing files of probability maps, they must be ordered according to the number of rows of the transition matrix using as a prefix a number, e.g. 1_name.tif. Each map will be a layer in the cube raster set + @submodel.in.constant.name = inputFolder + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + inputFolder := Folder ".."; + + listFilenames6935 := ListFilenames { + folder = inputFolder, + filePattern = $"(*.tif)", + searchSubFoldersRecursively = .no + }; + + getFileNames := GetTableColumn listFilenames6935 2; + + // the transiton matrix + @submodel.in.constant.advanced = no + @submodel.in.constant.description = the transiton matrix + @submodel.in.constant.name = transitionMatrix + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + transitionMatrix := Table .UNBOUND; + + expandTableToUniqueKeys15358 := ExpandTableToUniqueKeys transitionMatrix; + + @collapsed = no + _ := ForEach expandTableToUniqueKeys15358 .none {{ + step = step; + + row := Step step; + + getTo := GetTableValue { + table = expandTableToUniqueKeys15358, + keys = row, + column = "To", + valueIfNotFound = .none + }; + + getFrom := GetTableValue { + table = expandTableToUniqueKeys15358, + keys = row, + column = "From", + valueIfNotFound = .none + }; + + muxTableLayerNames := MuxTable [ + "Key*", "layer_sufix#str", + ] setTableCellValue15385; + + createLayerName := CreateString $"()" {{ + NumberString $"(_)" 1; + + NumberString $"(to)" 2; + + NumberString $"(probability)" 3; + + NumberValue getFrom 1; + + NumberValue getTo 2; + }}; + + setTableCellValue15385 := SetTableCellValue { + table = muxTableLayerNames, + column = 2, + keys = row, + value = createLayerName + }; + }}; + + getTableValue13738 := GetTableValue { + table = getFileNames, + keys = [ 1 ], + column = 2, + valueIfNotFound = .none + }; + + loadFirstMap := LoadMap { + filename = getTableValue13738, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = .none, + workdir = .none + }; + + convertToDouble := CalculateMap { + expression = [ + i1 + ], + cellType = .float32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap loadFirstMap 1; + }}; + + tableJunction906 := TableJunction setTableCellValue15385 [ + "Key*", "Value", + ]; + + @collapsed = no + _ := ForEach (TableJunction setTableCellValue15385 [ + "Key*", "Value", + ]) .none {{ + step0 = step; + + layer := Step step0; + + getTableValue92360 := GetTableValue { + table = getFileNames, + keys = layer, + column = 2, + valueIfNotFound = .none + }; + + loadEvidence := LoadMap { + filename = getTableValue92360, + nullValue = .none, + storageMode = .default, + suffixDigits = 0, + step = step0, + workdir = .none + }; + + muxMap27567 := MuxMap convertToDouble insertMapLayer27563; + + firstLayer := CalculateValue [ + if v1 = 1 then + 1 + else + 0 + ] .none {{ + NumberValue layer 1; + }}; + + getTableValue9236 := GetTableValue { + table = tableJunction906, + keys = layer, + column = 2, + valueIfNotFound = .none + }; + + @alias = convert to double + convertToDouble0 := CalculateMap { + expression = [ + i1 + ], + cellType = .float32, + nullValue = .default, + resultIsSparse = .no, + resultFormat = .none + } {{ + NumberMap loadEvidence 1; + }}; + + insertMapLayer27563 := InsertMapLayer { + map = muxMap27567, + layer = convertToDouble0, + layerPosition = layer, + layerName = getTableValue9236, + replaceLayer = firstLayer + }; + }}; + + // Value or constant representing a map provided as the functor input. + @alias = Object + @submodel.out.object.description = Value or constant representing a map provided as the functor input. + @submodel.out.object.name = probabilityMapsRasterCube + @submodel.out.object.order = 1 + _ := Map (MapJunction insertMapLayer27563 convertToDouble); +}}; diff --git a/inst/dinamica_models/allocation_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded new file mode 100644 index 0000000..19c08b6 --- /dev/null +++ b/inst/dinamica_models/allocation_ego_Submodels/ExpandTableToUniqueKeys.ego-decoded @@ -0,0 +1,40 @@ +@charset = UTF-8 +@submodel.name = ExpandTableToUniqueKeys +@submodel.description = Create a new Table containing the original table plus a key column with an unique index for each row of that Table. +@submodel.group = Table +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + @submodel.in.constant.advanced = no + @submodel.in.constant.name = constant + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + constant := Table .UNBOUND; + + @collapsed = no + calculatePythonExpression475 := CalculatePythonExpression "newHeader = dinamica.inputs["t1"][ 0 ] +newColumnName = "UniqueIndex" +while newColumnName in newHeader: + newColumnName += "_" + +for c in newHeader: + c.replace( "*", "" ) + +newHeader.insert(0, newColumnName) +newTable = [ newHeader ] +uniqueIndex = 1 +for rowIndex in range(1, len(dinamica.inputs["t1"])): + row = dinamica.inputs["t1"][ rowIndex ] + row.insert(0, uniqueIndex) + newTable.append(row) + uniqueIndex += 1 + +dinamica.outputs["table"] = newTable" {{ + NumberTable constant 1; + }}; + + @submodel.out.table.description = Extracted table. + @submodel.out.table.name = table + @submodel.out.table.order = 1 + _ := ExtractStructTable calculatePythonExpression475 $"(table)"; +}}; diff --git a/inst/dinamica_models/allocation_ego_Submodels/ListFilenames.ego-decoded b/inst/dinamica_models/allocation_ego_Submodels/ListFilenames.ego-decoded new file mode 100644 index 0000000..ce3fb37 --- /dev/null +++ b/inst/dinamica_models/allocation_ego_Submodels/ListFilenames.ego-decoded @@ -0,0 +1,68 @@ +@charset = UTF-8 +@submodel.name = ListFilenames +@author = Hermann Rodrigues +@organization = CSR / UFMG +@submodel.description = "List the filenames in the given folder (and maybe its sub-folders) that match the given file pattern. The resulting filenames include their corresponding file paths. +The functionality provided by this submodel is similar to the one provided by the \"List Filenames In Folder\" submodel, except that this implementation used Python internally. However, this version is simpler to use." +@submodel.group = Files +@date = 2023-Nov-22 15:03:10 +@version = 7.6.0.20231102 +Script {{ + // Folder where the files are located. + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Folder where the files are located. + @submodel.in.constant.name = folder + @submodel.in.constant.optional = no + @submodel.in.constant.order = 1 + folder := Folder .UNBOUND; + + // If true, also search for files in the sub-folders of the given folder + // recursively. Otherwise, the result only includes files from the given folder. + @submodel.in.constant.advanced = yes + @submodel.in.constant.description = If true, also search for files in the sub-folders of the given folder recursively. Otherwise, the result only includes files from the given folder. + @submodel.in.constant.name = searchSubFoldersRecursively + @submodel.in.constant.optional = yes + @submodel.in.constant.order = 3 + searchSubFoldersRecursively := BooleanValue .no; + + // Pattern of file names that will be returned. The pattern can include all + // wildcards supported by the OS shell. Ex: *.*, *.csv, my_image[0-9].tif, etc + @submodel.in.constant.advanced = no + @submodel.in.constant.description = Pattern of file names that will be returned. The pattern can include all wildcards supported by the OS shell. Ex: *.*, *.csv, my_image[0-9].tif, etc + @submodel.in.constant.name = filePattern + @submodel.in.constant.optional = no + @submodel.in.constant.order = 2 + filePattern := String .UNBOUND; + + calculatePythonExpression13799 := CalculatePythonExpression "dinamica.package("glob") + +folder = dinamica.inputs["s1"] +extension = dinamica.inputs["s2"] + +recurse_subfolders = dinamica.inputs["v1"] != 0 + +if recurse_subfolders: + full_filename_pattern = folder + "/**/" + extension; +else: + full_filename_pattern = folder + "/" + extension; + +files = glob.glob(full_filename_pattern, recursive=recurse_subfolders) +# sort files alphabetically +files = sorted(files) +files = [["Indices*#real", "Filenames#string"]] + list(enumerate(files, 1)) +print("Prepared output for Dinamica: ", files) + +dinamica.outputs["files"] = dinamica.prepareTable(files, 1) +" {{ + NumberString folder 1; + + NumberString filePattern 2; + + NumberValue searchSubFoldersRecursively 1; + }}; + + @submodel.out.table.description = The list of filename (and their corresponding paths) found in the given folder that match the given extension. + @submodel.out.table.name = files + @submodel.out.table.order = 1 + _ := ExtractStructTable calculatePythonExpression13799 $"(files)"; +}}; diff --git a/inst/tinytest/test_evoland_db.R b/inst/tinytest/test_evoland_db.R index 6b139b9..a66b146 100644 --- a/inst/tinytest/test_evoland_db.R +++ b/inst/tinytest/test_evoland_db.R @@ -227,7 +227,7 @@ expect_equal(db$intrv_meta_t, intrv_meta_t) # Test 11: Active bindings - trans_meta_t expect_silent(db$trans_meta_t <- trans_meta_t) -expect_equal(db$trans_meta_t[, c(-1)], trans_meta_t) +expect_equal(db$trans_meta_t[, c(-1)], trans_meta_t, tolerance = 1e7) # Test 13: Active bindings - alloc_params_t (with MAP columns) expect_silent(db$alloc_params_t <- alloc_params_t) diff --git a/inst/tinytest/test_parquet_duckdb.R b/inst/tinytest/test_parquet_duckdb.R index 5f1c3db..60f1401 100644 --- a/inst/tinytest/test_parquet_duckdb.R +++ b/inst/tinytest/test_parquet_duckdb.R @@ -8,23 +8,19 @@ on.exit(unlink(test_dir, recursive = TRUE), add = TRUE) # Test 1: Initialization expect_silent( db <- parquet_duckdb$new( - path = test_dir, - default_format = "parquet" + path = test_dir ) ) expect_true(inherits(db, "parquet_duckdb")) expect_true(dir.exists(test_dir)) -expect_equal(db$default_format, "parquet") expect_true(!is.null(db$connection)) expect_true(inherits(db$connection, "duckdb_connection")) # Test 2: Initial state - no tables expect_identical(db$list_tables(), character(0)) -# Test 3: Fetch from non-existent table returns empty data.table -result <- db$fetch("nonexistent_table") -expect_true(inherits(result, "data.table")) -expect_equal(nrow(result), 0L) +# Test 3: Fetch for nonexistent errors out +expect_error(db$fetch("nonexistent_table"), "does not exist") # Test 4: Row count for non-existent table expect_equal(db$row_count("nonexistent_table"), 0L) @@ -318,26 +314,7 @@ expect_true(inherits(result, "data.table")) expect_true("max_id" %in% names(result)) db$detach_table("test_attach") -# Test 31: CSV format support -test_dir_csv <- tempfile("parquet_duckdb_csv_") -on.exit(unlink(test_dir_csv, recursive = TRUE), add = TRUE) - -db_csv <- parquet_duckdb$new( - path = test_dir_csv, - default_format = "csv" -) -expect_equal(db_csv$default_format, "csv") - -test_csv_data <- data.table::data.table( - id = 1:3, - name = c("a", "b", "c") -) -db_csv$commit(test_csv_data, "csv_table", method = "overwrite") -expect_true("csv_table" %in% db_csv$list_tables()) -retrieved <- db_csv$fetch("csv_table") -expect_equal(retrieved, test_csv_data) - -# Test 32: Extension loading +# Test 31: Extension loading test_dir_ext <- tempfile("parquet_duckdb_ext_") on.exit(unlink(test_dir_ext, recursive = TRUE), add = TRUE) @@ -350,7 +327,7 @@ expect_silent( db_ext$get_query("SELECT ST_Point(0, 0) as geom") ) -# Test 33: Persistence across connections +# Test 32: Persistence across connections db$commit( method = "overwrite", test_data_1, diff --git a/inst/tinytest/test_trans_models_t.R b/inst/tinytest/test_trans_models_t.R index b5fcb02..53dba30 100644 --- a/inst/tinytest/test_trans_models_t.R +++ b/inst/tinytest/test_trans_models_t.R @@ -76,7 +76,7 @@ lulc_data <- data.table::rbindlist(list( db_tm$lulc_data_t <- as_lulc_data_t(lulc_data) # Create transition metadata -transitions <- db_tm$fetch("transitions_v") +transitions <- db_tm$transitions_v db_tm$trans_meta_t <- create_trans_meta_t( transitions, min_cardinality_abs = 5L @@ -103,7 +103,9 @@ pred_spec_tm <- list( sources = list(list(url = "https://example.com/roads.gpkg", md5sum = "ghi789")) ) ) -db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm) +expect_silent( + db_tm$pred_meta_t <- create_pred_meta_t(pred_spec_tm) +) # Add predictor data - mix of static and time-varying set.seed(43) diff --git a/inst/tinytest/test_trans_preds_t.R b/inst/tinytest/test_trans_preds_t.R index 6cee99a..1b6f66a 100644 --- a/inst/tinytest/test_trans_preds_t.R +++ b/inst/tinytest/test_trans_preds_t.R @@ -57,9 +57,8 @@ lulc_data <- data.table::rbindlist(list( db_tps$lulc_data_t <- as_lulc_data_t(lulc_data) # Create transition metadata -transitions <- db_tps$fetch("transitions_v") db_tps$trans_meta_t <- create_trans_meta_t( - transitions, + db_tps$transitions_v, min_cardinality_abs = 5L ) @@ -147,10 +146,10 @@ pred_data_int <- data.table::data.table( ) db_tps$pred_data_t_int <- as_pred_data_t(pred_data_int, type = "int") -expect_true(setequal( - db_tps$trans_pred_data_v(1L)[, id_coord], +expect_equal( + sort(db_tps$trans_pred_data_v(1L)[, id_coord]), c(1, 2, 2, 4, 4, 7, 8, 10, 10, 12, 12, 13, 13, 16, 16, 17, 17, 18, 20, 21, 23, 24) -)) +) # Test pruning expect_message( diff --git a/inst/tinytest/test_util_dinamica.R b/inst/tinytest/test_util_dinamica.R new file mode 100644 index 0000000..f5220c6 --- /dev/null +++ b/inst/tinytest/test_util_dinamica.R @@ -0,0 +1,151 @@ +library(tinytest) + +# nolint start +sample_dinamica_script_encoded <- ' +@charset = UTF-8 +@date = 2023-Oct-13 15:04:52 +@version = 8.3 +Script {{ + realValue1652 := RealValue 2; + + @collapsed = no + calculateRexpression1653 := CalculateRExpression "b3V0cHV0IDwtIHYxKjIKb3V0cHV0RG91YmxlKCJvdXRwdXRfbnVtYmVyIiwgb3V0cHV0KQ==" .no {{ + NumberValue realValue1652 1; + }}; + + @viewer.number = yes + _ := ExtractStructNumber calculateRexpression1653 $"(output_number)"; +}}; +' + +sample_dinamica_script_decoded <- ' +@charset = UTF-8 +@date = 2023-Oct-13 15:04:52 +@version = 8.3 +Script {{ + realValue1652 := RealValue 2; + + @collapsed = no + calculateRexpression1653 := CalculateRExpression "stop("runcible spoon")" .no {{ + NumberValue realValue1652 1; + }}; + + @viewer.number = yes + _ := ExtractStructNumber calculateRexpression1653 $"(output_number)"; +}}; +' +# nolint end + +# Test: evoland:::process_dinamica_script encodes correctly +expect_error( + evoland:::process_dinamica_script( + I(sample_dinamica_script_encoded), + mode = "encode" + ), + pattern = "seems unlikely for an unencoded code chunk" +) + +expect_match( + evoland:::process_dinamica_script( + I(sample_dinamica_script_encoded), + mode = "decode" + ), + 'output <- v1\\*2\\noutputDouble\\("output_number", output\\)' +) + +# Test: evoland:::process_dinamica_script decodes correctly +expect_error( + evoland:::process_dinamica_script( + I(sample_dinamica_script_decoded), + mode = "decode" + ), + pattern = "seems unlikely for an encoded code chunk" +) + +expect_match( + evoland:::process_dinamica_script( + I(sample_dinamica_script_decoded), + mode = "encode" + ), + "c3RvcCgicnVuY2libGUgc3Bvb24iKQ==" +) + +# Test: evoland:::process_dinamica_script is idempotent +expect_equal( + { + sample_dinamica_script_encoded |> + I() |> + evoland:::process_dinamica_script(mode = "decode") |> + I() |> + evoland:::process_dinamica_script(mode = "encode") + }, + sample_dinamica_script_encoded +) + +expect_equal( + { + sample_dinamica_script_decoded |> + I() |> + evoland:::process_dinamica_script(mode = "encode") |> + I() |> + evoland:::process_dinamica_script(mode = "decode") + }, + sample_dinamica_script_decoded +) + +if (Sys.which("DinamicaConsole") != "") { + stopifnot( + !is.na(Sys.getenv("DINAMICA_EGO_8_INSTALLATION_DIRECTORY", unset = NA)), + !is.na(Sys.getenv("DINAMICA_EGO_CLI", unset = NA)), + !is.na(Sys.getenv("DINAMICA_EGO_8_HOME", unset = NA)) + ) + Sys.setenv( + "DINAMICA_EGO_8_TEMP_DIR" = tempdir() + ) + # Test: exec_dinamica works + tmpfile_ego <- tempfile(fileext = ".ego") + writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL + ) + expect_message( + expect_length( + exec_dinamica(tmpfile_ego), + 4 # list of status, stdout, stderr, timeout + ), + "Logging to" + ) + unlink(tmpfile_ego) + + # Test: exec_dinamica with echo + tmpfile_ego <- tempfile(fileext = ".ego") + writeChar( + sample_dinamica_script_encoded, + tmpfile_ego, + eos = NULL + ) + expect_stdout( + exec_dinamica(tmpfile_ego, echo = TRUE, write_logfile = FALSE), + "Running model script" + ) + unlink(tmpfile_ego) + + # Test: exec_dinamica fails + tmpfile_ego <- tempfile(fileext = ".ego") + evoland:::process_dinamica_script(I(sample_dinamica_script_decoded), tmpfile_ego) + + # capture the R error for the Dinamica CalculateRExpression (via stdout) + expect_stdout( + # silence the logging message within this calling process + expect_message( + # capture the R error within this calling process + expect_error( + exec_dinamica(tmpfile_ego, echo = TRUE), + "Dinamica registered an error" + ) + ), + pattern = "Error caught in R execution: 'runcible spoon'" + ) + unlink(tmpfile_ego) +} diff --git a/inst/tinytest/test_util_terra.R b/inst/tinytest/test_util_terra.R index 35a0bab..1d6a566 100644 --- a/inst/tinytest/test_util_terra.R +++ b/inst/tinytest/test_util_terra.R @@ -83,7 +83,7 @@ expect_silent(vector_result <- extract_using_coords_t(vect_data, coords_t)) expect_equal( - vector_result[id_coord %in% 1], + vector_result[id_coord == 1][order(attribute, -rank(value))], data.table::data.table( id_coord = 1L, attribute = factor( diff --git a/man/evoland_db.Rd b/man/evoland_db.Rd index 590d1ad..7d59a3e 100644 --- a/man/evoland_db.Rd +++ b/man/evoland_db.Rd @@ -5,7 +5,7 @@ \title{R6 Class for Folder-Based Data Storage Interface} \description{ An R6 class that provides an interface to a folder-based data storage system -for the evoland package. Each table is stored as a parquet (or CSV) file. +for the evoland package. Each table is stored as a parquet (or JSON) file. This class uses DuckDB for in-memory SQL operations while persisting data to disk in parquet format for better compression. @@ -29,7 +29,6 @@ Additional methods and active bindings are added to this class in separate files \item \href{#method-evoland_db-generate_neighbor_predictors}{\code{evoland_db$generate_neighbor_predictors()}} \item \href{#method-evoland_db-trans_pred_data_v}{\code{evoland_db$trans_pred_data_v()}} \item \href{#method-evoland_db-new}{\code{evoland_db$new()}} -\item \href{#method-evoland_db-fetch}{\code{evoland_db$fetch()}} \item \href{#method-evoland_db-set_report}{\code{evoland_db$set_report()}} \item \href{#method-evoland_db-set_coords}{\code{evoland_db$set_coords()}} \item \href{#method-evoland_db-set_periods}{\code{evoland_db$set_periods()}} @@ -49,6 +48,7 @@ Additional methods and active bindings are added to this class in separate files
  • evoland::parquet_duckdb$delete_from()
  • evoland::parquet_duckdb$detach_table()
  • evoland::parquet_duckdb$execute()
  • +
  • evoland::parquet_duckdb$fetch()
  • evoland::parquet_duckdb$get_query()
  • evoland::parquet_duckdb$list_tables()
  • evoland::parquet_duckdb$print()
  • @@ -95,7 +95,7 @@ Additional methods and active bindings are added to this class in separate files \subsection{Method \code{new()}}{ Initialize a new evoland_db object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{evoland_db$new(path, default_format = c("parquet", "csv"), ...)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{evoland_db$new(path, ...)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -103,9 +103,6 @@ Initialize a new evoland_db object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "csv"). -Default is "parquet".} - \item{\code{...}}{passed on to \code{set_report}} } \if{html}{\out{}} @@ -115,30 +112,6 @@ A new \code{evoland_db} object } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-evoland_db-fetch}{}}} -\subsection{Method \code{fetch()}}{ -Fetch data from storage with evoland-specific view support -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{evoland_db$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{table_name}}{Character string. Name of the table to query.} - -\item{\code{where}}{Character string. Optional WHERE clause for the SQL query.} - -\item{\code{limit}}{integerish, limit the amount of rows to return} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -A data.table -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-evoland_db-set_report}{}}} \subsection{Method \code{set_report()}}{ diff --git a/man/parquet_duckdb.Rd b/man/parquet_duckdb.Rd index cb41afa..5e437aa 100644 --- a/man/parquet_duckdb.Rd +++ b/man/parquet_duckdb.Rd @@ -16,9 +16,7 @@ domain-specific database classes. \item{\code{path}}{Character string path to the data folder} -\item{\code{default_format}}{Default file format for new tables} - -\item{\code{writeopts}}{Default write options for DuckDB} +\item{\code{writeopts}}{Write options for DuckDB parquet output} } \if{html}{\out{}} } @@ -46,11 +44,7 @@ domain-specific database classes. \subsection{Method \code{new()}}{ Initialize a new parquet_duckdb object \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{parquet_duckdb$new( - path, - default_format = c("parquet", "csv"), - extensions = character(0) -)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{parquet_duckdb$new(path, extensions = character(0))}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -58,9 +52,6 @@ Initialize a new parquet_duckdb object \describe{ \item{\code{path}}{Character string. Path to the data folder.} -\item{\code{default_format}}{Character. Default file format ("parquet" or "csv"). -Default is "parquet".} - \item{\code{extensions}}{Character vector of DuckDB extensions to load (e.g., "spatial")} } \if{html}{\out{}} @@ -113,7 +104,7 @@ A data.table with query results \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-parquet_duckdb-attach_table}{}}} \subsection{Method \code{attach_table()}}{ -Attach a table from parquet/CSV file as a temporary table in DuckDB +Attach a table from parquet file as a temporary table in DuckDB \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{parquet_duckdb$attach_table(table_name, columns = "*", where = NULL)}\if{html}{\out{
    }} } @@ -217,7 +208,7 @@ Result of func \subsection{Method \code{fetch()}}{ Fetch data from a table \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch(table_name, where = NULL, limit = NULL)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{parquet_duckdb$fetch(table_name, where = NULL, limit = NULL, map_cols = NULL)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -228,6 +219,8 @@ Fetch data from a table \item{\code{where}}{Character string. Optional WHERE clause for the SQL query.} \item{\code{limit}}{Integer. Optional limit on number of rows to return.} + +\item{\code{map_cols}}{Vector of columns to be converted from key/value structs to R lists} } \if{html}{\out{}} } @@ -318,19 +311,16 @@ self (invisibly) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) param x Data frame to commit. If character, in-duckdb-memory table. param table_name Character string table name param key_cols Character vector of columns that define uniqueness. If missing, use all columns starting with \code{id_} param autoincrement_cols Character vector of column names to auto-increment -param map_cols Character vector of columns to convert to MAP format return Invisible NULL (called for side effects) } } diff --git a/man/reporting_t.Rd b/man/reporting_t.Rd new file mode 100644 index 0000000..0a5d592 --- /dev/null +++ b/man/reporting_t.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reporting_t.R +\name{reporting_t} +\alias{reporting_t} +\alias{as_reporting_t} +\title{Create Reporting Table} +\usage{ +as_reporting_t(x) +} +\value{ +A data.table of class "reporting_t" with columns: +\itemize{ +\item \code{key}: Unique character key +\item \code{value}: Value as character +} +} +\description{ +The reporting table holds information handy for writing out reports (tables, +graphs...) +} diff --git a/man/util_dinamica.Rd b/man/util_dinamica.Rd new file mode 100644 index 0000000..d15d4da --- /dev/null +++ b/man/util_dinamica.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util_dinamica.r +\name{util_dinamica} +\alias{util_dinamica} +\alias{exec_dinamica} +\alias{run_evoland_dinamica_sim} +\alias{process_dinamica_script} +\title{Dinamica Utility Functions} +\usage{ +exec_dinamica( + model_path, + disable_parallel = TRUE, + log_level = NULL, + additional_args = NULL, + write_logfile = TRUE, + echo = FALSE +) + +run_evoland_dinamica_sim( + run_modelprechecks = TRUE, + config = get_config(), + work_dir = format(Sys.time(), "\%Y-\%m-\%d_\%Hh\%Mm\%Ss"), + calibration = FALSE, + ... +) + +process_dinamica_script(infile, outfile, mode = "encode", check = TRUE) +} +\arguments{ +\item{model_path}{Path to the .ego model file to run. Any submodels must be included +in a directory of the exact form \verb{basename(modelpath)_ego_Submodels}, \href{https://csr.ufmg.br/dinamica/dokuwiki/doku.php?id=submodels}{see wiki}} + +\item{disable_parallel}{Whether to disable parallel steps (default TRUE)} + +\item{log_level}{Logging level (1-7, default NULL)} + +\item{additional_args}{Additional arguments to pass to DinamicaConsole, see +\code{DinamicaConsole -help}} + +\item{write_logfile}{bool, write stdout&stderr to a file?} + +\item{echo}{bool, direct echo to console?} + +\item{run_modelprechecks}{bool, Validate that everything's in place for a model run. +Will never be run if calibration.} + +\item{config}{List of config params} + +\item{work_dir}{Working dir, where to place ego files and control table} + +\item{calibration}{bool, Is this a calibration run?} + +\item{...}{passed to \code{\link[=exec_dinamica]{exec_dinamica()}}} + +\item{infile}{Input file path. Treated as input if passed AsIs using \code{base::I()}} + +\item{outfile}{Output file path (optional)} + +\item{mode}{Character, either "encode" or "decode"} + +\item{check}{Default TRUE, simple check to ensure that you're handling what you're expecting} +} +\description{ +Interact with Dinamica from R, see \strong{Functions} section below. +} +\section{Functions}{ +\itemize{ +\item \code{exec_dinamica()}: Execute a Dinamica .ego file using \code{DinamicaConsole} + +\item \code{run_evoland_dinamica_sim()}: Set up evoland-specific Dinamica EGO files; execute using +\code{\link[=exec_dinamica]{exec_dinamica()}} + +\item \code{process_dinamica_script()}: Encode or decode raw R and Python code chunks in .ego +files and their submodels to/from base64 + +}} diff --git a/vignettes/evoland.qmd b/vignettes/evoland.qmd index 37da973..c12bf91 100644 --- a/vignettes/evoland.qmd +++ b/vignettes/evoland.qmd @@ -120,7 +120,7 @@ zippath <- file.path( # find singular csv csv_file <- unzip(zippath, list = TRUE) |> - purrr::pluck("Name") |> + (\(x) x[["Name"]])() |> stringi::stri_subset_fixed(".csv") stopifnot(length(csv_file) == 1L) ``` diff --git a/vignettes/ingest-predictors.qmd b/vignettes/ingest-predictors.qmd index 46d0ef2..27fb60d 100644 --- a/vignettes/ingest-predictors.qmd +++ b/vignettes/ingest-predictors.qmd @@ -86,7 +86,7 @@ sonbase_sources <- extent_wide <- db$extent |> terra::extend(1000) sonbase_max <- - purrr::map( + lapply( sonbase_sources$local_path, \(x) terra::rast(x) |> terra::crop(extent_wide) ) |>