diff --git a/NEWS.md b/NEWS.md index 27afe61..f864642 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,13 @@ # lobstr (development version) -* `obj_size()` no longer errors with "bad binding access" when inspecting - environments with non-standard bindings such as those created by `for` loops - or immediate bindings (#48). +* `obj_size()`, `obj_addrs()`, and `sxp()` no longer error with "bad binding + access" when inspecting environments with non-standard bindings such as + those created by `for` loops or immediate bindings (#48). + +* `sxp(expand = "environment")` no longer shows the internal `_frame` and + `_hashtab` structures. Instead, it now shows promise expressions without + forcing them. This change was necessary to make lobstr compliant with R's + public C API. # lobstr 1.1.3 diff --git a/R/sxp.R b/R/sxp.R index 17bb877..c90fb0e 100644 --- a/R/sxp.R +++ b/R/sxp.R @@ -17,7 +17,7 @@ #' suppressed. Use: #' #' * "character" to show underlying entries in the global string pool. -#' * "environment" to show the underlying hashtables. +#' * "environment" to show binding components without any side effect (e.g. promise). #' * "altrep" to show the underlying data. #' * "call" to show the full AST (but [ast()] is usually superior) #' * "bytecode" to show generated bytecode. @@ -43,14 +43,13 @@ #' sxp(x) #' sxp(x, expand = "altrep") #' -#' # Expand environmnets to see the underlying implementation details -#' e1 <- new.env(hash = FALSE, parent = emptyenv(), size = 3L) -#' e2 <- new.env(hash = TRUE, parent = emptyenv(), size = 3L) -#' e1$x <- e2$x <- 1:10 +#' # Expand environments to see promise expressions without forcing +#' e <- new.env(parent = emptyenv()) +#' delayedAssign("x", 1 + 1, assign.env = e) #' -#' sxp(e1) -#' sxp(e1, expand = "environment") -#' sxp(e2, expand = "environment") +#' sxp(e) +#' sxp(e, expand = "environment") + sxp <- function(x, expand = character(), max_depth = 5L) { opts <- c("character", "altrep", "environment", "call", "bytecode") if (any(!expand %in% opts)) { @@ -78,18 +77,17 @@ format.lobstr_inspector <- function(x, ..., depth = 0, name = NA) { indent <- paste0(rep(" ", depth), collapse = "") id <- crayon::bold(attr(x, "id")) - if (!is_testing()) { + if (!is_testing() && !is_placeholder(x)) { addr <- paste0(":", crayon::silver(attr(x, "addr"))) } else { addr <- "" } - if (attr(x, "type") == 0) { - desc <- crayon::silver("") - } else if (attr(x, "has_seen")) { + type <- attr(x, "type") + + if (attr(x, "has_seen")) { desc <- paste0("[", attr(x, "id"), addr, "]") } else { - type <- sexp_type(attr(x, "type")) if (sexp_is_vector(type)) { length <- paste0("[", attr(x, "length"), "]") } else { @@ -102,7 +100,7 @@ format.lobstr_inspector <- function(x, ..., depth = 0, name = NA) { value <- NULL } - if (!is_testing()) { + if (!is_testing() && !is_placeholder(x)) { no_references <- attr(x, "no_references") maybe_shared <- attr(x, "maybe_shared") if (no_references == 1) { @@ -123,20 +121,33 @@ format.lobstr_inspector <- function(x, ..., depth = 0, name = NA) { references ) - desc <- paste0( - "[", - id, - addr, - "] ", - "<", - crayon::cyan(type), - length, - value, - "> ", - "(", - sxpinfo, - ")" - ) + # Placeholders don't show sxpinfo + if (is_placeholder(x)) { + desc <- paste0( + "[", + id, + "] ", + "<", + crayon::cyan(type), + value, + ">" + ) + } else { + desc <- paste0( + "[", + id, + addr, + "] ", + "<", + crayon::cyan(type), + length, + value, + "> ", + "(", + sxpinfo, + ")" + ) + } } name <- if (!identical(name, "")) { @@ -190,10 +201,6 @@ sxp_view <- function(x, expand = character()) { # helpers ----------------------------------------------------------------- -sexp_type <- function(x) { - unname(SEXPTYPE[as.character(x)]) -} - sexp_is_vector <- function(x) { x %in% c( @@ -208,32 +215,8 @@ sexp_is_vector <- function(x) { ) } -SEXPTYPE <- c( - "0" = "NILSXP", - "1" = "SYMSXP", - "2" = "LISTSXP", - "3" = "CLOSXP", - "4" = "ENVSXP", - "5" = "PROMSXP", - "6" = "LANGSXP", - "7" = "SPECIALSXP", - "8" = "BUILTINSXP", - "9" = "CHARSXP", - "10" = "LGLSXP", - "13" = "INTSXP", - "14" = "REALSXP", - "15" = "CPLXSXP", - "16" = "STRSXP", - "17" = "DOTSXP", - "18" = "ANYSXP", - "19" = "VECSXP", - "20" = "EXPRSXP", - "21" = "BCODESXP", - "22" = "EXTPTRSXP", - "23" = "WEAKREFSXP", - "24" = "RAWSXP", - "25" = "S4SXP", - "30" = "NEWSXP", - "31" = "FREESXP", - "99" = "FUNSXP" -) +# Placeholder nodes do not have any inspectable properties such as refcount or +# address +is_placeholder <- function(x) { + !nzchar(attr(x, "addr")) || identical(attr(x, "type"), "NILSXP") +} diff --git a/man/sxp.Rd b/man/sxp.Rd index 0f5511f..2600918 100644 --- a/man/sxp.Rd +++ b/man/sxp.Rd @@ -13,7 +13,7 @@ sxp(x, expand = character(), max_depth = 5L) suppressed. Use: \itemize{ \item "character" to show underlying entries in the global string pool. -\item "environment" to show the underlying hashtables. +\item "environment" to show binding components without any side effect (e.g. promise). \item "altrep" to show the underlying data. \item "call" to show the full AST (but \code{\link[=ast]{ast()}} is usually superior) \item "bytecode" to show generated bytecode. @@ -54,14 +54,12 @@ x <- 1:10 sxp(x) sxp(x, expand = "altrep") -# Expand environmnets to see the underlying implementation details -e1 <- new.env(hash = FALSE, parent = emptyenv(), size = 3L) -e2 <- new.env(hash = TRUE, parent = emptyenv(), size = 3L) -e1$x <- e2$x <- 1:10 +# Expand environments to see promise expressions without forcing +e <- new.env(parent = emptyenv()) +delayedAssign("x", 1 + 1, assign.env = e) -sxp(e1) -sxp(e1, expand = "environment") -sxp(e2, expand = "environment") +sxp(e) +sxp(e, expand = "environment") } \seealso{ Other object inspectors: diff --git a/src/address.cpp b/src/address.cpp index 363705b..101e496 100644 --- a/src/address.cpp +++ b/src/address.cpp @@ -1,24 +1,18 @@ #include "utils.h" #include +#include #include +extern "C" { +#include +} + [[cpp11::register]] std::string obj_addr_(SEXP name, cpp11::environment env) { return obj_addr_(Rf_eval(name, env)); } -void frame_addresses(SEXP frame, std::vector* refs) { - for(SEXP cur = frame; cur != R_NilValue; cur = CDR(cur)) { - SEXP obj = CAR(cur); - if (obj != R_UnboundValue) - refs->push_back(obj_addr_(obj)); - } -} -void hash_table_addresses(SEXP table, std::vector* refs) { - int n = Rf_length(table); - for (int i = 0; i < n; ++i) - frame_addresses(VECTOR_ELT(table, i), refs); -} + [[cpp11::register]] std::vector obj_addrs_(SEXP x) { @@ -39,14 +33,36 @@ std::vector obj_addrs_(SEXP x) { break; case ENVSXP: { - // Using node-based object accessors: CAR for FRAME, and TAG for HASHTAB. - // TODO: Iterate over environments using environment accessors. - // We won't be able to provide an address for things like promises though. - bool isHashed = TAG(x) != R_NilValue; - if (isHashed) { - hash_table_addresses(TAG(x), &out); - } else { - frame_addresses(CAR(x), &out); + cpp11::sexp syms(r_env_syms(x)); + R_xlen_t n_bindings = Rf_xlength(syms); + + for (R_xlen_t i = 0; i < n_bindings; ++i) { + SEXP sym = VECTOR_ELT(syms, i); + enum r_env_binding_type type = r_env_binding_type(x, sym); + + switch (type) { + case R_ENV_BINDING_TYPE_missing: + break; + + case R_ENV_BINDING_TYPE_value: + out.push_back(obj_addr_(r_env_get(x, sym))); + break; + + case R_ENV_BINDING_TYPE_delayed: + out.push_back(obj_addr_(r_env_binding_delayed_expr(x, sym))); + break; + + case R_ENV_BINDING_TYPE_forced: + out.push_back(obj_addr_(r_env_binding_forced_value(x, sym))); + break; + + case R_ENV_BINDING_TYPE_active: + out.push_back(obj_addr_(r_env_binding_active_fn(x, sym))); + break; + + case R_ENV_BINDING_TYPE_unbound: + break; + } } break; } diff --git a/src/inspect.cpp b/src/inspect.cpp index 9e0d958..bf1bb7f 100644 --- a/src/inspect.cpp +++ b/src/inspect.cpp @@ -1,10 +1,15 @@ #include #include +#include #include #include #include #include "utils.h" +extern "C" { +#include +} + struct Expand { bool alrep; bool charsxp; @@ -50,6 +55,92 @@ class GrowableList { SEXP obj_children_(SEXP x, std::map& seen, double max_depth, Expand expand); bool is_namespace(cpp11::environment env); +// Convert SEXPTYPE to uppercase name (e.g. REALSXP, ENVSXP) +const char* sexptype_name(SEXPTYPE type) { + switch (type) { + case NILSXP: return "NILSXP"; + case SYMSXP: return "SYMSXP"; + case LISTSXP: return "LISTSXP"; + case CLOSXP: return "CLOSXP"; + case ENVSXP: return "ENVSXP"; + case PROMSXP: return "PROMSXP"; + case LANGSXP: return "LANGSXP"; + case SPECIALSXP: return "SPECIALSXP"; + case BUILTINSXP: return "BUILTINSXP"; + case CHARSXP: return "CHARSXP"; + case LGLSXP: return "LGLSXP"; + case INTSXP: return "INTSXP"; + case REALSXP: return "REALSXP"; + case CPLXSXP: return "CPLXSXP"; + case STRSXP: return "STRSXP"; + case DOTSXP: return "DOTSXP"; + case ANYSXP: return "ANYSXP"; + case VECSXP: return "VECSXP"; + case EXPRSXP: return "EXPRSXP"; + case BCODESXP: return "BCODESXP"; + case EXTPTRSXP: return "EXTPTRSXP"; + case WEAKREFSXP: return "WEAKREFSXP"; + case RAWSXP: return "RAWSXP"; + case S4SXP: return "S4SXP"; + default: return "UNKNOWN"; + } +} + +struct InspectorParams { + // Empty string indicates a placeholder node (synthetic entry, not a real R object). + // The R formatter uses this to skip address and refs display. + const char* addr = ""; + int id = 0; + bool has_seen = false; + // Type string (e.g. "ENVSXP", "missing"). Use Rf_type2char() for real objects. + const char* type = "NILSXP"; + double length = 0; + bool altrep = false; + int maybe_shared = 0; + int no_references = 0; + bool object = false; + // Shown as `` (e.g. symbol name, env name) + const char* value = NULL; +}; + +SEXP new_inspector_node(SEXP children, const InspectorParams& params) { + Rf_setAttrib(children, Rf_install("addr"), PROTECT(Rf_mkString(params.addr))); + Rf_setAttrib(children, Rf_install("has_seen"), PROTECT(Rf_ScalarLogical(params.has_seen))); + Rf_setAttrib(children, Rf_install("id"), PROTECT(Rf_ScalarInteger(params.id))); + Rf_setAttrib(children, Rf_install("type"), PROTECT(Rf_mkString(params.type))); + Rf_setAttrib(children, Rf_install("length"), PROTECT(Rf_ScalarReal(params.length))); + Rf_setAttrib(children, Rf_install("altrep"), PROTECT(Rf_ScalarLogical(params.altrep))); + Rf_setAttrib(children, Rf_install("maybe_shared"), PROTECT(Rf_ScalarInteger(params.maybe_shared))); + Rf_setAttrib(children, Rf_install("no_references"), PROTECT(Rf_ScalarInteger(params.no_references))); + Rf_setAttrib(children, Rf_install("object"), PROTECT(Rf_ScalarInteger(params.object))); + Rf_setAttrib(children, Rf_install("class"), PROTECT(Rf_mkString("lobstr_inspector"))); + UNPROTECT(10); + + if (params.value != NULL) { + Rf_setAttrib(children, Rf_install("value"), PROTECT(Rf_mkString(params.value))); + UNPROTECT(1); + } + + return children; +} + +// Create a placeholder inspector node for synthetic entries (e.g. promise bindings) +SEXP new_placeholder_inspector( + const char* type, + std::map& seen, + const char* value = NULL) { + SEXP out = PROTECT(Rf_allocVector(VECSXP, 0)); + + InspectorParams params; + params.id = seen.size() + 1; + params.type = type; + params.value = value; + new_inspector_node(out, params); + + UNPROTECT(1); + return out; +} + bool is_altrep(SEXP x) { #if defined(R_VERSION) && R_VERSION >= R_Version(3, 5, 0) return ALTREP(x); @@ -77,18 +168,7 @@ SEXP obj_inspect_(SEXP x, children = PROTECT(obj_children_(x, seen, max_depth, expand)); } - // don't store object directly to avoid increasing refcount - Rf_setAttrib(children, Rf_install("addr"), PROTECT(Rf_mkString(obj_addr_(x).c_str()))); - Rf_setAttrib(children, Rf_install("has_seen"), PROTECT(Rf_ScalarLogical(has_seen))); - Rf_setAttrib(children, Rf_install("id"), PROTECT(Rf_ScalarInteger(id))); - Rf_setAttrib(children, Rf_install("type"), PROTECT(Rf_ScalarInteger(TYPEOF(x)))); - Rf_setAttrib(children, Rf_install("length"), PROTECT(Rf_ScalarReal(sxp_length(x)))); - Rf_setAttrib(children, Rf_install("altrep"), PROTECT(Rf_ScalarLogical(is_altrep(x)))); - Rf_setAttrib(children, Rf_install("maybe_shared"), PROTECT(Rf_ScalarInteger(MAYBE_SHARED(x)))); - Rf_setAttrib(children, Rf_install("no_references"), PROTECT(Rf_ScalarInteger(NO_REFERENCES(x)))); - Rf_setAttrib(children, Rf_install("object"), PROTECT(Rf_ScalarInteger(Rf_isObject(x)))); - UNPROTECT(9); - + // Compute optional value for display const char* value = NULL; if (TYPEOF(x) == SYMSXP && PRINTNAME(x) != R_NilValue) { value = CHAR(PRINTNAME(x)); @@ -104,13 +184,21 @@ SEXP obj_inspect_(SEXP x, value = CHAR(STRING_ELT(R_PackageEnvName(x), 0)); } } - if (value != NULL) { - Rf_setAttrib(children, Rf_install("value"), PROTECT(Rf_mkString(value))); - UNPROTECT(1); - } - Rf_setAttrib(children, Rf_install("class"), PROTECT(Rf_mkString("lobstr_inspector"))); - UNPROTECT(1); + std::string addr = obj_addr_(x); + + InspectorParams params; + params.addr = addr.c_str(); + params.id = id; + params.has_seen = has_seen; + params.type = sexptype_name(TYPEOF(x)); + params.length = sxp_length(x); + params.altrep = is_altrep(x); + params.maybe_shared = MAYBE_SHARED(x); + params.no_references = NO_REFERENCES(x); + params.object = Rf_isObject(x); + params.value = value; + new_inspector_node(children, params); UNPROTECT(1); return children; @@ -251,38 +339,73 @@ SEXP obj_children_( break; // Environments - case ENVSXP: + case ENVSXP: { if (x == R_BaseEnv || x == R_GlobalEnv || x == R_EmptyEnv || is_namespace(x)) break; - if (expand.env) { - // Using node-based object accessors: CAR for FRAME, and TAG for HASHTAB. - // TODO: Iterate manually over the environment using environment accessors. - recurse(&children, seen, "_frame", CAR(x), max_depth, expand); - recurse(&children, seen, "_hashtab", TAG(x), max_depth, expand); - } else { - SEXP names = PROTECT(R_lsInternal3(x, /* all= */ TRUE, /* sorted= */ FALSE)); - for (R_xlen_t i = 0; i < XLENGTH(names); ++i) { - const char* name = CHAR(STRING_ELT(names, i)); - SEXP sym = PROTECT(Rf_install(name)); - - if (R_BindingIsActive(sym, x)) { - SEXP sym = PROTECT(Rf_install("_active_binding")); - SEXP active = PROTECT(obj_inspect_(sym, seen, max_depth, expand)); - children.push_back(name, active); - UNPROTECT(2); - } else { - SEXP obj = PROTECT(Rf_findVarInFrame(x, sym)); - recurse(&children, seen, name, obj, max_depth, expand); - UNPROTECT(1); + cpp11::sexp syms(r_env_syms(x)); + R_xlen_t n_bindings = Rf_xlength(syms); + + for (R_xlen_t i = 0; i < n_bindings; ++i) { + SEXP sym = VECTOR_ELT(syms, i); + const char* name = CHAR(PRINTNAME(sym)); + enum r_env_binding_type type = r_env_binding_type(x, sym); + + switch (type) { + case R_ENV_BINDING_TYPE_value: + recurse(&children, seen, name, r_env_get(x, sym), max_depth, expand); + break; + + case R_ENV_BINDING_TYPE_missing: { + SEXP missing = PROTECT(new_placeholder_inspector("missing", seen)); + children.push_back(name, missing); + UNPROTECT(1); + break; + } + + case R_ENV_BINDING_TYPE_delayed: { + SEXP promise = PROTECT(new_placeholder_inspector("PROMSXP", seen)); + children.push_back(name, promise); + UNPROTECT(1); + + if (expand.env) { + recurse(&children, seen, "_code", r_env_binding_delayed_expr(x, sym), max_depth, expand); + recurse(&children, seen, "_env", r_env_binding_delayed_env(x, sym), max_depth, expand); } + break; + } + + case R_ENV_BINDING_TYPE_forced: { + SEXP promise = PROTECT(new_placeholder_inspector("PROMSXP", seen)); + children.push_back(name, promise); + UNPROTECT(1); + + if (expand.env) { + recurse(&children, seen, "_value", r_env_binding_forced_value(x, sym), max_depth, expand); + recurse(&children, seen, "_code", r_env_binding_forced_expr(x, sym), max_depth, expand); + } + break; + } + + case R_ENV_BINDING_TYPE_active: { + SEXP active = PROTECT(new_placeholder_inspector("CLOSXP", seen, "active")); + children.push_back(name, active); UNPROTECT(1); + + if (expand.env) { + recurse(&children, seen, "_fn", r_env_binding_active_fn(x, sym), max_depth, expand); + } + break; + } + + case R_ENV_BINDING_TYPE_unbound: + break; } - UNPROTECT(1); } - recurse(&children, seen, "_enclos", R_ParentEnv(x), max_depth, expand); + recurse(&children, seen, "_enclos", r_env_parent(x), max_depth, expand); break; + } // Functions case CLOSXP: diff --git a/tests/testthat/_snaps/sxp.md b/tests/testthat/_snaps/sxp.md index 0ea91a6..64f064c 100644 --- a/tests/testthat/_snaps/sxp.md +++ b/tests/testthat/_snaps/sxp.md @@ -1,3 +1,48 @@ +# snapshots environment binding types + + Code + print(sxp(e)) + Output + [1] () + active [2] + forced [2] + delayed [2] + missing [2] + value [2] () + _enclos [3] () + Code + print(sxp(e, expand = "environment", max_depth = 6L)) + Output + [1] () + active [2] + _fn [2] () + _formals [3] + _body [4] () + _env [5] () + e [1] + _enclos [6] () + _enclos [7] () + _enclos [8] () + ... + _attrib [9] () + srcref [10] (object ) + _attrib [11] () + srcfile [12] (object ) + ... + class [13] () + ... + forced [14] + _value [14] () + _code [15] () + ... + delayed [16] + _code [16] () + ... + _env [5] + missing [17] + value [17] () + _enclos [18] () + # can inspect all atomic vectors Code @@ -39,24 +84,10 @@ print(sxp(e2, expand = "environment", max_depth = 5L)) Output [1] () - _frame - _hashtab [3] () - - - - - - _enclos [4] () - _frame - _hashtab [5] () - [6] () - x [7] () - [8] () - y [4] - - - - _enclos [9] () + _enclos [2] () + x [3] () + y [2] + _enclos [4] () # can expand altrep @@ -71,7 +102,7 @@ [5] () [6] () _data1 [7] () - _data2 + _data2 [8] # can inspect cons cells diff --git a/tests/testthat/test-sxp.R b/tests/testthat/test-sxp.R index aee66ce..273ce53 100644 --- a/tests/testthat/test-sxp.R +++ b/tests/testthat/test-sxp.R @@ -41,6 +41,29 @@ test_that("can inspect active bindings", { expect_named(x, c("f", "_enclos")) }) +test_that("snapshots environment binding types", { + e <- new.env(parent = emptyenv(), hash = FALSE) + + # value + e$value <- 1 + + # missing argument binding + env_bind(e, missing = missing_arg()) + + # delayed and forced promise bindings + delayedAssign("delayed", 1 + 1, assign.env = e) + delayedAssign("forced", 1 + 1, assign.env = e) + invisible(e$forced) + + # active binding + env_bind_active(e, active = function() 42) + + expect_snapshot({ + print(sxp(e)) + print(sxp(e, expand = "environment", max_depth = 6L)) + }) +}) + # Regression tests -------------------------------------------------------- test_that("can inspect all atomic vectors", {