diff --git a/NAMESPACE b/NAMESPACE index ddf8065..ce28f19 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,17 @@ S3method(format,lobstr_inspector) S3method(print,lobstr_bytes) S3method(print,lobstr_inspector) S3method(print,lobstr_raw) +S3method(print,lobstr_srcref) S3method(tree_label,"NULL") S3method(tree_label,"function") S3method(tree_label,character) S3method(tree_label,default) S3method(tree_label,environment) +S3method(tree_label,lobstr_srcfile_ref) +S3method(tree_label,lobstr_srcref) +S3method(tree_label,lobstr_srcref_location) +S3method(tree_label,srcfile) +S3method(tree_label,srcref) export(ast) export(cst) export(mem_used) @@ -20,6 +26,7 @@ export(obj_addrs) export(obj_size) export(obj_sizes) export(ref) +export(src) export(sxp) export(tree) export(tree_label) diff --git a/R/src.R b/R/src.R new file mode 100644 index 0000000..b2241bc --- /dev/null +++ b/R/src.R @@ -0,0 +1,723 @@ +#' Display tree of source references +#' +#' View source reference metadata attached to R objects in a tree structure. +#' Shows source file information, line/column locations, and lines of source code. +#' +#' @param x An R object with source references. Can be: +#' - A `srcref` object +#' - A list of `srcref` objects +#' - A expression vector with attached source references +#' - An evaluated closure with attached source references +#' - A quoted call with attached source references +#' @param max_depth Maximum depth to traverse nested structures (default 5) +#' @param max_length Maximum number of srcref nodes to display (default 100) +#' @param ... Additional arguments passed to [tree()] +#' +#' @return Returns a structured list containing the source reference +#' information. Print it to view the formatted tree. +#' +#' @section Overview: +#' +#' Source references are made of two kinds of objects: +#' - `srcref` objects, which contain information about a specific +#' location within the source file, such as the line and column numbers. +#' - `srcfile` objects, which contain metadata about the source file +#' such as its name, path, and encoding. +#' +#' +#' ## Where and when are source references created? +#' +#' Ultimately the R parser creates source references. The main two entry points +#' to the parser are: +#' - The R function `parse()`. +#' - The frontend hook `ReadConsole`, which powers the console input parser in +#' the R CLI and in IDEs. This C-level parser can also be accessed from C code +#' via `R_ParseVector()`. +#' +#' In principle, anything that calls `parse()` may create source references, but +#' here are the important direct and indirect callers: +#' - `source()` and `sys.source()` which parse and evaluate code. +#' - `loadNamespace()` calls `sys.source()` when loading a _source_ package: +#' . +#' - `R CMD install` creates a lazy-load database from a source package. +#' The first step is to call `loadNamespace()`: +#' +#' +#' By default source references are not created but can be enabled by: +#' +#' - Passing `keep.source = TRUE` explicitly to `parse()`, `source()`, +#' `sys.source()`, or `loadNamespace()`. +#' - Setting `options(keep.source = TRUE)`. This affects the default arguments +#' of the aforementioned functions, as well as the console input parser. +#' In interactive sessions, `keep.source` is set to `TRUE` by default: +#' . +#' - Setting `options(keep.source.pkgs = TRUE)`. This affects loading a package +#' from source, and installing a package from source. +#' +#' +#' ## `srcref` objects +#' +#' `srcref` objects are compact integer vectors describing a character range +#' in a source. It records start/end lines and byte/column positions and, +#' optionally, the parsed-line numbers if `#line` directives were used. +#' +#' Lengths of 4, 6, or 8 are allowed: +#' - 4: basic (first_line, first_byte, last_line, last_byte) +#' - 6: adds columns in Unicode codepoints (first_col, last_col) +#' - 8: adds parsed-line numbers (first_parsed, last_parsed) +#' +#' The "column" information does not represent grapheme clusters, but Unicode +#' codepoints. The column cursor is incremented at every UTF-8 lead byte and +#' there is no support for encodings other than UTF-8. +#' +#' The srcref columns are right-boundary positions, meaning that for an +#' expression starting at the start of the file, the column will be 1. Note that +#' `wholeSrcref` (see below) on the other hand starts at 0, before the first +#' character. It might also end 1 character after the last srcref column. +#' +#' They are attached as attributes (e.g. `attr(x, "srcref")` or `attr(x, +#' "wholeSrcref")`), possibly wrapped in a list, to the following objects: +#' +#' - Expression vectors returned by `parse()` (wrapped in a list) +#' - Quoted function calls (unwrapped) +#' - Quoted `{` calls (wrapped in a list) +#' - Evaluated closures (unwrapped) +#' +#' They have a `srcfile` attribute that points to the source file. +#' +#' Methods: +#' - `as.character()`: Retrieves relevant source lines from the `srcfile` +#' reference. +#' +#' +#' ### `wholeSrcref` attributes +#' +#' These are `srcref` objects stored in the `wholeSrcref` attributes of: +#' +#' - Expression vectors returned by `parse()`, which seems to be the intended +#' usage. +#' - `{` calls, which seems unintended. +#' +#' For expression vectors, the `wholeSrcref` spans from the first position +#' to the last position and represents the entire document. For braces, they +#' span from the first position to the location of the closing brace. There is +#' no way to know the location of the opening brace without reparsing, which +#' seems odd. It's probably an overlook from `xxexprlist()` calling +#' `attachSrcrefs()` in +#' . That +#' function is also called at the end of parsing, where it's intended for the +#' `wholeSrcref` attribute to be attached. +#' +#' For evaluated closures, the `wholeSrcref` attribute on the body has the same +#' unreliable start positions as `{` nodes. +#' +#' +#' ## `srcfile` objects +#' +#' `srcfile` objects are environments representing information about a +#' source file that a source reference points to. They typically refer to +#' a file on disk and store the filename, working directory, a timestamp, +#' and encoding information. +#' +#' While it is possible to create bare `srcfile` objects, specialized subclasses +#' are much more common. +#' +#' +#' ### `srcfile` +#' +#' A bare `srcfile` object does not contain any data apart from the file path. +#' It lazily loads lines from the file on disk, without any caching. +#' +#' Fields common to all `srcfile` objects: +#' +#' - `filename`: The filename of the source file. If relative, the path is +#' resolved against `wd`. +#' +#' - `wd`: The working directory (`getwd()`) at the time the srcfile was created, +#' generally at the time of parsing). +#' +#' - `timestamp`: The timestamp of the source file. Retrieved from `filename` +#' with `file.mtime()`. +#' +#' - `encoding`: The encoding of the source file. +#' +#' - `Enc`: The encoding of output lines. Used by `getSrcLines()`, which +#' calls `iconv()` when `Enc` does not match `encoding`. +#' +#' - `parseData` (optional): Parser information saved when `keep.source.data` is #' set to `TRUE`. +#' +#' Implementations: +#' - `print()` and `summary()` to print information about the source file. +#' - `open()` and `close()` to access the underlying file as a connection. +#' +#' Helpers: +#' - `getSrcLines()`: Retrieves source lines from a `srcfile`. +#' +#' +#' ### `srcfilecopy` +#' +#' A `srcfilecopy` stores the actual source lines in memory in `$lines`. +#' `srcfilecopy` is useful when the original file may change or does not +#' exist, because it preserves the exact text used by the parser. +#' +#' This type of srcfile is the most common. It's created by: +#' +#' - The R-level `parse()` function when `text` is supplied: +#' +#' ```r +#' # Creates a `""` non-file `srcfilecopy` +#' parse(text = "...", keep.source = TRUE) +#' ``` +#' +#' - The console's input parser when `getOption("keep.source")` is `TRUE`. +#' +#' - `sys.source()` when `keep.source = TRUE`: +#' +#' ```r +#' sys.source(file, keep.source = TRUE) +#' ``` +#' +#' The `srcfilecopy` object is timestamped with the file's last modification time. +#' +#' +#' Fields: +#' +#' - `filename`: The filename of the source file. If `isFile` is `FALSE`, +#' the field is non meaningful. For instance `parse(text = )` sets it to +#' `""`, and the console input parser sets it to `""`. +#' +#' - `isFile`: A logical indicating whether the source file exists. +#' +#' - `fixedNewlines`: If `TRUE`, `lines` is a character vector of lines with +#' no embedded `\n` characters. The `getSrcLines()` helper regularises `lines` +#' in this way and sets `fixedNewlines` to `TRUE`. +#' +#' +#' Note that the C-level parser (used directly mainly when parsing console input) +#' does not call the R-level constructor and only instantiates the `filename` +#' (set to `""`) and `lines` fields. +#' +#' ### `srcfilealias` +#' +#' This object wraps an existing `srcfile` object (stored in `original`). It +#' allows exposing a different `filename` while delegating the open/close/get +#' lines operations to the `srcfile` stored in `original`. +#' +#' The typical way aliases are created is via `#line *line* *filename*` +#' directives where the optional `*filename*` argument is supplied. These +#' directives remap the srcref and srcfile of parsed code to a different +#' location, for example from a temporary file or generated file to the original +#' location on disk. +#' +#' Called by `install.packages()` when installing a _source_ package with `keep.source.pkgs` set to `TRUE` (see +#' ), but +#' [only when](https://github.com/r-devel/r-svn/blob/52affc16/src/library/tools/R/admin.R#L308): +#' +#' - `Encoding` was supplied in `DESCRIPTION` +#' - The system locale is not "C" or "POSIX". +#' +#' The source files are converted to the encoding of the system locale, then +#' collated in a single source file with `#line` directives mapping them to their +#' original file names (with full paths): +#' . +#' +#' Note that the `filename` of the `original` srcfile incorrectly points to the +#' package path in the install destination. +#' +#' +#' Fields: +#' +#' - `filename`: The virtual file name (or full path) of the parsed code. +#' - `original`: The actual `srcfile` the code was parsed from. +#' +#' @seealso +#' - [srcfile()]: Base documentation for `srcref` and `srcfile` objects. +#' - [getParseData()]: Parse information stored when `keep.source.data` is `TRUE`. +#' - Source References (R Journal): +#' +#' @export +#' @family object inspectors +src <- function( + x, + max_depth = 5L, + max_length = 100L, + ... +) { + seen_srcfiles <- new.env(parent = emptyenv()) + seen_srcfiles$.counter <- 0L + + result <- src_extract(x, seen_srcfiles) + if (is.null(result)) { + return(invisible(NULL)) + } + + # Ensure result has proper type for tree display + if (is.null(attr(result, "srcref_type"))) { + result <- as_srcref_tree(result, from = x) + } + + structure( + result, + max_depth = max_depth, + max_length = max_length, + tree_args = list(...), + class = c("lobstr_srcref", class(result)) + ) +} + +#' @export +print.lobstr_srcref <- function(x, ...) { + max_depth <- attr(x, "max_depth") %||% 5L + max_length <- attr(x, "max_length") %||% 100L + tree_args <- attr(x, "tree_args") %||% list() + + # Strip attributes before printing + attr(x, "max_depth") <- NULL + attr(x, "max_length") <- NULL + attr(x, "tree_args") <- NULL + + # Defaults for `tree()` arguments that are not directly exposed by `src()` + tree_args$max_vec_len <- tree_args$max_vec_len %||% 3L + + inject(tree( + x = x, + max_depth = max_depth, + max_length = max_length, + !!!tree_args + )) + + invisible(x) +} + +#' @export +tree_label.lobstr_srcref <- function(x, opts) { + type <- attr(x, "srcref_type") + + switch( + type, + body = "", + block = "<{>", + srcfile = srcfile_label(x), + paste0("<", type, ">") + ) +} + +#' @export +tree_label.lobstr_srcref_location <- function(x, opts) { + as.character(x) +} + +#' @export +tree_label.srcref <- function(x, opts) { + loc <- srcref_location(x) + paste0("") +} + +#' @export +tree_label.srcfile <- function(x, opts) { + paste0("<", class(x)[1], ": ", utils::getSrcFilename(x), ">") +} + +#' @export +tree_label.lobstr_srcfile_ref <- function(x, opts) { + paste0("@", as.character(x)) +} + + +# Main extraction logic -------------------------------------------------------- + +src_extract <- function(x, seen_srcfiles) { + # Srcref object + if (inherits(x, "srcref")) { + return(srcref_node(x, seen_srcfiles)) + } + + # List of srcrefs + if ( + is.list(x) && + length(x) > 0 && + all(vapply(x, inherits, logical(1), "srcref")) + ) { + return(srcref_list_node(x, seen_srcfiles)) + } + + # Evaluated closures + if (is_closure(x)) { + return(function_node(x, seen_srcfiles)) + } + + # Expressions and language objects + if (is.expression(x) || is.language(x)) { + return(expr_node(x, seen_srcfiles)) + } + + NULL +} + +# Extract standard srcref-related attributes from any object +extract_srcref_attrs <- function(x, seen_srcfiles) { + attrs <- list() + + if (!is.null(srcref <- attr(x, "srcref"))) { + attrs$`attr("srcref")` <- srcref_attr_node( + srcref, + seen_srcfiles + ) + } + + if (!is.null(srcfile <- attr(x, "srcfile"))) { + attrs$`attr("srcfile")` <- srcfile_node(srcfile, seen_srcfiles) + } + + if (!is.null(whole <- attr(x, "wholeSrcref"))) { + attrs$`attr("wholeSrcref")` <- srcref_attr_node(whole, seen_srcfiles) + } + + attrs +} + +# A srcref attribute may be a srcref object or a list of srcref objects +srcref_attr_node <- function(srcref, seen_srcfiles) { + if (inherits(srcref, "srcref")) { + return(srcref_node(srcref, seen_srcfiles)) + } + + if (is.list(srcref)) { + return(srcref_list_node(srcref, seen_srcfiles)) + } + + NULL +} + +srcref_node <- function(srcref, seen_srcfiles) { + info <- srcref_info(srcref) + node <- list(location = info$location) + + if (!is.null(info$bytes)) { + node$bytes <- info$bytes + } + if (!is.null(info$parsed)) { + node$parsed <- info$parsed + } + + # Just for completeness but we really don't expect srcref attributes on srcrefs + attrs <- extract_srcref_attrs(srcref, seen_srcfiles) + node <- c(node, attrs) + + new_srcref_tree(node, type = "srcref") +} + +srcref_list_node <- function(srcref_list, seen_srcfiles) { + srcrefs <- lapply(srcref_list, srcref_node, seen_srcfiles) + names(srcrefs) <- paste0("[[", seq_along(srcrefs), "]]") + + attrs <- extract_srcref_attrs(srcref_list, seen_srcfiles) + node <- c(srcrefs, attrs) + + new_srcref_tree(node, type = "list") +} + +function_node <- function(fun, seen_srcfiles) { + node <- extract_srcref_attrs(fun, seen_srcfiles) + body <- src_extract(body(fun), seen_srcfiles) + + if (!is.null(body)) { + node$`body()` <- as_srcref_tree(body, from = body(fun)) + } + + if (length(node) == 0) { + return(NULL) + } + + new_srcref_tree(node, type = "closure") +} + +expr_node <- function(x, seen_srcfiles) { + attrs <- extract_srcref_attrs(x, seen_srcfiles) + nested <- extract_nested_srcrefs(x, seen_srcfiles) + + if (length(attrs) > 0) { + # Node has attributes: wrap with proper type + node <- c(attrs, nested) + return(new_srcref_tree(node, type = node_type(x))) + } + + # No attributes: return bare list for path collapsing, or NULL if empty + if (length(nested) > 0) { + nested + } else { + NULL + } +} + +extract_nested_srcrefs <- function(x, seen_srcfiles) { + if (!is_traversable(x)) { + return(list()) + } + + nested <- list() + for (i in seq_along(x)) { + child <- src_extract(x[[i]], seen_srcfiles) + + if (!is.null(child)) { + nested <- merge_child_result(nested, child, i) + } + } + + nested +} + +merge_child_result <- function(nested, child, index) { + path <- paste0("[[", index, "]]") + + if (is_wrapped_node(child)) { + nested[[path]] <- child + } else { + # Collapse paths for bare lists + for (name in names(child)) { + nested[[paste0(path, name)]] <- child[[name]] + } + } + + nested +} + +is_traversable <- function(x) { + (is.expression(x) || is.call(x)) && length(x) > 0 +} + +is_wrapped_node <- function(x) { + !is.null(attr(x, "srcref_type")) +} + +node_type <- function(x) { + if (is.expression(x)) { + "expression" + } else if (is.call(x) && length(x) > 0) { + if (identical(x[[1]], as.symbol("function"))) { + "quoted_function" + } else if (identical(x[[1]], as.symbol("{"))) { + "block" + } else { + "language" + } + } else { + "language" + } +} + +as_srcref_tree <- function(data, ..., from) { + if (is_wrapped_node(data)) { + data + } else { + new_srcref_tree(data, type = node_type(from)) + } +} + + +# Srcfile handling ------------------------------------------------------------- + +srcfile_node <- function(srcfile, seen_srcfiles) { + if (is.null(srcfile)) { + return(NULL) + } + + addr <- obj_addr(srcfile) + srcfile_class <- class(srcfile)[[1]] + + # Check if already seen + id <- seen_srcfiles[[addr]] + if (!is_null(id)) { + return(new_srcfile_ref(id, srcfile_class)) + } + + # First occurrence - assign sequential ID + seen_srcfiles$.counter <- seen_srcfiles$.counter + 1L + id <- sprintf("%03d", seen_srcfiles$.counter) + seen_srcfiles[[addr]] <- id + + info <- as.list.environment(srcfile, all.names = TRUE, sorted = TRUE) + + # Format timestamp for readability + if (!is.null(info$timestamp)) { + info$timestamp <- format(info$timestamp) + } + + # Process nested srcfile objects (e.g., 'original' in srcfilealias) + if (!is.null(info$original) && inherits(info$original, "srcfile")) { + info$original <- srcfile_node(info$original, seen_srcfiles) + } + + # Add source preview for plain srcfiles + if (!inherits(srcfile, "srcfilecopy") && !is.null(srcref)) { + snippet <- srcfile_lines(srcfile, srcref) + if (length(snippet) > 0) { + info$`lines (from file)` <- snippet + } + } + + # Check for srcref attributes even on srcfile objects + attrs <- extract_srcref_attrs(srcfile, seen_srcfiles) + info <- c(info, attrs) + + new_srcref_tree( + info, + type = "srcfile", + srcfile_class = srcfile_class %||% "srcfile", + srcfile_id = id + ) +} + +srcfile_lines <- function(srcfile, srcref) { + if (is.null(srcfile) || !is_srcref(srcref)) { + return(character(0)) + } + + max_lines <- 3L + + first_line <- srcref[[1]] + last_line <- min(srcref[[3]], first_line + max_lines - 1L) + + # Try embedded lines first + lines <- srcfile$lines + if (!is.null(lines) && length(lines) >= last_line) { + return(lines[first_line:last_line]) + } + + # Try reading from file + filename <- srcfile$filename + directory <- srcfile$wd + + if (!is.null(filename) && !is.null(directory)) { + filepath <- file.path(directory, filename) + + if (file.exists(filepath)) { + all_lines <- tryCatch( + readLines(filepath, warn = FALSE), + error = function(e) NULL + ) + + if (!is.null(all_lines) && length(all_lines) >= last_line) { + return(all_lines[first_line:last_line]) + } + } + } + + character(0) +} + +srcfile_label <- function(x) { + class <- attr(x, "srcfile_class") + label <- paste0("<", class, ">") + + id <- attr(x, "srcfile_id") + if (!is.null(id)) { + label <- paste0(label, " @", id) + } + + label +} + + +# Srcref information extraction ------------------------------------------------ + +srcref_info <- function(srcref) { + if (!inherits(srcref, "srcref")) { + abort("Expected a srcref object") + } + + len <- length(srcref) + if (!len %in% c(4, 6, 8)) { + abort(sprintf("Unexpected srcref length: %d", len)) + } + + first_line <- srcref[[1]] + first_byte <- srcref[[2]] + last_line <- srcref[[3]] + last_byte <- srcref[[4]] + first_col <- if (len >= 6) srcref[[5]] else first_byte + last_col <- if (len >= 6) srcref[[6]] else last_byte + first_parsed <- if (len == 8) srcref[[7]] else first_line + last_parsed <- if (len == 8) srcref[[8]] else last_line + + info <- list( + location = new_srcref_location(srcref_location(srcref)) + ) + + # Add byte info if different from columns + if (first_byte != first_col || last_byte != last_col) { + info$bytes <- sprintf("%d - %d", first_byte, last_byte) + } + + # Add parsed info if different from actual lines + if (first_parsed != first_line || last_parsed != last_line) { + info$parsed <- new_srcref_location(sprintf( + "%d:%d - %d:%d", + first_parsed, + first_col, + last_parsed, + last_col + )) + } + + info +} + +srcref_location <- function(x) { + first_line <- x[[1]] + last_line <- x[[3]] + first_col <- if (length(x) >= 6) x[[5]] else x[[2]] + last_col <- if (length(x) >= 6) x[[6]] else x[[4]] + + sprintf("%d:%d - %d:%d", first_line, first_col, last_line, last_col) +} + + +# Helper functions ------------------------------------------------------------- + +has_srcref <- function(x) { + !is.null(attr(x, "srcref")) || + !is.null(attr(x, "wholeSrcref")) || + !is.null(attr(x, "srcfile")) +} + +new_srcref_tree <- function(x, type = NULL, ..., class = NULL) { + type <- type %||% attr(x, "srcref_type") + type <- arg_match( + type, + c( + "block", + "body", + "closure", + "expression", + "language", + "list", + "quoted_function", + "srcfile", + "srcref" + ) + ) + + structure( + x, + srcref_type = type, + ..., + class = c(class, "lobstr_srcref") + ) +} + +new_srcref_location <- function(x) { + structure(x, class = c("lobstr_srcref_location", "character")) +} + +new_srcfile_ref <- function(id, srcfile_class = "srcfile") { + structure( + id, + srcfile_class = srcfile_class, + class = "lobstr_srcfile_ref" + ) +} + +is_srcref <- function(x) { + is.integer(x) && inherits(x, "srcref") && length(x) %in% c(4L, 6L, 8L) +} diff --git a/R/tree.R b/R/tree.R index 6510e5a..28e8f66 100644 --- a/R/tree.R +++ b/R/tree.R @@ -9,6 +9,7 @@ #' @param max_depth How far down the tree structure should be printed. E.g. `1` #' means only direct children of the root element will be shown. Useful for #' very deep lists. +#' @param max_vec_len How many elements should be printed for vectors? #' @param show_environments Should environments be treated like normal lists and #' recursed into? #' @param hide_scalar_types Should atomic scalars be printed with type and @@ -82,6 +83,7 @@ tree <- function( index_unnamed = FALSE, max_depth = 10L, max_length = 1000L, + max_vec_len = 10L, show_environments = TRUE, hide_scalar_types = TRUE, val_printer = crayon::blue, @@ -99,6 +101,7 @@ tree <- function( index_unnamed = index_unnamed, max_depth = max_depth, max_length = max_length, + max_vec_len = max_vec_len, show_envs = show_environments, hide_scalar_types = hide_scalar_types, val_printer = val_printer, @@ -354,7 +357,7 @@ tree_label.character <- function(x, opts) { #' @export tree_label.default <- function(x, opts) { if (rlang::is_atomic(x)) { - opts$val_printer(collapse_and_truncate_vec(x, 10)) + opts$val_printer(collapse_and_truncate_vec(x, opts$max_vec_len)) } else if (rlang::is_function(x)) { # Lots of times function-like functions don't actually trigger the s3 method # for function because they dont have function in their class-list. This diff --git a/man/ast.Rd b/man/ast.Rd index 8b46d01..9c52ee1 100644 --- a/man/ast.Rd +++ b/man/ast.Rd @@ -43,6 +43,7 @@ ast(!1 + !1) \seealso{ Other object inspectors: \code{\link{ref}()}, +\code{\link{src}()}, \code{\link{sxp}()} } \concept{object inspectors} diff --git a/man/ref.Rd b/man/ref.Rd index 37fc80c..3524c82 100644 --- a/man/ref.Rd +++ b/man/ref.Rd @@ -39,6 +39,7 @@ ref(c("x", "x", "y"), character = TRUE) \seealso{ Other object inspectors: \code{\link{ast}()}, +\code{\link{src}()}, \code{\link{sxp}()} } \concept{object inspectors} diff --git a/man/src.Rd b/man/src.Rd new file mode 100644 index 0000000..2073bee --- /dev/null +++ b/man/src.Rd @@ -0,0 +1,269 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/src.R +\name{src} +\alias{src} +\title{Display tree of source references} +\usage{ +src(x, max_depth = 5L, max_length = 100L, ...) +} +\arguments{ +\item{x}{An R object with source references. Can be: +\itemize{ +\item A \code{srcref} object +\item A list of \code{srcref} objects +\item A expression vector with attached source references +\item An evaluated closure with attached source references +\item A quoted call with attached source references +}} + +\item{max_depth}{Maximum depth to traverse nested structures (default 5)} + +\item{max_length}{Maximum number of srcref nodes to display (default 100)} + +\item{...}{Additional arguments passed to \code{\link[=tree]{tree()}}} +} +\value{ +Returns a structured list containing the source reference +information. Print it to view the formatted tree. +} +\description{ +View source reference metadata attached to R objects in a tree structure. +Shows source file information, line/column locations, and lines of source code. +} +\section{Overview}{ + + +Source references are made of two kinds of objects: +\itemize{ +\item \code{srcref} objects, which contain information about a specific +location within the source file, such as the line and column numbers. +\item \code{srcfile} objects, which contain metadata about the source file +such as its name, path, and encoding. +} +\subsection{Where and when are source references created?}{ + +Ultimately the R parser creates source references. The main two entry points +to the parser are: +\itemize{ +\item The R function \code{parse()}. +\item The frontend hook \code{ReadConsole}, which powers the console input parser in +the R CLI and in IDEs. This C-level parser can also be accessed from C code +via \code{R_ParseVector()}. +} + +In principle, anything that calls \code{parse()} may create source references, but +here are the important direct and indirect callers: +\itemize{ +\item \code{source()} and \code{sys.source()} which parse and evaluate code. +\item \code{loadNamespace()} calls \code{sys.source()} when loading a \emph{source} package: +\url{https://github.com/r-devel/r-svn/blob/acd196be/src/library/base/R/namespace.R#L573}. +\item \verb{R CMD install} creates a lazy-load database from a source package. +The first step is to call \code{loadNamespace()}: +\url{https://github.com/r-devel/r-svn/blob/acd196be/src/library/tools/R/makeLazyLoad.R#L32} +} + +By default source references are not created but can be enabled by: +\itemize{ +\item Passing \code{keep.source = TRUE} explicitly to \code{parse()}, \code{source()}, +\code{sys.source()}, or \code{loadNamespace()}. +\item Setting \code{options(keep.source = TRUE)}. This affects the default arguments +of the aforementioned functions, as well as the console input parser. +In interactive sessions, \code{keep.source} is set to \code{TRUE} by default: +\url{https://github.com/r-devel/r-svn/blob/3a4745af/src/library/profile/Common.R#L26}. +\item Setting \code{options(keep.source.pkgs = TRUE)}. This affects loading a package +from source, and installing a package from source. +} +} + +\subsection{\code{srcref} objects}{ + +\code{srcref} objects are compact integer vectors describing a character range +in a source. It records start/end lines and byte/column positions and, +optionally, the parsed-line numbers if \verb{#line} directives were used. + +Lengths of 4, 6, or 8 are allowed: +\itemize{ +\item 4: basic (first_line, first_byte, last_line, last_byte) +\item 6: adds columns in Unicode codepoints (first_col, last_col) +\item 8: adds parsed-line numbers (first_parsed, last_parsed) +} + +The "column" information does not represent grapheme clusters, but Unicode +codepoints. The column cursor is incremented at every UTF-8 lead byte and +there is no support for encodings other than UTF-8. + +The srcref columns are right-boundary positions, meaning that for an +expression starting at the start of the file, the column will be 1. Note that +\code{wholeSrcref} (see below) on the other hand starts at 0, before the first +character. It might also end 1 character after the last srcref column. + +They are attached as attributes (e.g. \code{attr(x, "srcref")} or \code{attr(x, "wholeSrcref")}), possibly wrapped in a list, to the following objects: +\itemize{ +\item Expression vectors returned by \code{parse()} (wrapped in a list) +\item Quoted function calls (unwrapped) +\item Quoted \verb{\{} calls (wrapped in a list) +\item Evaluated closures (unwrapped) +} + +They have a \code{srcfile} attribute that points to the source file. + +Methods: +\itemize{ +\item \code{as.character()}: Retrieves relevant source lines from the \code{srcfile} +reference. +} +\subsection{\code{wholeSrcref} attributes}{ + +These are \code{srcref} objects stored in the \code{wholeSrcref} attributes of: +\itemize{ +\item Expression vectors returned by \code{parse()}, which seems to be the intended +usage. +\item \verb{\{} calls, which seems unintended. +} + +For expression vectors, the \code{wholeSrcref} spans from the first position +to the last position and represents the entire document. For braces, they +span from the first position to the location of the closing brace. There is +no way to know the location of the opening brace without reparsing, which +seems odd. It's probably an overlook from \code{xxexprlist()} calling +\code{attachSrcrefs()} in +\url{https://github.com/r-devel/r-svn/blob/52affc16/src/main/gram.y#L1380}. That +function is also called at the end of parsing, where it's intended for the +\code{wholeSrcref} attribute to be attached. + +For evaluated closures, the \code{wholeSrcref} attribute on the body has the same +unreliable start positions as \verb{\{} nodes. +} + +} + +\subsection{\code{srcfile} objects}{ + +\code{srcfile} objects are environments representing information about a +source file that a source reference points to. They typically refer to +a file on disk and store the filename, working directory, a timestamp, +and encoding information. + +While it is possible to create bare \code{srcfile} objects, specialized subclasses +are much more common. +\subsection{\code{srcfile}}{ + +A bare \code{srcfile} object does not contain any data apart from the file path. +It lazily loads lines from the file on disk, without any caching. + +Fields common to all \code{srcfile} objects: +\itemize{ +\item \code{filename}: The filename of the source file. If relative, the path is +resolved against \code{wd}. +\item \code{wd}: The working directory (\code{getwd()}) at the time the srcfile was created, +generally at the time of parsing). +\item \code{timestamp}: The timestamp of the source file. Retrieved from \code{filename} +with \code{file.mtime()}. +\item \code{encoding}: The encoding of the source file. +\item \code{Enc}: The encoding of output lines. Used by \code{getSrcLines()}, which +calls \code{iconv()} when \code{Enc} does not match \code{encoding}. +\item \code{parseData} (optional): Parser information saved when \code{keep.source.data} is #' set to \code{TRUE}. +} + +Implementations: +\itemize{ +\item \code{print()} and \code{summary()} to print information about the source file. +\item \code{open()} and \code{close()} to access the underlying file as a connection. +} + +Helpers: +\itemize{ +\item \code{getSrcLines()}: Retrieves source lines from a \code{srcfile}. +} +} + +\subsection{\code{srcfilecopy}}{ + +A \code{srcfilecopy} stores the actual source lines in memory in \verb{$lines}. +\code{srcfilecopy} is useful when the original file may change or does not +exist, because it preserves the exact text used by the parser. + +This type of srcfile is the most common. It's created by: +\itemize{ +\item The R-level \code{parse()} function when \code{text} is supplied: + +\if{html}{\out{
}}\preformatted{# Creates a `""` non-file `srcfilecopy` +parse(text = "...", keep.source = TRUE) +}\if{html}{\out{
}} +\item The console's input parser when \code{getOption("keep.source")} is \code{TRUE}. +\item \code{sys.source()} when \code{keep.source = TRUE}: + +\if{html}{\out{
}}\preformatted{sys.source(file, keep.source = TRUE) +}\if{html}{\out{
}} + +The \code{srcfilecopy} object is timestamped with the file's last modification time. +\url{https://github.com/r-devel/r-svn/blob/52affc16/src/library/base/R/source.R#L273-L276} +} + +Fields: +\itemize{ +\item \code{filename}: The filename of the source file. If \code{isFile} is \code{FALSE}, +the field is non meaningful. For instance \code{parse(text = )} sets it to +\code{""}, and the console input parser sets it to \code{""}. +\item \code{isFile}: A logical indicating whether the source file exists. +\item \code{fixedNewlines}: If \code{TRUE}, \code{lines} is a character vector of lines with +no embedded \verb{\\n} characters. The \code{getSrcLines()} helper regularises \code{lines} +in this way and sets \code{fixedNewlines} to \code{TRUE}. +} + +Note that the C-level parser (used directly mainly when parsing console input) +does not call the R-level constructor and only instantiates the \code{filename} +(set to \code{""}) and \code{lines} fields. +} + +\subsection{\code{srcfilealias}}{ + +This object wraps an existing \code{srcfile} object (stored in \code{original}). It +allows exposing a different \code{filename} while delegating the open/close/get +lines operations to the \code{srcfile} stored in \code{original}. + +The typical way aliases are created is via \verb{#line *line* *filename*} +directives where the optional \verb{*filename*} argument is supplied. These +directives remap the srcref and srcfile of parsed code to a different +location, for example from a temporary file or generated file to the original +location on disk. + +Called by \code{install.packages()} when installing a \emph{source} package with \code{keep.source.pkgs} set to \code{TRUE} (see +\url{https://github.com/r-devel/r-svn/blob/52affc16/src/library/tools/R/install.R#L545}), but +\href{https://github.com/r-devel/r-svn/blob/52affc16/src/library/tools/R/admin.R#L308}{only when}: +\itemize{ +\item \code{Encoding} was supplied in \code{DESCRIPTION} +\item The system locale is not "C" or "POSIX". +} + +The source files are converted to the encoding of the system locale, then +collated in a single source file with \verb{#line} directives mapping them to their +original file names (with full paths): +\url{https://github.com/r-devel/r-svn/blob/52affc16/src/library/tools/R/admin.R#L342}. + +Note that the \code{filename} of the \code{original} srcfile incorrectly points to the +package path in the install destination. + +Fields: +\itemize{ +\item \code{filename}: The virtual file name (or full path) of the parsed code. +\item \code{original}: The actual \code{srcfile} the code was parsed from. +} +} + +} +} + +\seealso{ +\itemize{ +\item \code{\link[=srcfile]{srcfile()}}: Base documentation for \code{srcref} and \code{srcfile} objects. +\item \code{\link[=getParseData]{getParseData()}}: Parse information stored when \code{keep.source.data} is \code{TRUE}. +\item Source References (R Journal): \url{https://journal.r-project.org/articles/RJ-2010-010} +} + +Other object inspectors: +\code{\link{ast}()}, +\code{\link{ref}()}, +\code{\link{sxp}()} +} +\concept{object inspectors} diff --git a/man/sxp.Rd b/man/sxp.Rd index 0f5511f..bd70bff 100644 --- a/man/sxp.Rd +++ b/man/sxp.Rd @@ -66,6 +66,7 @@ sxp(e2, expand = "environment") \seealso{ Other object inspectors: \code{\link{ast}()}, -\code{\link{ref}()} +\code{\link{ref}()}, +\code{\link{src}()} } \concept{object inspectors} diff --git a/man/tree.Rd b/man/tree.Rd index 884c0be..99deac4 100644 --- a/man/tree.Rd +++ b/man/tree.Rd @@ -10,6 +10,7 @@ tree( index_unnamed = FALSE, max_depth = 10L, max_length = 1000L, + max_vec_len = 10L, show_environments = TRUE, hide_scalar_types = TRUE, val_printer = crayon::blue, @@ -34,6 +35,8 @@ very deep lists.} \item{max_length}{How many elements should be printed? This is useful in case you try and print an object with 100,000 items in it.} +\item{max_vec_len}{How many elements should be printed for vectors?} + \item{show_environments}{Should environments be treated like normal lists and recursed into?} diff --git a/tests/testthat/_snaps/src.md b/tests/testthat/_snaps/src.md new file mode 100644 index 0000000..6549395 --- /dev/null +++ b/tests/testthat/_snaps/src.md @@ -0,0 +1,790 @@ +# src() shows closure with srcref and wholeSrcref + + Code + f <- simple_function_with_srcref() + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:14 - 3:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "test_func <-...", " x + y", "}" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:29 - 1:29 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 2:3 - 2:7 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 3:1 + └─attr("srcfile"): @001 + +# src() shows multi-statement function + + Code + f <- multi_statement_function_with_srcref() + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:15 - 6:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "multi_func <...", " a <- x + 1", " b <- a * 2", ... + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:27 - 1:27 + │ │ └─attr("srcfile"): @001 + │ ├─[[2]]: + │ │ ├─location: 2:3 - 2:12 + │ │ └─attr("srcfile"): @001 + │ ├─[[3]]: + │ │ ├─location: 3:3 - 3:12 + │ │ └─attr("srcfile"): @001 + │ ├─[[4]]: + │ │ ├─location: 4:3 - 4:12 + │ │ └─attr("srcfile"): @001 + │ └─[[5]]: + │ ├─location: 5:3 - 5:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 6:1 + └─attr("srcfile"): @001 + +# src() shows quoted function with nested body + + Code + with_srcref("x <- quote(function() {})") + src(x) + Output + + ├─[[3]]: <{> + │ ├─attr("srcref"): + │ │ └─[[1]]: + │ │ ├─location: 1:23 - 1:23 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: TRUE + │ │ ├─lines: "x <- quote(function() {})" + │ │ ├─parseData: 1, 1, 1, ...... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ ├─attr("srcfile"): @001 + │ └─attr("wholeSrcref"): + │ ├─location: 1:0 - 1:24 + │ └─attr("srcfile"): @001 + └─[[4]]: + ├─location: 1:12 - 1:24 + └─attr("srcfile"): @001 + +# src() shows quoted function body directly + + Code + with_srcref("x <- quote(function() {})") + src(x[[3]]) + Output + <{> + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:23 - 1:23 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "x <- quote(function() {})" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:24 + └─attr("srcfile"): @001 + +# src() shows quoted function with arguments + + Code + with_srcref("x <- quote(function(a, b) {})") + src(x) + Output + + ├─[[3]]: <{> + │ ├─attr("srcref"): + │ │ └─[[1]]: + │ │ ├─location: 1:27 - 1:27 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: TRUE + │ │ ├─lines: "x <- quote(function(a, b) {})" + │ │ ├─parseData: 1, 1, 1, ...... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ ├─attr("srcfile"): @001 + │ └─attr("wholeSrcref"): + │ ├─location: 1:0 - 1:28 + │ └─attr("srcfile"): @001 + └─[[4]]: + ├─location: 1:12 - 1:28 + └─attr("srcfile"): @001 + +# src() shows srcref with parsed field when positions differ + + Code + srcfile <- srcfilecopy("test.R", c("x <- function() {", + " # A long comment that spans", " # multiple lines", " y <- 1", "}")) + synthetic_srcref <- structure(c(2L, 3L, 4L, 8L, 3L, 8L, 1L, 5L), class = "srcref", + srcfile = srcfile) + src(synthetic_srcref) + Output + + ├─location: 2:3 - 4:8 + ├─parsed: 1:3 - 5:8 + └─attr("srcfile"): @001 + ├─Enc: "unknown" + ├─filename: "" + ├─fixedNewlines: TRUE + ├─isFile: FALSE + ├─lines: "x <- functio...", " # A long c...", " # multiple...", ... + ├─timestamp: "" + └─wd: "" + +# src() shows expression with single element + + Code + x <- parse(text = "x + 1", keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 1:5 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "x + 1" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 2:0 + └─attr("srcfile"): @001 + +# src() shows expression with multiple elements + + Code + x <- parse(text = c("x + 1", "y + 2", "z + 3"), keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:5 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: FALSE + │ │ ├─lines: "x + 1", "y + 2", "z + 3" + │ │ ├─parseData: 1, 1, 1, ... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ ├─[[2]]: + │ │ ├─location: 2:1 - 2:5 + │ │ └─attr("srcfile"): @001 + │ └─[[3]]: + │ ├─location: 3:1 - 3:5 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 4:0 + └─attr("srcfile"): @001 + +# src() shows expression with nested block and wholeSrcref + + Code + x <- parse(text = "{\n 1\n}", keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 3:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "{", " 1", "}" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 4:0 + │ └─attr("srcfile"): @001 + └─[[1]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:1 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 2:3 - 2:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 3:1 + └─attr("srcfile"): @001 + +# src() shows nested block element directly + + Code + x <- parse(text = "{\n 1\n}", keep.source = TRUE) + src(x[[1]]) + Output + <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:1 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: FALSE + │ │ ├─lines: "{", " 1", "}" + │ │ ├─parseData: 1, 1, 1, ... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ └─[[2]]: + │ ├─location: 2:3 - 2:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 3:1 + └─attr("srcfile"): @001 + +# src() shows block with srcref list and wholeSrcref + + Code + x <- parse(text = "{\n a <- 1\n b <- 2\n}", keep.source = TRUE) + src(x[[1]]) + Output + <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:1 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: FALSE + │ │ ├─lines: "{", " a <- 1", " b <- 2", ... + │ │ ├─parseData: 1, 1, 1, ... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ ├─[[2]]: + │ │ ├─location: 2:3 - 2:8 + │ │ └─attr("srcfile"): @001 + │ └─[[3]]: + │ ├─location: 3:3 - 3:8 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 4:1 + └─attr("srcfile"): @001 + +# src() shows single srcref + + Code + x <- parse(text = "x + 1", keep.source = TRUE) + sr <- attr(x, "srcref")[[1]] + src(sr) + Output + + ├─location: 1:1 - 1:5 + └─attr("srcfile"): @001 + ├─Enc: "unknown" + ├─filename: "" + ├─fixedNewlines: TRUE + ├─isFile: FALSE + ├─lines: "x + 1" + ├─parseData: 1, 1, 1, ... + ├─timestamp: "" + └─wd: "" + +# src() shows list of srcrefs with count + + Code + x <- parse(text = c("x + 1", "y + 2"), keep.source = TRUE) + sr_list <- attr(x, "srcref") + src(sr_list) + Output + + ├─[[1]]: + │ ├─location: 1:1 - 1:5 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "x + 1", "y + 2" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─[[2]]: + ├─location: 2:1 - 2:5 + └─attr("srcfile"): @001 + +# src() reveals srcref list structure with index notation + + Code + with_srcref("x <- quote(function() { 1 })") + src(x[[3]]) + Output + <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:23 - 1:23 + │ │ └─attr("srcfile"): @001 + │ │ ├─Enc: "unknown" + │ │ ├─filename: "" + │ │ ├─fixedNewlines: TRUE + │ │ ├─isFile: TRUE + │ │ ├─lines: "x <- quote(function() { 1 })" + │ │ ├─parseData: 1, 1, 1, ... + │ │ ├─timestamp: "" + │ │ └─wd: "" + │ └─[[2]]: + │ ├─location: 1:25 - 1:25 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:27 + └─attr("srcfile"): @001 + +# src() handles srcrefs nested in language calls + + Code + x <- parse(text = "foo({ if (1) bar({ 2 }) })", keep.source = TRUE) + src(x, max_depth = 10) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 1:26 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "foo({ if (1) bar({ 2 }) })" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 2:0 + │ └─attr("srcfile"): @001 + └─[[1]][[2]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:5 - 1:5 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:7 - 1:23 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 1:25 + │ └─attr("srcfile"): @001 + └─[[2]][[3]][[2]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:18 - 1:18 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:20 - 1:20 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:22 + └─attr("srcfile"): @001 + +# src() handles srcrefs nested in function bodies + + Code + with_srcref("f <- function() foo({ if (1) bar({ 2 }) })") + src(f, max_depth = 10) + Output + + ├─attr("srcref"): + │ ├─location: 1:6 - 1:42 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "f <- function() foo({ if (1) bar..." + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): + └─[[2]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:21 - 1:21 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:23 - 1:39 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 1:41 + │ └─attr("srcfile"): @001 + └─[[2]][[3]][[2]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:34 - 1:34 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:36 - 1:36 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:38 + └─attr("srcfile"): @001 + +# src() currently shows duplicate srcfile objects + + Code + f <- simple_function_with_srcref() + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:14 - 3:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "test_func <-...", " x + y", "}" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:29 - 1:29 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 2:3 - 2:7 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 3:1 + └─attr("srcfile"): @001 + +# src() shows many duplicate srcfiles in nested expression + + Code + x <- parse(text = "{\n 1\n 2\n}", keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 4:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "{", " 1", " 2", ... + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 5:0 + │ └─attr("srcfile"): @001 + └─[[1]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:1 + │ │ └─attr("srcfile"): @001 + │ ├─[[2]]: + │ │ ├─location: 2:3 - 2:3 + │ │ └─attr("srcfile"): @001 + │ └─[[3]]: + │ ├─location: 3:3 - 3:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 4:1 + └─attr("srcfile"): @001 + +# src() handles empty block + + Code + x <- parse(text = "{}", keep.source = TRUE) + src(x[[1]]) + Output + <{> + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 1:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "{}" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:2 + └─attr("srcfile"): @001 + +# src() handles function without arguments + + Code + with_srcref("f <- function() { NULL }") + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:6 - 1:24 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "f <- function() { NULL }" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:17 - 1:17 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:19 - 1:22 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:24 + └─attr("srcfile"): @001 + +# src() handles if statement with blocks + + Code + x <- parse(text = "if (TRUE) { 1 } else { 2 }", keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 1:26 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "if (TRUE) { 1 } else { 2 }" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 2:0 + │ └─attr("srcfile"): @001 + ├─[[1]][[3]]: <{> + │ ├─attr("srcref"): + │ │ ├─[[1]]: + │ │ │ ├─location: 1:11 - 1:11 + │ │ │ └─attr("srcfile"): @001 + │ │ └─[[2]]: + │ │ ├─location: 1:13 - 1:13 + │ │ └─attr("srcfile"): @001 + │ ├─attr("srcfile"): @001 + │ └─attr("wholeSrcref"): + │ ├─location: 1:0 - 1:15 + │ └─attr("srcfile"): @001 + └─[[1]][[4]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:22 - 1:22 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 1:24 - 1:24 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 1:26 + └─attr("srcfile"): @001 + +# src() respects show_source_lines parameter + + Code + f <- simple_function_with_srcref() + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:14 - 3:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "test_func <-...", " x + y", "}" + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:29 - 1:29 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 2:3 - 2:7 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 3:1 + └─attr("srcfile"): @001 + +# src() shows expression with multiple nested blocks + + Code + x <- parse(text = "{\n {\n 1\n }\n {\n 2\n }\n}", keep.source = TRUE) + src(x) + Output + + ├─attr("srcref"): + │ └─[[1]]: + │ ├─location: 1:1 - 8:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: FALSE + │ ├─lines: "{", " {", " 1", ... + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 9:0 + │ └─attr("srcfile"): @001 + └─[[1]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:1 - 1:1 + │ │ └─attr("srcfile"): @001 + │ ├─[[2]]: + │ │ ├─location: 2:3 - 4:3 + │ │ └─attr("srcfile"): @001 + │ └─[[3]]: + │ ├─location: 5:3 - 7:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 8:1 + │ └─attr("srcfile"): @001 + ├─[[2]]: <{> + │ ├─attr("srcref"): + │ │ ├─[[1]]: + │ │ │ ├─location: 2:3 - 2:3 + │ │ │ └─attr("srcfile"): @001 + │ │ └─[[2]]: + │ │ ├─location: 3:5 - 3:5 + │ │ └─attr("srcfile"): @001 + │ ├─attr("srcfile"): @001 + │ └─attr("wholeSrcref"): + │ ├─location: 1:0 - 4:3 + │ └─attr("srcfile"): @001 + └─[[3]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 5:3 - 5:3 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 6:5 - 6:5 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 7:3 + └─attr("srcfile"): @001 + +# src() shows function with nested block in body + + Code + with_srcref("f <- function(x) {\n if (x) {\n 1\n }\n}") + src(f) + Output + + ├─attr("srcref"): + │ ├─location: 1:6 - 5:1 + │ └─attr("srcfile"): @001 + │ ├─Enc: "unknown" + │ ├─filename: "" + │ ├─fixedNewlines: TRUE + │ ├─isFile: TRUE + │ ├─lines: "f <- functio...", " if (x) {", " 1", ... + │ ├─parseData: 1, 1, 1, ... + │ ├─timestamp: "" + │ └─wd: "" + └─body(): <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 1:18 - 1:18 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 2:3 - 4:3 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + ├─attr("wholeSrcref"): + │ ├─location: 1:0 - 5:1 + │ └─attr("srcfile"): @001 + └─[[2]][[3]]: <{> + ├─attr("srcref"): + │ ├─[[1]]: + │ │ ├─location: 2:10 - 2:10 + │ │ └─attr("srcfile"): @001 + │ └─[[2]]: + │ ├─location: 3:5 - 3:5 + │ └─attr("srcfile"): @001 + ├─attr("srcfile"): @001 + └─attr("wholeSrcref"): + ├─location: 1:0 - 4:3 + └─attr("srcfile"): @001 + diff --git a/tests/testthat/helper-src.R b/tests/testthat/helper-src.R new file mode 100644 index 0000000..6fafafc --- /dev/null +++ b/tests/testthat/helper-src.R @@ -0,0 +1,49 @@ +# Snapshot transformer to scrub src() output for deterministic snapshots +scrub_src_transform <- function(lines) { + lines <- gsub('filename: "[^"]+"', 'filename: ""', lines) + lines <- gsub('directory: "[^"]+"', 'directory: ""', lines) + lines <- gsub('timestamp: "[^"]+"', 'timestamp: ""', lines) + lines <- gsub('wd: "[^"]+"', 'wd: ""', lines) + lines +} + +with_srcref <- function(code, env = parent.frame(), file = NULL) { + if (is.null(file)) { + file <- tempfile("test_srcref", fileext = ".R") + on.exit(unlink(file), add = TRUE) + } + + writeLines(code, file) + source(file, local = env, keep.source = TRUE) +} + +parse_with_srcref <- function(code) { + parse(text = code, keep.source = TRUE) +} + +simple_function_with_srcref <- function() { + code <- c( + "test_func <- function(x, y) {", + " x + y", + "}" + ) + + env <- new.env(parent = baseenv()) + with_srcref(code, env = env) + env$test_func +} + +multi_statement_function_with_srcref <- function() { + code <- c( + "multi_func <- function(x) {", + " a <- x + 1", + " b <- a * 2", + " c <- b - 3", + " c", + "}" + ) + + env <- new.env(parent = baseenv()) + with_srcref(code, env = env) + env$multi_func +} diff --git a/tests/testthat/test-src.R b/tests/testthat/test-src.R new file mode 100644 index 0000000..0d420b5 --- /dev/null +++ b/tests/testthat/test-src.R @@ -0,0 +1,1045 @@ +# Test: Closures (evaluated functions) ------------------------------------------ + +if (utils::packageVersion("base") < "4.2.0") { + # Tree characters are ASCII on old Windows R + skip_on_os("windows") +} + +test_that("src() shows closure with srcref and wholeSrcref", { + expect_snapshot(transform = scrub_src_transform, { + f <- simple_function_with_srcref() + src(f) + }) +}) + +test_that("src() shows multi-statement function", { + expect_snapshot(transform = scrub_src_transform, { + f <- multi_statement_function_with_srcref() + src(f) + }) +}) + +# Test: Quoted functions -------------------------------------------------------- + +test_that("src() shows quoted function with nested body", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("x <- quote(function() {})") + src(x) + }) +}) + +test_that("src() shows quoted function body directly", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("x <- quote(function() {})") + src(x[[3]]) + }) +}) + +test_that("src() shows quoted function with arguments", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("x <- quote(function(a, b) {})") + src(x) + }) +}) + +test_that("src() shows srcref with parsed field when positions differ", { + expect_snapshot(transform = scrub_src_transform, { + # Create a synthetic 8-element srcref where parsed positions differ + # Format: c(first_line, first_byte, last_line, last_byte, + # first_col, last_col, first_parsed, last_parsed) + # This simulates a case where R's parser reports different positions + # than the actual source locations (e.g., due to string continuations) + srcfile <- srcfilecopy( + "test.R", + c( + "x <- function() {", + " # A long comment that spans", + " # multiple lines", + " y <- 1", + "}" + ) + ) + + synthetic_srcref <- structure( + c(2L, 3L, 4L, 8L, 3L, 8L, 1L, 5L), + class = "srcref", + srcfile = srcfile + ) + + src(synthetic_srcref) + }) +}) + +# Test: Expression objects ------------------------------------------------------ + +test_that("src() shows expression with single element", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "x + 1", keep.source = TRUE) + src(x) + }) +}) + +test_that("src() shows expression with multiple elements", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = c("x + 1", "y + 2", "z + 3"), keep.source = TRUE) + src(x) + }) +}) + +test_that("src() shows expression with nested block and wholeSrcref", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "{\n 1\n}", keep.source = TRUE) + src(x) + }) +}) + +test_that("src() shows nested block element directly", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "{\n 1\n}", keep.source = TRUE) + src(x[[1]]) + }) +}) + +# Test: Blocks with wholeSrcref ------------------------------------------------- + +test_that("src() shows block with srcref list and wholeSrcref", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "{\n a <- 1\n b <- 2\n}", keep.source = TRUE) + src(x[[1]]) + }) +}) + +# Test: Single srcref objects --------------------------------------------------- + +test_that("src() shows single srcref", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "x + 1", keep.source = TRUE) + sr <- attr(x, "srcref")[[1]] + src(sr) + }) +}) + +# Test: List of srcrefs --------------------------------------------------------- + +test_that("src() shows list of srcrefs with count", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = c("x + 1", "y + 2"), keep.source = TRUE) + sr_list <- attr(x, "srcref") + src(sr_list) + }) +}) + +# Test: Srcref lists shown as with [[1]], [[2]] notation ---------------- + +test_that("src() reveals srcref list structure with index notation", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("x <- quote(function() { 1 })") + src(x[[3]]) + }) +}) + +test_that("src() handles srcrefs nested in language calls", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "foo({ if (1) bar({ 2 }) })", keep.source = TRUE) + src(x, max_depth = 10) + }) +}) + +test_that("src() handles srcrefs nested in function bodies", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("f <- function() foo({ if (1) bar({ 2 }) })") + src(f, max_depth = 10) + }) +}) + +# Test: Type labels ------------------------------------------------------------- + +test_that("src() uses correct type labels", { + # Closure + f <- simple_function_with_srcref() + result_closure <- src(f) + expect_equal(attr(result_closure, "srcref_type"), "closure") + + # Quoted function + with_srcref("x <- quote(function() {})") + result_quoted <- src(x) + expect_equal(attr(result_quoted, "srcref_type"), "quoted_function") + + # Expression + expr <- parse(text = "1 + 1", keep.source = TRUE) + result_expr <- src(expr) + expect_equal(attr(result_expr, "srcref_type"), "expression") + + # Block + block <- parse(text = "{1}", keep.source = TRUE)[[1]] + result_block <- src(block) + expect_equal(attr(result_block, "srcref_type"), "block") +}) + +# Test: Srcfile duplication (current behavior - will change in Phase 1) -------- + +test_that("src() currently shows duplicate srcfile objects", { + expect_snapshot(transform = scrub_src_transform, { + # Current behavior: srcfile appears twice (in srcref and wholeSrcref) + # After Phase 1: should use reference notation like @abc123 + f <- simple_function_with_srcref() + src(f) + }) +}) + +test_that("src() shows many duplicate srcfiles in nested expression", { + expect_snapshot(transform = scrub_src_transform, { + # Current behavior: same srcfile appears many times + # After Phase 1: these should be deduplicated + x <- parse(text = "{\n 1\n 2\n}", keep.source = TRUE) + src(x) + }) +}) + +# Test: Edge cases -------------------------------------------------------------- + +test_that("src() handles empty block", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "{}", keep.source = TRUE) + src(x[[1]]) + }) +}) + +test_that("src() handles function without arguments", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("f <- function() { NULL }") + src(f) + }) +}) + +test_that("src() handles if statement with blocks", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse(text = "if (TRUE) { 1 } else { 2 }", keep.source = TRUE) + src(x) + }) +}) + +# Test: Parameters -------------------------------------------------------------- + +test_that("src() respects show_source_lines parameter", { + expect_snapshot(transform = scrub_src_transform, { + f <- simple_function_with_srcref() + src(f) + }) +}) + +# Test: Complex nested structures ----------------------------------------------- + +test_that("src() shows expression with multiple nested blocks", { + expect_snapshot(transform = scrub_src_transform, { + x <- parse( + text = "{\n {\n 1\n }\n {\n 2\n }\n}", + keep.source = TRUE + ) + src(x) + }) +}) + +test_that("src() shows function with nested block in body", { + expect_snapshot(transform = scrub_src_transform, { + with_srcref("f <- function(x) {\n if (x) {\n 1\n }\n}") + src(f) + }) +}) +# Tests for src() function and helpers + +# Helper function tests -------------------------------------------------------- + +test_that("extract_srcref_info handles 4-element srcrefs", { + # Create a simple expression with srcref + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + + # Manually create a 4-element srcref for testing + # Note: In practice, 4-element srcrefs are rare in modern R + srcref_4 <- structure( + c(1L, 1L, 1L, 5L), + class = "srcref", + srcfile = attr(srcref, "srcfile") + ) + + info <- srcref_info(srcref_4) + + expect_s3_class(info$location, "lobstr_srcref_location") + expect_equal(as.character(info$location), "1:1 - 1:5") +}) + +test_that("extract_srcref_info handles 6-element srcrefs", { + # Create a 6-element srcref + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref_base <- attr(expr, "srcref")[[1]] + + srcref_6 <- structure( + c(1L, 1L, 1L, 5L, 1L, 5L), + class = "srcref", + srcfile = attr(srcref_base, "srcfile") + ) + + info <- srcref_info(srcref_6) + + expect_s3_class(info$location, "lobstr_srcref_location") + expect_equal(as.character(info$location), "1:1 - 1:5") +}) + +test_that("extract_srcref_info handles 8-element srcrefs", { + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + + # Most modern srcrefs are 8-element + info <- srcref_info(srcref) + + expect_s3_class(info$location, "lobstr_srcref_location") + expect_match(as.character(info$location), "\\d+:\\d+ - \\d+:\\d+") +}) + +test_that("extract_srcref_info shows encoding details when requested", { + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + + info <- srcref_info(srcref) + + expect_true("location" %in% names(info)) +}) + +test_that("extract_srcref_info errors on invalid srcref length", { + # Create an invalid srcref with wrong number of elements + bad_srcref <- structure(c(1L, 2L, 3L), class = "srcref") + + expect_error( + srcref_info(bad_srcref), + "Unexpected srcref length" + ) +}) + +test_that("srcfile_node handles srcfilecopy", { + expr <- parse(text = "x + 1", keep.source = TRUE) + srcfile <- attr(attr(expr, "srcref")[[1]], "srcfile") + srcref <- attr(expr, "srcref")[[1]] + seen_srcfiles <- new.env(parent = emptyenv()) + + info <- srcfile_node(srcfile, seen_srcfiles) + + expect_equal(attr(info, "srcfile_class"), class(srcfile)[1]) + expect_type(info$filename, "character") + expect_type(info$Enc, "character") +}) + +test_that("srcfile_node handles NULL gracefully", { + seen_srcfiles <- new.env(parent = emptyenv()) + info <- srcfile_node(NULL, seen_srcfiles) + expect_null(info) +}) + +test_that("srcfile_lines extracts from srcfilecopy", { + code <- c("x <- 1", "y <- 2", "z <- 3") + expr <- parse(text = code, keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + srcfile <- attr(srcref, "srcfile") + + snippet <- srcfile_lines(srcfile, srcref) + + expect_type(snippet, "character") + expect_true(length(snippet) >= 1) +}) + +test_that("srcfile_lines respects max_lines", { + code <- c("x <- 1", "y <- 2", "z <- 3", "a <- 4", "b <- 5") + expr <- parse(text = paste(code, collapse = "\n"), keep.source = TRUE) + + srcfile <- attr(attr(expr, "srcref")[[1]], "srcfile") + srcref <- structure( + c(1L, 1L, 5L, 10L, 1L, 10L, 1L, 5L), + class = "srcref", + srcfile = srcfile + ) + + snippet <- srcfile_lines(srcfile, srcref) + + expect_type(snippet, "character") + expect_lte(length(snippet), 3) +}) + +test_that("srcref_location works correctly", { + srcref <- structure( + c(1L, 5L, 3L, 20L, 5L, 20L, 1L, 3L), + class = "srcref" + ) + loc <- srcref_location(srcref) + expect_equal(loc, "1:5 - 3:20") +}) + +# Integration tests for src() -------------------------------------------------- + +test_that("src works with functions with source references", { + fun <- simple_function_with_srcref() + + result <- src(fun) + + expect_type(result, "list") + expect_equal(attr(result, "srcref_type"), "closure") +}) + +test_that("src works with single srcref objects", { + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + + result <- src(srcref) + + expect_type(result, "list") + expect_equal(attr(result, "srcref_type"), "srcref") + expect_true("location" %in% names(result)) +}) + +test_that("src works with list of srcrefs", { + expr <- parse(text = c("x + 1", "y + 2"), keep.source = TRUE) + srcref_list <- attr(expr, "srcref") + + result <- src(srcref_list) + + expect_type(result, "list") + expect_equal(attr(result, "srcref_type"), "list") +}) + +test_that("src works with expressions", { + expr <- parse(text = "x + 1", keep.source = TRUE) + + result <- src(expr) + + expect_type(result, "list") +}) + +test_that("src works for objects without srcrefs", { + fun <- function(x) x + 1 + attr(fun, "srcref") <- NULL + expect_null(src(fun)) + expect_null(src(new.env())) + expect_null(src(list())) + expect_null(src(sum)) +}) + +test_that("src respects max_lines_preview parameter", { + fun <- multi_statement_function_with_srcref() + + result <- src(fun, max_lines_preview = 1) + + expect_type(result, "list") + expect_equal(attr(result, "srcref_type"), "closure") +}) + +test_that("src returns structure and print method works", { + fun <- simple_function_with_srcref() + + # src() returns visibly (with S3 class) + result <- src(fun) + expect_s3_class(result, "lobstr_srcref") + + # print method returns invisibly and outputs to console + expect_output( + expect_invisible(print(result)), + "" + ) +}) + +# S3 method tests -------------------------------------------------------------- + +test_that("tree_label.srcref formats correctly", { + expr <- parse(text = "x + 1", keep.source = TRUE) + srcref <- attr(expr, "srcref")[[1]] + + # Call the method directly since srcref has proper class + label <- tree_label.srcref(srcref, list()) + + expect_type(label, "character") + expect_match(label, "= 1) # Should be hex ID (up to 6 chars) + expect_true(nchar(id) <= 6) + + # Check that wholeSrcref uses a reference + whole_srcfile <- result$`body()`$`attr("wholeSrcref")`$`attr("srcfile")` + expect_s3_class(whole_srcfile, "lobstr_srcfile_ref") + expect_equal(as.character(whole_srcfile), id) +}) + +test_that("srcfile deduplication - multiple statement srcrefs share one srcfile", { + # Create a function with multiple statements + code <- parse( + text = "f <- function(x) { a <- x + 1; b <- a * 2; b }", + keep.source = TRUE + ) + f <- eval(code[[1]]) + + result <- src(f) + + # Get the ID from the first occurrence + first_srcfile <- result$`attr("srcref")`$`attr("srcfile")` + id <- attr(first_srcfile, "srcfile_id") + + # Check that all statement srcrefs use references + stmt_list <- result$`body()`$`attr("srcref")` + + for (i in seq_along(stmt_list)) { + stmt_name <- paste0("[[", i, "]]") + stmt_srcfile <- stmt_list[[stmt_name]]$`attr("srcfile")` + + # Should be a reference + expect_s3_class(stmt_srcfile, "lobstr_srcfile_ref") + expect_equal(as.character(stmt_srcfile), id) + } +}) + +test_that("srcfile deduplication - IDs are stable within a single src() call", { + # Parse the same code twice to get two different srcfile objects + code1 <- parse(text = "f <- function(x) { x + 1 }", keep.source = TRUE) + code2 <- parse(text = "g <- function(y) { y * 2 }", keep.source = TRUE) + + f <- eval(code1[[1]]) + g <- eval(code2[[1]]) + + # Call src() on first function + result_f <- src(f) + id_f <- attr(result_f$`attr("srcref")`$`attr("srcfile")`, "srcfile_id") + + # Call src() on second function (different call, different seen_srcfiles) + result_g <- src(g) + id_g <- attr(result_g$`attr("srcref")`$`attr("srcfile")`, "srcfile_id") + + # IDs are sequential and start fresh for each src() call + expect_type(id_f, "character") + expect_type(id_g, "character") + + # Both should be 3-digit sequential IDs starting at "001" + expect_equal(id_f, "001") + expect_equal(id_g, "001") +}) + +test_that("srcfile deduplication - multiple files means no cross-file deduplication", { + # Parse two separate code snippets (different srcfile objects) + code1 <- parse(text = "f <- function(x) { x + 1 }", keep.source = TRUE) + code2 <- parse(text = "g <- function(y) { y * 2 }", keep.source = TRUE) + + f <- eval(code1[[1]]) + g <- eval(code2[[1]]) + + # Get the srcfile addresses + srcfile_f <- attr(attr(f, "srcref"), "srcfile") + srcfile_g <- attr(attr(g, "srcref"), "srcfile") + + addr_f <- lobstr::obj_addr(srcfile_f) + addr_g <- lobstr::obj_addr(srcfile_g) + + # Different srcfiles should have different addresses + expect_false(addr_f == addr_g) + + # If we call src() on each separately, they each get their own ID + result_f <- src(f) + result_g <- src(g) + + id_f <- attr(result_f$`attr("srcref")`$`attr("srcfile")`, "srcfile_id") + id_g <- attr(result_g$`attr("srcref")`$`attr("srcfile")`, "srcfile_id") + + # IDs are sequential and start fresh for each src() call, so both get "001" + # This ensures deterministic snapshots + expect_equal(id_f, "001") + expect_equal(id_g, "001") +}) + +test_that("srcfile deduplication - nested functions from same file", { + # Create code with a nested function + code <- " + outer <- function() { + inner <- function(x) { x + 1 } + inner(5) + } + " + parsed <- parse(text = code, keep.source = TRUE) + eval(parsed[[1]]) + + result <- src(outer) + + # The outer function should have a srcfile with an ID + outer_srcfile <- result$`attr("srcref")`$`attr("srcfile")` + expect_true(!is.null(attr(outer_srcfile, "srcfile_id"))) + + # All other references should use the same ID + id <- attr(outer_srcfile, "srcfile_id") + + # Check wholeSrcref reference + whole_srcfile <- result$`body()`$`attr("wholeSrcref")`$`attr("srcfile")` + expect_s3_class(whole_srcfile, "lobstr_srcfile_ref") + expect_equal(as.character(whole_srcfile), id) +}) + +test_that("srcfile deduplication - reference notation displays correctly", { + code <- parse(text = "f <- function(x) { x + 1 }", keep.source = TRUE) + f <- eval(code[[1]]) + + # Capture the output + output <- capture.output(print(src(f))) + + # Should see the full srcfile once with @id notation + full_srcfile_lines <- grep(" @[0-9a-f]+", output) + expect_true(length(full_srcfile_lines) >= 1) + + # Should see reference notation (just @id without class) + ref_lines <- grep("^[^<]*@[0-9a-f]+\\s*$", output, perl = TRUE) + expect_true(length(ref_lines) >= 1) + + # Extract the ID from both to verify they match + full_line <- output[full_srcfile_lines[1]] + id_from_full <- regmatches(full_line, regexpr("@[0-9a-f]+", full_line)) + + ref_line <- output[ref_lines[1]] + id_from_ref <- regmatches(ref_line, regexpr("@[0-9a-f]+", ref_line)) + + expect_equal(id_from_full, id_from_ref) +}) + +test_that("lobstr_srcfile_ref class has correct structure", { + # Create a reference object directly + ref <- new_srcfile_ref("abc123", "srcfilecopy") + + expect_s3_class(ref, "lobstr_srcfile_ref") + expect_equal(as.character(ref), "abc123") + expect_equal(attr(ref, "srcfile_class"), "srcfilecopy") + + # Tree label should show just @id + label <- tree_label.lobstr_srcfile_ref(ref, list()) + expect_equal(label, "@abc123") +}) + +test_that("srcfile deduplication - expression with multiple elements", { + # Parse an expression with multiple top-level elements + code <- parse(text = c("x <- 1", "y <- 2", "z <- 3"), keep.source = TRUE) + + result <- src(code) + + # The expression should have an srcref list + srcref_list <- result$`attr("srcref")` + + # Get the first srcfile + first_srcfile <- srcref_list$`[[1]]`$`attr("srcfile")` + id <- attr(first_srcfile, "srcfile_id") + expect_type(id, "character") + + # Other elements should reference the same srcfile + second_srcfile <- srcref_list$`[[2]]`$`attr("srcfile")` + expect_s3_class(second_srcfile, "lobstr_srcfile_ref") + expect_equal(as.character(second_srcfile), id) + + third_srcfile <- srcref_list$`[[3]]`$`attr("srcfile")` + expect_s3_class(third_srcfile, "lobstr_srcfile_ref") + expect_equal(as.character(third_srcfile), id) +}) + +# Deep nesting tests ---------------------------------------------------------- + +test_that("deep nesting - for loop with nested block", { + code <- parse( + text = " + f <- function(x) { + for (i in 1:x) { + print(i) + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(f) + + # Should have body with nested block + expect_true("body()" %in% names(result)) + + # Should show nested block with path notation [[2]][[4]] + # (element 2 of body is the for loop, element 4 is the body block) + nested_block_path <- grep( + "^\\[\\[2\\]\\]\\[\\[4\\]\\]", + names(result$`body()`), + value = TRUE + ) + expect_true(length(nested_block_path) >= 1) + + # The nested block should have srcref attributes + nested_block <- result$`body()`[[nested_block_path[1]]] + expect_s3_class(nested_block, "lobstr_srcref") + expect_true( + !is.null(nested_block$`attr("srcref")`) || + !is.null(nested_block$`attr("wholeSrcref")`) + ) +}) + +test_that("deep nesting - if/else with blocks", { + code <- parse( + text = " + g <- function(x) { + if (x > 0) { + y <- x + 1 + } else { + y <- x - 1 + } + y + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(g) + + # Should show both if and else blocks with path notation + body_names <- names(result$`body()`) + + # Look for nested blocks (should be [[2]][[3]] and [[2]][[4]]) + if_block_paths <- grep( + "^\\[\\[2\\]\\]\\[\\[3\\]\\]", + body_names, + value = TRUE + ) + else_block_paths <- grep( + "^\\[\\[2\\]\\]\\[\\[4\\]\\]", + body_names, + value = TRUE + ) + + expect_true(length(if_block_paths) >= 1) + expect_true(length(else_block_paths) >= 1) + + # Both blocks should be srcref objects + if_block <- result$`body()`[[if_block_paths[1]]] + expect_s3_class(if_block, "lobstr_srcref") + + else_block <- result$`body()`[[else_block_paths[1]]] + expect_s3_class(else_block, "lobstr_srcref") +}) + +test_that("deep nesting - nested blocks { { { } } }", { + code <- parse( + text = " + h <- function() { + { + { + x <- 1 + } + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(h) + + # Should have nested blocks + expect_true("body()" %in% names(result)) + + # Should have at least one nested [[2]] block + expect_true("[[2]]" %in% names(result$`body()`)) + + # That nested block should have further nesting + nested_block <- result$`body()`$`[[2]]` + expect_s3_class(nested_block, "lobstr_srcref") + expect_true("[[2]]" %in% names(nested_block)) +}) + +test_that("deep nesting - multiple top-level statements", { + code <- parse( + text = " + f <- function(x) { + a <- x + 1 + b <- a * 2 + for (i in 1:b) { + print(i) + } + b + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(f) + + # Should have body with statement srcrefs + expect_true("body()" %in% names(result)) + expect_true("attr(\"srcref\")" %in% names(result$`body()`)) + + # Should show the nested for loop block + body_names <- names(result$`body()`) + for_block_paths <- grep( + "\\[\\[4\\]\\]\\[\\[4\\]\\]", + body_names, + value = TRUE + ) + expect_true(length(for_block_paths) >= 1) +}) + +test_that("deep nesting - empty function", { + code <- parse(text = "f <- function() {}", keep.source = TRUE) + + eval(code[[1]]) + result <- src(f) + + # Should still have structure + expect_type(result, "list") + expect_s3_class(result, "lobstr_srcref") + expect_true("attr(\"srcref\")" %in% names(result)) +}) + +test_that("deep nesting - very deep structure respects max_depth", { + # Create deeply nested structure + code <- parse( + text = " + f <- function(x) { + for (i in 1:x) { + if (i > 0) { + while (i < 10) { + for (j in 1:i) { + print(j) + } + break + } + } + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + + # With low max_depth, should truncate + result_shallow <- src(f, max_depth = 2) + expect_type(result_shallow, "list") + + # With high max_depth, should show more nesting + result_deep <- src(f, max_depth = 10) + expect_type(result_deep, "list") + + # Deep version should have more nested paths + shallow_paths <- names(result_shallow) + deep_paths <- names(result_deep) + + # Both should be valid results + expect_true(length(shallow_paths) > 0) + expect_true(length(deep_paths) > 0) +}) + +test_that("deep nesting - intermediate calls without srcrefs are omitted", { + code <- parse( + text = " + f <- function(x) { + for (i in 1:x) { + print(i) + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + + # Capture output + output <- capture.output(print(src(f))) + + # Should NOT see intermediate nodes + language_nodes <- grep("", output, value = TRUE) + expect_equal(length(language_nodes), 0) + + # Should see collapsed path notation + collapsed_paths <- grep( + "\\[\\[\\d+\\]\\]\\[\\[\\d+\\]\\]", + output, + value = TRUE + ) + expect_true(length(collapsed_paths) > 0) +}) + +test_that("deep nesting - while loop with nested block", { + code <- parse( + text = " + f <- function(x) { + i <- 0 + while (i < x) { + print(i) + i <- i + 1 + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(f) + + # Should have nested block for while body + body_names <- names(result$`body()`) + while_block_paths <- grep( + "\\[\\[3\\]\\]\\[\\[3\\]\\]", + body_names, + value = TRUE + ) + expect_true(length(while_block_paths) >= 1) +}) + +test_that("deep nesting - repeat loop with nested block", { + code <- parse( + text = " + f <- function(x) { + repeat { + print(x) + break + } + } + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(f) + + # Should have nested block for repeat body + expect_true("body()" %in% names(result)) + body_names <- names(result$`body()`) + + # Look for nested block path + nested_paths <- grep("^\\[\\[2\\]\\]\\[\\[2\\]\\]", body_names, value = TRUE) + expect_true(length(nested_paths) >= 1) +}) + +test_that("deep nesting - switch statement with blocks", { + code <- parse( + text = ' + f <- function(x) { + switch(x, + a = { + print("a") + }, + b = { + print("b") + } + ) + } + ', + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(f) + + # Should have structure + expect_type(result, "list") + expect_s3_class(result, "lobstr_srcref") +}) + +test_that("deep nesting - quoted expressions with nested blocks", { + code <- parse( + text = " + x <- quote({ + for (i in 1:3) { + print(i) + } + }) + ", + keep.source = TRUE + ) + + eval(code[[1]]) + result <- src(x) + + # Should show nested structure + expect_type(result, "list") + + # Should have nested block paths + if (!is.null(result) && length(result) > 0) { + all_names <- names(unlist(result, recursive = TRUE)) + nested_paths <- grep( + "\\[\\[\\d+\\]\\]\\[\\[\\d+\\]\\]", + all_names, + value = TRUE + ) + # Might or might not have nested paths depending on how quote() preserves srcrefs + expect_true(length(nested_paths) >= 0) + } +})