From 984511adacfc23b89611286a58e8ac8e7a688776 Mon Sep 17 00:00:00 2001 From: Alex Breskin Date: Tue, 20 Jan 2026 16:51:19 -0500 Subject: [PATCH 1/4] adding collect argument to TblSqlSource execute_query method --- pkg-r/R/TblSqlSource.R | 14 ++++++++++--- pkg-r/man/TblSqlSource.Rd | 9 +++++++-- pkg-r/tests/testthat/test-TblSqlSource.R | 25 ++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pkg-r/R/TblSqlSource.R b/pkg-r/R/TblSqlSource.R index 51e2d007..6c7a8de1 100644 --- a/pkg-r/R/TblSqlSource.R +++ b/pkg-r/R/TblSqlSource.R @@ -117,10 +117,18 @@ TblSqlSource <- R6::R6Class( #' Execute a SQL query and return results #' #' @param query SQL query string to execute - #' @return A data frame containing query results - execute_query = function(query) { + #' @param collect If `TRUE`, collects the results into a local data frame + #' using [dplyr::collect()]. If `FALSE` (default), returns a lazy SQL + #' tibble. + #' @return A data frame (if `collect = TRUE`) or a lazy SQL tibble (if + #' `collect = FALSE`) + execute_query = function(query, collect = FALSE) { sql_query <- self$prep_query(query) - dplyr::tbl(private$conn, dplyr::sql(sql_query)) + result <- dplyr::tbl(private$conn, dplyr::sql(sql_query)) + if (collect) { + result <- dplyr::collect(result) + } + result }, #' @description diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 71e98a6a..154e69f1 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -117,18 +117,23 @@ A string containing schema information formatted for LLM prompts \subsection{Method \code{execute_query()}}{ Execute a SQL query and return results \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TblSqlSource$execute_query(query)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{TblSqlSource$execute_query(query, collect = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{query}}{SQL query string to execute} + +\item{\code{collect}}{If \code{TRUE}, collects the results into a local data frame +using \code{\link[dplyr:compute]{dplyr::collect()}}. If \code{FALSE} (default), returns a lazy SQL +tibble.} } \if{html}{\out{
}} } \subsection{Returns}{ -A data frame containing query results +A data frame (if \code{collect = TRUE}) or a lazy SQL tibble (if +\code{collect = FALSE}) } } \if{html}{\out{
}} diff --git a/pkg-r/tests/testthat/test-TblSqlSource.R b/pkg-r/tests/testthat/test-TblSqlSource.R index d1abcedf..04d70b88 100644 --- a/pkg-r/tests/testthat/test-TblSqlSource.R +++ b/pkg-r/tests/testthat/test-TblSqlSource.R @@ -35,6 +35,31 @@ describe("TblSqlSource$new()", { expect_equal(collected$value, c(30, 40, 50)) }) + it("returns lazy tibble from execute_query() when collect = FALSE", { + source <- local_tbl_sql_source() + + result <- source$execute_query( + "SELECT * FROM test_table WHERE value > 25", + collect = FALSE + ) + expect_s3_class(result, "tbl_sql") + expect_s3_class(result, "tbl_lazy") + }) + + it("returns data frame from execute_query() when collect = TRUE", { + source <- local_tbl_sql_source() + + result <- source$execute_query( + "SELECT * FROM test_table WHERE value > 25", + collect = TRUE + ) + expect_s3_class(result, "data.frame") + expect_false(inherits(result, "tbl_sql")) + expect_false(inherits(result, "tbl_lazy")) + expect_equal(nrow(result), 3) + expect_equal(result$value, c(30, 40, 50)) + }) + it("returns data frame from test_query()", { source <- local_tbl_sql_source() From 14e67a3cc7f21dc7f460435d72c60f47a3ebef63 Mon Sep 17 00:00:00 2001 From: Alex Breskin Date: Wed, 21 Jan 2026 10:40:32 -0500 Subject: [PATCH 2/4] Changing to default to collect = TRUE --- pkg-r/R/TblSqlSource.R | 6 +++--- pkg-r/man/TblSqlSource.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg-r/R/TblSqlSource.R b/pkg-r/R/TblSqlSource.R index 6c7a8de1..91903d90 100644 --- a/pkg-r/R/TblSqlSource.R +++ b/pkg-r/R/TblSqlSource.R @@ -117,12 +117,12 @@ TblSqlSource <- R6::R6Class( #' Execute a SQL query and return results #' #' @param query SQL query string to execute - #' @param collect If `TRUE`, collects the results into a local data frame - #' using [dplyr::collect()]. If `FALSE` (default), returns a lazy SQL + #' @param collect If `TRUE` (default), collects the results into a local data frame + #' using [dplyr::collect()]. If `FALSE`, returns a lazy SQL #' tibble. #' @return A data frame (if `collect = TRUE`) or a lazy SQL tibble (if #' `collect = FALSE`) - execute_query = function(query, collect = FALSE) { + execute_query = function(query, collect = TRUE) { sql_query <- self$prep_query(query) result <- dplyr::tbl(private$conn, dplyr::sql(sql_query)) if (collect) { diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 154e69f1..78f54fb2 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -117,7 +117,7 @@ A string containing schema information formatted for LLM prompts \subsection{Method \code{execute_query()}}{ Execute a SQL query and return results \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TblSqlSource$execute_query(query, collect = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{TblSqlSource$execute_query(query, collect = TRUE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -125,8 +125,8 @@ Execute a SQL query and return results \describe{ \item{\code{query}}{SQL query string to execute} -\item{\code{collect}}{If \code{TRUE}, collects the results into a local data frame -using \code{\link[dplyr:compute]{dplyr::collect()}}. If \code{FALSE} (default), returns a lazy SQL +\item{\code{collect}}{If \code{TRUE} (default), collects the results into a local data frame +using \code{\link[dplyr:compute]{dplyr::collect()}}. If \code{FALSE}, returns a lazy SQL tibble.} } \if{html}{\out{}} From 4e28e8a19d8f567f521369ed1abb42080bc6a0c6 Mon Sep 17 00:00:00 2001 From: Alex Breskin Date: Wed, 21 Jan 2026 11:22:44 -0500 Subject: [PATCH 3/4] Fixing test errors --- pkg-r/tests/testthat/helper-fixtures.R | 3 ++- pkg-r/tests/testthat/test-TblSqlSource.R | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg-r/tests/testthat/helper-fixtures.R b/pkg-r/tests/testthat/helper-fixtures.R index d800b010..111ed784 100644 --- a/pkg-r/tests/testthat/helper-fixtures.R +++ b/pkg-r/tests/testthat/helper-fixtures.R @@ -117,7 +117,8 @@ local_tbl_sql_source <- function( DBI::dbWriteTable(conn, table_name, data, overwrite = TRUE) tbl <- dplyr::tbl(conn, table_name) - tbl <- tbl_transform(tbl) + tbl <- tbl_transform(tbl) |> + dplyr::compute("test_table") TblSqlSource$new(tbl, table_name) } diff --git a/pkg-r/tests/testthat/test-TblSqlSource.R b/pkg-r/tests/testthat/test-TblSqlSource.R index 04d70b88..d19a2325 100644 --- a/pkg-r/tests/testthat/test-TblSqlSource.R +++ b/pkg-r/tests/testthat/test-TblSqlSource.R @@ -22,10 +22,10 @@ describe("TblSqlSource$new()", { }) }) - it("returns lazy tibble from execute_query()", { + it("returns lazy tibble from execute_query() when collect = FALSE", { source <- local_tbl_sql_source() - result <- source$execute_query("SELECT * FROM test_table WHERE value > 25") + result <- source$execute_query("SELECT * FROM test_table WHERE value > 25", collect = FALSE) expect_s3_class(result, "tbl_sql") expect_s3_class(result, "tbl_lazy") @@ -84,7 +84,7 @@ describe("TblSqlSource with transformed tbl (CTE mode)", { ) # CTE should be used since tbl is transformed - result <- source$execute_query("SELECT * FROM test_table") + result <- source$execute_query("SELECT * FROM test_table", collect = FALSE) collected <- dplyr::collect(result) expect_equal(nrow(collected), 3) expect_true(all(collected$value > 20)) @@ -216,7 +216,8 @@ describe("TblSqlSource edge cases - Category B: Column Naming Issues", { # SELECT with explicit duplicate column names from JOIN # DuckDB allows duplicate names but tibble rejects them on collect result <- source$execute_query( - "SELECT table_a.id, table_b.id FROM table_a JOIN table_b ON table_a.id = table_b.id" + "SELECT table_a.id, table_b.id FROM table_a JOIN table_b ON table_a.id = table_b.id", + collect = FALSE ) expect_error( dplyr::collect(result), @@ -297,7 +298,8 @@ describe("TblSqlSource edge cases - Category B: Column Naming Issues", { # SELECT * from JOIN produces duplicate 'id' columns # tibble rejects duplicate names on collect result <- source$execute_query( - "SELECT * FROM table_a JOIN table_b ON table_a.id = table_b.id" + "SELECT * FROM table_a JOIN table_b ON table_a.id = table_b.id", + collect = FALSE ) expect_error( dplyr::collect(result), From 79dfb604974dd237a0771478842c92555818346a Mon Sep 17 00:00:00 2001 From: Alex Breskin <44781144+asb2111@users.noreply.github.com> Date: Wed, 21 Jan 2026 11:50:29 -0500 Subject: [PATCH 4/4] Update pkg-r/tests/testthat/test-TblSqlSource.R Co-authored-by: Garrick Aden-Buie --- pkg-r/tests/testthat/test-TblSqlSource.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg-r/tests/testthat/test-TblSqlSource.R b/pkg-r/tests/testthat/test-TblSqlSource.R index d19a2325..45cebce3 100644 --- a/pkg-r/tests/testthat/test-TblSqlSource.R +++ b/pkg-r/tests/testthat/test-TblSqlSource.R @@ -25,7 +25,10 @@ describe("TblSqlSource$new()", { it("returns lazy tibble from execute_query() when collect = FALSE", { source <- local_tbl_sql_source() - result <- source$execute_query("SELECT * FROM test_table WHERE value > 25", collect = FALSE) + result <- source$execute_query( + "SELECT * FROM test_table WHERE value > 25", + collect = FALSE + ) expect_s3_class(result, "tbl_sql") expect_s3_class(result, "tbl_lazy")