From 03f0ad4834de0617ae92c0f25414a34731c62b4a Mon Sep 17 00:00:00 2001 From: Carson Date: Wed, 21 Jan 2026 10:01:54 -0600 Subject: [PATCH 01/45] feat: Add Snowflake Semantic Views support Add automatic detection and context injection for Snowflake Semantic Views when a Snowflake connection is provided. This helps LLMs generate correct queries using the SEMANTIC_VIEW() function with certified business metrics. Changes: - Add SnowflakeSource class (Python and R) that extends SQLAlchemySource/DBISource - Discover semantic views via SHOW SEMANTIC VIEWS at initialization - Retrieve DDL definitions via GET_DDL('SEMANTIC_VIEW', ...) - Include semantic view context in schema output - Add SEMANTIC_VIEW() syntax reference to system prompt - Add Snowflake-specific SQL tips (QUALIFY, LATERAL FLATTEN, time travel) - Graceful fallback when no semantic views exist or discovery fails Based on the Snowflake skill from posit-dev/databot#278. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 189 ++++++++++++++++++++++ pkg-py/src/querychat/_querychat_base.py | 4 + pkg-py/src/querychat/_system_prompt.py | 116 +++++++++++++- pkg-py/src/querychat/prompts/prompt.md | 19 +++ pkg-r/R/QueryChat.R | 4 + pkg-r/R/QueryChatSystemPrompt.R | 122 +++++++++++++- pkg-r/R/SnowflakeSource.R | 201 ++++++++++++++++++++++++ pkg-r/inst/prompts/prompt.md | 19 +++ 8 files changed, 670 insertions(+), 4 deletions(-) create mode 100644 pkg-r/R/SnowflakeSource.R diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index e5bdcc93..dfc564a8 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Generic, Literal, cast @@ -706,6 +707,194 @@ def cleanup(self) -> None: self._engine.dispose() +@dataclass +class SemanticViewInfo: + """Metadata for a Snowflake Semantic View.""" + + name: str + """Fully qualified name (database.schema.view_name).""" + + ddl: str + """The DDL definition from GET_DDL().""" + + +logger = logging.getLogger(__name__) + + +class SnowflakeSource(SQLAlchemySource): + """ + A DataSource implementation for Snowflake with Semantic View support. + + Extends SQLAlchemySource to automatically detect and provide context about + Snowflake Semantic Views when available. + """ + + _semantic_views: list[SemanticViewInfo] + + def __init__( + self, + engine: Engine, + table_name: str, + ): + """ + Initialize with a SQLAlchemy engine connected to Snowflake. + + Parameters + ---------- + engine + SQLAlchemy engine connected to Snowflake + table_name + Name of the table to query + + """ + super().__init__(engine, table_name) + + # Discover semantic views at initialization + self._semantic_views = self._discover_semantic_views() + + def _discover_semantic_views(self) -> list[SemanticViewInfo]: + """ + Discover Semantic Views in the current schema and retrieve their DDLs. + + Returns + ------- + list[SemanticViewInfo] + List of semantic views with their DDL definitions + + """ + semantic_views: list[SemanticViewInfo] = [] + + try: + with self._get_connection() as conn: + # Check for semantic views in the current schema + result = conn.execute(text("SHOW SEMANTIC VIEWS")) + rows = result.fetchall() + + if not rows: + return [] + + # Get column names from result + column_names = list(result.keys()) + + for row in rows: + row_dict = dict(zip(column_names, row, strict=False)) + # SHOW SEMANTIC VIEWS returns columns like: + # created_on, name, database_name, schema_name, owner, ... + view_name = row_dict.get("name") + database_name = row_dict.get("database_name") + schema_name = row_dict.get("schema_name") + + if not view_name: + continue + + # Build fully qualified name + fq_name = f"{database_name}.{schema_name}.{view_name}" + + # Get the DDL for this semantic view + ddl = self._get_semantic_view_ddl(conn, fq_name) + if ddl: + semantic_views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) + + except Exception as e: + # Log warning but don't fail - gracefully fall back to no semantic views + logger.warning(f"Failed to discover semantic views: {e}") + return [] + + return semantic_views + + def _get_semantic_view_ddl(self, conn: Connection, fq_name: str) -> str | None: + """ + Retrieve the DDL for a semantic view. + + Parameters + ---------- + conn + Active database connection + fq_name + Fully qualified name (database.schema.view_name) + + Returns + ------- + str | None + The DDL text, or None if retrieval failed + + """ + try: + result = conn.execute( + text(f"SELECT GET_DDL('SEMANTIC_VIEW', '{fq_name}')") + ) + row = result.fetchone() + if row: + return str(row[0]) + except Exception as e: + logger.warning(f"Failed to get DDL for semantic view {fq_name}: {e}") + + return None + + @property + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return len(self._semantic_views) > 0 + + @property + def semantic_views(self) -> list[SemanticViewInfo]: + """Get the list of discovered semantic views.""" + return self._semantic_views + + def get_schema(self, *, categorical_threshold: int) -> str: + """ + Generate schema information including semantic view context. + + Parameters + ---------- + categorical_threshold + Maximum number of unique values for a text column to be considered + categorical + + Returns + ------- + str + String describing the schema, including semantic view information + if available + + """ + # Get base schema from parent + schema = super().get_schema(categorical_threshold=categorical_threshold) + + # If no semantic views, return base schema + if not self._semantic_views: + return schema + + # Add semantic view information + semantic_section = self._format_semantic_views_section() + return f"{schema}\n\n{semantic_section}" + + def _format_semantic_views_section(self) -> str: + """Format the semantic views section for the schema output.""" + lines = [ + "## Snowflake Semantic Views", + "", + "This database has Semantic Views available. Semantic Views provide a curated ", + "layer over raw data with pre-defined metrics, dimensions, and relationships. ", + "They encode business logic and calculation rules that ensure consistent, ", + "accurate results.", + "", + "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over ", + "raw table queries to benefit from certified metric definitions.", + "", + ] + + for sv in self._semantic_views: + lines.append(f"### Semantic View: `{sv.name}`") + lines.append("") + lines.append("```sql") + lines.append(sv.ddl) + lines.append("```") + lines.append("") + + return "\n".join(lines) + + class PolarsLazySource(DataSource["pl.LazyFrame"]): """ A DataSource implementation for Polars LazyFrames. diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index e8a7c7f1..105822f2 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -18,6 +18,7 @@ IbisSource, IntoFrameT, PolarsLazySource, + SnowflakeSource, SQLAlchemySource, ) from ._shiny_module import GREETING_PROMPT @@ -229,6 +230,9 @@ def normalize_data_source( if isinstance(data_source, DataSource): return data_source if isinstance(data_source, sqlalchemy.Engine): + # Use SnowflakeSource for Snowflake connections to get semantic view support + if data_source.dialect.name.lower() == "snowflake": + return SnowflakeSource(data_source, table_name) return SQLAlchemySource(data_source, table_name) if is_ibis_table(data_source): diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 2b9cdb04..addf7862 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -10,6 +10,109 @@ from ._querychat_base import TOOL_GROUPS +# Reference documentation for SEMANTIC_VIEW() query syntax +SEMANTIC_VIEW_SYNTAX = """ +## SEMANTIC_VIEW() Query Syntax + +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + +### Basic Syntax + +```sql +SELECT * FROM SEMANTIC_VIEW( + {view_name} + METRICS {logical_table}.{metric_name} + DIMENSIONS {logical_table}.{dimension_name} + [WHERE {dimension} = 'value'] -- Optional: pre-aggregation filter +) +[WHERE {column} = 'value'] -- Optional: post-aggregation filter +``` + +### Key Rules + +1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view +2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS +3. **No JOINs needed within model** - Relationships are pre-defined +4. **No aggregate functions needed** - Metrics are pre-aggregated +5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly + +### WHERE Clause: Inside vs Outside + +- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed +- **Outside** (post-aggregation): Filters results AFTER metrics are computed + +```sql +-- Pre-aggregation: only include 'EXT' accounts in the calculation +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD + WHERE REF_ENTITIES.ACC_TYPE_CD = 'EXT' +) + +-- Post-aggregation: compute all, then filter results +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +WHERE NET_REVENUE > 1000000 +``` + +### Common Patterns + +**Single metric (total):** +```sql +SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) +``` + +**Metric by dimension:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +``` + +**Multiple metrics and dimensions:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Time series:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Join results with other data:** +```sql +SELECT sv.*, lookup.category_name +FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) AS sv +JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code +``` + +### Troubleshooting + +- **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL +- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed +""" + + class QueryChatSystemPrompt: """Manages system prompt template and component assembly.""" @@ -65,11 +168,20 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: Fully rendered system prompt string """ - is_duck_db = self.data_source.get_db_type().lower() == "duckdb" + db_type = self.data_source.get_db_type() + is_duck_db = db_type.lower() == "duckdb" + is_snowflake = db_type.lower() == "snowflake" + + # Check for semantic views (only available with SnowflakeSource) + # Use getattr to safely access the property that only exists on SnowflakeSource + has_semantic_views: bool = getattr(self.data_source, "has_semantic_views", False) context = { - "db_type": self.data_source.get_db_type(), + "db_type": db_type, "is_duck_db": is_duck_db, + "is_snowflake": is_snowflake, + "has_semantic_views": has_semantic_views, + "semantic_view_syntax": SEMANTIC_VIEW_SYNTAX if has_semantic_views else "", "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 7c8ea5a1..05cc4a29 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -71,6 +71,25 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} +{{#is_snowflake}} +### Snowflake SQL Tips + +**QUALIFY clause:** Use QUALIFY instead of a subquery when filtering on window function results. + +**LATERAL FLATTEN:** Use for expanding JSON arrays or nested structures. + +**Time travel:** Use `AT` or `BEFORE` clauses for historical data access. + +{{/is_snowflake}} +{{#has_semantic_views}} +### Semantic Views + +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide certified business metrics that encode correct calculation rules. When a Semantic View covers the data you need, **always prefer it over raw table queries**. + +Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. + +{{{semantic_view_syntax}}} +{{/has_semantic_views}} ## Your Capabilities You can handle these types of requests: diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 20574442..2bc89e3d 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -935,6 +935,10 @@ normalize_data_source <- function(data_source, table_name) { } if (inherits(data_source, "DBIConnection")) { + # Use SnowflakeSource for Snowflake connections to get semantic view support + if (is_snowflake_connection(data_source)) { + return(SnowflakeSource$new(data_source, table_name)) + } return(DBISource$new(data_source, table_name)) } diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 52e99fb5..07e8d11f 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -79,11 +79,25 @@ QueryChatSystemPrompt <- R6::R6Class( #' @return A character string containing the rendered system prompt. render = function(tools) { # Build context for whisker rendering - is_duck_db <- tolower(self$data_source$get_db_type()) == "duckdb" + db_type <- self$data_source$get_db_type() + is_duck_db <- tolower(db_type) == "duckdb" + is_snowflake <- tolower(db_type) == "snowflake" + + # Check for semantic views (only available with SnowflakeSource) + has_semantic_views <- FALSE + if ( + inherits(self$data_source, "SnowflakeSource") && + self$data_source$has_semantic_views() + ) { + has_semantic_views <- TRUE + } context <- list( - db_type = self$data_source$get_db_type(), + db_type = db_type, is_duck_db = is_duck_db, + is_snowflake = if (is_snowflake) "true", + has_semantic_views = if (has_semantic_views) "true", + semantic_view_syntax = if (has_semantic_views) SEMANTIC_VIEW_SYNTAX, schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, @@ -97,6 +111,110 @@ QueryChatSystemPrompt <- R6::R6Class( ) ) +# Reference documentation for SEMANTIC_VIEW() query syntax +# nolint start: line_length_linter. +SEMANTIC_VIEW_SYNTAX <- ' +## SEMANTIC_VIEW() Query Syntax + +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + +### Basic Syntax + +```sql +SELECT * FROM SEMANTIC_VIEW( + {view_name} + METRICS {logical_table}.{metric_name} + DIMENSIONS {logical_table}.{dimension_name} + [WHERE {dimension} = \'value\'] -- Optional: pre-aggregation filter +) +[WHERE {column} = \'value\'] -- Optional: post-aggregation filter +``` + +### Key Rules + +1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view +2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS +3. **No JOINs needed within model** - Relationships are pre-defined +4. **No aggregate functions needed** - Metrics are pre-aggregated +5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly + +### WHERE Clause: Inside vs Outside + +- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed +- **Outside** (post-aggregation): Filters results AFTER metrics are computed + +```sql +-- Pre-aggregation: only include \'EXT\' accounts in the calculation +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD + WHERE REF_ENTITIES.ACC_TYPE_CD = \'EXT\' +) + +-- Post-aggregation: compute all, then filter results +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +WHERE NET_REVENUE > 1000000 +``` + +### Common Patterns + +**Single metric (total):** +```sql +SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) +``` + +**Metric by dimension:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +``` + +**Multiple metrics and dimensions:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Time series:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Join results with other data:** +```sql +SELECT sv.*, lookup.category_name +FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) AS sv +JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code +``` + +### Troubleshooting + +- **"Invalid identifier"**: Verify metric/dimension names match exactly what is in the DDL +- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY is not needed +' +# nolint end + # Utility function for loading file or string content read_text <- function(x) { if (file.exists(x)) { diff --git a/pkg-r/R/SnowflakeSource.R b/pkg-r/R/SnowflakeSource.R new file mode 100644 index 00000000..dc357da6 --- /dev/null +++ b/pkg-r/R/SnowflakeSource.R @@ -0,0 +1,201 @@ +#' Snowflake Source +#' +#' A DataSource implementation for Snowflake database connections with +#' Semantic View support. This class extends DBISource to automatically detect +#' and provide context about Snowflake Semantic Views when available. +#' +#' @noRd +SnowflakeSource <- R6::R6Class( + "SnowflakeSource", + inherit = DBISource, + private = list( + semantic_views = NULL + ), + public = list( + #' @description + #' Create a new SnowflakeSource + #' + #' @param conn A DBI connection object to Snowflake + #' @param table_name Name of the table in the database + #' + #' @return A new SnowflakeSource object + initialize = function(conn, table_name) { + super$initialize(conn, table_name) + + # Discover semantic views at initialization + private$semantic_views <- discover_semantic_views(conn) + }, + + #' @description + #' Check if semantic views are available + #' @return TRUE if semantic views were discovered + has_semantic_views = function() { + length(private$semantic_views) > 0 + }, + + #' @description + #' Get the list of discovered semantic views + #' @return A list of semantic view info (name and ddl) + get_semantic_views = function() { + private$semantic_views + }, + + #' @description + #' Get schema information for the database table, including semantic views + #' + #' @param categorical_threshold Maximum number of unique values for a text + #' column to be considered categorical (default: 20) + #' @return A string describing the schema + get_schema = function(categorical_threshold = 20) { + # Get base schema from parent + schema <- super$get_schema(categorical_threshold = categorical_threshold) + + # If no semantic views, return base schema + if (!self$has_semantic_views()) { + return(schema) + } + + # Add semantic view information + semantic_section <- format_semantic_views_section(private$semantic_views) + paste(schema, semantic_section, sep = "\n\n") + } + ) +) + + +#' Discover Semantic Views in Snowflake +#' +#' @param conn A DBI connection to Snowflake +#' @return A list of semantic views with name and ddl +#' @noRd +discover_semantic_views <- function(conn) { + semantic_views <- list() + + + tryCatch( + { + # Check for semantic views in the current schema + result <- DBI::dbGetQuery(conn, "SHOW SEMANTIC VIEWS") + + if (nrow(result) == 0) { + return(list()) + } + + for (i in seq_len(nrow(result))) { + row <- result[i, ] + view_name <- row[["name"]] + database_name <- row[["database_name"]] + schema_name <- row[["schema_name"]] + + if (is.null(view_name) || is.na(view_name)) { + next + } + + # Build fully qualified name + fq_name <- paste(database_name, schema_name, view_name, sep = ".") + + # Get the DDL for this semantic view + ddl <- get_semantic_view_ddl(conn, fq_name) + if (!is.null(ddl)) { + semantic_views <- c(semantic_views, list(list( + name = fq_name, + ddl = ddl + ))) + } + } + }, + error = function(e) { + # Log warning but don't fail - gracefully fall back to no semantic views + cli::cli_warn("Failed to discover semantic views: {conditionMessage(e)}") + } + ) + + semantic_views +} + + +#' Get the DDL for a Semantic View +#' +#' @param conn A DBI connection to Snowflake +#' @param fq_name Fully qualified name (database.schema.view_name) +#' @return The DDL text, or NULL if retrieval failed +#' @noRd +get_semantic_view_ddl <- function(conn, fq_name) { + tryCatch( + { + query <- sprintf("SELECT GET_DDL('SEMANTIC_VIEW', '%s')", fq_name) + result <- DBI::dbGetQuery(conn, query) + if (nrow(result) > 0 && ncol(result) > 0) { + as.character(result[[1, 1]]) + } else { + NULL + } + }, + error = function(e) { + cli::cli_warn("Failed to get DDL for semantic view {fq_name}: {conditionMessage(e)}") + NULL + } + ) +} + + +#' Format Semantic Views Section for Schema Output +#' +#' @param semantic_views A list of semantic view info (name and ddl) +#' @return A formatted string describing the semantic views +#' @noRd +format_semantic_views_section <- function(semantic_views) { + lines <- c( + "## Snowflake Semantic Views", + "", + "This database has Semantic Views available. Semantic Views provide a curated", + "layer over raw data with pre-defined metrics, dimensions, and relationships.", + "They encode business logic and calculation rules that ensure consistent,", + "accurate results.", + "", + "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over", + "raw table queries to benefit from certified metric definitions.", + "" + ) + + for (sv in semantic_views) { + lines <- c( + lines, + sprintf("### Semantic View: `%s`", sv$name), + "", + "```sql", + sv$ddl, + "```", + "" + ) + } + + paste(lines, collapse = "\n") +} + + +#' Check if a connection is a Snowflake connection +#' +#' @param conn A DBI connection object +#' @return TRUE if the connection is to Snowflake +#' @noRd +is_snowflake_connection <- function(conn) { + if (!inherits(conn, "DBIConnection")) { + return(FALSE) + } + + # Check for known Snowflake connection classes + if (inherits(conn, "Snowflake")) { + return(TRUE) + } + + # Check dbms.name from connection info + tryCatch( + { + conn_info <- DBI::dbGetInfo(conn) + dbms_name <- tolower(conn_info[["dbms.name"]] %||% "") + grepl("snowflake", dbms_name, ignore.case = TRUE) + }, + error = function(e) FALSE + ) +} diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 7c8ea5a1..05cc4a29 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -71,6 +71,25 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} +{{#is_snowflake}} +### Snowflake SQL Tips + +**QUALIFY clause:** Use QUALIFY instead of a subquery when filtering on window function results. + +**LATERAL FLATTEN:** Use for expanding JSON arrays or nested structures. + +**Time travel:** Use `AT` or `BEFORE` clauses for historical data access. + +{{/is_snowflake}} +{{#has_semantic_views}} +### Semantic Views + +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide certified business metrics that encode correct calculation rules. When a Semantic View covers the data you need, **always prefer it over raw table queries**. + +Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. + +{{{semantic_view_syntax}}} +{{/has_semantic_views}} ## Your Capabilities You can handle these types of requests: From ad966770db2844f3c60846448d72e3b6ffc7e717 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Wed, 21 Jan 2026 16:06:54 +0000 Subject: [PATCH 02/45] `air format` (GitHub Actions) --- pkg-r/R/SnowflakeSource.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pkg-r/R/SnowflakeSource.R b/pkg-r/R/SnowflakeSource.R index dc357da6..670e8124 100644 --- a/pkg-r/R/SnowflakeSource.R +++ b/pkg-r/R/SnowflakeSource.R @@ -71,7 +71,6 @@ SnowflakeSource <- R6::R6Class( discover_semantic_views <- function(conn) { semantic_views <- list() - tryCatch( { # Check for semantic views in the current schema @@ -97,10 +96,13 @@ discover_semantic_views <- function(conn) { # Get the DDL for this semantic view ddl <- get_semantic_view_ddl(conn, fq_name) if (!is.null(ddl)) { - semantic_views <- c(semantic_views, list(list( - name = fq_name, - ddl = ddl - ))) + semantic_views <- c( + semantic_views, + list(list( + name = fq_name, + ddl = ddl + )) + ) } } }, @@ -132,7 +134,9 @@ get_semantic_view_ddl <- function(conn, fq_name) { } }, error = function(e) { - cli::cli_warn("Failed to get DDL for semantic view {fq_name}: {conditionMessage(e)}") + cli::cli_warn( + "Failed to get DDL for semantic view {fq_name}: {conditionMessage(e)}" + ) NULL } ) From 1a19d3288f3b1a77367932c70b4cfa507b4cc0e6 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 10:36:34 -0600 Subject: [PATCH 03/45] fix: Address Snowflake Semantic Views code review concerns - Fix SQL injection risk by escaping single quotes in view names - Add discover_semantic_views parameter for lazy initialization - Remove error swallowing, let errors propagate for debugging - Add debug logging when no semantic views are found - Move logger placement to top of file (Python) - Add defensive dialect check in normalize_data_source (Python) - Add comprehensive unit tests for both Python and R Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 87 +++--- pkg-py/src/querychat/_querychat_base.py | 3 +- pkg-py/tests/test_snowflake_source.py | 321 ++++++++++++++++++++ pkg-r/R/SnowflakeSource.R | 126 ++++---- pkg-r/tests/testthat/test-SnowflakeSource.R | 111 +++++++ 5 files changed, 540 insertions(+), 108 deletions(-) create mode 100644 pkg-py/tests/test_snowflake_source.py create mode 100644 pkg-r/tests/testthat/test-SnowflakeSource.R diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index dfc564a8..44fb231e 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -14,6 +14,8 @@ from ._df_compat import read_sql from ._utils import as_narwhals, check_query +logger = logging.getLogger(__name__) + if TYPE_CHECKING: import ibis import polars as pl @@ -718,9 +720,6 @@ class SemanticViewInfo: """The DDL definition from GET_DDL().""" -logger = logging.getLogger(__name__) - - class SnowflakeSource(SQLAlchemySource): """ A DataSource implementation for Snowflake with Semantic View support. @@ -735,6 +734,8 @@ def __init__( self, engine: Engine, table_name: str, + *, + discover_semantic_views: bool = True, ): """ Initialize with a SQLAlchemy engine connected to Snowflake. @@ -745,12 +746,18 @@ def __init__( SQLAlchemy engine connected to Snowflake table_name Name of the table to query + discover_semantic_views + If True (default), automatically discover semantic views at + initialization. Set to False to skip discovery (e.g., for + performance or if not needed). """ super().__init__(engine, table_name) - # Discover semantic views at initialization - self._semantic_views = self._discover_semantic_views() + if discover_semantic_views: + self._semantic_views = self._discover_semantic_views() + else: + self._semantic_views = [] def _discover_semantic_views(self) -> list[SemanticViewInfo]: """ @@ -764,41 +771,36 @@ def _discover_semantic_views(self) -> list[SemanticViewInfo]: """ semantic_views: list[SemanticViewInfo] = [] - try: - with self._get_connection() as conn: - # Check for semantic views in the current schema - result = conn.execute(text("SHOW SEMANTIC VIEWS")) - rows = result.fetchall() - - if not rows: - return [] + with self._get_connection() as conn: + # Check for semantic views in the current schema + result = conn.execute(text("SHOW SEMANTIC VIEWS")) + rows = result.fetchall() - # Get column names from result - column_names = list(result.keys()) + if not rows: + logger.debug("No semantic views found in current schema") + return [] - for row in rows: - row_dict = dict(zip(column_names, row, strict=False)) - # SHOW SEMANTIC VIEWS returns columns like: - # created_on, name, database_name, schema_name, owner, ... - view_name = row_dict.get("name") - database_name = row_dict.get("database_name") - schema_name = row_dict.get("schema_name") + # Get column names from result + column_names = list(result.keys()) - if not view_name: - continue + for row in rows: + row_dict = dict(zip(column_names, row, strict=False)) + # SHOW SEMANTIC VIEWS returns columns like: + # created_on, name, database_name, schema_name, owner, ... + view_name = row_dict.get("name") + database_name = row_dict.get("database_name") + schema_name = row_dict.get("schema_name") - # Build fully qualified name - fq_name = f"{database_name}.{schema_name}.{view_name}" + if not view_name: + continue - # Get the DDL for this semantic view - ddl = self._get_semantic_view_ddl(conn, fq_name) - if ddl: - semantic_views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) + # Build fully qualified name + fq_name = f"{database_name}.{schema_name}.{view_name}" - except Exception as e: - # Log warning but don't fail - gracefully fall back to no semantic views - logger.warning(f"Failed to discover semantic views: {e}") - return [] + # Get the DDL for this semantic view + ddl = self._get_semantic_view_ddl(conn, fq_name) + if ddl: + semantic_views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) return semantic_views @@ -819,15 +821,14 @@ def _get_semantic_view_ddl(self, conn: Connection, fq_name: str) -> str | None: The DDL text, or None if retrieval failed """ - try: - result = conn.execute( - text(f"SELECT GET_DDL('SEMANTIC_VIEW', '{fq_name}')") - ) - row = result.fetchone() - if row: - return str(row[0]) - except Exception as e: - logger.warning(f"Failed to get DDL for semantic view {fq_name}: {e}") + # Escape single quotes to prevent SQL injection + safe_name = fq_name.replace("'", "''") + result = conn.execute( + text(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')") + ) + row = result.fetchone() + if row: + return str(row[0]) return None diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index 105822f2..42a39c06 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -231,7 +231,8 @@ def normalize_data_source( return data_source if isinstance(data_source, sqlalchemy.Engine): # Use SnowflakeSource for Snowflake connections to get semantic view support - if data_source.dialect.name.lower() == "snowflake": + dialect_name = getattr(getattr(data_source, "dialect", None), "name", "") or "" + if dialect_name.lower() == "snowflake": return SnowflakeSource(data_source, table_name) return SQLAlchemySource(data_source, table_name) diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py new file mode 100644 index 00000000..50e60f8e --- /dev/null +++ b/pkg-py/tests/test_snowflake_source.py @@ -0,0 +1,321 @@ +"""Tests for SnowflakeSource and semantic view functionality.""" + +import logging +from unittest.mock import MagicMock, patch + +import pytest +from querychat._datasource import SemanticViewInfo, SnowflakeSource + + +class TestSemanticViewInfo: + """Tests for SemanticViewInfo dataclass.""" + + def test_creation(self): + """Test basic creation of SemanticViewInfo.""" + info = SemanticViewInfo(name="db.schema.view", ddl="CREATE SEMANTIC VIEW...") + assert info.name == "db.schema.view" + assert info.ddl == "CREATE SEMANTIC VIEW..." + + def test_equality(self): + """Test equality comparison.""" + info1 = SemanticViewInfo(name="db.schema.view", ddl="DDL") + info2 = SemanticViewInfo(name="db.schema.view", ddl="DDL") + info3 = SemanticViewInfo(name="db.schema.other", ddl="DDL") + assert info1 == info2 + assert info1 != info3 + + +class TestFormatSemanticViewsSection: + """Tests for semantic view formatting.""" + + def test_format_with_views(self): + """Test that format produces expected markdown structure.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + # Manually add semantic views for formatting test + source._semantic_views = [ + SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1"), + SemanticViewInfo(name="db.schema.view2", ddl="CREATE SEMANTIC VIEW v2"), + ] + + section = source._format_semantic_views_section() + assert "## Snowflake Semantic Views" in section + assert "db.schema.view1" in section + assert "db.schema.view2" in section + assert "CREATE SEMANTIC VIEW v1" in section + assert "CREATE SEMANTIC VIEW v2" in section + assert "```sql" in section + + +class TestSQLEscaping: + """Tests for SQL injection prevention.""" + + def test_single_quote_escaped(self): + """Verify that names with single quotes are properly escaped.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.fetchone.return_value = ["DDL result"] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + + # Test the escaping logic directly + with patch.object(source, "_get_connection") as mock_get_conn: + mock_context = MagicMock() + mock_context.__enter__ = MagicMock(return_value=mock_conn) + mock_context.__exit__ = MagicMock(return_value=False) + mock_get_conn.return_value = mock_context + mock_conn.execute.return_value = mock_result + + # Call with a name containing single quotes + source._get_semantic_view_ddl(mock_conn, "db.schema.test'view") + + # Verify the executed query has escaped quotes + call_args = mock_conn.execute.call_args + query_text = str(call_args[0][0]) + assert "test''view" in query_text + + def test_normal_name_unchanged(self): + """Verify that normal names without special chars are not modified.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.fetchone.return_value = ["DDL result"] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + + mock_conn.execute.return_value = mock_result + source._get_semantic_view_ddl(mock_conn, "db.schema.normal_view") + + call_args = mock_conn.execute.call_args + query_text = str(call_args[0][0]) + assert "db.schema.normal_view" in query_text + + +class TestSnowflakeSourceDiscovery: + """Tests for semantic view discovery with mocked connections.""" + + def test_discovery_disabled(self): + """Test that discover_semantic_views=False skips discovery.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + with patch("querychat._datasource.inspect", return_value=mock_inspector): + # Should not call _discover_semantic_views when disabled + source = SnowflakeSource( + mock_engine, "test_table", discover_semantic_views=False + ) + assert source._semantic_views == [] + assert not source.has_semantic_views + + def test_discovery_enabled_default(self): + """Test that discovery is enabled by default.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object( + SnowflakeSource, "_discover_semantic_views", return_value=[] + ) as mock_discover, + ): + SnowflakeSource(mock_engine, "test_table") + mock_discover.assert_called_once() + + def test_discovery_error_propagates(self): + """Verify that discovery errors propagate (not swallowed).""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + mock_conn = MagicMock() + mock_conn.execute.side_effect = Exception("Database connection failed") + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(mock_engine, "connect") as mock_connect, + ): + mock_context = MagicMock() + mock_context.__enter__ = MagicMock(return_value=mock_conn) + mock_context.__exit__ = MagicMock(return_value=False) + mock_connect.return_value = mock_context + + # Error should propagate, not be swallowed + with pytest.raises(Exception, match="Database connection failed"): + SnowflakeSource(mock_engine, "test_table") + + def test_no_views_logs_debug(self, caplog): + """Verify debug message when no views found.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.fetchall.return_value = [] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(mock_engine, "connect") as mock_connect, + caplog.at_level(logging.DEBUG, logger="querychat._datasource"), + ): + mock_context = MagicMock() + mock_context.__enter__ = MagicMock(return_value=mock_conn) + mock_context.__exit__ = MagicMock(return_value=False) + mock_connect.return_value = mock_context + mock_conn.execute.return_value = mock_result + + SnowflakeSource(mock_engine, "test_table") + assert "No semantic views found" in caplog.text + + +class TestSnowflakeSourceProperties: + """Tests for SnowflakeSource properties.""" + + def test_has_semantic_views_true(self): + """Test has_semantic_views returns True when views exist.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + source._semantic_views = [SemanticViewInfo(name="test", ddl="DDL")] + assert source.has_semantic_views is True + + def test_has_semantic_views_false(self): + """Test has_semantic_views returns False when no views.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + assert source.has_semantic_views is False + + def test_semantic_views_property(self): + """Test semantic_views property returns the list.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + + views = [ + SemanticViewInfo(name="view1", ddl="DDL1"), + SemanticViewInfo(name="view2", ddl="DDL2"), + ] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object( + SnowflakeSource, "_discover_semantic_views", return_value=views + ), + ): + source = SnowflakeSource(mock_engine, "test_table") + assert source.semantic_views == views + + +class TestGetSchemaWithSemanticViews: + """Tests for get_schema with semantic views included.""" + + def test_schema_includes_semantic_views(self): + """Test that get_schema includes semantic view section.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] + + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object( + SnowflakeSource, "_discover_semantic_views", return_value=views + ), + ): + source = SnowflakeSource(mock_engine, "test_table") + + # Mock the parent get_schema + with patch.object( + SnowflakeSource.__bases__[0], + "get_schema", + return_value="Table: test_table\nColumns:\n- id", + ): + schema = source.get_schema(categorical_threshold=20) + + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" in schema + assert "db.schema.metrics" in schema + + def test_schema_without_semantic_views(self): + """Test that get_schema works without semantic views.""" + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + ): + source = SnowflakeSource(mock_engine, "test_table") + + with patch.object( + SnowflakeSource.__bases__[0], + "get_schema", + return_value="Table: test_table\nColumns:\n- id", + ): + schema = source.get_schema(categorical_threshold=20) + + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" not in schema diff --git a/pkg-r/R/SnowflakeSource.R b/pkg-r/R/SnowflakeSource.R index 670e8124..58080fb2 100644 --- a/pkg-r/R/SnowflakeSource.R +++ b/pkg-r/R/SnowflakeSource.R @@ -17,13 +17,18 @@ SnowflakeSource <- R6::R6Class( #' #' @param conn A DBI connection object to Snowflake #' @param table_name Name of the table in the database + #' @param discover_semantic_views If TRUE (default), automatically discover + #' semantic views at initialization. Set to FALSE to skip discovery. #' #' @return A new SnowflakeSource object - initialize = function(conn, table_name) { + initialize = function(conn, table_name, discover_semantic_views = TRUE) { super$initialize(conn, table_name) - # Discover semantic views at initialization - private$semantic_views <- discover_semantic_views(conn) + if (discover_semantic_views) { + private$semantic_views <- discover_semantic_views_impl(conn) + } else { + private$semantic_views <- list() + } }, #' @description @@ -68,49 +73,46 @@ SnowflakeSource <- R6::R6Class( #' @param conn A DBI connection to Snowflake #' @return A list of semantic views with name and ddl #' @noRd -discover_semantic_views <- function(conn) { +discover_semantic_views_impl <- function(conn) { semantic_views <- list() - tryCatch( - { - # Check for semantic views in the current schema - result <- DBI::dbGetQuery(conn, "SHOW SEMANTIC VIEWS") + # Check for semantic views in the current schema + result <- DBI::dbGetQuery(conn, "SHOW SEMANTIC VIEWS") - if (nrow(result) == 0) { - return(list()) - } + if (nrow(result) == 0) { + cli::cli_inform( + c("i" = "No semantic views found in current schema"), + .frequency = "once", + .frequency_id = "querychat_no_semantic_views" + ) + return(list()) + } - for (i in seq_len(nrow(result))) { - row <- result[i, ] - view_name <- row[["name"]] - database_name <- row[["database_name"]] - schema_name <- row[["schema_name"]] - - if (is.null(view_name) || is.na(view_name)) { - next - } - - # Build fully qualified name - fq_name <- paste(database_name, schema_name, view_name, sep = ".") - - # Get the DDL for this semantic view - ddl <- get_semantic_view_ddl(conn, fq_name) - if (!is.null(ddl)) { - semantic_views <- c( - semantic_views, - list(list( - name = fq_name, - ddl = ddl - )) - ) - } - } - }, - error = function(e) { - # Log warning but don't fail - gracefully fall back to no semantic views - cli::cli_warn("Failed to discover semantic views: {conditionMessage(e)}") + for (i in seq_len(nrow(result))) { + row <- result[i, ] + view_name <- row[["name"]] + database_name <- row[["database_name"]] + schema_name <- row[["schema_name"]] + + if (is.null(view_name) || is.na(view_name)) { + next } - ) + + # Build fully qualified name + fq_name <- paste(database_name, schema_name, view_name, sep = ".") + + # Get the DDL for this semantic view + ddl <- get_semantic_view_ddl(conn, fq_name) + if (!is.null(ddl)) { + semantic_views <- c( + semantic_views, + list(list( + name = fq_name, + ddl = ddl + )) + ) + } + } semantic_views } @@ -123,23 +125,15 @@ discover_semantic_views <- function(conn) { #' @return The DDL text, or NULL if retrieval failed #' @noRd get_semantic_view_ddl <- function(conn, fq_name) { - tryCatch( - { - query <- sprintf("SELECT GET_DDL('SEMANTIC_VIEW', '%s')", fq_name) - result <- DBI::dbGetQuery(conn, query) - if (nrow(result) > 0 && ncol(result) > 0) { - as.character(result[[1, 1]]) - } else { - NULL - } - }, - error = function(e) { - cli::cli_warn( - "Failed to get DDL for semantic view {fq_name}: {conditionMessage(e)}" - ) - NULL - } - ) + # Escape single quotes to prevent SQL injection + safe_name <- gsub("'", "''", fq_name, fixed = TRUE) + query <- sprintf("SELECT GET_DDL('SEMANTIC_VIEW', '%s')", safe_name) + result <- DBI::dbGetQuery(conn, query) + if (nrow(result) > 0 && ncol(result) > 0) { + as.character(result[[1, 1]]) + } else { + NULL + } } @@ -152,13 +146,17 @@ format_semantic_views_section <- function(semantic_views) { lines <- c( "## Snowflake Semantic Views", "", - "This database has Semantic Views available. Semantic Views provide a curated", - "layer over raw data with pre-defined metrics, dimensions, and relationships.", - "They encode business logic and calculation rules that ensure consistent,", - "accurate results.", + paste0( + "This database has Semantic Views available. Semantic Views provide a ", + "curated layer over raw data with pre-defined metrics, dimensions, and ", + "relationships. They encode business logic and calculation rules that ", + "ensure consistent, accurate results." + ), "", - "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over", - "raw table queries to benefit from certified metric definitions.", + paste0( + "**IMPORTANT**: When a Semantic View covers the data you need, prefer ", + "it over raw table queries to benefit from certified metric definitions." + ), "" ) diff --git a/pkg-r/tests/testthat/test-SnowflakeSource.R b/pkg-r/tests/testthat/test-SnowflakeSource.R new file mode 100644 index 00000000..788282a1 --- /dev/null +++ b/pkg-r/tests/testthat/test-SnowflakeSource.R @@ -0,0 +1,111 @@ +# Tests for SnowflakeSource and semantic view functionality + +describe("format_semantic_views_section()", { + it("formats single semantic view correctly", { + views <- list( + list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "## Snowflake Semantic Views") + expect_match(result, "db.schema.view") + expect_match(result, "CREATE SEMANTIC VIEW test_view") + expect_match(result, "```sql") + }) + + it("formats multiple views", { + views <- list( + list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), + list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "db.schema.view1") + expect_match(result, "db.schema.view2") + expect_match(result, "CREATE SEMANTIC VIEW v1") + expect_match(result, "CREATE SEMANTIC VIEW v2") + }) + + it("includes IMPORTANT notice", { + views <- list( + list(name = "test", ddl = "DDL") + ) + result <- format_semantic_views_section(views) + expect_match(result, "\\*\\*IMPORTANT\\*\\*") + }) +}) + +describe("SQL escaping in get_semantic_view_ddl()", { + it("escapes single quotes in view names", { + # We can't test the full function without a Snowflake connection, + # but we can test the escaping logic directly + fq_name <- "db.schema.test'view" + safe_name <- gsub("'", "''", fq_name, fixed = TRUE) + + expect_equal(safe_name, "db.schema.test''view") + }) + + it("leaves normal names unchanged", { + fq_name <- "db.schema.normal_view" + safe_name <- gsub("'", "''", fq_name, fixed = TRUE) + + expect_equal(safe_name, "db.schema.normal_view") + }) +}) + +describe("is_snowflake_connection()", { + it("returns FALSE for non-Snowflake connections", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + + expect_false(is_snowflake_connection(conn)) + }) + + it("returns FALSE for non-DBI objects", { + expect_false(is_snowflake_connection(NULL)) + expect_false(is_snowflake_connection("not a connection")) + expect_false(is_snowflake_connection(list(fake = "connection"))) + expect_false(is_snowflake_connection(123)) + }) +}) + +describe("SnowflakeSource initialization", { + # Note: We cannot fully test SnowflakeSource without a real Snowflake + # connection, but we can test the parameter validation and discovery + # option through integration with DBISource + + it("can disable semantic view discovery", { + # This is a mock test - in reality you'd need a Snowflake connection + # The actual behavior is tested through the discover_semantic_views param + # which skips the discovery when FALSE + + # The parameter exists and should be accepted by the class + expect_true("discover_semantic_views" %in% formalArgs( + SnowflakeSource$public_methods$initialize + )) + }) + + it("inherits from DBISource", { + # Check that SnowflakeSource inherits from DBISource + expect_identical(SnowflakeSource$get_inherit(), DBISource) + }) +}) + +describe("discover_semantic_views_impl()", { + it("propagates errors (not swallowed)", { + skip_if_not_installed("RSQLite") + + # SQLite doesn't have SHOW SEMANTIC VIEWS, so it should error + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + + # Without tryCatch wrapping, this should error (not return empty list) + # The error is a syntax error since SQLite doesn't support SHOW command + expect_error( + discover_semantic_views_impl(conn), + "SHOW" + ) + }) +}) From c3035b53a09ef143f3fa408d97e9723b81171986 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 16:41:04 +0000 Subject: [PATCH 04/45] `air format` (GitHub Actions) --- pkg-r/tests/testthat/test-SnowflakeSource.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg-r/tests/testthat/test-SnowflakeSource.R b/pkg-r/tests/testthat/test-SnowflakeSource.R index 788282a1..fad663f6 100644 --- a/pkg-r/tests/testthat/test-SnowflakeSource.R +++ b/pkg-r/tests/testthat/test-SnowflakeSource.R @@ -82,9 +82,12 @@ describe("SnowflakeSource initialization", { # which skips the discovery when FALSE # The parameter exists and should be accepted by the class - expect_true("discover_semantic_views" %in% formalArgs( - SnowflakeSource$public_methods$initialize - )) + expect_true( + "discover_semantic_views" %in% + formalArgs( + SnowflakeSource$public_methods$initialize + ) + ) }) it("inherits from DBISource", { From 83c271aa1608f73b3cf37229d90e280059703252 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 11:31:42 -0600 Subject: [PATCH 05/45] refactor: Extract Snowflake semantic views into adapter pattern Introduces a Protocol-based adapter pattern for Snowflake semantic view discovery that works with both SQLAlchemy and Ibis backends: - Add _snowflake.py with RawSQLExecutor Protocol, executor implementations (SQLAlchemyExecutor, IbisExecutor), and standalone discovery functions - Add _snowflake_sources.py with SnowflakeSource and new SnowflakeIbisSource - Update normalize_data_source() to route Ibis Snowflake backends - Maintain backwards-compatible imports from _datasource.py This enables semantic view support for Ibis connections to Snowflake, not just SQLAlchemy connections. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 209 +-------- pkg-py/src/querychat/_querychat_base.py | 7 +- pkg-py/src/querychat/_snowflake.py | 247 +++++++++++ pkg-py/src/querychat/_snowflake_sources.py | 147 ++++++ pkg-py/tests/test_snowflake_source.py | 492 ++++++++++++++------- 5 files changed, 745 insertions(+), 357 deletions(-) create mode 100644 pkg-py/src/querychat/_snowflake.py create mode 100644 pkg-py/src/querychat/_snowflake_sources.py diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 44fb231e..c5f3750c 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -709,193 +709,6 @@ def cleanup(self) -> None: self._engine.dispose() -@dataclass -class SemanticViewInfo: - """Metadata for a Snowflake Semantic View.""" - - name: str - """Fully qualified name (database.schema.view_name).""" - - ddl: str - """The DDL definition from GET_DDL().""" - - -class SnowflakeSource(SQLAlchemySource): - """ - A DataSource implementation for Snowflake with Semantic View support. - - Extends SQLAlchemySource to automatically detect and provide context about - Snowflake Semantic Views when available. - """ - - _semantic_views: list[SemanticViewInfo] - - def __init__( - self, - engine: Engine, - table_name: str, - *, - discover_semantic_views: bool = True, - ): - """ - Initialize with a SQLAlchemy engine connected to Snowflake. - - Parameters - ---------- - engine - SQLAlchemy engine connected to Snowflake - table_name - Name of the table to query - discover_semantic_views - If True (default), automatically discover semantic views at - initialization. Set to False to skip discovery (e.g., for - performance or if not needed). - - """ - super().__init__(engine, table_name) - - if discover_semantic_views: - self._semantic_views = self._discover_semantic_views() - else: - self._semantic_views = [] - - def _discover_semantic_views(self) -> list[SemanticViewInfo]: - """ - Discover Semantic Views in the current schema and retrieve their DDLs. - - Returns - ------- - list[SemanticViewInfo] - List of semantic views with their DDL definitions - - """ - semantic_views: list[SemanticViewInfo] = [] - - with self._get_connection() as conn: - # Check for semantic views in the current schema - result = conn.execute(text("SHOW SEMANTIC VIEWS")) - rows = result.fetchall() - - if not rows: - logger.debug("No semantic views found in current schema") - return [] - - # Get column names from result - column_names = list(result.keys()) - - for row in rows: - row_dict = dict(zip(column_names, row, strict=False)) - # SHOW SEMANTIC VIEWS returns columns like: - # created_on, name, database_name, schema_name, owner, ... - view_name = row_dict.get("name") - database_name = row_dict.get("database_name") - schema_name = row_dict.get("schema_name") - - if not view_name: - continue - - # Build fully qualified name - fq_name = f"{database_name}.{schema_name}.{view_name}" - - # Get the DDL for this semantic view - ddl = self._get_semantic_view_ddl(conn, fq_name) - if ddl: - semantic_views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) - - return semantic_views - - def _get_semantic_view_ddl(self, conn: Connection, fq_name: str) -> str | None: - """ - Retrieve the DDL for a semantic view. - - Parameters - ---------- - conn - Active database connection - fq_name - Fully qualified name (database.schema.view_name) - - Returns - ------- - str | None - The DDL text, or None if retrieval failed - - """ - # Escape single quotes to prevent SQL injection - safe_name = fq_name.replace("'", "''") - result = conn.execute( - text(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')") - ) - row = result.fetchone() - if row: - return str(row[0]) - - return None - - @property - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - return len(self._semantic_views) > 0 - - @property - def semantic_views(self) -> list[SemanticViewInfo]: - """Get the list of discovered semantic views.""" - return self._semantic_views - - def get_schema(self, *, categorical_threshold: int) -> str: - """ - Generate schema information including semantic view context. - - Parameters - ---------- - categorical_threshold - Maximum number of unique values for a text column to be considered - categorical - - Returns - ------- - str - String describing the schema, including semantic view information - if available - - """ - # Get base schema from parent - schema = super().get_schema(categorical_threshold=categorical_threshold) - - # If no semantic views, return base schema - if not self._semantic_views: - return schema - - # Add semantic view information - semantic_section = self._format_semantic_views_section() - return f"{schema}\n\n{semantic_section}" - - def _format_semantic_views_section(self) -> str: - """Format the semantic views section for the schema output.""" - lines = [ - "## Snowflake Semantic Views", - "", - "This database has Semantic Views available. Semantic Views provide a curated ", - "layer over raw data with pre-defined metrics, dimensions, and relationships. ", - "They encode business logic and calculation rules that ensure consistent, ", - "accurate results.", - "", - "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over ", - "raw table queries to benefit from certified metric definitions.", - "", - ] - - for sv in self._semantic_views: - lines.append(f"### Semantic View: `{sv.name}`") - lines.append("") - lines.append("```sql") - lines.append(sv.ddl) - lines.append("```") - lines.append("") - - return "\n".join(lines) - - class PolarsLazySource(DataSource["pl.LazyFrame"]): """ A DataSource implementation for Polars LazyFrames. @@ -1324,3 +1137,25 @@ def cleanup(self) -> None: The Ibis backend connection is owned by the caller and should be closed by calling `backend.disconnect()` when appropriate. """ + + +# Backwards-compatible re-exports (moved to _snowflake.py and _snowflake_sources.py) +from ._snowflake import SemanticViewInfo # noqa: E402 +from ._snowflake_sources import ( # noqa: E402 + SnowflakeIbisSource, + SnowflakeSource, +) + +__all__ = [ + "ColumnMeta", + "DataFrameSource", + "DataSource", + "IbisSource", + "MissingColumnsError", + "PolarsLazySource", + "SQLAlchemySource", + "SemanticViewInfo", + "SnowflakeIbisSource", + "SnowflakeSource", + "format_schema", +] diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index 42a39c06..6bacb0b1 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -18,10 +18,10 @@ IbisSource, IntoFrameT, PolarsLazySource, - SnowflakeSource, SQLAlchemySource, ) from ._shiny_module import GREETING_PROMPT +from ._snowflake_sources import SnowflakeIbisSource, SnowflakeSource from ._system_prompt import QueryChatSystemPrompt from ._utils import MISSING, MISSING_TYPE, is_ibis_table from .tools import ( @@ -223,7 +223,7 @@ def cleanup(self) -> None: self._data_source.cleanup() -def normalize_data_source( +def normalize_data_source( # noqa: PLR0911 data_source: IntoFrame | sqlalchemy.Engine | DataSource, table_name: str, ) -> DataSource: @@ -237,6 +237,9 @@ def normalize_data_source( return SQLAlchemySource(data_source, table_name) if is_ibis_table(data_source): + backend = data_source.get_backend() + if backend.name.lower() == "snowflake": + return SnowflakeIbisSource(data_source, table_name) return IbisSource(data_source, table_name) src = nw.from_native(data_source, pass_through=True) diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py new file mode 100644 index 00000000..facba149 --- /dev/null +++ b/pkg-py/src/querychat/_snowflake.py @@ -0,0 +1,247 @@ +""" +Snowflake-specific utilities for semantic view discovery. + +This module provides a backend-agnostic interface for discovering Snowflake +Semantic Views. It uses a Protocol pattern to abstract SQL execution, allowing +the same discovery logic to work with both SQLAlchemy engines and Ibis backends. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Protocol + +if TYPE_CHECKING: + from ibis.backends.sql import SQLBackend + from sqlalchemy import Engine + from sqlalchemy.engine import Connection + +logger = logging.getLogger(__name__) + + +@dataclass +class SemanticViewInfo: + """Metadata for a Snowflake Semantic View.""" + + name: str + """Fully qualified name (database.schema.view_name).""" + + ddl: str + """The DDL definition from GET_DDL().""" + + +class RawSQLExecutor(Protocol): + """ + Protocol for executing raw SQL queries. + + This abstraction allows semantic view discovery to work with different + database backends (SQLAlchemy, Ibis) without knowing the specific API. + """ + + def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: + """Execute raw SQL and return results as list of row dicts.""" + ... + + +class SQLAlchemyExecutor: + """Raw SQL executor for SQLAlchemy engines.""" + + def __init__(self, engine: Engine): + from sqlalchemy import text + + self._engine = engine + self._text = text + + def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: + """Execute raw SQL and return results as list of row dicts.""" + with self._engine.connect() as conn: + result = conn.execute(self._text(query)) + keys = list(result.keys()) + return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] + + +class SQLAlchemyConnectionExecutor: + """ + Raw SQL executor for an active SQLAlchemy connection. + + Unlike SQLAlchemyExecutor, this uses an existing connection rather than + creating a new one. Useful when you need to execute multiple queries + within the same connection/transaction. + """ + + def __init__(self, conn: Connection): + from sqlalchemy import text + + self._conn = conn + self._text = text + + def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: + """Execute raw SQL and return results as list of row dicts.""" + result = self._conn.execute(self._text(query)) + keys = list(result.keys()) + return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] + + +class IbisExecutor: + """Raw SQL executor for Ibis backends.""" + + def __init__(self, backend: SQLBackend): + self._backend = backend + + def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: + """Execute raw SQL and return results as list of row dicts.""" + # Use backend.sql() to create an ibis table from raw SQL, then execute + result_table = self._backend.sql(query) + df = result_table.execute() + # execute() returns a pandas DataFrame + return df.to_dict(orient="records") # type: ignore[call-overload] + + +def discover_semantic_views(executor: RawSQLExecutor) -> list[SemanticViewInfo]: + """ + Discover semantic views using any SQL executor. + + Parameters + ---------- + executor + An object implementing the RawSQLExecutor protocol + + Returns + ------- + list[SemanticViewInfo] + List of semantic views with their DDL definitions + + """ + rows = executor.execute_raw_sql("SHOW SEMANTIC VIEWS") + + if not rows: + logger.debug("No semantic views found in current schema") + return [] + + views: list[SemanticViewInfo] = [] + for row in rows: + db = row.get("database_name") + schema = row.get("schema_name") + name = row.get("name") + + if not name: + continue + + fq_name = f"{db}.{schema}.{name}" + ddl = get_semantic_view_ddl(executor, fq_name) + if ddl: + views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) + + return views + + +def get_semantic_view_ddl(executor: RawSQLExecutor, fq_name: str) -> str | None: + """ + Get DDL for a semantic view. + + Parameters + ---------- + executor + An object implementing the RawSQLExecutor protocol + fq_name + Fully qualified name (database.schema.view_name) + + Returns + ------- + str | None + The DDL text, or None if retrieval failed + + """ + # Escape single quotes to prevent SQL injection + safe_name = fq_name.replace("'", "''") + rows = executor.execute_raw_sql(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')") + if rows: + return str(next(iter(rows[0].values()))) + return None + + +def format_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str: + """ + Format the semantic views section for schema output. + + Parameters + ---------- + semantic_views + List of semantic view metadata + + Returns + ------- + str + Formatted markdown section describing the semantic views + + """ + lines = [ + "## Snowflake Semantic Views", + "", + "This database has Semantic Views available. Semantic Views provide a curated ", + "layer over raw data with pre-defined metrics, dimensions, and relationships. ", + "They encode business logic and calculation rules that ensure consistent, ", + "accurate results.", + "", + "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over ", + "raw table queries to benefit from certified metric definitions.", + "", + ] + + for sv in semantic_views: + lines.append(f"### Semantic View: `{sv.name}`") + lines.append("") + lines.append("```sql") + lines.append(sv.ddl) + lines.append("```") + lines.append("") + + return "\n".join(lines) + + +class SemanticViewMixin: + """ + Mixin providing semantic view support for get_schema(). + + This mixin adds semantic view discovery and schema formatting to DataSource + subclasses. Classes using this mixin must initialize `_semantic_views` in + their constructor. + + Attributes + ---------- + _semantic_views : list[SemanticViewInfo] + List of discovered semantic views (set by subclass) + + """ + + _semantic_views: list[SemanticViewInfo] + + def _get_schema_with_semantic_views(self, base_schema: str) -> str: + """ + Append semantic view section to base schema if views exist. + + Parameters + ---------- + base_schema + The base schema string from the parent class + + Returns + ------- + str + Schema with semantic views section appended (if any exist) + + """ + if not self._semantic_views: + return base_schema + return f"{base_schema}\n\n{format_semantic_views_section(self._semantic_views)}" + + @property + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return len(self._semantic_views) > 0 + + @property + def semantic_views(self) -> list[SemanticViewInfo]: + """Get the list of discovered semantic views.""" + return self._semantic_views diff --git a/pkg-py/src/querychat/_snowflake_sources.py b/pkg-py/src/querychat/_snowflake_sources.py new file mode 100644 index 00000000..fde6946e --- /dev/null +++ b/pkg-py/src/querychat/_snowflake_sources.py @@ -0,0 +1,147 @@ +""" +Snowflake-specific DataSource implementations. + +This module provides DataSource classes for Snowflake connections with +semantic view support. Both SQLAlchemy and Ibis backends are supported. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ._datasource import IbisSource, SQLAlchemySource +from ._snowflake import ( + IbisExecutor, + SemanticViewInfo, + SemanticViewMixin, + SQLAlchemyExecutor, + discover_semantic_views, +) + +if TYPE_CHECKING: + import ibis + from sqlalchemy.engine import Engine + +__all__ = ["SnowflakeIbisSource", "SnowflakeSource"] + + +class SnowflakeSource(SQLAlchemySource, SemanticViewMixin): + """ + SQLAlchemy-based Snowflake source with semantic view support. + + Extends SQLAlchemySource to automatically detect and provide context about + Snowflake Semantic Views when available. + """ + + _semantic_views: list[SemanticViewInfo] + + def __init__( + self, + engine: Engine, + table_name: str, + *, + discover_semantic_views_flag: bool = True, + ): + """ + Initialize with a SQLAlchemy engine connected to Snowflake. + + Parameters + ---------- + engine + SQLAlchemy engine connected to Snowflake + table_name + Name of the table to query + discover_semantic_views_flag + If True (default), automatically discover semantic views at + initialization. Set to False to skip discovery (e.g., for + performance or if not needed). + + """ + super().__init__(engine, table_name) + + if discover_semantic_views_flag: + executor = SQLAlchemyExecutor(engine) + self._semantic_views = discover_semantic_views(executor) + else: + self._semantic_views = [] + + def get_schema(self, *, categorical_threshold: int) -> str: + """ + Generate schema information including semantic view context. + + Parameters + ---------- + categorical_threshold + Maximum number of unique values for a text column to be considered + categorical + + Returns + ------- + str + String describing the schema, including semantic view information + if available + + """ + base_schema = super().get_schema(categorical_threshold=categorical_threshold) + return self._get_schema_with_semantic_views(base_schema) + + +class SnowflakeIbisSource(IbisSource, SemanticViewMixin): + """ + Ibis-based Snowflake source with semantic view support. + + Extends IbisSource to automatically detect and provide context about + Snowflake Semantic Views when available. + """ + + _semantic_views: list[SemanticViewInfo] + + def __init__( + self, + table: ibis.Table, + table_name: str, + *, + discover_semantic_views_flag: bool = True, + ): + """ + Initialize with an Ibis Table connected to Snowflake. + + Parameters + ---------- + table + Ibis Table from a Snowflake backend + table_name + Name of the table to query + discover_semantic_views_flag + If True (default), automatically discover semantic views at + initialization. Set to False to skip discovery (e.g., for + performance or if not needed). + + """ + super().__init__(table, table_name) + + if discover_semantic_views_flag and self._backend.name.lower() == "snowflake": + executor = IbisExecutor(self._backend) + self._semantic_views = discover_semantic_views(executor) + else: + self._semantic_views = [] + + def get_schema(self, *, categorical_threshold: int) -> str: + """ + Generate schema information including semantic view context. + + Parameters + ---------- + categorical_threshold + Maximum number of unique values for a text column to be considered + categorical + + Returns + ------- + str + String describing the schema, including semantic view information + if available + + """ + base_schema = super().get_schema(categorical_threshold=categorical_threshold) + return self._get_schema_with_semantic_views(base_schema) diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 50e60f8e..06aff447 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -1,10 +1,19 @@ -"""Tests for SnowflakeSource and semantic view functionality.""" +"""Tests for Snowflake semantic view functionality.""" import logging from unittest.mock import MagicMock, patch -import pytest +# Import from _datasource for backwards compatibility testing from querychat._datasource import SemanticViewInfo, SnowflakeSource +from querychat._snowflake import ( + IbisExecutor, + SQLAlchemyConnectionExecutor, + SQLAlchemyExecutor, + discover_semantic_views, + format_semantic_views_section, + get_semantic_view_ddl, +) +from querychat._snowflake_sources import SnowflakeIbisSource class TestSemanticViewInfo: @@ -28,182 +37,213 @@ def test_equality(self): class TestFormatSemanticViewsSection: """Tests for semantic view formatting.""" - def test_format_with_views(self): - """Test that format produces expected markdown structure.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + def test_format_single_view(self): + """Test that format produces expected markdown structure for single view.""" + views = [SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1")] + section = format_semantic_views_section(views) - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), - ): - source = SnowflakeSource(mock_engine, "test_table") - # Manually add semantic views for formatting test - source._semantic_views = [ - SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1"), - SemanticViewInfo(name="db.schema.view2", ddl="CREATE SEMANTIC VIEW v2"), - ] + assert "## Snowflake Semantic Views" in section + assert "db.schema.view1" in section + assert "CREATE SEMANTIC VIEW v1" in section + assert "```sql" in section + assert "**IMPORTANT**" in section - section = source._format_semantic_views_section() - assert "## Snowflake Semantic Views" in section - assert "db.schema.view1" in section - assert "db.schema.view2" in section - assert "CREATE SEMANTIC VIEW v1" in section - assert "CREATE SEMANTIC VIEW v2" in section - assert "```sql" in section + def test_format_multiple_views(self): + """Test formatting with multiple views.""" + views = [ + SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1"), + SemanticViewInfo(name="db.schema.view2", ddl="CREATE SEMANTIC VIEW v2"), + ] + section = format_semantic_views_section(views) + + assert "db.schema.view1" in section + assert "db.schema.view2" in section + assert "CREATE SEMANTIC VIEW v1" in section + assert "CREATE SEMANTIC VIEW v2" in section class TestSQLEscaping: - """Tests for SQL injection prevention.""" + """Tests for SQL injection prevention in get_semantic_view_ddl.""" def test_single_quote_escaped(self): """Verify that names with single quotes are properly escaped.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + mock_executor = MagicMock() + mock_executor.execute_raw_sql.return_value = [{"col": "DDL result"}] + + get_semantic_view_ddl(mock_executor, "db.schema.test'view") + # Verify the executed query has escaped quotes + call_args = mock_executor.execute_raw_sql.call_args + query = call_args[0][0] + assert "test''view" in query + + def test_normal_name_unchanged(self): + """Verify that normal names without special chars work correctly.""" + mock_executor = MagicMock() + mock_executor.execute_raw_sql.return_value = [{"col": "DDL result"}] + + get_semantic_view_ddl(mock_executor, "db.schema.normal_view") + + call_args = mock_executor.execute_raw_sql.call_args + query = call_args[0][0] + assert "db.schema.normal_view" in query + assert "''" not in query + + +class TestDiscoverSemanticViews: + """Tests for the standalone discover_semantic_views function.""" + + def test_discover_returns_views(self): + """Test successful discovery of semantic views.""" + mock_executor = MagicMock() + mock_executor.execute_raw_sql.side_effect = [ + # First call: SHOW SEMANTIC VIEWS + [ + {"database_name": "DB", "schema_name": "SCH", "name": "VIEW1"}, + {"database_name": "DB", "schema_name": "SCH", "name": "VIEW2"}, + ], + # Second call: GET_DDL for VIEW1 + [{"col": "DDL1"}], + # Third call: GET_DDL for VIEW2 + [{"col": "DDL2"}], + ] + + views = discover_semantic_views(mock_executor) + + assert len(views) == 2 + assert views[0].name == "DB.SCH.VIEW1" + assert views[0].ddl == "DDL1" + assert views[1].name == "DB.SCH.VIEW2" + assert views[1].ddl == "DDL2" + + def test_discover_no_views(self, caplog): + """Test discovery when no views exist.""" + mock_executor = MagicMock() + mock_executor.execute_raw_sql.return_value = [] + + with caplog.at_level(logging.DEBUG, logger="querychat._snowflake"): + views = discover_semantic_views(mock_executor) + + assert views == [] + assert "No semantic views found" in caplog.text + + def test_discover_skips_null_names(self): + """Test that rows with null names are skipped.""" + mock_executor = MagicMock() + mock_executor.execute_raw_sql.side_effect = [ + [ + {"database_name": "DB", "schema_name": "SCH", "name": None}, + {"database_name": "DB", "schema_name": "SCH", "name": "VIEW1"}, + ], + [{"col": "DDL1"}], + ] + + views = discover_semantic_views(mock_executor) + + assert len(views) == 1 + assert views[0].name == "DB.SCH.VIEW1" + + +class TestSQLAlchemyExecutor: + """Tests for SQLAlchemyExecutor.""" + + def test_execute_raw_sql(self): + """Test that execute_raw_sql returns list of dicts.""" + mock_engine = MagicMock() mock_conn = MagicMock() mock_result = MagicMock() - mock_result.fetchone.return_value = ["DDL result"] + mock_result.keys.return_value = ["col1", "col2"] + mock_result.fetchall.return_value = [("a", "b"), ("c", "d")] - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), - ): - source = SnowflakeSource(mock_engine, "test_table") + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.return_value = mock_result - # Test the escaping logic directly - with patch.object(source, "_get_connection") as mock_get_conn: - mock_context = MagicMock() - mock_context.__enter__ = MagicMock(return_value=mock_conn) - mock_context.__exit__ = MagicMock(return_value=False) - mock_get_conn.return_value = mock_context - mock_conn.execute.return_value = mock_result + executor = SQLAlchemyExecutor(mock_engine) + result = executor.execute_raw_sql("SELECT 1") - # Call with a name containing single quotes - source._get_semantic_view_ddl(mock_conn, "db.schema.test'view") + assert result == [{"col1": "a", "col2": "b"}, {"col1": "c", "col2": "d"}] - # Verify the executed query has escaped quotes - call_args = mock_conn.execute.call_args - query_text = str(call_args[0][0]) - assert "test''view" in query_text - def test_normal_name_unchanged(self): - """Verify that normal names without special chars are not modified.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] +class TestSQLAlchemyConnectionExecutor: + """Tests for SQLAlchemyConnectionExecutor.""" + def test_execute_raw_sql(self): + """Test that execute_raw_sql uses existing connection.""" mock_conn = MagicMock() mock_result = MagicMock() - mock_result.fetchone.return_value = ["DDL result"] + mock_result.keys.return_value = ["col1"] + mock_result.fetchall.return_value = [("value",)] + mock_conn.execute.return_value = mock_result - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), - ): - source = SnowflakeSource(mock_engine, "test_table") + executor = SQLAlchemyConnectionExecutor(mock_conn) + result = executor.execute_raw_sql("SELECT 1") - mock_conn.execute.return_value = mock_result - source._get_semantic_view_ddl(mock_conn, "db.schema.normal_view") + assert result == [{"col1": "value"}] + mock_conn.execute.assert_called_once() - call_args = mock_conn.execute.call_args - query_text = str(call_args[0][0]) - assert "db.schema.normal_view" in query_text +class TestIbisExecutor: + """Tests for IbisExecutor.""" -class TestSnowflakeSourceDiscovery: - """Tests for semantic view discovery with mocked connections.""" + def test_execute_raw_sql(self): + """Test that execute_raw_sql converts ibis result to list of dicts.""" + mock_backend = MagicMock() + mock_table = MagicMock() + mock_df = MagicMock() + mock_df.to_dict.return_value = [{"col1": "a"}, {"col1": "b"}] - def test_discovery_disabled(self): - """Test that discover_semantic_views=False skips discovery.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + mock_backend.sql.return_value = mock_table + mock_table.execute.return_value = mock_df - with patch("querychat._datasource.inspect", return_value=mock_inspector): - # Should not call _discover_semantic_views when disabled - source = SnowflakeSource( - mock_engine, "test_table", discover_semantic_views=False - ) - assert source._semantic_views == [] - assert not source.has_semantic_views + executor = IbisExecutor(mock_backend) + result = executor.execute_raw_sql("SELECT 1") - def test_discovery_enabled_default(self): - """Test that discovery is enabled by default.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + assert result == [{"col1": "a"}, {"col1": "b"}] + mock_backend.sql.assert_called_once_with("SELECT 1") + mock_df.to_dict.assert_called_once_with(orient="records") - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object( - SnowflakeSource, "_discover_semantic_views", return_value=[] - ) as mock_discover, - ): - SnowflakeSource(mock_engine, "test_table") - mock_discover.assert_called_once() - def test_discovery_error_propagates(self): - """Verify that discovery errors propagate (not swallowed).""" +class TestSnowflakeSourceDiscovery: + """Tests for SnowflakeSource semantic view discovery.""" + + def test_discovery_disabled(self): + """Test that discover_semantic_views_flag=False skips discovery.""" mock_engine = MagicMock() mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] - mock_conn = MagicMock() - mock_conn.execute.side_effect = Exception("Database connection failed") - - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(mock_engine, "connect") as mock_connect, + with patch( + "querychat._snowflake_sources.SQLAlchemySource.__init__", return_value=None ): - mock_context = MagicMock() - mock_context.__enter__ = MagicMock(return_value=mock_conn) - mock_context.__exit__ = MagicMock(return_value=False) - mock_connect.return_value = mock_context - - # Error should propagate, not be swallowed - with pytest.raises(Exception, match="Database connection failed"): - SnowflakeSource(mock_engine, "test_table") - - def test_no_views_logs_debug(self, caplog): - """Verify debug message when no views found.""" + source = SnowflakeSource.__new__(SnowflakeSource) + source._engine = mock_engine + source.table_name = "test" + source._columns_info = [] + source._colnames = [] + source._semantic_views = [] + + assert source._semantic_views == [] + assert not source.has_semantic_views + + def test_discovery_enabled_calls_discover(self): + """Test that discovery is called when enabled.""" mock_engine = MagicMock() mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] - mock_conn = MagicMock() - mock_result = MagicMock() - mock_result.fetchall.return_value = [] - with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(mock_engine, "connect") as mock_connect, - caplog.at_level(logging.DEBUG, logger="querychat._datasource"), + patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ) as mock_discover, ): - mock_context = MagicMock() - mock_context.__enter__ = MagicMock(return_value=mock_conn) - mock_context.__exit__ = MagicMock(return_value=False) - mock_connect.return_value = mock_context - mock_conn.execute.return_value = mock_result - SnowflakeSource(mock_engine, "test_table") - assert "No semantic views found" in caplog.text + mock_discover.assert_called_once() class TestSnowflakeSourceProperties: @@ -212,14 +252,15 @@ class TestSnowflakeSourceProperties: def test_has_semantic_views_true(self): """Test has_semantic_views returns True when views exist.""" mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ), ): source = SnowflakeSource(mock_engine, "test_table") source._semantic_views = [SemanticViewInfo(name="test", ddl="DDL")] @@ -228,35 +269,36 @@ def test_has_semantic_views_true(self): def test_has_semantic_views_false(self): """Test has_semantic_views returns False when no views.""" mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), + patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ), ): source = SnowflakeSource(mock_engine, "test_table") assert source.has_semantic_views is False def test_semantic_views_property(self): """Test semantic_views property returns the list.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] - views = [ SemanticViewInfo(name="view1", ddl="DDL1"), SemanticViewInfo(name="view2", ddl="DDL2"), ] + mock_engine = MagicMock() + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id"}] + with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object( - SnowflakeSource, "_discover_semantic_views", return_value=views + patch( + "querychat._snowflake_sources.discover_semantic_views", + return_value=views, ), ): source = SnowflakeSource(mock_engine, "test_table") @@ -268,54 +310,168 @@ class TestGetSchemaWithSemanticViews: def test_schema_includes_semantic_views(self): """Test that get_schema includes semantic view section.""" + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] + mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] - views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] - with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object( - SnowflakeSource, "_discover_semantic_views", return_value=views + patch( + "querychat._snowflake_sources.discover_semantic_views", + return_value=views, ), - ): - source = SnowflakeSource(mock_engine, "test_table") - - # Mock the parent get_schema - with patch.object( + patch.object( SnowflakeSource.__bases__[0], "get_schema", return_value="Table: test_table\nColumns:\n- id", - ): - schema = source.get_schema(categorical_threshold=20) + ), + ): + source = SnowflakeSource(mock_engine, "test_table") + schema = source.get_schema(categorical_threshold=20) - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" in schema - assert "db.schema.metrics" in schema + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" in schema + assert "db.schema.metrics" in schema def test_schema_without_semantic_views(self): """Test that get_schema works without semantic views.""" mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch.object(SnowflakeSource, "_discover_semantic_views", return_value=[]), - ): - source = SnowflakeSource(mock_engine, "test_table") - - with patch.object( + patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ), + patch.object( SnowflakeSource.__bases__[0], "get_schema", return_value="Table: test_table\nColumns:\n- id", + ), + ): + source = SnowflakeSource(mock_engine, "test_table") + schema = source.get_schema(categorical_threshold=20) + + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" not in schema + + +class TestSnowflakeIbisSource: + """Tests for SnowflakeIbisSource.""" + + def test_discovery_enabled_for_snowflake(self): + """Test that discovery runs for Snowflake backends.""" + mock_table = MagicMock() + mock_backend = MagicMock() + mock_backend.name = "snowflake" + mock_table.get_backend.return_value = mock_backend + mock_schema = MagicMock() + mock_schema.items.return_value = [] + mock_schema.names = [] + mock_table.schema.return_value = mock_schema + + with patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ) as mock_discover: + with patch( + "querychat._snowflake_sources.IbisSource.__init__", return_value=None ): - schema = source.get_schema(categorical_threshold=20) + source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) + source._table = mock_table + source.table_name = "test" + source._schema = mock_schema + source._backend = mock_backend + source._colnames = [] + + # Manually call the __init__ logic that would use discover_semantic_views + # Import from _snowflake_sources module's namespace (where it's imported) + import querychat._snowflake_sources as sf_sources + + # Verify the function is importable and would be called + executor = sf_sources.IbisExecutor(mock_backend) + source._semantic_views = sf_sources.discover_semantic_views(executor) + + mock_discover.assert_called_once() + + def test_discovery_disabled_for_non_snowflake(self): + """Test that discovery is skipped for non-Snowflake backends.""" + mock_table = MagicMock() + mock_backend = MagicMock() + mock_backend.name = "postgres" + mock_table.get_backend.return_value = mock_backend + mock_schema = MagicMock() + mock_schema.items.return_value = [] + mock_schema.names = [] + mock_table.schema.return_value = mock_schema + + with patch( + "querychat._snowflake_sources.discover_semantic_views" + ) as mock_discover: + with patch( + "querychat._snowflake_sources.IbisSource.__init__", return_value=None + ): + source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) + source._table = mock_table + source.table_name = "test" + source._schema = mock_schema + source._backend = mock_backend + source._colnames = [] + + # Manually set semantic views (simulating what __init__ does for non-Snowflake) + source._semantic_views = [] + + # Since backend is not Snowflake, discover should not be called + mock_discover.assert_not_called() + assert source._semantic_views == [] + + def test_has_semantic_views_mixin(self): + """Test that SnowflakeIbisSource has semantic view mixin properties.""" + mock_table = MagicMock() + mock_backend = MagicMock() + mock_backend.name = "snowflake" + mock_table.get_backend.return_value = mock_backend + mock_schema = MagicMock() + mock_schema.items.return_value = [] + mock_schema.names = [] + mock_table.schema.return_value = mock_schema + + with ( + patch( + "querychat._snowflake_sources.discover_semantic_views", return_value=[] + ), + patch( + "querychat._snowflake_sources.IbisSource.__init__", return_value=None + ), + ): + source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) + source._table = mock_table + source.table_name = "test" + source._schema = mock_schema + source._backend = mock_backend + source._colnames = [] + source._semantic_views = [SemanticViewInfo(name="test", ddl="DDL")] + + assert source.has_semantic_views is True + assert source.semantic_views == [SemanticViewInfo(name="test", ddl="DDL")] + + +class TestBackwardsCompatibility: + """Tests for backwards-compatible imports.""" + + def test_import_from_datasource(self): + """Test that imports from _datasource still work.""" + from querychat._datasource import SemanticViewInfo, SnowflakeSource + + assert SemanticViewInfo is not None + assert SnowflakeSource is not None + + def test_snowflake_ibis_source_import(self): + """Test that SnowflakeIbisSource can be imported from _datasource.""" + from querychat._datasource import SnowflakeIbisSource - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" not in schema + assert SnowflakeIbisSource is not None From 42b8ece4ec10e23203783b5ea144f126ec61dd0a Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 11:50:37 -0600 Subject: [PATCH 06/45] chore: Remove backwards-compat re-exports from _datasource.py Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 25 ------------------------- pkg-py/tests/test_snowflake_source.py | 22 ++-------------------- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index c5f3750c..e5bdcc93 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Generic, Literal, cast @@ -14,8 +13,6 @@ from ._df_compat import read_sql from ._utils import as_narwhals, check_query -logger = logging.getLogger(__name__) - if TYPE_CHECKING: import ibis import polars as pl @@ -1137,25 +1134,3 @@ def cleanup(self) -> None: The Ibis backend connection is owned by the caller and should be closed by calling `backend.disconnect()` when appropriate. """ - - -# Backwards-compatible re-exports (moved to _snowflake.py and _snowflake_sources.py) -from ._snowflake import SemanticViewInfo # noqa: E402 -from ._snowflake_sources import ( # noqa: E402 - SnowflakeIbisSource, - SnowflakeSource, -) - -__all__ = [ - "ColumnMeta", - "DataFrameSource", - "DataSource", - "IbisSource", - "MissingColumnsError", - "PolarsLazySource", - "SQLAlchemySource", - "SemanticViewInfo", - "SnowflakeIbisSource", - "SnowflakeSource", - "format_schema", -] diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 06aff447..4335f1cd 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -3,17 +3,16 @@ import logging from unittest.mock import MagicMock, patch -# Import from _datasource for backwards compatibility testing -from querychat._datasource import SemanticViewInfo, SnowflakeSource from querychat._snowflake import ( IbisExecutor, + SemanticViewInfo, SQLAlchemyConnectionExecutor, SQLAlchemyExecutor, discover_semantic_views, format_semantic_views_section, get_semantic_view_ddl, ) -from querychat._snowflake_sources import SnowflakeIbisSource +from querychat._snowflake_sources import SnowflakeIbisSource, SnowflakeSource class TestSemanticViewInfo: @@ -458,20 +457,3 @@ def test_has_semantic_views_mixin(self): assert source.has_semantic_views is True assert source.semantic_views == [SemanticViewInfo(name="test", ddl="DDL")] - - -class TestBackwardsCompatibility: - """Tests for backwards-compatible imports.""" - - def test_import_from_datasource(self): - """Test that imports from _datasource still work.""" - from querychat._datasource import SemanticViewInfo, SnowflakeSource - - assert SemanticViewInfo is not None - assert SnowflakeSource is not None - - def test_snowflake_ibis_source_import(self): - """Test that SnowflakeIbisSource can be imported from _datasource.""" - from querychat._datasource import SnowflakeIbisSource - - assert SnowflakeIbisSource is not None From 28268916fa6379ab9ccc674d4dca565fe0157dc4 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 12:15:05 -0600 Subject: [PATCH 07/45] refactor: Move Snowflake semantic view discovery into base classes Instead of separate SnowflakeSource and SnowflakeIbisSource classes, SQLAlchemySource and IbisSource now auto-detect Snowflake backends and discover semantic views during initialization. Changes: - SQLAlchemySource/IbisSource check dialect/backend name for "snowflake" - Discovery can be disabled via QUERYCHAT_DISABLE_SEMANTIC_VIEWS env var - Removed _snowflake_sources.py (no longer needed) - Simplified normalize_data_source() - no Snowflake-specific routing - Updated tests to verify new architecture Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 43 +++- pkg-py/src/querychat/_querychat_base.py | 11 +- pkg-py/src/querychat/_snowflake_sources.py | 147 ------------ pkg-py/tests/test_snowflake_source.py | 267 +++++++++------------ 4 files changed, 161 insertions(+), 307 deletions(-) delete mode 100644 pkg-py/src/querychat/_snowflake_sources.py diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index e5bdcc93..e366e20f 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Generic, Literal, cast @@ -11,6 +12,13 @@ from sqlalchemy.sql import sqltypes from ._df_compat import read_sql +from ._snowflake import ( + IbisExecutor, + SemanticViewInfo, + SQLAlchemyExecutor, + discover_semantic_views, + format_semantic_views_section, +) from ._utils import as_narwhals, check_query if TYPE_CHECKING: @@ -429,6 +437,8 @@ class SQLAlchemySource(DataSource[nw.DataFrame]): and Databricks. """ + _semantic_views: list[SemanticViewInfo] + def __init__( self, engine: Engine, @@ -457,6 +467,15 @@ def __init__( self._columns_info = inspector.get_columns(table_name) self._colnames = [col["name"] for col in self._columns_info] + # Discover Snowflake semantic views if applicable + self._semantic_views = [] + if ( + self._engine.dialect.name.lower() == "snowflake" + and not os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS") + ): + executor = SQLAlchemyExecutor(engine) + self._semantic_views = discover_semantic_views(executor) + def get_db_type(self) -> str: """ Get the database type. @@ -487,7 +506,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: for col in self._columns_info ] self._add_column_stats(columns, categorical_threshold) - return format_schema(self.table_name, columns) + schema = format_schema(self.table_name, columns) + + if self._semantic_views: + schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" + + return schema @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: @@ -926,6 +950,7 @@ class IbisSource(DataSource["ibis.Table"]): _table: ibis.Table _backend: SQLBackend + _semantic_views: list[SemanticViewInfo] table_name: str def __init__(self, table: ibis.Table, table_name: str): @@ -950,6 +975,15 @@ def __init__(self, table: ibis.Table, table_name: str): ) self._colnames = list(colnames) + # Discover Snowflake semantic views if applicable + self._semantic_views = [] + if ( + self._backend.name.lower() == "snowflake" + and not os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS") + ): + executor = IbisExecutor(self._backend) + self._semantic_views = discover_semantic_views(executor) + def get_db_type(self) -> str: return self._backend.name @@ -958,7 +992,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._make_column_meta(name, dtype) for name, dtype in self._schema.items() ] self._add_column_stats(columns, self._table, categorical_threshold) - return format_schema(self.table_name, columns) + schema = format_schema(self.table_name, columns) + + if self._semantic_views: + schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" + + return schema @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index 6bacb0b1..de87a2c8 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -21,7 +21,6 @@ SQLAlchemySource, ) from ._shiny_module import GREETING_PROMPT -from ._snowflake_sources import SnowflakeIbisSource, SnowflakeSource from ._system_prompt import QueryChatSystemPrompt from ._utils import MISSING, MISSING_TYPE, is_ibis_table from .tools import ( @@ -223,23 +222,17 @@ def cleanup(self) -> None: self._data_source.cleanup() -def normalize_data_source( # noqa: PLR0911 +def normalize_data_source( data_source: IntoFrame | sqlalchemy.Engine | DataSource, table_name: str, ) -> DataSource: if isinstance(data_source, DataSource): return data_source + if isinstance(data_source, sqlalchemy.Engine): - # Use SnowflakeSource for Snowflake connections to get semantic view support - dialect_name = getattr(getattr(data_source, "dialect", None), "name", "") or "" - if dialect_name.lower() == "snowflake": - return SnowflakeSource(data_source, table_name) return SQLAlchemySource(data_source, table_name) if is_ibis_table(data_source): - backend = data_source.get_backend() - if backend.name.lower() == "snowflake": - return SnowflakeIbisSource(data_source, table_name) return IbisSource(data_source, table_name) src = nw.from_native(data_source, pass_through=True) diff --git a/pkg-py/src/querychat/_snowflake_sources.py b/pkg-py/src/querychat/_snowflake_sources.py deleted file mode 100644 index fde6946e..00000000 --- a/pkg-py/src/querychat/_snowflake_sources.py +++ /dev/null @@ -1,147 +0,0 @@ -""" -Snowflake-specific DataSource implementations. - -This module provides DataSource classes for Snowflake connections with -semantic view support. Both SQLAlchemy and Ibis backends are supported. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING - -from ._datasource import IbisSource, SQLAlchemySource -from ._snowflake import ( - IbisExecutor, - SemanticViewInfo, - SemanticViewMixin, - SQLAlchemyExecutor, - discover_semantic_views, -) - -if TYPE_CHECKING: - import ibis - from sqlalchemy.engine import Engine - -__all__ = ["SnowflakeIbisSource", "SnowflakeSource"] - - -class SnowflakeSource(SQLAlchemySource, SemanticViewMixin): - """ - SQLAlchemy-based Snowflake source with semantic view support. - - Extends SQLAlchemySource to automatically detect and provide context about - Snowflake Semantic Views when available. - """ - - _semantic_views: list[SemanticViewInfo] - - def __init__( - self, - engine: Engine, - table_name: str, - *, - discover_semantic_views_flag: bool = True, - ): - """ - Initialize with a SQLAlchemy engine connected to Snowflake. - - Parameters - ---------- - engine - SQLAlchemy engine connected to Snowflake - table_name - Name of the table to query - discover_semantic_views_flag - If True (default), automatically discover semantic views at - initialization. Set to False to skip discovery (e.g., for - performance or if not needed). - - """ - super().__init__(engine, table_name) - - if discover_semantic_views_flag: - executor = SQLAlchemyExecutor(engine) - self._semantic_views = discover_semantic_views(executor) - else: - self._semantic_views = [] - - def get_schema(self, *, categorical_threshold: int) -> str: - """ - Generate schema information including semantic view context. - - Parameters - ---------- - categorical_threshold - Maximum number of unique values for a text column to be considered - categorical - - Returns - ------- - str - String describing the schema, including semantic view information - if available - - """ - base_schema = super().get_schema(categorical_threshold=categorical_threshold) - return self._get_schema_with_semantic_views(base_schema) - - -class SnowflakeIbisSource(IbisSource, SemanticViewMixin): - """ - Ibis-based Snowflake source with semantic view support. - - Extends IbisSource to automatically detect and provide context about - Snowflake Semantic Views when available. - """ - - _semantic_views: list[SemanticViewInfo] - - def __init__( - self, - table: ibis.Table, - table_name: str, - *, - discover_semantic_views_flag: bool = True, - ): - """ - Initialize with an Ibis Table connected to Snowflake. - - Parameters - ---------- - table - Ibis Table from a Snowflake backend - table_name - Name of the table to query - discover_semantic_views_flag - If True (default), automatically discover semantic views at - initialization. Set to False to skip discovery (e.g., for - performance or if not needed). - - """ - super().__init__(table, table_name) - - if discover_semantic_views_flag and self._backend.name.lower() == "snowflake": - executor = IbisExecutor(self._backend) - self._semantic_views = discover_semantic_views(executor) - else: - self._semantic_views = [] - - def get_schema(self, *, categorical_threshold: int) -> str: - """ - Generate schema information including semantic view context. - - Parameters - ---------- - categorical_threshold - Maximum number of unique values for a text column to be considered - categorical - - Returns - ------- - str - String describing the schema, including semantic view information - if available - - """ - base_schema = super().get_schema(categorical_threshold=categorical_threshold) - return self._get_schema_with_semantic_views(base_schema) diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 4335f1cd..9d3bd361 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -1,6 +1,7 @@ """Tests for Snowflake semantic view functionality.""" import logging +import os from unittest.mock import MagicMock, patch from querychat._snowflake import ( @@ -12,7 +13,6 @@ format_semantic_views_section, get_semantic_view_ddl, ) -from querychat._snowflake_sources import SnowflakeIbisSource, SnowflakeSource class TestSemanticViewInfo: @@ -203,32 +203,13 @@ def test_execute_raw_sql(self): mock_df.to_dict.assert_called_once_with(orient="records") -class TestSnowflakeSourceDiscovery: - """Tests for SnowflakeSource semantic view discovery.""" +class TestSQLAlchemySourceSemanticViews: + """Tests for SQLAlchemySource semantic view discovery.""" - def test_discovery_disabled(self): - """Test that discover_semantic_views_flag=False skips discovery.""" - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + def test_discovery_for_snowflake_backend(self): + """Test that discovery is called for Snowflake backends.""" + from querychat._datasource import SQLAlchemySource - with patch( - "querychat._snowflake_sources.SQLAlchemySource.__init__", return_value=None - ): - source = SnowflakeSource.__new__(SnowflakeSource) - source._engine = mock_engine - source.table_name = "test" - source._columns_info = [] - source._colnames = [] - source._semantic_views = [] - - assert source._semantic_views == [] - assert not source.has_semantic_views - - def test_discovery_enabled_calls_discover(self): - """Test that discovery is called when enabled.""" mock_engine = MagicMock() mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() @@ -238,36 +219,21 @@ def test_discovery_enabled_calls_discover(self): with ( patch("querychat._datasource.inspect", return_value=mock_inspector), patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] + "querychat._datasource.discover_semantic_views", return_value=[] ) as mock_discover, + patch.dict(os.environ, {}, clear=False), ): - SnowflakeSource(mock_engine, "test_table") + # Remove the disable env var if present + os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) + SQLAlchemySource(mock_engine, "test_table") mock_discover.assert_called_once() + def test_discovery_skipped_for_non_snowflake(self): + """Test that discovery is skipped for non-Snowflake backends.""" + from querychat._datasource import SQLAlchemySource -class TestSnowflakeSourceProperties: - """Tests for SnowflakeSource properties.""" - - def test_has_semantic_views_true(self): - """Test has_semantic_views returns True when views exist.""" - mock_engine = MagicMock() - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] - - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] - ), - ): - source = SnowflakeSource(mock_engine, "test_table") - source._semantic_views = [SemanticViewInfo(name="test", ddl="DDL")] - assert source.has_semantic_views is True - - def test_has_semantic_views_false(self): - """Test has_semantic_views returns False when no views.""" mock_engine = MagicMock() + mock_engine.dialect.name = "postgresql" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] @@ -275,20 +241,19 @@ def test_has_semantic_views_false(self): with ( patch("querychat._datasource.inspect", return_value=mock_inspector), patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] - ), + "querychat._datasource.discover_semantic_views" + ) as mock_discover, ): - source = SnowflakeSource(mock_engine, "test_table") - assert source.has_semantic_views is False + source = SQLAlchemySource(mock_engine, "test_table") + mock_discover.assert_not_called() + assert source._semantic_views == [] - def test_semantic_views_property(self): - """Test semantic_views property returns the list.""" - views = [ - SemanticViewInfo(name="view1", ddl="DDL1"), - SemanticViewInfo(name="view2", ddl="DDL2"), - ] + def test_discovery_disabled_via_env_var(self): + """Test that QUERYCHAT_DISABLE_SEMANTIC_VIEWS disables discovery.""" + from querychat._datasource import SQLAlchemySource mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id"}] @@ -296,22 +261,22 @@ def test_semantic_views_property(self): with ( patch("querychat._datasource.inspect", return_value=mock_inspector), patch( - "querychat._snowflake_sources.discover_semantic_views", - return_value=views, - ), + "querychat._datasource.discover_semantic_views" + ) as mock_discover, + patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}), ): - source = SnowflakeSource(mock_engine, "test_table") - assert source.semantic_views == views - - -class TestGetSchemaWithSemanticViews: - """Tests for get_schema with semantic views included.""" + source = SQLAlchemySource(mock_engine, "test_table") + mock_discover.assert_not_called() + assert source._semantic_views == [] - def test_schema_includes_semantic_views(self): + def test_get_schema_includes_semantic_views(self): """Test that get_schema includes semantic view section.""" + from querychat._datasource import SQLAlchemySource + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] @@ -319,54 +284,53 @@ def test_schema_includes_semantic_views(self): with ( patch("querychat._datasource.inspect", return_value=mock_inspector), patch( - "querychat._snowflake_sources.discover_semantic_views", + "querychat._datasource.discover_semantic_views", return_value=views, ), - patch.object( - SnowflakeSource.__bases__[0], - "get_schema", - return_value="Table: test_table\nColumns:\n- id", - ), + patch.dict(os.environ, {}, clear=False), ): - source = SnowflakeSource(mock_engine, "test_table") - schema = source.get_schema(categorical_threshold=20) + os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) + source = SQLAlchemySource(mock_engine, "test_table") + + # Mock the stats query to avoid needing a real connection + with patch.object(source, "_add_column_stats"): + schema = source.get_schema(categorical_threshold=20) assert "Table: test_table" in schema assert "## Snowflake Semantic Views" in schema assert "db.schema.metrics" in schema - def test_schema_without_semantic_views(self): + def test_get_schema_without_semantic_views(self): """Test that get_schema works without semantic views.""" + from querychat._datasource import SQLAlchemySource + mock_engine = MagicMock() + mock_engine.dialect.name = "postgresql" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] - ), - patch.object( - SnowflakeSource.__bases__[0], - "get_schema", - return_value="Table: test_table\nColumns:\n- id", - ), - ): - source = SnowflakeSource(mock_engine, "test_table") - schema = source.get_schema(categorical_threshold=20) + with patch("querychat._datasource.inspect", return_value=mock_inspector): + source = SQLAlchemySource(mock_engine, "test_table") + + # Mock the stats query + with patch.object(source, "_add_column_stats"): + schema = source.get_schema(categorical_threshold=20) assert "Table: test_table" in schema assert "## Snowflake Semantic Views" not in schema -class TestSnowflakeIbisSource: - """Tests for SnowflakeIbisSource.""" +class TestIbisSourceSemanticViews: + """Tests for IbisSource semantic view discovery.""" - def test_discovery_enabled_for_snowflake(self): + def test_discovery_for_snowflake_backend(self): """Test that discovery runs for Snowflake backends.""" + from ibis.backends.sql import SQLBackend + from querychat._datasource import IbisSource + mock_table = MagicMock() - mock_backend = MagicMock() + mock_backend = MagicMock(spec=SQLBackend) mock_backend.name = "snowflake" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() @@ -374,33 +338,23 @@ def test_discovery_enabled_for_snowflake(self): mock_schema.names = [] mock_table.schema.return_value = mock_schema - with patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] - ) as mock_discover: - with patch( - "querychat._snowflake_sources.IbisSource.__init__", return_value=None - ): - source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) - source._table = mock_table - source.table_name = "test" - source._schema = mock_schema - source._backend = mock_backend - source._colnames = [] - - # Manually call the __init__ logic that would use discover_semantic_views - # Import from _snowflake_sources module's namespace (where it's imported) - import querychat._snowflake_sources as sf_sources - - # Verify the function is importable and would be called - executor = sf_sources.IbisExecutor(mock_backend) - source._semantic_views = sf_sources.discover_semantic_views(executor) - + with ( + patch( + "querychat._datasource.discover_semantic_views", return_value=[] + ) as mock_discover, + patch.dict(os.environ, {}, clear=False), + ): + os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) + IbisSource(mock_table, "test") mock_discover.assert_called_once() - def test_discovery_disabled_for_non_snowflake(self): + def test_discovery_skipped_for_non_snowflake(self): """Test that discovery is skipped for non-Snowflake backends.""" + from ibis.backends.sql import SQLBackend + from querychat._datasource import IbisSource + mock_table = MagicMock() - mock_backend = MagicMock() + mock_backend = MagicMock(spec=SQLBackend) mock_backend.name = "postgres" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() @@ -409,29 +363,19 @@ def test_discovery_disabled_for_non_snowflake(self): mock_table.schema.return_value = mock_schema with patch( - "querychat._snowflake_sources.discover_semantic_views" + "querychat._datasource.discover_semantic_views" ) as mock_discover: - with patch( - "querychat._snowflake_sources.IbisSource.__init__", return_value=None - ): - source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) - source._table = mock_table - source.table_name = "test" - source._schema = mock_schema - source._backend = mock_backend - source._colnames = [] - - # Manually set semantic views (simulating what __init__ does for non-Snowflake) - source._semantic_views = [] - - # Since backend is not Snowflake, discover should not be called + source = IbisSource(mock_table, "test") mock_discover.assert_not_called() assert source._semantic_views == [] - def test_has_semantic_views_mixin(self): - """Test that SnowflakeIbisSource has semantic view mixin properties.""" + def test_discovery_disabled_via_env_var(self): + """Test that QUERYCHAT_DISABLE_SEMANTIC_VIEWS disables discovery.""" + from ibis.backends.sql import SQLBackend + from querychat._datasource import IbisSource + mock_table = MagicMock() - mock_backend = MagicMock() + mock_backend = MagicMock(spec=SQLBackend) mock_backend.name = "snowflake" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() @@ -441,19 +385,44 @@ def test_has_semantic_views_mixin(self): with ( patch( - "querychat._snowflake_sources.discover_semantic_views", return_value=[] - ), + "querychat._datasource.discover_semantic_views" + ) as mock_discover, + patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}), + ): + source = IbisSource(mock_table, "test") + mock_discover.assert_not_called() + assert source._semantic_views == [] + + def test_get_schema_includes_semantic_views(self): + """Test that get_schema includes semantic view section.""" + from ibis.backends.sql import SQLBackend + from querychat._datasource import IbisSource + + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] + + mock_table = MagicMock() + mock_backend = MagicMock(spec=SQLBackend) + mock_backend.name = "snowflake" + mock_table.get_backend.return_value = mock_backend + mock_schema = MagicMock() + mock_schema.items.return_value = [("id", MagicMock())] + mock_schema.names = ["id"] + mock_table.schema.return_value = mock_schema + + with ( patch( - "querychat._snowflake_sources.IbisSource.__init__", return_value=None + "querychat._datasource.discover_semantic_views", + return_value=views, ), + patch.dict(os.environ, {}, clear=False), ): - source = SnowflakeIbisSource.__new__(SnowflakeIbisSource) - source._table = mock_table - source.table_name = "test" - source._schema = mock_schema - source._backend = mock_backend - source._colnames = [] - source._semantic_views = [SemanticViewInfo(name="test", ddl="DDL")] - - assert source.has_semantic_views is True - assert source.semantic_views == [SemanticViewInfo(name="test", ddl="DDL")] + os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) + source = IbisSource(mock_table, "test_table") + + # Mock _add_column_stats to avoid complex aggregation setup + with patch.object(IbisSource, "_add_column_stats"): + schema = source.get_schema(categorical_threshold=20) + + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" in schema + assert "db.schema.metrics" in schema From 8c40db4846aa3a42ded4e64088d505ec25f09ae6 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 12:30:48 -0600 Subject: [PATCH 08/45] refactor: Move SEMANTIC_VIEW_SYNTAX to shared prompt files Extract inline syntax documentation to `prompts/semantic-view-syntax.md` in both R and Python packages, making it language-agnostic and easier to maintain. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_system_prompt.py | 109 ++--------------- .../querychat/prompts/semantic-view-syntax.md | 98 ++++++++++++++++ pkg-r/R/QueryChatSystemPrompt.R | 110 +----------------- pkg-r/inst/prompts/semantic-view-syntax.md | 98 ++++++++++++++++ 4 files changed, 209 insertions(+), 206 deletions(-) create mode 100644 pkg-py/src/querychat/prompts/semantic-view-syntax.md create mode 100644 pkg-r/inst/prompts/semantic-view-syntax.md diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index addf7862..b86e163a 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -10,107 +10,12 @@ from ._querychat_base import TOOL_GROUPS -# Reference documentation for SEMANTIC_VIEW() query syntax -SEMANTIC_VIEW_SYNTAX = """ -## SEMANTIC_VIEW() Query Syntax - -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - -### Basic Syntax - -```sql -SELECT * FROM SEMANTIC_VIEW( - {view_name} - METRICS {logical_table}.{metric_name} - DIMENSIONS {logical_table}.{dimension_name} - [WHERE {dimension} = 'value'] -- Optional: pre-aggregation filter -) -[WHERE {column} = 'value'] -- Optional: post-aggregation filter -``` - -### Key Rules - -1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view -2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS -3. **No JOINs needed within model** - Relationships are pre-defined -4. **No aggregate functions needed** - Metrics are pre-aggregated -5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly - -### WHERE Clause: Inside vs Outside - -- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed -- **Outside** (post-aggregation): Filters results AFTER metrics are computed - -```sql --- Pre-aggregation: only include 'EXT' accounts in the calculation -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD - WHERE REF_ENTITIES.ACC_TYPE_CD = 'EXT' -) - --- Post-aggregation: compute all, then filter results -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) -WHERE NET_REVENUE > 1000000 -``` - -### Common Patterns - -**Single metric (total):** -```sql -SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) -``` - -**Metric by dimension:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) -``` - -**Multiple metrics and dimensions:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT -) -ORDER BY LOG_DT ASC -``` - -**Time series:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS T_DATA.LOG_DT -) -ORDER BY LOG_DT ASC -``` - -**Join results with other data:** -```sql -SELECT sv.*, lookup.category_name -FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) AS sv -JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code -``` - -### Troubleshooting - -- **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL -- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed -""" +PROMPTS_DIR = Path(__file__).parent / "prompts" + + +def get_semantic_view_syntax() -> str: + """Load SEMANTIC_VIEW_SYNTAX from shared prompt file.""" + return (PROMPTS_DIR / "semantic-view-syntax.md").read_text() class QueryChatSystemPrompt: @@ -181,7 +86,7 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: "is_duck_db": is_duck_db, "is_snowflake": is_snowflake, "has_semantic_views": has_semantic_views, - "semantic_view_syntax": SEMANTIC_VIEW_SYNTAX if has_semantic_views else "", + "semantic_view_syntax": get_semantic_view_syntax() if has_semantic_views else "", "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/src/querychat/prompts/semantic-view-syntax.md b/pkg-py/src/querychat/prompts/semantic-view-syntax.md new file mode 100644 index 00000000..062f5aa8 --- /dev/null +++ b/pkg-py/src/querychat/prompts/semantic-view-syntax.md @@ -0,0 +1,98 @@ +## SEMANTIC_VIEW() Query Syntax + +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + +### Basic Syntax + +```sql +SELECT * FROM SEMANTIC_VIEW( + {view_name} + METRICS {logical_table}.{metric_name} + DIMENSIONS {logical_table}.{dimension_name} + [WHERE {dimension} = 'value'] -- Optional: pre-aggregation filter +) +[WHERE {column} = 'value'] -- Optional: post-aggregation filter +``` + +### Key Rules + +1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view +2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS +3. **No JOINs needed within model** - Relationships are pre-defined +4. **No aggregate functions needed** - Metrics are pre-aggregated +5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly + +### WHERE Clause: Inside vs Outside + +- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed +- **Outside** (post-aggregation): Filters results AFTER metrics are computed + +```sql +-- Pre-aggregation: only include 'EXT' accounts in the calculation +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD + WHERE REF_ENTITIES.ACC_TYPE_CD = 'EXT' +) + +-- Post-aggregation: compute all, then filter results +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +WHERE NET_REVENUE > 1000000 +``` + +### Common Patterns + +**Single metric (total):** +```sql +SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) +``` + +**Metric by dimension:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +``` + +**Multiple metrics and dimensions:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Time series:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Join results with other data:** +```sql +SELECT sv.*, lookup.category_name +FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) AS sv +JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code +``` + +### Troubleshooting + +- **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL +- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 07e8d11f..94662308 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -97,7 +97,7 @@ QueryChatSystemPrompt <- R6::R6Class( is_duck_db = is_duck_db, is_snowflake = if (is_snowflake) "true", has_semantic_views = if (has_semantic_views) "true", - semantic_view_syntax = if (has_semantic_views) SEMANTIC_VIEW_SYNTAX, + semantic_view_syntax = if (has_semantic_views) get_semantic_view_syntax(), schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, @@ -111,109 +111,11 @@ QueryChatSystemPrompt <- R6::R6Class( ) ) -# Reference documentation for SEMANTIC_VIEW() query syntax -# nolint start: line_length_linter. -SEMANTIC_VIEW_SYNTAX <- ' -## SEMANTIC_VIEW() Query Syntax - -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - -### Basic Syntax - -```sql -SELECT * FROM SEMANTIC_VIEW( - {view_name} - METRICS {logical_table}.{metric_name} - DIMENSIONS {logical_table}.{dimension_name} - [WHERE {dimension} = \'value\'] -- Optional: pre-aggregation filter -) -[WHERE {column} = \'value\'] -- Optional: post-aggregation filter -``` - -### Key Rules - -1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view -2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS -3. **No JOINs needed within model** - Relationships are pre-defined -4. **No aggregate functions needed** - Metrics are pre-aggregated -5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly - -### WHERE Clause: Inside vs Outside - -- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed -- **Outside** (post-aggregation): Filters results AFTER metrics are computed - -```sql --- Pre-aggregation: only include \'EXT\' accounts in the calculation -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD - WHERE REF_ENTITIES.ACC_TYPE_CD = \'EXT\' -) - --- Post-aggregation: compute all, then filter results -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) -WHERE NET_REVENUE > 1000000 -``` - -### Common Patterns - -**Single metric (total):** -```sql -SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) -``` - -**Metric by dimension:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) -``` - -**Multiple metrics and dimensions:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT -) -ORDER BY LOG_DT ASC -``` - -**Time series:** -```sql -SELECT * FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS T_DATA.LOG_DT -) -ORDER BY LOG_DT ASC -``` - -**Join results with other data:** -```sql -SELECT sv.*, lookup.category_name -FROM SEMANTIC_VIEW( - MODEL_NAME - METRICS T_DATA.NET_REVENUE - DIMENSIONS REF_ENTITIES.ACC_TYPE_CD -) AS sv -JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code -``` - -### Troubleshooting - -- **"Invalid identifier"**: Verify metric/dimension names match exactly what is in the DDL -- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY is not needed -' -# nolint end +# Load SEMANTIC_VIEW_SYNTAX from shared prompt file +get_semantic_view_syntax <- function() { + path <- system.file("prompts", "semantic-view-syntax.md", package = "querychat") + read_utf8(path) +} # Utility function for loading file or string content read_text <- function(x) { diff --git a/pkg-r/inst/prompts/semantic-view-syntax.md b/pkg-r/inst/prompts/semantic-view-syntax.md new file mode 100644 index 00000000..062f5aa8 --- /dev/null +++ b/pkg-r/inst/prompts/semantic-view-syntax.md @@ -0,0 +1,98 @@ +## SEMANTIC_VIEW() Query Syntax + +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + +### Basic Syntax + +```sql +SELECT * FROM SEMANTIC_VIEW( + {view_name} + METRICS {logical_table}.{metric_name} + DIMENSIONS {logical_table}.{dimension_name} + [WHERE {dimension} = 'value'] -- Optional: pre-aggregation filter +) +[WHERE {column} = 'value'] -- Optional: post-aggregation filter +``` + +### Key Rules + +1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view +2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS +3. **No JOINs needed within model** - Relationships are pre-defined +4. **No aggregate functions needed** - Metrics are pre-aggregated +5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly + +### WHERE Clause: Inside vs Outside + +- **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed +- **Outside** (post-aggregation): Filters results AFTER metrics are computed + +```sql +-- Pre-aggregation: only include 'EXT' accounts in the calculation +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD + WHERE REF_ENTITIES.ACC_TYPE_CD = 'EXT' +) + +-- Post-aggregation: compute all, then filter results +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +WHERE NET_REVENUE > 1000000 +``` + +### Common Patterns + +**Single metric (total):** +```sql +SELECT * FROM SEMANTIC_VIEW(MODEL_NAME METRICS T_DATA.NET_REVENUE) +``` + +**Metric by dimension:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) +``` + +**Multiple metrics and dimensions:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE, T_DATA.GROSS_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD, T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Time series:** +```sql +SELECT * FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS T_DATA.LOG_DT +) +ORDER BY LOG_DT ASC +``` + +**Join results with other data:** +```sql +SELECT sv.*, lookup.category_name +FROM SEMANTIC_VIEW( + MODEL_NAME + METRICS T_DATA.NET_REVENUE + DIMENSIONS REF_ENTITIES.ACC_TYPE_CD +) AS sv +JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code +``` + +### Troubleshooting + +- **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL +- **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed From 78d4f3ebc88295b0aea66aeed032c0ca86fad6ff Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 18:34:18 +0000 Subject: [PATCH 09/45] `air format` (GitHub Actions) --- pkg-r/R/QueryChatSystemPrompt.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 94662308..f10b5c63 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -97,7 +97,9 @@ QueryChatSystemPrompt <- R6::R6Class( is_duck_db = is_duck_db, is_snowflake = if (is_snowflake) "true", has_semantic_views = if (has_semantic_views) "true", - semantic_view_syntax = if (has_semantic_views) get_semantic_view_syntax(), + semantic_view_syntax = if (has_semantic_views) { + get_semantic_view_syntax() + }, schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, @@ -113,7 +115,11 @@ QueryChatSystemPrompt <- R6::R6Class( # Load SEMANTIC_VIEW_SYNTAX from shared prompt file get_semantic_view_syntax <- function() { - path <- system.file("prompts", "semantic-view-syntax.md", package = "querychat") + path <- system.file( + "prompts", + "semantic-view-syntax.md", + package = "querychat" + ) read_utf8(path) } From df33e8289ede38584d19d5c52be28cad13a46dc8 Mon Sep 17 00:00:00 2001 From: Carson Sievert Date: Mon, 26 Jan 2026 15:01:00 -0600 Subject: [PATCH 10/45] Update pkg-py/src/querychat/_querychat_base.py --- pkg-py/src/querychat/_querychat_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index de87a2c8..e8a7c7f1 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -228,7 +228,6 @@ def normalize_data_source( ) -> DataSource: if isinstance(data_source, DataSource): return data_source - if isinstance(data_source, sqlalchemy.Engine): return SQLAlchemySource(data_source, table_name) From 1f0d888fa6978f2731c3524364dd8959565951e8 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:18:47 -0600 Subject: [PATCH 11/45] refactor: Extract Snowflake semantic views into adapter pattern - Replace Protocol/class-based OOP with functional approach using backend_type: Literal["sqlalchemy", "ibis"] discriminator - Move env var check (QUERYCHAT_DISABLE_SEMANTIC_VIEWS) into discover_semantic_views() for early exit - Move semantic view discovery from __init__ to get_schema() for lazy initialization - Remove SemanticViewMixin in favor of direct function calls - Update tests to verify new lazy discovery behavior Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 53 ++-- pkg-py/src/querychat/_snowflake.py | 177 ++++-------- pkg-py/tests/test_snowflake_source.py | 398 ++++++++++++++++---------- 3 files changed, 335 insertions(+), 293 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index e366e20f..0c5ca568 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Generic, Literal, cast @@ -13,9 +12,7 @@ from ._df_compat import read_sql from ._snowflake import ( - IbisExecutor, SemanticViewInfo, - SQLAlchemyExecutor, discover_semantic_views, format_semantic_views_section, ) @@ -437,7 +434,7 @@ class SQLAlchemySource(DataSource[nw.DataFrame]): and Databricks. """ - _semantic_views: list[SemanticViewInfo] + _semantic_views: list[SemanticViewInfo] | None def __init__( self, @@ -467,14 +464,8 @@ def __init__( self._columns_info = inspector.get_columns(table_name) self._colnames = [col["name"] for col in self._columns_info] - # Discover Snowflake semantic views if applicable - self._semantic_views = [] - if ( - self._engine.dialect.name.lower() == "snowflake" - and not os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS") - ): - executor = SQLAlchemyExecutor(engine) - self._semantic_views = discover_semantic_views(executor) + # Semantic views are discovered lazily in get_schema() + self._semantic_views = None def get_db_type(self) -> str: """ @@ -508,11 +499,25 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, categorical_threshold) schema = format_schema(self.table_name, columns) + # Discover Snowflake semantic views lazily (only on first call) + if self._semantic_views is None: + if self._engine.dialect.name.lower() == "snowflake": + self._semantic_views = discover_semantic_views( + self._engine, "sqlalchemy" + ) + else: + self._semantic_views = [] + if self._semantic_views: schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" return schema + @property + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return bool(self._semantic_views) + @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" @@ -950,7 +955,7 @@ class IbisSource(DataSource["ibis.Table"]): _table: ibis.Table _backend: SQLBackend - _semantic_views: list[SemanticViewInfo] + _semantic_views: list[SemanticViewInfo] | None table_name: str def __init__(self, table: ibis.Table, table_name: str): @@ -975,14 +980,8 @@ def __init__(self, table: ibis.Table, table_name: str): ) self._colnames = list(colnames) - # Discover Snowflake semantic views if applicable - self._semantic_views = [] - if ( - self._backend.name.lower() == "snowflake" - and not os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS") - ): - executor = IbisExecutor(self._backend) - self._semantic_views = discover_semantic_views(executor) + # Semantic views are discovered lazily in get_schema() + self._semantic_views = None def get_db_type(self) -> str: return self._backend.name @@ -994,11 +993,23 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, self._table, categorical_threshold) schema = format_schema(self.table_name, columns) + # Discover Snowflake semantic views lazily (only on first call) + if self._semantic_views is None: + if self._backend.name.lower() == "snowflake": + self._semantic_views = discover_semantic_views(self._backend, "ibis") + else: + self._semantic_views = [] + if self._semantic_views: schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" return schema + @property + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return bool(self._semantic_views) + @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: """Create ColumnMeta from an ibis dtype.""" diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index facba149..00bb5704 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -1,24 +1,25 @@ """ Snowflake-specific utilities for semantic view discovery. -This module provides a backend-agnostic interface for discovering Snowflake -Semantic Views. It uses a Protocol pattern to abstract SQL execution, allowing -the same discovery logic to work with both SQLAlchemy engines and Ibis backends. +This module provides functions for discovering Snowflake Semantic Views, +supporting both SQLAlchemy engines and Ibis backends through a type parameter. """ from __future__ import annotations import logging +import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Protocol +from typing import TYPE_CHECKING, Any, Literal if TYPE_CHECKING: from ibis.backends.sql import SQLBackend from sqlalchemy import Engine - from sqlalchemy.engine import Connection logger = logging.getLogger(__name__) +BackendType = Literal["sqlalchemy", "ibis"] + @dataclass class SemanticViewInfo: @@ -31,81 +32,56 @@ class SemanticViewInfo: """The DDL definition from GET_DDL().""" -class RawSQLExecutor(Protocol): - """ - Protocol for executing raw SQL queries. - - This abstraction allows semantic view discovery to work with different - database backends (SQLAlchemy, Ibis) without knowing the specific API. +def execute_raw_sql( + query: str, + backend: Engine | SQLBackend, + backend_type: BackendType, +) -> list[dict[str, Any]]: """ + Execute raw SQL and return results as list of row dicts. - def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: - """Execute raw SQL and return results as list of row dicts.""" - ... - + Parameters + ---------- + query + SQL query to execute + backend + SQLAlchemy Engine or Ibis SQLBackend + backend_type + Type of backend: "sqlalchemy" or "ibis" -class SQLAlchemyExecutor: - """Raw SQL executor for SQLAlchemy engines.""" + Returns + ------- + list[dict[str, Any]] + Query results as list of row dictionaries - def __init__(self, engine: Engine): + """ + if backend_type == "sqlalchemy": from sqlalchemy import text - self._engine = engine - self._text = text - - def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: - """Execute raw SQL and return results as list of row dicts.""" - with self._engine.connect() as conn: - result = conn.execute(self._text(query)) + with backend.connect() as conn: # type: ignore[union-attr] + result = conn.execute(text(query)) keys = list(result.keys()) return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] - - -class SQLAlchemyConnectionExecutor: - """ - Raw SQL executor for an active SQLAlchemy connection. - - Unlike SQLAlchemyExecutor, this uses an existing connection rather than - creating a new one. Useful when you need to execute multiple queries - within the same connection/transaction. - """ - - def __init__(self, conn: Connection): - from sqlalchemy import text - - self._conn = conn - self._text = text - - def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: - """Execute raw SQL and return results as list of row dicts.""" - result = self._conn.execute(self._text(query)) - keys = list(result.keys()) - return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] - - -class IbisExecutor: - """Raw SQL executor for Ibis backends.""" - - def __init__(self, backend: SQLBackend): - self._backend = backend - - def execute_raw_sql(self, query: str) -> list[dict[str, Any]]: - """Execute raw SQL and return results as list of row dicts.""" - # Use backend.sql() to create an ibis table from raw SQL, then execute - result_table = self._backend.sql(query) + else: + # Ibis backend + result_table = backend.sql(query) # type: ignore[union-attr] df = result_table.execute() - # execute() returns a pandas DataFrame - return df.to_dict(orient="records") # type: ignore[call-overload] + return df.to_dict(orient="records") # type: ignore[return-value] -def discover_semantic_views(executor: RawSQLExecutor) -> list[SemanticViewInfo]: +def discover_semantic_views( + backend: Engine | SQLBackend, + backend_type: BackendType, +) -> list[SemanticViewInfo]: """ - Discover semantic views using any SQL executor. + Discover semantic views in the current schema. Parameters ---------- - executor - An object implementing the RawSQLExecutor protocol + backend + SQLAlchemy Engine or Ibis SQLBackend + backend_type + Type of backend: "sqlalchemy" or "ibis" Returns ------- @@ -113,7 +89,11 @@ def discover_semantic_views(executor: RawSQLExecutor) -> list[SemanticViewInfo]: List of semantic views with their DDL definitions """ - rows = executor.execute_raw_sql("SHOW SEMANTIC VIEWS") + # Check env var for early exit + if os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS"): + return [] + + rows = execute_raw_sql("SHOW SEMANTIC VIEWS", backend, backend_type) if not rows: logger.debug("No semantic views found in current schema") @@ -129,21 +109,27 @@ def discover_semantic_views(executor: RawSQLExecutor) -> list[SemanticViewInfo]: continue fq_name = f"{db}.{schema}.{name}" - ddl = get_semantic_view_ddl(executor, fq_name) + ddl = get_semantic_view_ddl(backend, backend_type, fq_name) if ddl: views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) return views -def get_semantic_view_ddl(executor: RawSQLExecutor, fq_name: str) -> str | None: +def get_semantic_view_ddl( + backend: Engine | SQLBackend, + backend_type: BackendType, + fq_name: str, +) -> str | None: """ Get DDL for a semantic view. Parameters ---------- - executor - An object implementing the RawSQLExecutor protocol + backend + SQLAlchemy Engine or Ibis SQLBackend + backend_type + Type of backend: "sqlalchemy" or "ibis" fq_name Fully qualified name (database.schema.view_name) @@ -155,7 +141,9 @@ def get_semantic_view_ddl(executor: RawSQLExecutor, fq_name: str) -> str | None: """ # Escape single quotes to prevent SQL injection safe_name = fq_name.replace("'", "''") - rows = executor.execute_raw_sql(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')") + rows = execute_raw_sql( + f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')", backend, backend_type + ) if rows: return str(next(iter(rows[0].values()))) return None @@ -198,50 +186,3 @@ def format_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str lines.append("") return "\n".join(lines) - - -class SemanticViewMixin: - """ - Mixin providing semantic view support for get_schema(). - - This mixin adds semantic view discovery and schema formatting to DataSource - subclasses. Classes using this mixin must initialize `_semantic_views` in - their constructor. - - Attributes - ---------- - _semantic_views : list[SemanticViewInfo] - List of discovered semantic views (set by subclass) - - """ - - _semantic_views: list[SemanticViewInfo] - - def _get_schema_with_semantic_views(self, base_schema: str) -> str: - """ - Append semantic view section to base schema if views exist. - - Parameters - ---------- - base_schema - The base schema string from the parent class - - Returns - ------- - str - Schema with semantic views section appended (if any exist) - - """ - if not self._semantic_views: - return base_schema - return f"{base_schema}\n\n{format_semantic_views_section(self._semantic_views)}" - - @property - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - return len(self._semantic_views) > 0 - - @property - def semantic_views(self) -> list[SemanticViewInfo]: - """Get the list of discovered semantic views.""" - return self._semantic_views diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 9d3bd361..2e34344c 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -5,11 +5,9 @@ from unittest.mock import MagicMock, patch from querychat._snowflake import ( - IbisExecutor, SemanticViewInfo, - SQLAlchemyConnectionExecutor, - SQLAlchemyExecutor, discover_semantic_views, + execute_raw_sql, format_semantic_views_section, get_semantic_view_ddl, ) @@ -66,36 +64,88 @@ class TestSQLEscaping: def test_single_quote_escaped(self): """Verify that names with single quotes are properly escaped.""" - mock_executor = MagicMock() - mock_executor.execute_raw_sql.return_value = [{"col": "DDL result"}] + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.keys.return_value = ["col"] + mock_result.fetchall.return_value = [("DDL result",)] + + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.return_value = mock_result - get_semantic_view_ddl(mock_executor, "db.schema.test'view") + get_semantic_view_ddl(mock_engine, "sqlalchemy", "db.schema.test'view") # Verify the executed query has escaped quotes - call_args = mock_executor.execute_raw_sql.call_args - query = call_args[0][0] - assert "test''view" in query + call_args = mock_conn.execute.call_args + query_str = str(call_args[0][0]) + assert "test''view" in query_str def test_normal_name_unchanged(self): """Verify that normal names without special chars work correctly.""" - mock_executor = MagicMock() - mock_executor.execute_raw_sql.return_value = [{"col": "DDL result"}] + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.keys.return_value = ["col"] + mock_result.fetchall.return_value = [("DDL result",)] + + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.return_value = mock_result + + get_semantic_view_ddl(mock_engine, "sqlalchemy", "db.schema.normal_view") + + call_args = mock_conn.execute.call_args + query_str = str(call_args[0][0]) + assert "db.schema.normal_view" in query_str + - get_semantic_view_ddl(mock_executor, "db.schema.normal_view") +class TestExecuteRawSQL: + """Tests for execute_raw_sql function.""" - call_args = mock_executor.execute_raw_sql.call_args - query = call_args[0][0] - assert "db.schema.normal_view" in query - assert "''" not in query + def test_sqlalchemy_backend(self): + """Test execute_raw_sql with SQLAlchemy backend.""" + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.keys.return_value = ["col1", "col2"] + mock_result.fetchall.return_value = [("a", "b"), ("c", "d")] + + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.return_value = mock_result + + result = execute_raw_sql("SELECT 1", mock_engine, "sqlalchemy") + + assert result == [{"col1": "a", "col2": "b"}, {"col1": "c", "col2": "d"}] + + def test_ibis_backend(self): + """Test execute_raw_sql with Ibis backend.""" + mock_backend = MagicMock() + mock_table = MagicMock() + mock_df = MagicMock() + mock_df.to_dict.return_value = [{"col1": "a"}, {"col1": "b"}] + + mock_backend.sql.return_value = mock_table + mock_table.execute.return_value = mock_df + + result = execute_raw_sql("SELECT 1", mock_backend, "ibis") + + assert result == [{"col1": "a"}, {"col1": "b"}] + mock_backend.sql.assert_called_once_with("SELECT 1") + mock_df.to_dict.assert_called_once_with(orient="records") class TestDiscoverSemanticViews: - """Tests for the standalone discover_semantic_views function.""" + """Tests for the discover_semantic_views function.""" def test_discover_returns_views(self): """Test successful discovery of semantic views.""" - mock_executor = MagicMock() - mock_executor.execute_raw_sql.side_effect = [ + mock_engine = MagicMock() + mock_conn = MagicMock() + + # Set up sequence of results for execute_raw_sql calls + results = [ # First call: SHOW SEMANTIC VIEWS [ {"database_name": "DB", "schema_name": "SCH", "name": "VIEW1"}, @@ -106,8 +156,29 @@ def test_discover_returns_views(self): # Third call: GET_DDL for VIEW2 [{"col": "DDL2"}], ] + call_count = [0] + + def mock_execute(*args, **kwargs): + result = MagicMock() + current_result = results[call_count[0]] + call_count[0] += 1 + + if isinstance(current_result, list) and current_result: + keys = list(current_result[0].keys()) + rows = [tuple(r.values()) for r in current_result] + else: + keys = [] + rows = [] + + result.keys.return_value = keys + result.fetchall.return_value = rows + return result + + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.side_effect = mock_execute - views = discover_semantic_views(mock_executor) + views = discover_semantic_views(mock_engine, "sqlalchemy") assert len(views) == 2 assert views[0].name == "DB.SCH.VIEW1" @@ -117,116 +188,104 @@ def test_discover_returns_views(self): def test_discover_no_views(self, caplog): """Test discovery when no views exist.""" - mock_executor = MagicMock() - mock_executor.execute_raw_sql.return_value = [] + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_result = MagicMock() + mock_result.keys.return_value = [] + mock_result.fetchall.return_value = [] + + mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) + mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.execute.return_value = mock_result with caplog.at_level(logging.DEBUG, logger="querychat._snowflake"): - views = discover_semantic_views(mock_executor) + views = discover_semantic_views(mock_engine, "sqlalchemy") assert views == [] assert "No semantic views found" in caplog.text + def test_discover_disabled_via_env_var(self): + """Test that QUERYCHAT_DISABLE_SEMANTIC_VIEWS disables discovery.""" + mock_engine = MagicMock() + + with patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}): + views = discover_semantic_views(mock_engine, "sqlalchemy") + + assert views == [] + # Engine should not be accessed + mock_engine.connect.assert_not_called() + def test_discover_skips_null_names(self): """Test that rows with null names are skipped.""" - mock_executor = MagicMock() - mock_executor.execute_raw_sql.side_effect = [ + mock_engine = MagicMock() + mock_conn = MagicMock() + + results = [ + # First call: SHOW SEMANTIC VIEWS with one null name [ {"database_name": "DB", "schema_name": "SCH", "name": None}, {"database_name": "DB", "schema_name": "SCH", "name": "VIEW1"}, ], + # Second call: GET_DDL for VIEW1 only [{"col": "DDL1"}], ] + call_count = [0] - views = discover_semantic_views(mock_executor) - - assert len(views) == 1 - assert views[0].name == "DB.SCH.VIEW1" - + def mock_execute(*args, **kwargs): + result = MagicMock() + current_result = results[call_count[0]] + call_count[0] += 1 -class TestSQLAlchemyExecutor: - """Tests for SQLAlchemyExecutor.""" + if isinstance(current_result, list) and current_result: + keys = list(current_result[0].keys()) + rows = [tuple(r.values()) for r in current_result] + else: + keys = [] + rows = [] - def test_execute_raw_sql(self): - """Test that execute_raw_sql returns list of dicts.""" - mock_engine = MagicMock() - mock_conn = MagicMock() - mock_result = MagicMock() - mock_result.keys.return_value = ["col1", "col2"] - mock_result.fetchall.return_value = [("a", "b"), ("c", "d")] + result.keys.return_value = keys + result.fetchall.return_value = rows + return result mock_engine.connect.return_value.__enter__ = MagicMock(return_value=mock_conn) mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) - mock_conn.execute.return_value = mock_result - - executor = SQLAlchemyExecutor(mock_engine) - result = executor.execute_raw_sql("SELECT 1") - - assert result == [{"col1": "a", "col2": "b"}, {"col1": "c", "col2": "d"}] - - -class TestSQLAlchemyConnectionExecutor: - """Tests for SQLAlchemyConnectionExecutor.""" - - def test_execute_raw_sql(self): - """Test that execute_raw_sql uses existing connection.""" - mock_conn = MagicMock() - mock_result = MagicMock() - mock_result.keys.return_value = ["col1"] - mock_result.fetchall.return_value = [("value",)] - mock_conn.execute.return_value = mock_result - - executor = SQLAlchemyConnectionExecutor(mock_conn) - result = executor.execute_raw_sql("SELECT 1") - - assert result == [{"col1": "value"}] - mock_conn.execute.assert_called_once() - + mock_conn.execute.side_effect = mock_execute -class TestIbisExecutor: - """Tests for IbisExecutor.""" + views = discover_semantic_views(mock_engine, "sqlalchemy") - def test_execute_raw_sql(self): - """Test that execute_raw_sql converts ibis result to list of dicts.""" - mock_backend = MagicMock() - mock_table = MagicMock() - mock_df = MagicMock() - mock_df.to_dict.return_value = [{"col1": "a"}, {"col1": "b"}] - - mock_backend.sql.return_value = mock_table - mock_table.execute.return_value = mock_df - - executor = IbisExecutor(mock_backend) - result = executor.execute_raw_sql("SELECT 1") - - assert result == [{"col1": "a"}, {"col1": "b"}] - mock_backend.sql.assert_called_once_with("SELECT 1") - mock_df.to_dict.assert_called_once_with(orient="records") + assert len(views) == 1 + assert views[0].name == "DB.SCH.VIEW1" class TestSQLAlchemySourceSemanticViews: """Tests for SQLAlchemySource semantic view discovery.""" def test_discovery_for_snowflake_backend(self): - """Test that discovery is called for Snowflake backends.""" + """Test that discovery is called for Snowflake backends in get_schema.""" from querychat._datasource import SQLAlchemySource mock_engine = MagicMock() mock_engine.dialect.name = "snowflake" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] with ( patch("querychat._datasource.inspect", return_value=mock_inspector), patch( "querychat._datasource.discover_semantic_views", return_value=[] ) as mock_discover, - patch.dict(os.environ, {}, clear=False), ): - # Remove the disable env var if present - os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) - SQLAlchemySource(mock_engine, "test_table") - mock_discover.assert_called_once() + source = SQLAlchemySource(mock_engine, "test_table") + # Discovery should NOT happen in __init__ + mock_discover.assert_not_called() + assert source._semantic_views is None + + # Discovery happens in get_schema + with patch.object(source, "_add_column_stats"): + source.get_schema(categorical_threshold=20) + + mock_discover.assert_called_once_with(mock_engine, "sqlalchemy") def test_discovery_skipped_for_non_snowflake(self): """Test that discovery is skipped for non-Snowflake backends.""" @@ -236,7 +295,7 @@ def test_discovery_skipped_for_non_snowflake(self): mock_engine.dialect.name = "postgresql" mock_inspector = MagicMock() mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] with ( patch("querychat._datasource.inspect", return_value=mock_inspector), @@ -245,27 +304,10 @@ def test_discovery_skipped_for_non_snowflake(self): ) as mock_discover, ): source = SQLAlchemySource(mock_engine, "test_table") - mock_discover.assert_not_called() - assert source._semantic_views == [] - - def test_discovery_disabled_via_env_var(self): - """Test that QUERYCHAT_DISABLE_SEMANTIC_VIEWS disables discovery.""" - from querychat._datasource import SQLAlchemySource - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id"}] + with patch.object(source, "_add_column_stats"): + source.get_schema(categorical_threshold=20) - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch( - "querychat._datasource.discover_semantic_views" - ) as mock_discover, - patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}), - ): - source = SQLAlchemySource(mock_engine, "test_table") mock_discover.assert_not_called() assert source._semantic_views == [] @@ -287,12 +329,9 @@ def test_get_schema_includes_semantic_views(self): "querychat._datasource.discover_semantic_views", return_value=views, ), - patch.dict(os.environ, {}, clear=False), ): - os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) source = SQLAlchemySource(mock_engine, "test_table") - # Mock the stats query to avoid needing a real connection with patch.object(source, "_add_column_stats"): schema = source.get_schema(categorical_threshold=20) @@ -313,19 +352,48 @@ def test_get_schema_without_semantic_views(self): with patch("querychat._datasource.inspect", return_value=mock_inspector): source = SQLAlchemySource(mock_engine, "test_table") - # Mock the stats query with patch.object(source, "_add_column_stats"): schema = source.get_schema(categorical_threshold=20) assert "Table: test_table" in schema assert "## Snowflake Semantic Views" not in schema + def test_has_semantic_views_property(self): + """Test the has_semantic_views property.""" + from querychat._datasource import SQLAlchemySource + + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] + + mock_engine = MagicMock() + mock_engine.dialect.name = "snowflake" + mock_inspector = MagicMock() + mock_inspector.has_table.return_value = True + mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] + + with ( + patch("querychat._datasource.inspect", return_value=mock_inspector), + patch( + "querychat._datasource.discover_semantic_views", + return_value=views, + ), + ): + source = SQLAlchemySource(mock_engine, "test_table") + + # Before get_schema, has_semantic_views is False (None evaluates to False) + assert source.has_semantic_views is False + + with patch.object(source, "_add_column_stats"): + source.get_schema(categorical_threshold=20) + + # After get_schema, has_semantic_views is True + assert source.has_semantic_views is True + class TestIbisSourceSemanticViews: """Tests for IbisSource semantic view discovery.""" def test_discovery_for_snowflake_backend(self): - """Test that discovery runs for Snowflake backends.""" + """Test that discovery runs for Snowflake backends in get_schema.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource @@ -334,19 +402,26 @@ def test_discovery_for_snowflake_backend(self): mock_backend.name = "snowflake" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() - mock_schema.items.return_value = [] - mock_schema.names = [] + mock_dtype = MagicMock() + mock_dtype.is_numeric.return_value = True + mock_dtype.is_integer.return_value = True + mock_schema.items.return_value = [("id", mock_dtype)] + mock_schema.names = ["id"] mock_table.schema.return_value = mock_schema - with ( - patch( - "querychat._datasource.discover_semantic_views", return_value=[] - ) as mock_discover, - patch.dict(os.environ, {}, clear=False), - ): - os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) - IbisSource(mock_table, "test") - mock_discover.assert_called_once() + with patch( + "querychat._datasource.discover_semantic_views", return_value=[] + ) as mock_discover: + source = IbisSource(mock_table, "test") + # Discovery should NOT happen in __init__ + mock_discover.assert_not_called() + assert source._semantic_views is None + + # Discovery happens in get_schema + with patch.object(IbisSource, "_add_column_stats"): + source.get_schema(categorical_threshold=20) + + mock_discover.assert_called_once_with(mock_backend, "ibis") def test_discovery_skipped_for_non_snowflake(self): """Test that discovery is skipped for non-Snowflake backends.""" @@ -358,43 +433,58 @@ def test_discovery_skipped_for_non_snowflake(self): mock_backend.name = "postgres" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() - mock_schema.items.return_value = [] - mock_schema.names = [] + mock_dtype = MagicMock() + mock_dtype.is_numeric.return_value = True + mock_dtype.is_integer.return_value = True + mock_schema.items.return_value = [("id", mock_dtype)] + mock_schema.names = ["id"] mock_table.schema.return_value = mock_schema with patch( "querychat._datasource.discover_semantic_views" ) as mock_discover: source = IbisSource(mock_table, "test") + + with patch.object(IbisSource, "_add_column_stats"): + source.get_schema(categorical_threshold=20) + mock_discover.assert_not_called() assert source._semantic_views == [] - def test_discovery_disabled_via_env_var(self): - """Test that QUERYCHAT_DISABLE_SEMANTIC_VIEWS disables discovery.""" + def test_get_schema_includes_semantic_views(self): + """Test that get_schema includes semantic view section.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource + views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] + mock_table = MagicMock() mock_backend = MagicMock(spec=SQLBackend) mock_backend.name = "snowflake" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() - mock_schema.items.return_value = [] - mock_schema.names = [] + mock_dtype = MagicMock() + mock_dtype.is_numeric.return_value = True + mock_dtype.is_integer.return_value = True + mock_schema.items.return_value = [("id", mock_dtype)] + mock_schema.names = ["id"] mock_table.schema.return_value = mock_schema - with ( - patch( - "querychat._datasource.discover_semantic_views" - ) as mock_discover, - patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}), + with patch( + "querychat._datasource.discover_semantic_views", + return_value=views, ): - source = IbisSource(mock_table, "test") - mock_discover.assert_not_called() - assert source._semantic_views == [] + source = IbisSource(mock_table, "test_table") - def test_get_schema_includes_semantic_views(self): - """Test that get_schema includes semantic view section.""" + with patch.object(IbisSource, "_add_column_stats"): + schema = source.get_schema(categorical_threshold=20) + + assert "Table: test_table" in schema + assert "## Snowflake Semantic Views" in schema + assert "db.schema.metrics" in schema + + def test_has_semantic_views_property(self): + """Test the has_semantic_views property.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource @@ -405,24 +495,24 @@ def test_get_schema_includes_semantic_views(self): mock_backend.name = "snowflake" mock_table.get_backend.return_value = mock_backend mock_schema = MagicMock() - mock_schema.items.return_value = [("id", MagicMock())] + mock_dtype = MagicMock() + mock_dtype.is_numeric.return_value = True + mock_dtype.is_integer.return_value = True + mock_schema.items.return_value = [("id", mock_dtype)] mock_schema.names = ["id"] mock_table.schema.return_value = mock_schema - with ( - patch( - "querychat._datasource.discover_semantic_views", - return_value=views, - ), - patch.dict(os.environ, {}, clear=False), + with patch( + "querychat._datasource.discover_semantic_views", + return_value=views, ): - os.environ.pop("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", None) source = IbisSource(mock_table, "test_table") - # Mock _add_column_stats to avoid complex aggregation setup + # Before get_schema, has_semantic_views is False + assert source.has_semantic_views is False + with patch.object(IbisSource, "_add_column_stats"): - schema = source.get_schema(categorical_threshold=20) + source.get_schema(categorical_threshold=20) - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" in schema - assert "db.schema.metrics" in schema + # After get_schema, has_semantic_views is True + assert source.has_semantic_views is True From 05fe216473e182f58d2f002c8d30fd8aed343edd Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:22:38 -0600 Subject: [PATCH 12/45] chore: Remove is_snowflake template variable Snowflake SQL Tips section isn't necessary for semantic views support. Removed the {{#is_snowflake}} block from prompt templates and the is_snowflake variable from system prompt code in both R and Python. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_system_prompt.py | 2 -- pkg-py/src/querychat/prompts/prompt.md | 10 ---------- pkg-r/R/QueryChatSystemPrompt.R | 2 -- pkg-r/inst/prompts/prompt.md | 10 ---------- 4 files changed, 24 deletions(-) diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index b86e163a..ebcc90f0 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -75,7 +75,6 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - is_snowflake = db_type.lower() == "snowflake" # Check for semantic views (only available with SnowflakeSource) # Use getattr to safely access the property that only exists on SnowflakeSource @@ -84,7 +83,6 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: context = { "db_type": db_type, "is_duck_db": is_duck_db, - "is_snowflake": is_snowflake, "has_semantic_views": has_semantic_views, "semantic_view_syntax": get_semantic_view_syntax() if has_semantic_views else "", "schema": self.schema, diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 05cc4a29..8712467f 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -71,16 +71,6 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} -{{#is_snowflake}} -### Snowflake SQL Tips - -**QUALIFY clause:** Use QUALIFY instead of a subquery when filtering on window function results. - -**LATERAL FLATTEN:** Use for expanding JSON arrays or nested structures. - -**Time travel:** Use `AT` or `BEFORE` clauses for historical data access. - -{{/is_snowflake}} {{#has_semantic_views}} ### Semantic Views diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index f10b5c63..51feca86 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -81,7 +81,6 @@ QueryChatSystemPrompt <- R6::R6Class( # Build context for whisker rendering db_type <- self$data_source$get_db_type() is_duck_db <- tolower(db_type) == "duckdb" - is_snowflake <- tolower(db_type) == "snowflake" # Check for semantic views (only available with SnowflakeSource) has_semantic_views <- FALSE @@ -95,7 +94,6 @@ QueryChatSystemPrompt <- R6::R6Class( context <- list( db_type = db_type, is_duck_db = is_duck_db, - is_snowflake = if (is_snowflake) "true", has_semantic_views = if (has_semantic_views) "true", semantic_view_syntax = if (has_semantic_views) { get_semantic_view_syntax() diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 05cc4a29..8712467f 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -71,16 +71,6 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} -{{#is_snowflake}} -### Snowflake SQL Tips - -**QUALIFY clause:** Use QUALIFY instead of a subquery when filtering on window function results. - -**LATERAL FLATTEN:** Use for expanding JSON arrays or nested structures. - -**Time travel:** Use `AT` or `BEFORE` clauses for historical data access. - -{{/is_snowflake}} {{#has_semantic_views}} ### Semantic Views From a652a67c0de640313988a306950a0c46bfd53353 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:26:45 -0600 Subject: [PATCH 13/45] refactor: Use isinstance() checks instead of type parameter Replace backend_type: Literal["sqlalchemy", "ibis"] parameter with isinstance(backend, sqlalchemy.Engine) checks for cleaner API. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 6 ++-- pkg-py/src/querychat/_snowflake.py | 42 +++++++++------------------ pkg-py/tests/test_snowflake_source.py | 36 +++++++++++++++-------- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 0c5ca568..0b99ceba 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -502,9 +502,7 @@ def get_schema(self, *, categorical_threshold: int) -> str: # Discover Snowflake semantic views lazily (only on first call) if self._semantic_views is None: if self._engine.dialect.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views( - self._engine, "sqlalchemy" - ) + self._semantic_views = discover_semantic_views(self._engine) else: self._semantic_views = [] @@ -996,7 +994,7 @@ def get_schema(self, *, categorical_threshold: int) -> str: # Discover Snowflake semantic views lazily (only on first call) if self._semantic_views is None: if self._backend.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views(self._backend, "ibis") + self._semantic_views = discover_semantic_views(self._backend) else: self._semantic_views = [] diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index 00bb5704..c91df2ee 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -2,7 +2,7 @@ Snowflake-specific utilities for semantic view discovery. This module provides functions for discovering Snowflake Semantic Views, -supporting both SQLAlchemy engines and Ibis backends through a type parameter. +supporting both SQLAlchemy engines and Ibis backends via isinstance() checks. """ from __future__ import annotations @@ -10,16 +10,15 @@ import logging import os from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any + +import sqlalchemy if TYPE_CHECKING: from ibis.backends.sql import SQLBackend - from sqlalchemy import Engine logger = logging.getLogger(__name__) -BackendType = Literal["sqlalchemy", "ibis"] - @dataclass class SemanticViewInfo: @@ -34,8 +33,7 @@ class SemanticViewInfo: def execute_raw_sql( query: str, - backend: Engine | SQLBackend, - backend_type: BackendType, + backend: sqlalchemy.Engine | SQLBackend, ) -> list[dict[str, Any]]: """ Execute raw SQL and return results as list of row dicts. @@ -46,8 +44,6 @@ def execute_raw_sql( SQL query to execute backend SQLAlchemy Engine or Ibis SQLBackend - backend_type - Type of backend: "sqlalchemy" or "ibis" Returns ------- @@ -55,23 +51,20 @@ def execute_raw_sql( Query results as list of row dictionaries """ - if backend_type == "sqlalchemy": - from sqlalchemy import text - - with backend.connect() as conn: # type: ignore[union-attr] - result = conn.execute(text(query)) + if isinstance(backend, sqlalchemy.Engine): + with backend.connect() as conn: + result = conn.execute(sqlalchemy.text(query)) keys = list(result.keys()) return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] else: # Ibis backend - result_table = backend.sql(query) # type: ignore[union-attr] + result_table = backend.sql(query) df = result_table.execute() return df.to_dict(orient="records") # type: ignore[return-value] def discover_semantic_views( - backend: Engine | SQLBackend, - backend_type: BackendType, + backend: sqlalchemy.Engine | SQLBackend, ) -> list[SemanticViewInfo]: """ Discover semantic views in the current schema. @@ -80,8 +73,6 @@ def discover_semantic_views( ---------- backend SQLAlchemy Engine or Ibis SQLBackend - backend_type - Type of backend: "sqlalchemy" or "ibis" Returns ------- @@ -93,7 +84,7 @@ def discover_semantic_views( if os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS"): return [] - rows = execute_raw_sql("SHOW SEMANTIC VIEWS", backend, backend_type) + rows = execute_raw_sql("SHOW SEMANTIC VIEWS", backend) if not rows: logger.debug("No semantic views found in current schema") @@ -109,7 +100,7 @@ def discover_semantic_views( continue fq_name = f"{db}.{schema}.{name}" - ddl = get_semantic_view_ddl(backend, backend_type, fq_name) + ddl = get_semantic_view_ddl(backend, fq_name) if ddl: views.append(SemanticViewInfo(name=fq_name, ddl=ddl)) @@ -117,8 +108,7 @@ def discover_semantic_views( def get_semantic_view_ddl( - backend: Engine | SQLBackend, - backend_type: BackendType, + backend: sqlalchemy.Engine | SQLBackend, fq_name: str, ) -> str | None: """ @@ -128,8 +118,6 @@ def get_semantic_view_ddl( ---------- backend SQLAlchemy Engine or Ibis SQLBackend - backend_type - Type of backend: "sqlalchemy" or "ibis" fq_name Fully qualified name (database.schema.view_name) @@ -141,9 +129,7 @@ def get_semantic_view_ddl( """ # Escape single quotes to prevent SQL injection safe_name = fq_name.replace("'", "''") - rows = execute_raw_sql( - f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')", backend, backend_type - ) + rows = execute_raw_sql(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')", backend) if rows: return str(next(iter(rows[0].values()))) return None diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 2e34344c..424b93a1 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -13,6 +13,12 @@ ) +# Decorator to make MagicMock pass isinstance(mock, sqlalchemy.Engine) +def patch_sqlalchemy_engine(func): + """Patch sqlalchemy.Engine so MagicMock instances pass isinstance checks.""" + return patch("querychat._snowflake.sqlalchemy.Engine", MagicMock)(func) + + class TestSemanticViewInfo: """Tests for SemanticViewInfo dataclass.""" @@ -62,6 +68,7 @@ def test_format_multiple_views(self): class TestSQLEscaping: """Tests for SQL injection prevention in get_semantic_view_ddl.""" + @patch_sqlalchemy_engine def test_single_quote_escaped(self): """Verify that names with single quotes are properly escaped.""" mock_engine = MagicMock() @@ -74,13 +81,14 @@ def test_single_quote_escaped(self): mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) mock_conn.execute.return_value = mock_result - get_semantic_view_ddl(mock_engine, "sqlalchemy", "db.schema.test'view") + get_semantic_view_ddl(mock_engine, "db.schema.test'view") # Verify the executed query has escaped quotes call_args = mock_conn.execute.call_args query_str = str(call_args[0][0]) assert "test''view" in query_str + @patch_sqlalchemy_engine def test_normal_name_unchanged(self): """Verify that normal names without special chars work correctly.""" mock_engine = MagicMock() @@ -93,7 +101,7 @@ def test_normal_name_unchanged(self): mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) mock_conn.execute.return_value = mock_result - get_semantic_view_ddl(mock_engine, "sqlalchemy", "db.schema.normal_view") + get_semantic_view_ddl(mock_engine, "db.schema.normal_view") call_args = mock_conn.execute.call_args query_str = str(call_args[0][0]) @@ -103,6 +111,7 @@ def test_normal_name_unchanged(self): class TestExecuteRawSQL: """Tests for execute_raw_sql function.""" + @patch_sqlalchemy_engine def test_sqlalchemy_backend(self): """Test execute_raw_sql with SQLAlchemy backend.""" mock_engine = MagicMock() @@ -115,7 +124,7 @@ def test_sqlalchemy_backend(self): mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) mock_conn.execute.return_value = mock_result - result = execute_raw_sql("SELECT 1", mock_engine, "sqlalchemy") + result = execute_raw_sql("SELECT 1", mock_engine) assert result == [{"col1": "a", "col2": "b"}, {"col1": "c", "col2": "d"}] @@ -129,7 +138,7 @@ def test_ibis_backend(self): mock_backend.sql.return_value = mock_table mock_table.execute.return_value = mock_df - result = execute_raw_sql("SELECT 1", mock_backend, "ibis") + result = execute_raw_sql("SELECT 1", mock_backend) assert result == [{"col1": "a"}, {"col1": "b"}] mock_backend.sql.assert_called_once_with("SELECT 1") @@ -139,6 +148,7 @@ def test_ibis_backend(self): class TestDiscoverSemanticViews: """Tests for the discover_semantic_views function.""" + @patch_sqlalchemy_engine def test_discover_returns_views(self): """Test successful discovery of semantic views.""" mock_engine = MagicMock() @@ -158,7 +168,7 @@ def test_discover_returns_views(self): ] call_count = [0] - def mock_execute(*args, **kwargs): + def mock_execute(_query): result = MagicMock() current_result = results[call_count[0]] call_count[0] += 1 @@ -178,7 +188,7 @@ def mock_execute(*args, **kwargs): mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) mock_conn.execute.side_effect = mock_execute - views = discover_semantic_views(mock_engine, "sqlalchemy") + views = discover_semantic_views(mock_engine) assert len(views) == 2 assert views[0].name == "DB.SCH.VIEW1" @@ -186,6 +196,7 @@ def mock_execute(*args, **kwargs): assert views[1].name == "DB.SCH.VIEW2" assert views[1].ddl == "DDL2" + @patch_sqlalchemy_engine def test_discover_no_views(self, caplog): """Test discovery when no views exist.""" mock_engine = MagicMock() @@ -199,7 +210,7 @@ def test_discover_no_views(self, caplog): mock_conn.execute.return_value = mock_result with caplog.at_level(logging.DEBUG, logger="querychat._snowflake"): - views = discover_semantic_views(mock_engine, "sqlalchemy") + views = discover_semantic_views(mock_engine) assert views == [] assert "No semantic views found" in caplog.text @@ -209,12 +220,13 @@ def test_discover_disabled_via_env_var(self): mock_engine = MagicMock() with patch.dict(os.environ, {"QUERYCHAT_DISABLE_SEMANTIC_VIEWS": "1"}): - views = discover_semantic_views(mock_engine, "sqlalchemy") + views = discover_semantic_views(mock_engine) assert views == [] # Engine should not be accessed mock_engine.connect.assert_not_called() + @patch_sqlalchemy_engine def test_discover_skips_null_names(self): """Test that rows with null names are skipped.""" mock_engine = MagicMock() @@ -231,7 +243,7 @@ def test_discover_skips_null_names(self): ] call_count = [0] - def mock_execute(*args, **kwargs): + def mock_execute(_query): result = MagicMock() current_result = results[call_count[0]] call_count[0] += 1 @@ -251,7 +263,7 @@ def mock_execute(*args, **kwargs): mock_engine.connect.return_value.__exit__ = MagicMock(return_value=False) mock_conn.execute.side_effect = mock_execute - views = discover_semantic_views(mock_engine, "sqlalchemy") + views = discover_semantic_views(mock_engine) assert len(views) == 1 assert views[0].name == "DB.SCH.VIEW1" @@ -285,7 +297,7 @@ def test_discovery_for_snowflake_backend(self): with patch.object(source, "_add_column_stats"): source.get_schema(categorical_threshold=20) - mock_discover.assert_called_once_with(mock_engine, "sqlalchemy") + mock_discover.assert_called_once_with(mock_engine) def test_discovery_skipped_for_non_snowflake(self): """Test that discovery is skipped for non-Snowflake backends.""" @@ -421,7 +433,7 @@ def test_discovery_for_snowflake_backend(self): with patch.object(IbisSource, "_add_column_stats"): source.get_schema(categorical_threshold=20) - mock_discover.assert_called_once_with(mock_backend, "ibis") + mock_discover.assert_called_once_with(mock_backend) def test_discovery_skipped_for_non_snowflake(self): """Test that discovery is skipped for non-Snowflake backends.""" From 1bade8e2e8de15ae3cfd3a6ab465b651a78f49a4 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:28:31 -0600 Subject: [PATCH 14/45] chore: Remove _semantic_views class attribute declarations Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 0b99ceba..81b419ae 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -12,7 +12,6 @@ from ._df_compat import read_sql from ._snowflake import ( - SemanticViewInfo, discover_semantic_views, format_semantic_views_section, ) @@ -434,8 +433,6 @@ class SQLAlchemySource(DataSource[nw.DataFrame]): and Databricks. """ - _semantic_views: list[SemanticViewInfo] | None - def __init__( self, engine: Engine, @@ -953,7 +950,6 @@ class IbisSource(DataSource["ibis.Table"]): _table: ibis.Table _backend: SQLBackend - _semantic_views: list[SemanticViewInfo] | None table_name: str def __init__(self, table: ibis.Table, table_name: str): From 400cc221cd7c023d4b1028b0b84e9cfc053b10fe Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:32:28 -0600 Subject: [PATCH 15/45] refactor: Merge SnowflakeSource semantic view logic into DBISource Move semantic view discovery from separate SnowflakeSource class into DBISource to match Python implementation: - Add semantic_views field and has_semantic_views() method to DBISource - Move is_snowflake_connection(), discover_semantic_views_impl(), get_semantic_view_ddl(), format_semantic_views_section() to DBISource.R - Add QUERYCHAT_DISABLE_SEMANTIC_VIEWS env var support for R - Delete SnowflakeSource.R - Update normalize_data_source() to use DBISource for all DBI connections - Update QueryChatSystemPrompt to check DBISource for semantic views Co-Authored-By: Claude Opus 4.5 --- pkg-r/R/DBISource.R | 200 +++++++++++++++++-- pkg-r/R/QueryChat.R | 35 ++-- pkg-r/R/QueryChatSystemPrompt.R | 4 +- pkg-r/R/SnowflakeSource.R | 203 -------------------- pkg-r/tests/testthat/test-SnowflakeSource.R | 55 ++++-- 5 files changed, 240 insertions(+), 257 deletions(-) delete mode 100644 pkg-r/R/SnowflakeSource.R diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index f27f6ebe..ecd5a369 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -27,7 +27,8 @@ DBISource <- R6::R6Class( "DBISource", inherit = DataSource, private = list( - conn = NULL + conn = NULL, + semantic_views = NULL ), public = list( #' @description @@ -58,23 +59,27 @@ DBISource <- R6::R6Class( # Check if table exists if (!DBI::dbExistsTable(conn, table_name)) { - cli::cli_abort(c( - "Table {.val {DBI::dbQuoteIdentifier(conn, table_name)}} not found in database", - "i" = "If you're using a table in a catalog or schema, pass a {.fn DBI::Id} object to {.arg table_name}" - )) + cli::cli_abort( + c( + "Table {.val {DBI::dbQuoteIdentifier(conn, table_name)}} not found in database", + "i" = "If you're using a table in a catalog or schema, pass a {.fn DBI::Id} object to {.arg table_name}" + ) + ) } private$conn <- conn self$table_name <- table_name # Store original column names for validation - private$colnames <- colnames(DBI::dbGetQuery( - conn, - sprintf( - "SELECT * FROM %s LIMIT 0", - DBI::dbQuoteIdentifier(conn, table_name) + private$colnames <- colnames( + DBI::dbGetQuery( + conn, + sprintf( + "SELECT * FROM %s LIMIT 0", + DBI::dbQuoteIdentifier(conn, table_name) + ) ) - )) + ) }, #' @description Get the database type @@ -104,7 +109,36 @@ DBISource <- R6::R6Class( #' @return A string describing the schema get_schema = function(categorical_threshold = 20) { check_number_whole(categorical_threshold, min = 1) - get_schema_impl(private$conn, self$table_name, categorical_threshold) + schema <- get_schema_impl( + private$conn, + self$table_name, + categorical_threshold + ) + + # Discover Snowflake semantic views lazily (only on first call) + if (is.null(private$semantic_views)) { + if (is_snowflake_connection(private$conn)) { + private$semantic_views <- discover_semantic_views_impl(private$conn) + } else { + private$semantic_views <- list() + } + } + + if (length(private$semantic_views) > 0) { + semantic_section <- format_semantic_views_section( + private$semantic_views + ) + schema <- paste(schema, semantic_section, sep = "\n\n") + } + + schema + }, + + #' @description + #' Check if semantic views are available + #' @return TRUE if semantic views were discovered + has_semantic_views = function() { + length(private$semantic_views %||% list()) > 0 }, #' @description @@ -181,7 +215,6 @@ DBISource <- R6::R6Class( ) ) - get_schema_impl <- function( conn, table_name, @@ -361,7 +394,6 @@ get_schema_impl <- function( paste(schema_lines, collapse = "\n") } - # nocov start # Map R classes to SQL types r_class_to_sql_type <- function(r_class) { @@ -380,3 +412,143 @@ r_class_to_sql_type <- function(r_class) { ) } # nocov end + +# Snowflake Semantic Views Support ---- + +#' Check if a connection is a Snowflake connection +#' +#' @param conn A DBI connection object +#' @return TRUE if the connection is to Snowflake +#' @noRd +is_snowflake_connection <- function(conn) { + if (!inherits(conn, "DBIConnection")) { + return(FALSE) + } + + # Check for known Snowflake connection classes + if (inherits(conn, "Snowflake")) { + return(TRUE) + } + + # Check dbms.name from connection info + tryCatch( + { + conn_info <- DBI::dbGetInfo(conn) + dbms_name <- tolower(conn_info[["dbms.name"]] %||% "") + grepl("snowflake", dbms_name, ignore.case = TRUE) + }, + error = function(e) FALSE + ) +} + +#' Discover Semantic Views in Snowflake +#' +#' @param conn A DBI connection to Snowflake +#' @return A list of semantic views with name and ddl +#' @noRd +discover_semantic_views_impl <- function(conn) { + # Check env var for early exit + if (nzchar(Sys.getenv("QUERYCHAT_DISABLE_SEMANTIC_VIEWS", ""))) { + return(list()) + } + + semantic_views <- list() + + # Check for semantic views in the current schema + result <- DBI::dbGetQuery(conn, "SHOW SEMANTIC VIEWS") + + if (nrow(result) == 0) { + cli::cli_inform( + c("i" = "No semantic views found in current schema"), + .frequency = "once", + .frequency_id = "querychat_no_semantic_views" + ) + return(list()) + } + + for (i in seq_len(nrow(result))) { + row <- result[i, ] + view_name <- row[["name"]] + database_name <- row[["database_name"]] + schema_name <- row[["schema_name"]] + + if (is.null(view_name) || is.na(view_name)) { + next + } + + # Build fully qualified name + fq_name <- paste(database_name, schema_name, view_name, sep = ".") + + # Get the DDL for this semantic view + ddl <- get_semantic_view_ddl(conn, fq_name) + if (!is.null(ddl)) { + semantic_views <- c( + semantic_views, + list( + list( + name = fq_name, + ddl = ddl + ) + ) + ) + } + } + + semantic_views +} + +#' Get the DDL for a Semantic View +#' +#' @param conn A DBI connection to Snowflake +#' @param fq_name Fully qualified name (database.schema.view_name) +#' @return The DDL text, or NULL if retrieval failed +#' @noRd +get_semantic_view_ddl <- function(conn, fq_name) { + # Escape single quotes to prevent SQL injection + safe_name <- gsub("'", "''", fq_name, fixed = TRUE) + query <- sprintf("SELECT GET_DDL('SEMANTIC_VIEW', '%s')", safe_name) + result <- DBI::dbGetQuery(conn, query) + if (nrow(result) > 0 && ncol(result) > 0) { + as.character(result[[1, 1]]) + } else { + NULL + } +} + +#' Format Semantic Views Section for Schema Output +#' +#' @param semantic_views A list of semantic view info (name and ddl) +#' @return A formatted string describing the semantic views +#' @noRd +format_semantic_views_section <- function(semantic_views) { + lines <- c( + "## Snowflake Semantic Views", + "", + paste0( + "This database has Semantic Views available. Semantic Views provide a ", + "curated layer over raw data with pre-defined metrics, dimensions, and ", + "relationships. They encode business logic and calculation rules that ", + "ensure consistent, accurate results." + ), + "", + paste0( + "**IMPORTANT**: When a Semantic View covers the data you need, prefer ", + "it over raw table queries to benefit from certified metric definitions." + ), + "" + ) + + for (sv in semantic_views) { + lines <- c( + lines, + sprintf("### Semantic View: `%s`", sv$name), + "", + "```sql", + sv$ddl, + "```", + "" + ) + } + + paste(lines, collapse = "\n") +} diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 2bc89e3d..7be4d9c8 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,8 +285,10 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) {}, - reset_dashboard = function() {} + update_dashboard = function(query, title) { + }, + reset_dashboard = function() { + } ) { private$require_data_source("$client") @@ -409,10 +411,12 @@ QueryChat <- R6::R6Class( ui <- function(req) { bslib::page_sidebar( - title = shiny::HTML(sprintf( - "querychat with %s", - table_name - )), + title = shiny::HTML( + sprintf( + "querychat with %s", + table_name + ) + ), class = "bslib-page-dashboard", sidebar = self$sidebar(), shiny::useBusyIndicators(pulse = TRUE, spinners = FALSE), @@ -508,12 +512,14 @@ QueryChat <- R6::R6Class( }) shiny::observeEvent(input$close_btn, label = "on_close_btn", { - shiny::stopApp(list( - df = qc_vals$df(), - sql = qc_vals$sql(), - title = qc_vals$title(), - client = qc_vals$client - )) + shiny::stopApp( + list( + df = qc_vals$df(), + sql = qc_vals$sql(), + title = qc_vals$title(), + client = qc_vals$client + ) + ) }) } @@ -935,10 +941,6 @@ normalize_data_source <- function(data_source, table_name) { } if (inherits(data_source, "DBIConnection")) { - # Use SnowflakeSource for Snowflake connections to get semantic view support - if (is_snowflake_connection(data_source)) { - return(SnowflakeSource$new(data_source, table_name)) - } return(DBISource$new(data_source, table_name)) } @@ -947,7 +949,6 @@ normalize_data_source <- function(data_source, table_name) { ) } - namespaced_id <- function(id, session = shiny::getDefaultReactiveDomain()) { if (is.null(session)) { id diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 51feca86..4b0a8b42 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -82,10 +82,10 @@ QueryChatSystemPrompt <- R6::R6Class( db_type <- self$data_source$get_db_type() is_duck_db <- tolower(db_type) == "duckdb" - # Check for semantic views (only available with SnowflakeSource) + # Check for semantic views (available with DBISource for Snowflake connections) has_semantic_views <- FALSE if ( - inherits(self$data_source, "SnowflakeSource") && + inherits(self$data_source, "DBISource") && self$data_source$has_semantic_views() ) { has_semantic_views <- TRUE diff --git a/pkg-r/R/SnowflakeSource.R b/pkg-r/R/SnowflakeSource.R deleted file mode 100644 index 58080fb2..00000000 --- a/pkg-r/R/SnowflakeSource.R +++ /dev/null @@ -1,203 +0,0 @@ -#' Snowflake Source -#' -#' A DataSource implementation for Snowflake database connections with -#' Semantic View support. This class extends DBISource to automatically detect -#' and provide context about Snowflake Semantic Views when available. -#' -#' @noRd -SnowflakeSource <- R6::R6Class( - "SnowflakeSource", - inherit = DBISource, - private = list( - semantic_views = NULL - ), - public = list( - #' @description - #' Create a new SnowflakeSource - #' - #' @param conn A DBI connection object to Snowflake - #' @param table_name Name of the table in the database - #' @param discover_semantic_views If TRUE (default), automatically discover - #' semantic views at initialization. Set to FALSE to skip discovery. - #' - #' @return A new SnowflakeSource object - initialize = function(conn, table_name, discover_semantic_views = TRUE) { - super$initialize(conn, table_name) - - if (discover_semantic_views) { - private$semantic_views <- discover_semantic_views_impl(conn) - } else { - private$semantic_views <- list() - } - }, - - #' @description - #' Check if semantic views are available - #' @return TRUE if semantic views were discovered - has_semantic_views = function() { - length(private$semantic_views) > 0 - }, - - #' @description - #' Get the list of discovered semantic views - #' @return A list of semantic view info (name and ddl) - get_semantic_views = function() { - private$semantic_views - }, - - #' @description - #' Get schema information for the database table, including semantic views - #' - #' @param categorical_threshold Maximum number of unique values for a text - #' column to be considered categorical (default: 20) - #' @return A string describing the schema - get_schema = function(categorical_threshold = 20) { - # Get base schema from parent - schema <- super$get_schema(categorical_threshold = categorical_threshold) - - # If no semantic views, return base schema - if (!self$has_semantic_views()) { - return(schema) - } - - # Add semantic view information - semantic_section <- format_semantic_views_section(private$semantic_views) - paste(schema, semantic_section, sep = "\n\n") - } - ) -) - - -#' Discover Semantic Views in Snowflake -#' -#' @param conn A DBI connection to Snowflake -#' @return A list of semantic views with name and ddl -#' @noRd -discover_semantic_views_impl <- function(conn) { - semantic_views <- list() - - # Check for semantic views in the current schema - result <- DBI::dbGetQuery(conn, "SHOW SEMANTIC VIEWS") - - if (nrow(result) == 0) { - cli::cli_inform( - c("i" = "No semantic views found in current schema"), - .frequency = "once", - .frequency_id = "querychat_no_semantic_views" - ) - return(list()) - } - - for (i in seq_len(nrow(result))) { - row <- result[i, ] - view_name <- row[["name"]] - database_name <- row[["database_name"]] - schema_name <- row[["schema_name"]] - - if (is.null(view_name) || is.na(view_name)) { - next - } - - # Build fully qualified name - fq_name <- paste(database_name, schema_name, view_name, sep = ".") - - # Get the DDL for this semantic view - ddl <- get_semantic_view_ddl(conn, fq_name) - if (!is.null(ddl)) { - semantic_views <- c( - semantic_views, - list(list( - name = fq_name, - ddl = ddl - )) - ) - } - } - - semantic_views -} - - -#' Get the DDL for a Semantic View -#' -#' @param conn A DBI connection to Snowflake -#' @param fq_name Fully qualified name (database.schema.view_name) -#' @return The DDL text, or NULL if retrieval failed -#' @noRd -get_semantic_view_ddl <- function(conn, fq_name) { - # Escape single quotes to prevent SQL injection - safe_name <- gsub("'", "''", fq_name, fixed = TRUE) - query <- sprintf("SELECT GET_DDL('SEMANTIC_VIEW', '%s')", safe_name) - result <- DBI::dbGetQuery(conn, query) - if (nrow(result) > 0 && ncol(result) > 0) { - as.character(result[[1, 1]]) - } else { - NULL - } -} - - -#' Format Semantic Views Section for Schema Output -#' -#' @param semantic_views A list of semantic view info (name and ddl) -#' @return A formatted string describing the semantic views -#' @noRd -format_semantic_views_section <- function(semantic_views) { - lines <- c( - "## Snowflake Semantic Views", - "", - paste0( - "This database has Semantic Views available. Semantic Views provide a ", - "curated layer over raw data with pre-defined metrics, dimensions, and ", - "relationships. They encode business logic and calculation rules that ", - "ensure consistent, accurate results." - ), - "", - paste0( - "**IMPORTANT**: When a Semantic View covers the data you need, prefer ", - "it over raw table queries to benefit from certified metric definitions." - ), - "" - ) - - for (sv in semantic_views) { - lines <- c( - lines, - sprintf("### Semantic View: `%s`", sv$name), - "", - "```sql", - sv$ddl, - "```", - "" - ) - } - - paste(lines, collapse = "\n") -} - - -#' Check if a connection is a Snowflake connection -#' -#' @param conn A DBI connection object -#' @return TRUE if the connection is to Snowflake -#' @noRd -is_snowflake_connection <- function(conn) { - if (!inherits(conn, "DBIConnection")) { - return(FALSE) - } - - # Check for known Snowflake connection classes - if (inherits(conn, "Snowflake")) { - return(TRUE) - } - - # Check dbms.name from connection info - tryCatch( - { - conn_info <- DBI::dbGetInfo(conn) - dbms_name <- tolower(conn_info[["dbms.name"]] %||% "") - grepl("snowflake", dbms_name, ignore.case = TRUE) - }, - error = function(e) FALSE - ) -} diff --git a/pkg-r/tests/testthat/test-SnowflakeSource.R b/pkg-r/tests/testthat/test-SnowflakeSource.R index fad663f6..71be9c93 100644 --- a/pkg-r/tests/testthat/test-SnowflakeSource.R +++ b/pkg-r/tests/testthat/test-SnowflakeSource.R @@ -1,4 +1,4 @@ -# Tests for SnowflakeSource and semantic view functionality +# Tests for Snowflake semantic view functionality in DBISource describe("format_semantic_views_section()", { it("formats single semantic view correctly", { @@ -71,28 +71,28 @@ describe("is_snowflake_connection()", { }) }) -describe("SnowflakeSource initialization", { - # Note: We cannot fully test SnowflakeSource without a real Snowflake - # connection, but we can test the parameter validation and discovery - # option through integration with DBISource - - it("can disable semantic view discovery", { - # This is a mock test - in reality you'd need a Snowflake connection - # The actual behavior is tested through the discover_semantic_views param - # which skips the discovery when FALSE - - # The parameter exists and should be accepted by the class - expect_true( - "discover_semantic_views" %in% - formalArgs( - SnowflakeSource$public_methods$initialize - ) - ) +describe("DBISource semantic views", { + it("has_semantic_views() returns FALSE before get_schema() is called", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) + + source <- DBISource$new(conn, "test_table") + expect_false(source$has_semantic_views()) }) - it("inherits from DBISource", { - # Check that SnowflakeSource inherits from DBISource - expect_identical(SnowflakeSource$get_inherit(), DBISource) + it("has_semantic_views() returns FALSE for non-Snowflake after get_schema()", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) + + source <- DBISource$new(conn, "test_table") + source$get_schema() + expect_false(source$has_semantic_views()) }) }) @@ -111,4 +111,17 @@ describe("discover_semantic_views_impl()", { "SHOW" ) }) + + it("respects QUERYCHAT_DISABLE_SEMANTIC_VIEWS env var", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + + withr::with_envvar(c("QUERYCHAT_DISABLE_SEMANTIC_VIEWS" = "1"), { + # Should return empty list without querying (no error from SQLite) + result <- discover_semantic_views_impl(conn) + expect_equal(result, list()) + }) + }) }) From 7a029aada43292b12a56d6e4e7d1935436006785 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:32:36 -0600 Subject: [PATCH 16/45] (local) --- pkg-r/R/DataFrameSource.R | 14 +++++++------ pkg-r/R/querychat_module.R | 12 ++++++----- pkg-r/R/querychat_tools.R | 14 +++++++------ pkg-r/R/utils-check.R | 2 -- pkg-r/R/utils-ellmer.R | 1 - pkg-r/tests/testthat/test-DBISource.R | 8 ++++--- pkg-r/tests/testthat/test-DataSource.R | 2 -- pkg-r/tests/testthat/test-QueryChat.R | 23 ++++++++++++++------- pkg-r/tests/testthat/test-querychat_tools.R | 6 ++++-- 9 files changed, 48 insertions(+), 34 deletions(-) diff --git a/pkg-r/R/DataFrameSource.R b/pkg-r/R/DataFrameSource.R index 6349c0f1..cb723700 100644 --- a/pkg-r/R/DataFrameSource.R +++ b/pkg-r/R/DataFrameSource.R @@ -115,10 +115,12 @@ get_default_dataframe_engine <- function() { if (is_installed("RSQLite")) { return("sqlite") } - cli::cli_abort(c( - "No compatible database engine installed for DataFrameSource", - "i" = "Install either {.pkg duckdb} or {.pkg RSQLite}:", - " " = "{.run install.packages(\"duckdb\")}", - " " = "{.run install.packages(\"RSQLite\")}" - )) + cli::cli_abort( + c( + "No compatible database engine installed for DataFrameSource", + "i" = "Install either {.pkg duckdb} or {.pkg RSQLite}:", + " " = "{.run install.packages(\"duckdb\")}", + " " = "{.run install.packages(\"RSQLite\")}" + ) + ) } diff --git a/pkg-r/R/querychat_module.R b/pkg-r/R/querychat_module.R index 3d977d1d..2d4e597a 100644 --- a/pkg-r/R/querychat_module.R +++ b/pkg-r/R/querychat_module.R @@ -89,11 +89,13 @@ mod_server <- function( greeting_content <- if (!is.null(greeting) && any(nzchar(greeting))) { greeting } else { - cli::cli_warn(c( - "No {.arg greeting} provided to {.fn QueryChat}. Using the LLM {.arg client} to generate one now.", - "i" = "For faster startup, lower cost, and determinism, consider providing a {.arg greeting} to {.fn QueryChat}.", - "i" = "You can use your {.help querychat::QueryChat} object's {.fn $generate_greeting} method to generate a greeting." - )) + cli::cli_warn( + c( + "No {.arg greeting} provided to {.fn QueryChat}. Using the LLM {.arg client} to generate one now.", + "i" = "For faster startup, lower cost, and determinism, consider providing a {.arg greeting} to {.fn QueryChat}.", + "i" = "You can use your {.help querychat::QueryChat} object's {.fn $generate_greeting} method to generate a greeting." + ) + ) chat$stream_async(GREETING_PROMPT) } diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index 60e99af9..49c33c0f 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,7 +5,8 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) {} + update_fn = function(query, title) { + } ) { check_data_source(data_source) @@ -67,7 +68,6 @@ tool_update_dashboard_impl <- function(data_source, update_fn) { } } - tool_reset_dashboard <- function(reset_fn = identity) { check_function(reset_fn) @@ -132,10 +132,12 @@ querychat_tool_details_option <- function() { valid_settings <- c("expanded", "collapsed", "default") if (!setting %in% valid_settings) { - cli::cli_warn(c( - "Invalid value for {.code querychat.tool_details} option or {.envvar QUERYCHAT_TOOL_DETAILS} environment variable: {.val {setting}}", - "i" = "Must be one of: {.or {.val {valid_settings}}}" - )) + cli::cli_warn( + c( + "Invalid value for {.code querychat.tool_details} option or {.envvar QUERYCHAT_TOOL_DETAILS} environment variable: {.val {setting}}", + "i" = "Must be one of: {.or {.val {valid_settings}}}" + ) + ) return(NULL) } diff --git a/pkg-r/R/utils-check.R b/pkg-r/R/utils-check.R index b9242bb0..9b557028 100644 --- a/pkg-r/R/utils-check.R +++ b/pkg-r/R/utils-check.R @@ -12,7 +12,6 @@ check_data_source <- function( } } - # SQL table name validation ---------------------------------------------- #' Check SQL table name validity @@ -64,7 +63,6 @@ is_valid_sql_table_name <- function(x) { grepl("^[a-zA-Z][a-zA-Z0-9_]*$", x) } - # SQL query validation -------------------------------------------------------- #' Check SQL query for disallowed operations diff --git a/pkg-r/R/utils-ellmer.R b/pkg-r/R/utils-ellmer.R index aadbe895..045e97af 100644 --- a/pkg-r/R/utils-ellmer.R +++ b/pkg-r/R/utils-ellmer.R @@ -14,7 +14,6 @@ interpolate_package <- function(path, ..., .envir = parent.frame()) { ellmer::interpolate_file(path, ..., .envir = .envir) } - as_querychat_client <- function(client = NULL) { if (is.null(client)) { client <- querychat_client_option() diff --git a/pkg-r/tests/testthat/test-DBISource.R b/pkg-r/tests/testthat/test-DBISource.R index 69ff7be2..487052ff 100644 --- a/pkg-r/tests/testthat/test-DBISource.R +++ b/pkg-r/tests/testthat/test-DBISource.R @@ -82,9 +82,11 @@ describe("DBISource$test_query()", { expect_error(dbi_source$test_query("SELECT * FROM non_existent_table")) - expect_error(dbi_source$test_query( - "SELECT non_existent_column FROM test_table" - )) + expect_error( + dbi_source$test_query( + "SELECT non_existent_column FROM test_table" + ) + ) }) it("works with different data types", { diff --git a/pkg-r/tests/testthat/test-DataSource.R b/pkg-r/tests/testthat/test-DataSource.R index cf2fc046..7defe8c2 100644 --- a/pkg-r/tests/testthat/test-DataSource.R +++ b/pkg-r/tests/testthat/test-DataSource.R @@ -29,7 +29,6 @@ describe("DataSource base class", { }) }) - describe("DataSource$get_schema()", { it("returns proper schema for DataFrameSource", { skip_if_no_dataframe_engine() @@ -311,7 +310,6 @@ describe("DataSource$execute_query()", { }) }) - describe("test_query() column validation", { skip_if_no_dataframe_engine() diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index b01a66c5..a295c56d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -364,9 +364,13 @@ describe("QueryChat$client()", { # Find and call the update tool tools <- client$get_tools() - update_tool <- tools[[which(sapply(tools, function(t) { - t@name == "querychat_update_dashboard" - }))]] + update_tool <- tools[[ + which( + sapply(tools, function(t) { + t@name == "querychat_update_dashboard" + }) + ) + ]] # Call the tool - it should execute the query and call the callback result <- update_tool( @@ -396,9 +400,13 @@ describe("QueryChat$client()", { # Find and call the reset tool tools <- client$get_tools() - reset_tool <- tools[[which(sapply(tools, function(t) { - t@name == "querychat_reset_dashboard" - }))]] + reset_tool <- tools[[ + which( + sapply(tools, function(t) { + t@name == "querychat_reset_dashboard" + }) + ) + ]] # Call the tool reset_tool() @@ -657,7 +665,8 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) {} + app = function(...) { + } ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index 99170514..ec37d2e7 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,8 +6,10 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) {}) - tool_update_dashboard(df_source, update_fn = function(title, extra) {}) + tool_update_dashboard(df_source, update_fn = function(query) { + }) + tool_update_dashboard(df_source, update_fn = function(title, extra) { + }) }) }) From 42f6d4cecd0622760d5dd3877e901643a42ecb2c Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:34:15 -0600 Subject: [PATCH 17/45] chore: Minimize docstrings and remove redundant comments - Use single-line docstrings for internal functions - Remove redundant inline comments Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_snowflake.py | 67 ++---------------------------- 1 file changed, 4 insertions(+), 63 deletions(-) diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index c91df2ee..1331937a 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -35,29 +35,13 @@ def execute_raw_sql( query: str, backend: sqlalchemy.Engine | SQLBackend, ) -> list[dict[str, Any]]: - """ - Execute raw SQL and return results as list of row dicts. - - Parameters - ---------- - query - SQL query to execute - backend - SQLAlchemy Engine or Ibis SQLBackend - - Returns - ------- - list[dict[str, Any]] - Query results as list of row dictionaries - - """ + """Execute raw SQL and return results as list of row dicts.""" if isinstance(backend, sqlalchemy.Engine): with backend.connect() as conn: result = conn.execute(sqlalchemy.text(query)) keys = list(result.keys()) return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] else: - # Ibis backend result_table = backend.sql(query) df = result_table.execute() return df.to_dict(orient="records") # type: ignore[return-value] @@ -66,21 +50,7 @@ def execute_raw_sql( def discover_semantic_views( backend: sqlalchemy.Engine | SQLBackend, ) -> list[SemanticViewInfo]: - """ - Discover semantic views in the current schema. - - Parameters - ---------- - backend - SQLAlchemy Engine or Ibis SQLBackend - - Returns - ------- - list[SemanticViewInfo] - List of semantic views with their DDL definitions - - """ - # Check env var for early exit + """Discover semantic views in the current schema.""" if os.environ.get("QUERYCHAT_DISABLE_SEMANTIC_VIEWS"): return [] @@ -111,23 +81,7 @@ def get_semantic_view_ddl( backend: sqlalchemy.Engine | SQLBackend, fq_name: str, ) -> str | None: - """ - Get DDL for a semantic view. - - Parameters - ---------- - backend - SQLAlchemy Engine or Ibis SQLBackend - fq_name - Fully qualified name (database.schema.view_name) - - Returns - ------- - str | None - The DDL text, or None if retrieval failed - - """ - # Escape single quotes to prevent SQL injection + """Get DDL for a semantic view by fully qualified name.""" safe_name = fq_name.replace("'", "''") rows = execute_raw_sql(f"SELECT GET_DDL('SEMANTIC_VIEW', '{safe_name}')", backend) if rows: @@ -136,20 +90,7 @@ def get_semantic_view_ddl( def format_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str: - """ - Format the semantic views section for schema output. - - Parameters - ---------- - semantic_views - List of semantic view metadata - - Returns - ------- - str - Formatted markdown section describing the semantic views - - """ + """Format the semantic views section for schema output.""" lines = [ "## Snowflake Semantic Views", "", From 1c19adb697eba9009c7bfa4c3f598ea1bde15828 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 21:40:59 +0000 Subject: [PATCH 18/45] `air format` (GitHub Actions) --- pkg-r/R/QueryChat.R | 6 ++---- pkg-r/R/querychat_tools.R | 3 +-- pkg-r/tests/testthat/test-QueryChat.R | 3 +-- pkg-r/tests/testthat/test-querychat_tools.R | 6 ++---- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 7be4d9c8..f109a4ca 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,10 +285,8 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) { - }, - reset_dashboard = function() { - } + update_dashboard = function(query, title) {}, + reset_dashboard = function() {} ) { private$require_data_source("$client") diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index 49c33c0f..be29fb01 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,8 +5,7 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) { - } + update_fn = function(query, title) {} ) { check_data_source(data_source) diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index a295c56d..15fa793d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -665,8 +665,7 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) { - } + app = function(...) {} ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index ec37d2e7..99170514 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,10 +6,8 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) { - }) - tool_update_dashboard(df_source, update_fn = function(title, extra) { - }) + tool_update_dashboard(df_source, update_fn = function(query) {}) + tool_update_dashboard(df_source, update_fn = function(title, extra) {}) }) }) From 4c88c24419af8d90d3e44bd8fd7e822da2303767 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 21:41:03 +0000 Subject: [PATCH 19/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 14 ++++++++++++++ pkg-r/man/DataFrameSource.Rd | 1 + pkg-r/man/TblSqlSource.Rd | 7 +++++++ 3 files changed, 22 insertions(+) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index dd832062..aaa54fec 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -42,6 +42,7 @@ db_source$cleanup() \item \href{#method-DBISource-new}{\code{DBISource$new()}} \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} +\item \href{#method-DBISource-has_semantic_views}{\code{DBISource$has_semantic_views()}} \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} \item \href{#method-DBISource-get_data}{\code{DBISource$get_data()}} @@ -107,6 +108,19 @@ A string describing the schema } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DBISource-has_semantic_views}{}}} +\subsection{Method \code{has_semantic_views()}}{ +Check if semantic views are available +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{DBISource$has_semantic_views()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +TRUE if semantic views were discovered +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-DBISource-execute_query}{}}} \subsection{Method \code{execute_query()}}{ diff --git a/pkg-r/man/DataFrameSource.Rd b/pkg-r/man/DataFrameSource.Rd index c16cf245..f6f4d15c 100644 --- a/pkg-r/man/DataFrameSource.Rd +++ b/pkg-r/man/DataFrameSource.Rd @@ -56,6 +56,7 @@ df_sqlite$cleanup()
  • querychat::DBISource$get_data()
  • querychat::DBISource$get_db_type()
  • querychat::DBISource$get_schema()
  • +
  • querychat::DBISource$has_semantic_views()
  • querychat::DBISource$test_query()
  • diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 71e98a6a..0a6ab672 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -54,6 +54,13 @@ mtcars_source$cleanup() \item \href{#method-TblSqlSource-clone}{\code{TblSqlSource$clone()}} } } +\if{html}{\out{ +
    Inherited methods + +
    +}} \if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TblSqlSource-new}{}}} From 20bb7ffb1eeb301cb1dba4bfe417552580c10828 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 15:41:03 -0600 Subject: [PATCH 20/45] refactor: Remove _semantic_views attribute and has_semantic_views property - Simplify get_schema() to discover semantic views inline without storing - Detect semantic views in system prompt by checking schema string - Remove unnecessary state management Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 42 +++------------ pkg-py/src/querychat/_system_prompt.py | 5 +- pkg-py/tests/test_snowflake_source.py | 72 -------------------------- 3 files changed, 9 insertions(+), 110 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 81b419ae..258b3686 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -461,9 +461,6 @@ def __init__( self._columns_info = inspector.get_columns(table_name) self._colnames = [col["name"] for col in self._columns_info] - # Semantic views are discovered lazily in get_schema() - self._semantic_views = None - def get_db_type(self) -> str: """ Get the database type. @@ -496,23 +493,13 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, categorical_threshold) schema = format_schema(self.table_name, columns) - # Discover Snowflake semantic views lazily (only on first call) - if self._semantic_views is None: - if self._engine.dialect.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views(self._engine) - else: - self._semantic_views = [] - - if self._semantic_views: - schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" + if self._engine.dialect.name.lower() == "snowflake": + semantic_views = discover_semantic_views(self._engine) + if semantic_views: + schema = f"{schema}\n\n{format_semantic_views_section(semantic_views)}" return schema - @property - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - return bool(self._semantic_views) - @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" @@ -974,9 +961,6 @@ def __init__(self, table: ibis.Table, table_name: str): ) self._colnames = list(colnames) - # Semantic views are discovered lazily in get_schema() - self._semantic_views = None - def get_db_type(self) -> str: return self._backend.name @@ -987,23 +971,13 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, self._table, categorical_threshold) schema = format_schema(self.table_name, columns) - # Discover Snowflake semantic views lazily (only on first call) - if self._semantic_views is None: - if self._backend.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views(self._backend) - else: - self._semantic_views = [] - - if self._semantic_views: - schema = f"{schema}\n\n{format_semantic_views_section(self._semantic_views)}" + if self._backend.name.lower() == "snowflake": + semantic_views = discover_semantic_views(self._backend) + if semantic_views: + schema = f"{schema}\n\n{format_semantic_views_section(semantic_views)}" return schema - @property - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - return bool(self._semantic_views) - @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: """Create ColumnMeta from an ibis dtype.""" diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index ebcc90f0..b3b4e593 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -75,10 +75,7 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - - # Check for semantic views (only available with SnowflakeSource) - # Use getattr to safely access the property that only exists on SnowflakeSource - has_semantic_views: bool = getattr(self.data_source, "has_semantic_views", False) + has_semantic_views = "## Snowflake Semantic Views" in self.schema context = { "db_type": db_type, diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 424b93a1..292e76de 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -289,11 +289,8 @@ def test_discovery_for_snowflake_backend(self): ) as mock_discover, ): source = SQLAlchemySource(mock_engine, "test_table") - # Discovery should NOT happen in __init__ mock_discover.assert_not_called() - assert source._semantic_views is None - # Discovery happens in get_schema with patch.object(source, "_add_column_stats"): source.get_schema(categorical_threshold=20) @@ -321,7 +318,6 @@ def test_discovery_skipped_for_non_snowflake(self): source.get_schema(categorical_threshold=20) mock_discover.assert_not_called() - assert source._semantic_views == [] def test_get_schema_includes_semantic_views(self): """Test that get_schema includes semantic view section.""" @@ -370,36 +366,6 @@ def test_get_schema_without_semantic_views(self): assert "Table: test_table" in schema assert "## Snowflake Semantic Views" not in schema - def test_has_semantic_views_property(self): - """Test the has_semantic_views property.""" - from querychat._datasource import SQLAlchemySource - - views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] - - mock_engine = MagicMock() - mock_engine.dialect.name = "snowflake" - mock_inspector = MagicMock() - mock_inspector.has_table.return_value = True - mock_inspector.get_columns.return_value = [{"name": "id", "type": MagicMock()}] - - with ( - patch("querychat._datasource.inspect", return_value=mock_inspector), - patch( - "querychat._datasource.discover_semantic_views", - return_value=views, - ), - ): - source = SQLAlchemySource(mock_engine, "test_table") - - # Before get_schema, has_semantic_views is False (None evaluates to False) - assert source.has_semantic_views is False - - with patch.object(source, "_add_column_stats"): - source.get_schema(categorical_threshold=20) - - # After get_schema, has_semantic_views is True - assert source.has_semantic_views is True - class TestIbisSourceSemanticViews: """Tests for IbisSource semantic view discovery.""" @@ -425,11 +391,8 @@ def test_discovery_for_snowflake_backend(self): "querychat._datasource.discover_semantic_views", return_value=[] ) as mock_discover: source = IbisSource(mock_table, "test") - # Discovery should NOT happen in __init__ mock_discover.assert_not_called() - assert source._semantic_views is None - # Discovery happens in get_schema with patch.object(IbisSource, "_add_column_stats"): source.get_schema(categorical_threshold=20) @@ -461,7 +424,6 @@ def test_discovery_skipped_for_non_snowflake(self): source.get_schema(categorical_threshold=20) mock_discover.assert_not_called() - assert source._semantic_views == [] def test_get_schema_includes_semantic_views(self): """Test that get_schema includes semantic view section.""" @@ -494,37 +456,3 @@ def test_get_schema_includes_semantic_views(self): assert "Table: test_table" in schema assert "## Snowflake Semantic Views" in schema assert "db.schema.metrics" in schema - - def test_has_semantic_views_property(self): - """Test the has_semantic_views property.""" - from ibis.backends.sql import SQLBackend - from querychat._datasource import IbisSource - - views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] - - mock_table = MagicMock() - mock_backend = MagicMock(spec=SQLBackend) - mock_backend.name = "snowflake" - mock_table.get_backend.return_value = mock_backend - mock_schema = MagicMock() - mock_dtype = MagicMock() - mock_dtype.is_numeric.return_value = True - mock_dtype.is_integer.return_value = True - mock_schema.items.return_value = [("id", mock_dtype)] - mock_schema.names = ["id"] - mock_table.schema.return_value = mock_schema - - with patch( - "querychat._datasource.discover_semantic_views", - return_value=views, - ): - source = IbisSource(mock_table, "test_table") - - # Before get_schema, has_semantic_views is False - assert source.has_semantic_views is False - - with patch.object(IbisSource, "_add_column_stats"): - source.get_schema(categorical_threshold=20) - - # After get_schema, has_semantic_views is True - assert source.has_semantic_views is True From eda50f845ad954f3742637f32e7580413d52c354 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 16:16:38 -0600 Subject: [PATCH 21/45] refactor: Consolidate semantic view information into single prompt section Move semantic view DDLs out of into a dedicated section, placing all semantic view info in one cohesive location in the prompt template. Structure is now: - General explanation (why semantic views matter) - Query syntax documentation - tag with table-specific DDL definitions Changes: - Remove semantic view info from get_schema() output - Add get_semantic_view_ddls() method to return just DDL content - Add semantic_view_ddls template variable to prompt context - Update prompt.md to include tag after syntax docs - Rename format_semantic_views_section to format_semantic_view_ddls Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 72 +++++++++++++++------ pkg-py/src/querychat/_snowflake.py | 17 +---- pkg-py/src/querychat/_system_prompt.py | 14 +++- pkg-py/src/querychat/prompts/prompt.md | 4 ++ pkg-py/tests/test_snowflake_source.py | 24 +++---- pkg-r/R/DBISource.R | 51 ++++++--------- pkg-r/R/QueryChat.R | 6 +- pkg-r/R/QueryChatSystemPrompt.R | 3 + pkg-r/R/querychat_tools.R | 3 +- pkg-r/inst/prompts/prompt.md | 4 ++ pkg-r/tests/testthat/test-QueryChat.R | 3 +- pkg-r/tests/testthat/test-querychat_tools.R | 6 +- 12 files changed, 121 insertions(+), 86 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 258b3686..b4bd04bd 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -12,8 +12,9 @@ from ._df_compat import read_sql from ._snowflake import ( + SemanticViewInfo, discover_semantic_views, - format_semantic_views_section, + format_semantic_view_ddls, ) from ._utils import as_narwhals, check_query @@ -60,7 +61,11 @@ def format_schema(table_name: str, columns: list[ColumnMeta]) -> str: for col in columns: lines.append(f"- {col.name} ({col.sql_type})") - if col.kind in ("numeric", "date") and col.min_val is not None and col.max_val is not None: + if ( + col.kind in ("numeric", "date") + and col.min_val is not None + and col.max_val is not None + ): lines.append(f" Range: {col.min_val} to {col.max_val}") elif col.categories: cats = ", ".join(f"'{v}'" for v in col.categories) @@ -451,6 +456,7 @@ def __init__( """ self._engine = engine self.table_name = table_name + self._semantic_views: list[SemanticViewInfo] | None = None # Validate table exists inspector = inspect(self._engine) @@ -491,21 +497,34 @@ def get_schema(self, *, categorical_threshold: int) -> str: for col in self._columns_info ] self._add_column_stats(columns, categorical_threshold) - schema = format_schema(self.table_name, columns) - if self._engine.dialect.name.lower() == "snowflake": - semantic_views = discover_semantic_views(self._engine) - if semantic_views: - schema = f"{schema}\n\n{format_semantic_views_section(semantic_views)}" + # Discover semantic views lazily (only on first call) + if self._semantic_views is None: + if self._engine.dialect.name.lower() == "snowflake": + self._semantic_views = discover_semantic_views(self._engine) + else: + self._semantic_views = [] - return schema + return format_schema(self.table_name, columns) + + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return bool(self._semantic_views) + + def get_semantic_view_ddls(self) -> str: + """Get formatted DDL content for semantic views.""" + if not self.has_semantic_views(): + return "" + return format_semantic_view_ddls(self._semantic_views) # type: ignore[arg-type] @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" kind: Literal["numeric", "text", "date", "other"] - if isinstance(sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger)): + if isinstance( + sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger) + ): kind = "numeric" sql_type = "INTEGER" elif isinstance(sa_type, sqltypes.Float): @@ -548,7 +567,9 @@ def _add_column_stats( select_parts.append(f"MIN({col.name}) as {col.name}__min") select_parts.append(f"MAX({col.name}) as {col.name}__max") elif col.kind == "text": - select_parts.append(f"COUNT(DISTINCT {col.name}) as {col.name}__nunique") + select_parts.append( + f"COUNT(DISTINCT {col.name}) as {col.name}__nunique" + ) if not select_parts: return @@ -556,7 +577,9 @@ def _add_column_stats( # Execute stats query stats = {} try: - stats_query = text(f"SELECT {', '.join(select_parts)} FROM {self.table_name}") + stats_query = text( + f"SELECT {', '.join(select_parts)} FROM {self.table_name}" + ) with self._get_connection() as conn: result = conn.execute(stats_query).fetchone() if result: @@ -572,7 +595,8 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col for col in columns + col + for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -945,6 +969,7 @@ def __init__(self, table: ibis.Table, table_name: str): self._table = table self.table_name = table_name self._schema = table.schema() + self._semantic_views: list[SemanticViewInfo] | None = None backend = table.get_backend() if not isinstance(backend, SQLBackend): @@ -969,14 +994,25 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._make_column_meta(name, dtype) for name, dtype in self._schema.items() ] self._add_column_stats(columns, self._table, categorical_threshold) - schema = format_schema(self.table_name, columns) - if self._backend.name.lower() == "snowflake": - semantic_views = discover_semantic_views(self._backend) - if semantic_views: - schema = f"{schema}\n\n{format_semantic_views_section(semantic_views)}" + # Discover semantic views lazily (only on first call) + if self._semantic_views is None: + if self._backend.name.lower() == "snowflake": + self._semantic_views = discover_semantic_views(self._backend) + else: + self._semantic_views = [] + + return format_schema(self.table_name, columns) + + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return bool(self._semantic_views) - return schema + def get_semantic_view_ddls(self) -> str: + """Get formatted DDL content for semantic views.""" + if not self.has_semantic_views(): + return "" + return format_semantic_view_ddls(self._semantic_views) # type: ignore[arg-type] @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index 1331937a..c61a5fe4 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -89,20 +89,9 @@ def get_semantic_view_ddl( return None -def format_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str: - """Format the semantic views section for schema output.""" - lines = [ - "## Snowflake Semantic Views", - "", - "This database has Semantic Views available. Semantic Views provide a curated ", - "layer over raw data with pre-defined metrics, dimensions, and relationships. ", - "They encode business logic and calculation rules that ensure consistent, ", - "accurate results.", - "", - "**IMPORTANT**: When a Semantic View covers the data you need, prefer it over ", - "raw table queries to benefit from certified metric definitions.", - "", - ] +def format_semantic_view_ddls(semantic_views: list[SemanticViewInfo]) -> str: + """Format just the DDL definitions for semantic views.""" + lines: list[str] = [] for sv in semantic_views: lines.append(f"### Semantic View: `{sv.name}`") diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index b3b4e593..38f6033c 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -75,13 +75,23 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - has_semantic_views = "## Snowflake Semantic Views" in self.schema + + # Check for semantic views (available with SQLAlchemySource/IbisSource for Snowflake) + has_semantic_views = ( + hasattr(self.data_source, "has_semantic_views") + and self.data_source.has_semantic_views() + ) context = { "db_type": db_type, "is_duck_db": is_duck_db, "has_semantic_views": has_semantic_views, - "semantic_view_syntax": get_semantic_view_syntax() if has_semantic_views else "", + "semantic_view_syntax": get_semantic_view_syntax() + if has_semantic_views + else "", + "semantic_view_ddls": ( + self.data_source.get_semantic_view_ddls() if has_semantic_views else "" + ), "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 8712467f..3e125d47 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -79,6 +79,10 @@ quantile_cont(salary, 0.5) Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. {{{semantic_view_syntax}}} + + +{{{semantic_view_ddls}}} + {{/has_semantic_views}} ## Your Capabilities diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 292e76de..56d840e3 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -8,7 +8,7 @@ SemanticViewInfo, discover_semantic_views, execute_raw_sql, - format_semantic_views_section, + format_semantic_view_ddls, get_semantic_view_ddl, ) @@ -37,19 +37,19 @@ def test_equality(self): assert info1 != info3 -class TestFormatSemanticViewsSection: - """Tests for semantic view formatting.""" +class TestFormatSemanticViewDdls: + """Tests for semantic view DDL formatting.""" def test_format_single_view(self): """Test that format produces expected markdown structure for single view.""" - views = [SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1")] - section = format_semantic_views_section(views) + views = [ + SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1") + ] + section = format_semantic_view_ddls(views) - assert "## Snowflake Semantic Views" in section assert "db.schema.view1" in section assert "CREATE SEMANTIC VIEW v1" in section assert "```sql" in section - assert "**IMPORTANT**" in section def test_format_multiple_views(self): """Test formatting with multiple views.""" @@ -57,7 +57,7 @@ def test_format_multiple_views(self): SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1"), SemanticViewInfo(name="db.schema.view2", ddl="CREATE SEMANTIC VIEW v2"), ] - section = format_semantic_views_section(views) + section = format_semantic_view_ddls(views) assert "db.schema.view1" in section assert "db.schema.view2" in section @@ -308,9 +308,7 @@ def test_discovery_skipped_for_non_snowflake(self): with ( patch("querychat._datasource.inspect", return_value=mock_inspector), - patch( - "querychat._datasource.discover_semantic_views" - ) as mock_discover, + patch("querychat._datasource.discover_semantic_views") as mock_discover, ): source = SQLAlchemySource(mock_engine, "test_table") @@ -415,9 +413,7 @@ def test_discovery_skipped_for_non_snowflake(self): mock_schema.names = ["id"] mock_table.schema.return_value = mock_schema - with patch( - "querychat._datasource.discover_semantic_views" - ) as mock_discover: + with patch("querychat._datasource.discover_semantic_views") as mock_discover: source = IbisSource(mock_table, "test") with patch.object(IbisSource, "_add_column_stats"): diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index ecd5a369..82a36e4e 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -109,11 +109,6 @@ DBISource <- R6::R6Class( #' @return A string describing the schema get_schema = function(categorical_threshold = 20) { check_number_whole(categorical_threshold, min = 1) - schema <- get_schema_impl( - private$conn, - self$table_name, - categorical_threshold - ) # Discover Snowflake semantic views lazily (only on first call) if (is.null(private$semantic_views)) { @@ -124,14 +119,11 @@ DBISource <- R6::R6Class( } } - if (length(private$semantic_views) > 0) { - semantic_section <- format_semantic_views_section( - private$semantic_views - ) - schema <- paste(schema, semantic_section, sep = "\n\n") - } - - schema + get_schema_impl( + private$conn, + self$table_name, + categorical_threshold + ) }, #' @description @@ -141,6 +133,16 @@ DBISource <- R6::R6Class( length(private$semantic_views %||% list()) > 0 }, + #' @description + #' Get formatted DDL content for semantic views + #' @return A string with DDL definitions, or empty string if none + get_semantic_view_ddls = function() { + if (!self$has_semantic_views()) { + return("") + } + format_semantic_view_ddls(private$semantic_views) + }, + #' @description #' Execute a SQL query #' @@ -515,28 +517,13 @@ get_semantic_view_ddl <- function(conn, fq_name) { } } -#' Format Semantic Views Section for Schema Output +#' Format Semantic View DDLs #' #' @param semantic_views A list of semantic view info (name and ddl) -#' @return A formatted string describing the semantic views +#' @return A formatted string with just the DDL definitions #' @noRd -format_semantic_views_section <- function(semantic_views) { - lines <- c( - "## Snowflake Semantic Views", - "", - paste0( - "This database has Semantic Views available. Semantic Views provide a ", - "curated layer over raw data with pre-defined metrics, dimensions, and ", - "relationships. They encode business logic and calculation rules that ", - "ensure consistent, accurate results." - ), - "", - paste0( - "**IMPORTANT**: When a Semantic View covers the data you need, prefer ", - "it over raw table queries to benefit from certified metric definitions." - ), - "" - ) +format_semantic_view_ddls <- function(semantic_views) { + lines <- character(0) for (sv in semantic_views) { lines <- c( diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index f109a4ca..7be4d9c8 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,8 +285,10 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) {}, - reset_dashboard = function() {} + update_dashboard = function(query, title) { + }, + reset_dashboard = function() { + } ) { private$require_data_source("$client") diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 4b0a8b42..f8943373 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -98,6 +98,9 @@ QueryChatSystemPrompt <- R6::R6Class( semantic_view_syntax = if (has_semantic_views) { get_semantic_view_syntax() }, + semantic_view_ddls = if (has_semantic_views) { + self$data_source$get_semantic_view_ddls() + }, schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index be29fb01..49c33c0f 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,7 +5,8 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) {} + update_fn = function(query, title) { + } ) { check_data_source(data_source) diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 8712467f..3e125d47 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -79,6 +79,10 @@ quantile_cont(salary, 0.5) Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. {{{semantic_view_syntax}}} + + +{{{semantic_view_ddls}}} + {{/has_semantic_views}} ## Your Capabilities diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index 15fa793d..a295c56d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -665,7 +665,8 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) {} + app = function(...) { + } ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index 99170514..ec37d2e7 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,8 +6,10 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) {}) - tool_update_dashboard(df_source, update_fn = function(title, extra) {}) + tool_update_dashboard(df_source, update_fn = function(query) { + }) + tool_update_dashboard(df_source, update_fn = function(title, extra) { + }) }) }) From 6aa6a634c87c8223586680e6f75ef326d03816ef Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 22:21:25 +0000 Subject: [PATCH 22/45] `air format` (GitHub Actions) --- pkg-r/R/QueryChat.R | 6 ++---- pkg-r/R/querychat_tools.R | 3 +-- pkg-r/tests/testthat/test-QueryChat.R | 3 +-- pkg-r/tests/testthat/test-querychat_tools.R | 6 ++---- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 7be4d9c8..f109a4ca 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,10 +285,8 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) { - }, - reset_dashboard = function() { - } + update_dashboard = function(query, title) {}, + reset_dashboard = function() {} ) { private$require_data_source("$client") diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index 49c33c0f..be29fb01 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,8 +5,7 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) { - } + update_fn = function(query, title) {} ) { check_data_source(data_source) diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index a295c56d..15fa793d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -665,8 +665,7 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) { - } + app = function(...) {} ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index ec37d2e7..99170514 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,10 +6,8 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) { - }) - tool_update_dashboard(df_source, update_fn = function(title, extra) { - }) + tool_update_dashboard(df_source, update_fn = function(query) {}) + tool_update_dashboard(df_source, update_fn = function(title, extra) {}) }) }) From 1a77f673694c6644f2553426143fb935b468155a Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 22:21:29 +0000 Subject: [PATCH 23/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 14 ++++++++++++++ pkg-r/man/DataFrameSource.Rd | 1 + pkg-r/man/TblSqlSource.Rd | 1 + 3 files changed, 16 insertions(+) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index aaa54fec..9793d220 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -43,6 +43,7 @@ db_source$cleanup() \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} \item \href{#method-DBISource-has_semantic_views}{\code{DBISource$has_semantic_views()}} +\item \href{#method-DBISource-get_semantic_view_ddls}{\code{DBISource$get_semantic_view_ddls()}} \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} \item \href{#method-DBISource-get_data}{\code{DBISource$get_data()}} @@ -121,6 +122,19 @@ TRUE if semantic views were discovered } } \if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DBISource-get_semantic_view_ddls}{}}} +\subsection{Method \code{get_semantic_view_ddls()}}{ +Get formatted DDL content for semantic views +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{DBISource$get_semantic_view_ddls()}\if{html}{\out{
    }} +} + +\subsection{Returns}{ +A string with DDL definitions, or empty string if none +} +} +\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-DBISource-execute_query}{}}} \subsection{Method \code{execute_query()}}{ diff --git a/pkg-r/man/DataFrameSource.Rd b/pkg-r/man/DataFrameSource.Rd index f6f4d15c..4cc648a9 100644 --- a/pkg-r/man/DataFrameSource.Rd +++ b/pkg-r/man/DataFrameSource.Rd @@ -56,6 +56,7 @@ df_sqlite$cleanup()
  • querychat::DBISource$get_data()
  • querychat::DBISource$get_db_type()
  • querychat::DBISource$get_schema()
  • +
  • querychat::DBISource$get_semantic_view_ddls()
  • querychat::DBISource$has_semantic_views()
  • querychat::DBISource$test_query()
  • diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 0a6ab672..3e3fa060 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -57,6 +57,7 @@ mtcars_source$cleanup() \if{html}{\out{
    Inherited methods
    From 7cb9d36908d86089b82fe47f1506dae61c186474 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 16:25:47 -0600 Subject: [PATCH 24/45] refactor: Add semantic view methods to DataSource base class - Add has_semantic_views() and get_semantic_view_ddls() to DataSource with default implementations returning False and empty string - Remove hasattr check in _system_prompt.py since methods now exist on base class - Revert superfluous formatting changes from ruff Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 29 ++++++++++++-------------- pkg-py/src/querychat/_system_prompt.py | 15 +++---------- pkg-py/tests/test_snowflake_source.py | 4 +--- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index b4bd04bd..7a6120de 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -61,11 +61,7 @@ def format_schema(table_name: str, columns: list[ColumnMeta]) -> str: for col in columns: lines.append(f"- {col.name} ({col.sql_type})") - if ( - col.kind in ("numeric", "date") - and col.min_val is not None - and col.max_val is not None - ): + if col.kind in ("numeric", "date") and col.min_val is not None and col.max_val is not None: lines.append(f" Range: {col.min_val} to {col.max_val}") elif col.categories: cats = ", ".join(f"'{v}'" for v in col.categories) @@ -188,6 +184,14 @@ def cleanup(self) -> None: """ + def has_semantic_views(self) -> bool: + """Check if semantic views are available.""" + return False + + def get_semantic_view_ddls(self) -> str: + """Get formatted DDL content for semantic views.""" + return "" + class DataFrameSource(DataSource[IntoDataFrameT]): """A DataSource implementation that wraps a DataFrame using DuckDB.""" @@ -522,9 +526,7 @@ def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" kind: Literal["numeric", "text", "date", "other"] - if isinstance( - sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger) - ): + if isinstance(sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger)): kind = "numeric" sql_type = "INTEGER" elif isinstance(sa_type, sqltypes.Float): @@ -567,9 +569,7 @@ def _add_column_stats( select_parts.append(f"MIN({col.name}) as {col.name}__min") select_parts.append(f"MAX({col.name}) as {col.name}__max") elif col.kind == "text": - select_parts.append( - f"COUNT(DISTINCT {col.name}) as {col.name}__nunique" - ) + select_parts.append(f"COUNT(DISTINCT {col.name}) as {col.name}__nunique") if not select_parts: return @@ -577,9 +577,7 @@ def _add_column_stats( # Execute stats query stats = {} try: - stats_query = text( - f"SELECT {', '.join(select_parts)} FROM {self.table_name}" - ) + stats_query = text(f"SELECT {', '.join(select_parts)} FROM {self.table_name}") with self._get_connection() as conn: result = conn.execute(stats_query).fetchone() if result: @@ -595,8 +593,7 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 38f6033c..6f238e17 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -75,23 +75,14 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - - # Check for semantic views (available with SQLAlchemySource/IbisSource for Snowflake) - has_semantic_views = ( - hasattr(self.data_source, "has_semantic_views") - and self.data_source.has_semantic_views() - ) + has_semantic_views = self.data_source.has_semantic_views() context = { "db_type": db_type, "is_duck_db": is_duck_db, "has_semantic_views": has_semantic_views, - "semantic_view_syntax": get_semantic_view_syntax() - if has_semantic_views - else "", - "semantic_view_ddls": ( - self.data_source.get_semantic_view_ddls() if has_semantic_views else "" - ), + "semantic_view_syntax": get_semantic_view_syntax() if has_semantic_views else "", + "semantic_view_ddls": self.data_source.get_semantic_view_ddls() if has_semantic_views else "", "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 56d840e3..b48cc22f 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -42,9 +42,7 @@ class TestFormatSemanticViewDdls: def test_format_single_view(self): """Test that format produces expected markdown structure for single view.""" - views = [ - SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1") - ] + views = [SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1")] section = format_semantic_view_ddls(views) assert "db.schema.view1" in section From 65b1bdcc6ea863309b83e6f3140b034be94d6078 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 16:30:08 -0600 Subject: [PATCH 25/45] refactor: Move semantic view discovery into get_semantic_view_ddls Extract discovery logic into _ensure_semantic_views_discovered() method called from has_semantic_views() and get_semantic_view_ddls(). This: - Removes discovery from get_schema() as requested - Eliminates the type: ignore by ensuring _semantic_views is always a list after discovery Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 7a6120de..3a31ee10 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -501,25 +501,26 @@ def get_schema(self, *, categorical_threshold: int) -> str: for col in self._columns_info ] self._add_column_stats(columns, categorical_threshold) + return format_schema(self.table_name, columns) - # Discover semantic views lazily (only on first call) + def _ensure_semantic_views_discovered(self) -> None: if self._semantic_views is None: if self._engine.dialect.name.lower() == "snowflake": self._semantic_views = discover_semantic_views(self._engine) else: self._semantic_views = [] - return format_schema(self.table_name, columns) - def has_semantic_views(self) -> bool: """Check if semantic views are available.""" + self._ensure_semantic_views_discovered() return bool(self._semantic_views) def get_semantic_view_ddls(self) -> str: """Get formatted DDL content for semantic views.""" - if not self.has_semantic_views(): + self._ensure_semantic_views_discovered() + if not self._semantic_views: return "" - return format_semantic_view_ddls(self._semantic_views) # type: ignore[arg-type] + return format_semantic_view_ddls(self._semantic_views) @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: @@ -991,25 +992,26 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._make_column_meta(name, dtype) for name, dtype in self._schema.items() ] self._add_column_stats(columns, self._table, categorical_threshold) + return format_schema(self.table_name, columns) - # Discover semantic views lazily (only on first call) + def _ensure_semantic_views_discovered(self) -> None: if self._semantic_views is None: if self._backend.name.lower() == "snowflake": self._semantic_views = discover_semantic_views(self._backend) else: self._semantic_views = [] - return format_schema(self.table_name, columns) - def has_semantic_views(self) -> bool: """Check if semantic views are available.""" + self._ensure_semantic_views_discovered() return bool(self._semantic_views) def get_semantic_view_ddls(self) -> str: """Get formatted DDL content for semantic views.""" - if not self.has_semantic_views(): + self._ensure_semantic_views_discovered() + if not self._semantic_views: return "" - return format_semantic_view_ddls(self._semantic_views) # type: ignore[arg-type] + return format_semantic_view_ddls(self._semantic_views) @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: From b7d234709c5837ce264cd06d9506259bbf66fc32 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 16:39:49 -0600 Subject: [PATCH 26/45] refactor: Remove has_semantic_views(), improve prompt text - Remove has_semantic_views() method from DataSource classes - Use truthy check on get_semantic_view_ddls() instead - Update prompt text with improved explanation and real-world example that better illustrates why semantic views matter Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 28 ++++---------------------- pkg-py/src/querychat/_system_prompt.py | 8 ++++---- pkg-py/src/querychat/prompts/prompt.md | 4 ++-- pkg-r/R/DBISource.R | 27 +++++++++---------------- pkg-r/R/QueryChatSystemPrompt.R | 14 +++++-------- pkg-r/inst/prompts/prompt.md | 4 ++-- 6 files changed, 26 insertions(+), 59 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 3a31ee10..9245ae6d 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -184,10 +184,6 @@ def cleanup(self) -> None: """ - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - return False - def get_semantic_view_ddls(self) -> str: """Get formatted DDL content for semantic views.""" return "" @@ -503,21 +499,13 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, categorical_threshold) return format_schema(self.table_name, columns) - def _ensure_semantic_views_discovered(self) -> None: + def get_semantic_view_ddls(self) -> str: + """Get formatted DDL content for semantic views.""" if self._semantic_views is None: if self._engine.dialect.name.lower() == "snowflake": self._semantic_views = discover_semantic_views(self._engine) else: self._semantic_views = [] - - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - self._ensure_semantic_views_discovered() - return bool(self._semantic_views) - - def get_semantic_view_ddls(self) -> str: - """Get formatted DDL content for semantic views.""" - self._ensure_semantic_views_discovered() if not self._semantic_views: return "" return format_semantic_view_ddls(self._semantic_views) @@ -994,21 +982,13 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, self._table, categorical_threshold) return format_schema(self.table_name, columns) - def _ensure_semantic_views_discovered(self) -> None: + def get_semantic_view_ddls(self) -> str: + """Get formatted DDL content for semantic views.""" if self._semantic_views is None: if self._backend.name.lower() == "snowflake": self._semantic_views = discover_semantic_views(self._backend) else: self._semantic_views = [] - - def has_semantic_views(self) -> bool: - """Check if semantic views are available.""" - self._ensure_semantic_views_discovered() - return bool(self._semantic_views) - - def get_semantic_view_ddls(self) -> str: - """Get formatted DDL content for semantic views.""" - self._ensure_semantic_views_discovered() if not self._semantic_views: return "" return format_semantic_view_ddls(self._semantic_views) diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 6f238e17..5bf3b59b 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -75,14 +75,14 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - has_semantic_views = self.data_source.has_semantic_views() + semantic_view_ddls = self.data_source.get_semantic_view_ddls() context = { "db_type": db_type, "is_duck_db": is_duck_db, - "has_semantic_views": has_semantic_views, - "semantic_view_syntax": get_semantic_view_syntax() if has_semantic_views else "", - "semantic_view_ddls": self.data_source.get_semantic_view_ddls() if has_semantic_views else "", + "has_semantic_views": bool(semantic_view_ddls), + "semantic_view_syntax": get_semantic_view_syntax() if semantic_view_ddls else "", + "semantic_view_ddls": semantic_view_ddls, "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 3e125d47..b35dacae 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -74,9 +74,9 @@ quantile_cont(salary, 0.5) {{#has_semantic_views}} ### Semantic Views -**IMPORTANT**: This database has Semantic Views available. Semantic Views provide certified business metrics that encode correct calculation rules. When a Semantic View covers the data you need, **always prefer it over raw table queries**. +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. -Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. +**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. {{{semantic_view_syntax}}} diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index 82a36e4e..32b8522c 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -109,16 +109,6 @@ DBISource <- R6::R6Class( #' @return A string describing the schema get_schema = function(categorical_threshold = 20) { check_number_whole(categorical_threshold, min = 1) - - # Discover Snowflake semantic views lazily (only on first call) - if (is.null(private$semantic_views)) { - if (is_snowflake_connection(private$conn)) { - private$semantic_views <- discover_semantic_views_impl(private$conn) - } else { - private$semantic_views <- list() - } - } - get_schema_impl( private$conn, self$table_name, @@ -126,18 +116,19 @@ DBISource <- R6::R6Class( ) }, - #' @description - #' Check if semantic views are available - #' @return TRUE if semantic views were discovered - has_semantic_views = function() { - length(private$semantic_views %||% list()) > 0 - }, - #' @description #' Get formatted DDL content for semantic views #' @return A string with DDL definitions, or empty string if none get_semantic_view_ddls = function() { - if (!self$has_semantic_views()) { + # Discover Snowflake semantic views lazily (only on first call) + if (is.null(private$semantic_views)) { + if (is_snowflake_connection(private$conn)) { + private$semantic_views <- discover_semantic_views_impl(private$conn) + } else { + private$semantic_views <- list() + } + } + if (length(private$semantic_views) == 0) { return("") } format_semantic_view_ddls(private$semantic_views) diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index f8943373..c44a586c 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -83,13 +83,11 @@ QueryChatSystemPrompt <- R6::R6Class( is_duck_db <- tolower(db_type) == "duckdb" # Check for semantic views (available with DBISource for Snowflake connections) - has_semantic_views <- FALSE - if ( - inherits(self$data_source, "DBISource") && - self$data_source$has_semantic_views() - ) { - has_semantic_views <- TRUE + semantic_view_ddls <- "" + if (inherits(self$data_source, "DBISource")) { + semantic_view_ddls <- self$data_source$get_semantic_view_ddls() } + has_semantic_views <- nzchar(semantic_view_ddls) context <- list( db_type = db_type, @@ -98,9 +96,7 @@ QueryChatSystemPrompt <- R6::R6Class( semantic_view_syntax = if (has_semantic_views) { get_semantic_view_syntax() }, - semantic_view_ddls = if (has_semantic_views) { - self$data_source$get_semantic_view_ddls() - }, + semantic_view_ddls = semantic_view_ddls, schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 3e125d47..b35dacae 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -74,9 +74,9 @@ quantile_cont(salary, 0.5) {{#has_semantic_views}} ### Semantic Views -**IMPORTANT**: This database has Semantic Views available. Semantic Views provide certified business metrics that encode correct calculation rules. When a Semantic View covers the data you need, **always prefer it over raw table queries**. +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. -Real-world example: Raw table queries for "external customer revenue" returned $184B while the semantic model's certified metric returned $84.5B (the correct answer). The raw query was 2x+ too high because it ignored discounts and included invalid transaction codes. +**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. {{{semantic_view_syntax}}} From 4fda131b344e4a904cb58f78c1db91efc5be5857 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 22:44:28 +0000 Subject: [PATCH 27/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 14 -------------- pkg-r/man/DataFrameSource.Rd | 1 - pkg-r/man/TblSqlSource.Rd | 1 - 3 files changed, 16 deletions(-) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index 9793d220..01771cbc 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -42,7 +42,6 @@ db_source$cleanup() \item \href{#method-DBISource-new}{\code{DBISource$new()}} \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} -\item \href{#method-DBISource-has_semantic_views}{\code{DBISource$has_semantic_views()}} \item \href{#method-DBISource-get_semantic_view_ddls}{\code{DBISource$get_semantic_view_ddls()}} \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} @@ -109,19 +108,6 @@ A string describing the schema } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DBISource-has_semantic_views}{}}} -\subsection{Method \code{has_semantic_views()}}{ -Check if semantic views are available -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DBISource$has_semantic_views()}\if{html}{\out{
    }} -} - -\subsection{Returns}{ -TRUE if semantic views were discovered -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-DBISource-get_semantic_view_ddls}{}}} \subsection{Method \code{get_semantic_view_ddls()}}{ diff --git a/pkg-r/man/DataFrameSource.Rd b/pkg-r/man/DataFrameSource.Rd index 4cc648a9..9deaa6ae 100644 --- a/pkg-r/man/DataFrameSource.Rd +++ b/pkg-r/man/DataFrameSource.Rd @@ -57,7 +57,6 @@ df_sqlite$cleanup()
  • querychat::DBISource$get_db_type()
  • querychat::DBISource$get_schema()
  • querychat::DBISource$get_semantic_view_ddls()
  • -
  • querychat::DBISource$has_semantic_views()
  • querychat::DBISource$test_query()
  • diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 3e3fa060..2e3386e6 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -58,7 +58,6 @@ mtcars_source$cleanup()
    Inherited methods
    }} From 4f70663be5818eb5d4837b9279dbadae351330fa Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 16:50:24 -0600 Subject: [PATCH 28/45] refactor: Simplify semantic views, adjust prompt structure - Change "### Semantic Views" to "## Semantic Views" (top-level section) - Move "use SEMANTIC_VIEW() instead of raw SQL" into prompt.md - Adjust header levels in semantic-view-syntax.md accordingly - Remove _semantic_views attribute from datasource classes - compute directly in get_semantic_view_ddls() without caching Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 27 +++++++------------ pkg-py/src/querychat/prompts/prompt.md | 4 ++- .../querychat/prompts/semantic-view-syntax.md | 14 +++++----- pkg-r/R/DBISource.R | 17 +++++------- pkg-r/inst/prompts/prompt.md | 4 ++- pkg-r/inst/prompts/semantic-view-syntax.md | 14 +++++----- 6 files changed, 34 insertions(+), 46 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 9245ae6d..758101b0 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -12,7 +12,6 @@ from ._df_compat import read_sql from ._snowflake import ( - SemanticViewInfo, discover_semantic_views, format_semantic_view_ddls, ) @@ -456,7 +455,6 @@ def __init__( """ self._engine = engine self.table_name = table_name - self._semantic_views: list[SemanticViewInfo] | None = None # Validate table exists inspector = inspect(self._engine) @@ -501,14 +499,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: def get_semantic_view_ddls(self) -> str: """Get formatted DDL content for semantic views.""" - if self._semantic_views is None: - if self._engine.dialect.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views(self._engine) - else: - self._semantic_views = [] - if not self._semantic_views: + if self._engine.dialect.name.lower() != "snowflake": return "" - return format_semantic_view_ddls(self._semantic_views) + views = discover_semantic_views(self._engine) + if not views: + return "" + return format_semantic_view_ddls(views) @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: @@ -955,7 +951,6 @@ def __init__(self, table: ibis.Table, table_name: str): self._table = table self.table_name = table_name self._schema = table.schema() - self._semantic_views: list[SemanticViewInfo] | None = None backend = table.get_backend() if not isinstance(backend, SQLBackend): @@ -984,14 +979,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: def get_semantic_view_ddls(self) -> str: """Get formatted DDL content for semantic views.""" - if self._semantic_views is None: - if self._backend.name.lower() == "snowflake": - self._semantic_views = discover_semantic_views(self._backend) - else: - self._semantic_views = [] - if not self._semantic_views: + if self._backend.name.lower() != "snowflake": + return "" + views = discover_semantic_views(self._backend) + if not views: return "" - return format_semantic_view_ddls(self._semantic_views) + return format_semantic_view_ddls(views) @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index b35dacae..5972e075 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -72,12 +72,14 @@ quantile_cont(salary, 0.5) {{/is_duck_db}} {{#has_semantic_views}} -### Semantic Views +## Semantic Views **IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. **Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + {{{semantic_view_syntax}}} diff --git a/pkg-py/src/querychat/prompts/semantic-view-syntax.md b/pkg-py/src/querychat/prompts/semantic-view-syntax.md index 062f5aa8..32988f38 100644 --- a/pkg-py/src/querychat/prompts/semantic-view-syntax.md +++ b/pkg-py/src/querychat/prompts/semantic-view-syntax.md @@ -1,8 +1,6 @@ -## SEMANTIC_VIEW() Query Syntax +### SEMANTIC_VIEW() Query Syntax -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - -### Basic Syntax +#### Basic Syntax ```sql SELECT * FROM SEMANTIC_VIEW( @@ -14,7 +12,7 @@ SELECT * FROM SEMANTIC_VIEW( [WHERE {column} = 'value'] -- Optional: post-aggregation filter ``` -### Key Rules +#### Key Rules 1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view 2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS @@ -22,7 +20,7 @@ SELECT * FROM SEMANTIC_VIEW( 4. **No aggregate functions needed** - Metrics are pre-aggregated 5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly -### WHERE Clause: Inside vs Outside +#### WHERE Clause: Inside vs Outside - **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed - **Outside** (post-aggregation): Filters results AFTER metrics are computed @@ -45,7 +43,7 @@ SELECT * FROM SEMANTIC_VIEW( WHERE NET_REVENUE > 1000000 ``` -### Common Patterns +#### Common Patterns **Single metric (total):** ```sql @@ -92,7 +90,7 @@ FROM SEMANTIC_VIEW( JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code ``` -### Troubleshooting +#### Troubleshooting - **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL - **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index 32b8522c..74ff970c 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -27,8 +27,7 @@ DBISource <- R6::R6Class( "DBISource", inherit = DataSource, private = list( - conn = NULL, - semantic_views = NULL + conn = NULL ), public = list( #' @description @@ -120,18 +119,14 @@ DBISource <- R6::R6Class( #' Get formatted DDL content for semantic views #' @return A string with DDL definitions, or empty string if none get_semantic_view_ddls = function() { - # Discover Snowflake semantic views lazily (only on first call) - if (is.null(private$semantic_views)) { - if (is_snowflake_connection(private$conn)) { - private$semantic_views <- discover_semantic_views_impl(private$conn) - } else { - private$semantic_views <- list() - } + if (!is_snowflake_connection(private$conn)) { + return("") } - if (length(private$semantic_views) == 0) { + views <- discover_semantic_views_impl(private$conn) + if (length(views) == 0) { return("") } - format_semantic_view_ddls(private$semantic_views) + format_semantic_view_ddls(views) }, #' @description diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index b35dacae..5972e075 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -72,12 +72,14 @@ quantile_cont(salary, 0.5) {{/is_duck_db}} {{#has_semantic_views}} -### Semantic Views +## Semantic Views **IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. **Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. +When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. + {{{semantic_view_syntax}}} diff --git a/pkg-r/inst/prompts/semantic-view-syntax.md b/pkg-r/inst/prompts/semantic-view-syntax.md index 062f5aa8..32988f38 100644 --- a/pkg-r/inst/prompts/semantic-view-syntax.md +++ b/pkg-r/inst/prompts/semantic-view-syntax.md @@ -1,8 +1,6 @@ -## SEMANTIC_VIEW() Query Syntax +### SEMANTIC_VIEW() Query Syntax -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - -### Basic Syntax +#### Basic Syntax ```sql SELECT * FROM SEMANTIC_VIEW( @@ -14,7 +12,7 @@ SELECT * FROM SEMANTIC_VIEW( [WHERE {column} = 'value'] -- Optional: post-aggregation filter ``` -### Key Rules +#### Key Rules 1. **Use `SEMANTIC_VIEW()` function** - Not direct SELECT FROM the view 2. **No GROUP BY needed** - Semantic layer handles aggregation via DIMENSIONS @@ -22,7 +20,7 @@ SELECT * FROM SEMANTIC_VIEW( 4. **No aggregate functions needed** - Metrics are pre-aggregated 5. **Use DDL-defined names** - Metrics and dimensions must match the DDL exactly -### WHERE Clause: Inside vs Outside +#### WHERE Clause: Inside vs Outside - **Inside** (pre-aggregation): Filters base data BEFORE metrics are computed - **Outside** (post-aggregation): Filters results AFTER metrics are computed @@ -45,7 +43,7 @@ SELECT * FROM SEMANTIC_VIEW( WHERE NET_REVENUE > 1000000 ``` -### Common Patterns +#### Common Patterns **Single metric (total):** ```sql @@ -92,7 +90,7 @@ FROM SEMANTIC_VIEW( JOIN category_lookup AS lookup ON sv.ACC_TYPE_CD = lookup.code ``` -### Troubleshooting +#### Troubleshooting - **"Invalid identifier"**: Verify metric/dimension names match exactly what's in the DDL - **Syntax error**: Use SEMANTIC_VIEW() function, GROUP BY isn't needed From 69863dac3ff7970dedcee3a8608545e6cc9bcf1e Mon Sep 17 00:00:00 2001 From: Carson Sievert Date: Mon, 26 Jan 2026 17:01:11 -0600 Subject: [PATCH 29/45] Apply suggestions from code review --- pkg-py/src/querychat/prompts/prompt.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 5972e075..6b0080e7 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -74,12 +74,10 @@ quantile_cont(salary, 0.5) {{#has_semantic_views}} ## Semantic Views -**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions (that is, use the `SEMANTIC_VIEW()` table function where appropriate when generating SQL). **Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - {{{semantic_view_syntax}}} From 3e7bd9c7937df0ebd6a32aab43e077d2fce8aac8 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 17:21:13 -0600 Subject: [PATCH 30/45] refactor: Restructure semantic views into dedicated directory - Move semantic view prompts to prompts/semantic-views/ directory - prompt.md: Contains IMPORTANT notice and real-world example - syntax.md: Contains SEMANTIC_VIEW() query syntax reference - Rename get_semantic_view_ddls() to get_semantic_views_section() which now returns the complete assembled section - Simplify main prompt.md to use single {{{semantic_views}}} placeholder - Remove type ignore in _snowflake.py by using itertuples instead of to_dict(orient="records") - Update IMPORTANT paragraph to include SEMANTIC_VIEW() instruction inline per PR feedback - Update tests to reflect new method names and structure Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 22 +++--- pkg-py/src/querychat/_snowflake.py | 25 ++++++- pkg-py/src/querychat/_system_prompt.py | 10 +-- pkg-py/src/querychat/prompts/prompt.md | 14 +--- .../prompts/semantic-views/prompt.md | 5 ++ .../syntax.md} | 0 pkg-py/tests/test_snowflake_source.py | 68 +++++++++---------- pkg-r/R/DBISource.R | 35 ++++++++-- pkg-r/R/QueryChat.R | 6 +- pkg-r/R/QueryChatSystemPrompt.R | 23 ++----- pkg-r/R/querychat_tools.R | 3 +- pkg-r/inst/prompts/prompt.md | 16 +---- pkg-r/inst/prompts/semantic-views/prompt.md | 5 ++ .../syntax.md} | 0 pkg-r/tests/testthat/test-QueryChat.R | 3 +- pkg-r/tests/testthat/test-SnowflakeSource.R | 40 +++++------ pkg-r/tests/testthat/test-querychat_tools.R | 6 +- 17 files changed, 145 insertions(+), 136 deletions(-) create mode 100644 pkg-py/src/querychat/prompts/semantic-views/prompt.md rename pkg-py/src/querychat/prompts/{semantic-view-syntax.md => semantic-views/syntax.md} (100%) create mode 100644 pkg-r/inst/prompts/semantic-views/prompt.md rename pkg-r/inst/prompts/{semantic-view-syntax.md => semantic-views/syntax.md} (100%) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 758101b0..64a5dff9 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -13,7 +13,7 @@ from ._df_compat import read_sql from ._snowflake import ( discover_semantic_views, - format_semantic_view_ddls, + get_semantic_views_section, ) from ._utils import as_narwhals, check_query @@ -183,8 +183,8 @@ def cleanup(self) -> None: """ - def get_semantic_view_ddls(self) -> str: - """Get formatted DDL content for semantic views.""" + def get_semantic_views_section(self) -> str: + """Get the complete semantic views section for the prompt.""" return "" @@ -497,14 +497,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, categorical_threshold) return format_schema(self.table_name, columns) - def get_semantic_view_ddls(self) -> str: - """Get formatted DDL content for semantic views.""" + def get_semantic_views_section(self) -> str: + """Get the complete semantic views section for the prompt.""" if self._engine.dialect.name.lower() != "snowflake": return "" views = discover_semantic_views(self._engine) - if not views: - return "" - return format_semantic_view_ddls(views) + return get_semantic_views_section(views) @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: @@ -977,14 +975,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, self._table, categorical_threshold) return format_schema(self.table_name, columns) - def get_semantic_view_ddls(self) -> str: - """Get formatted DDL content for semantic views.""" + def get_semantic_views_section(self) -> str: + """Get the complete semantic views section for the prompt.""" if self._backend.name.lower() != "snowflake": return "" views = discover_semantic_views(self._backend) - if not views: - return "" - return format_semantic_view_ddls(views) + return get_semantic_views_section(views) @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index c61a5fe4..87b58633 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -44,7 +44,8 @@ def execute_raw_sql( else: result_table = backend.sql(query) df = result_table.execute() - return df.to_dict(orient="records") # type: ignore[return-value] + columns = list(df.columns) + return [dict(zip(columns, row, strict=False)) for row in df.itertuples(index=False)] def discover_semantic_views( @@ -102,3 +103,25 @@ def format_semantic_view_ddls(semantic_views: list[SemanticViewInfo]) -> str: lines.append("") return "\n".join(lines) + + +def get_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str: + """Build the complete semantic views section for the prompt.""" + if not semantic_views: + return "" + + from importlib.resources import files + + prompts = files("querychat.prompts.semantic-views") + prompt_text = (prompts / "prompt.md").read_text() + syntax_text = (prompts / "syntax.md").read_text() + ddls_text = format_semantic_view_ddls(semantic_views) + + return f"""{prompt_text} + +{syntax_text} + + +{ddls_text} + +""" diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 5bf3b59b..caa39f6b 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -13,11 +13,6 @@ PROMPTS_DIR = Path(__file__).parent / "prompts" -def get_semantic_view_syntax() -> str: - """Load SEMANTIC_VIEW_SYNTAX from shared prompt file.""" - return (PROMPTS_DIR / "semantic-view-syntax.md").read_text() - - class QueryChatSystemPrompt: """Manages system prompt template and component assembly.""" @@ -75,14 +70,11 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ db_type = self.data_source.get_db_type() is_duck_db = db_type.lower() == "duckdb" - semantic_view_ddls = self.data_source.get_semantic_view_ddls() context = { "db_type": db_type, "is_duck_db": is_duck_db, - "has_semantic_views": bool(semantic_view_ddls), - "semantic_view_syntax": get_semantic_view_syntax() if semantic_view_ddls else "", - "semantic_view_ddls": semantic_view_ddls, + "semantic_views": self.data_source.get_semantic_views_section(), "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 6b0080e7..8c6ff97b 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -71,19 +71,7 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} -{{#has_semantic_views}} -## Semantic Views - -**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions (that is, use the `SEMANTIC_VIEW()` table function where appropriate when generating SQL). - -**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. - -{{{semantic_view_syntax}}} - - -{{{semantic_view_ddls}}} - -{{/has_semantic_views}} +{{{semantic_views}}} ## Your Capabilities You can handle these types of requests: diff --git a/pkg-py/src/querychat/prompts/semantic-views/prompt.md b/pkg-py/src/querychat/prompts/semantic-views/prompt.md new file mode 100644 index 00000000..32f448ee --- /dev/null +++ b/pkg-py/src/querychat/prompts/semantic-views/prompt.md @@ -0,0 +1,5 @@ +## Semantic Views + +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions (that is, use the `SEMANTIC_VIEW()` table function where appropriate when generating SQL). + +**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. diff --git a/pkg-py/src/querychat/prompts/semantic-view-syntax.md b/pkg-py/src/querychat/prompts/semantic-views/syntax.md similarity index 100% rename from pkg-py/src/querychat/prompts/semantic-view-syntax.md rename to pkg-py/src/querychat/prompts/semantic-views/syntax.md diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index b48cc22f..847040c1 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -128,19 +128,20 @@ def test_sqlalchemy_backend(self): def test_ibis_backend(self): """Test execute_raw_sql with Ibis backend.""" + import pandas as pd + mock_backend = MagicMock() mock_table = MagicMock() - mock_df = MagicMock() - mock_df.to_dict.return_value = [{"col1": "a"}, {"col1": "b"}] + # Use a real pandas DataFrame for itertuples to work correctly + df = pd.DataFrame({"col1": ["a", "b"]}) mock_backend.sql.return_value = mock_table - mock_table.execute.return_value = mock_df + mock_table.execute.return_value = df result = execute_raw_sql("SELECT 1", mock_backend) assert result == [{"col1": "a"}, {"col1": "b"}] mock_backend.sql.assert_called_once_with("SELECT 1") - mock_df.to_dict.assert_called_once_with(orient="records") class TestDiscoverSemanticViews: @@ -271,7 +272,7 @@ class TestSQLAlchemySourceSemanticViews: """Tests for SQLAlchemySource semantic view discovery.""" def test_discovery_for_snowflake_backend(self): - """Test that discovery is called for Snowflake backends in get_schema.""" + """Test that discovery is called for Snowflake backends.""" from querychat._datasource import SQLAlchemySource mock_engine = MagicMock() @@ -289,8 +290,8 @@ def test_discovery_for_snowflake_backend(self): source = SQLAlchemySource(mock_engine, "test_table") mock_discover.assert_not_called() - with patch.object(source, "_add_column_stats"): - source.get_schema(categorical_threshold=20) + # Discovery happens when calling get_semantic_views_section + source.get_semantic_views_section() mock_discover.assert_called_once_with(mock_engine) @@ -310,13 +311,13 @@ def test_discovery_skipped_for_non_snowflake(self): ): source = SQLAlchemySource(mock_engine, "test_table") - with patch.object(source, "_add_column_stats"): - source.get_schema(categorical_threshold=20) + # For non-Snowflake, discovery is not called + source.get_semantic_views_section() mock_discover.assert_not_called() - def test_get_schema_includes_semantic_views(self): - """Test that get_schema includes semantic view section.""" + def test_get_semantic_views_section_includes_views(self): + """Test that get_semantic_views_section includes semantic view content.""" from querychat._datasource import SQLAlchemySource views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] @@ -335,16 +336,14 @@ def test_get_schema_includes_semantic_views(self): ), ): source = SQLAlchemySource(mock_engine, "test_table") + section = source.get_semantic_views_section() - with patch.object(source, "_add_column_stats"): - schema = source.get_schema(categorical_threshold=20) - - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" in schema - assert "db.schema.metrics" in schema + assert "## Semantic Views" in section + assert "db.schema.metrics" in section + assert "CREATE SEMANTIC VIEW" in section - def test_get_schema_without_semantic_views(self): - """Test that get_schema works without semantic views.""" + def test_get_semantic_views_section_empty_for_non_snowflake(self): + """Test that get_semantic_views_section returns empty for non-Snowflake.""" from querychat._datasource import SQLAlchemySource mock_engine = MagicMock() @@ -355,19 +354,16 @@ def test_get_schema_without_semantic_views(self): with patch("querychat._datasource.inspect", return_value=mock_inspector): source = SQLAlchemySource(mock_engine, "test_table") + section = source.get_semantic_views_section() - with patch.object(source, "_add_column_stats"): - schema = source.get_schema(categorical_threshold=20) - - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" not in schema + assert section == "" class TestIbisSourceSemanticViews: """Tests for IbisSource semantic view discovery.""" def test_discovery_for_snowflake_backend(self): - """Test that discovery runs for Snowflake backends in get_schema.""" + """Test that discovery runs for Snowflake backends.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource @@ -389,8 +385,8 @@ def test_discovery_for_snowflake_backend(self): source = IbisSource(mock_table, "test") mock_discover.assert_not_called() - with patch.object(IbisSource, "_add_column_stats"): - source.get_schema(categorical_threshold=20) + # Discovery happens when calling get_semantic_views_section + source.get_semantic_views_section() mock_discover.assert_called_once_with(mock_backend) @@ -414,13 +410,13 @@ def test_discovery_skipped_for_non_snowflake(self): with patch("querychat._datasource.discover_semantic_views") as mock_discover: source = IbisSource(mock_table, "test") - with patch.object(IbisSource, "_add_column_stats"): - source.get_schema(categorical_threshold=20) + # For non-Snowflake, discovery is not called + source.get_semantic_views_section() mock_discover.assert_not_called() - def test_get_schema_includes_semantic_views(self): - """Test that get_schema includes semantic view section.""" + def test_get_semantic_views_section_includes_views(self): + """Test that get_semantic_views_section includes semantic view content.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource @@ -443,10 +439,8 @@ def test_get_schema_includes_semantic_views(self): return_value=views, ): source = IbisSource(mock_table, "test_table") + section = source.get_semantic_views_section() - with patch.object(IbisSource, "_add_column_stats"): - schema = source.get_schema(categorical_threshold=20) - - assert "Table: test_table" in schema - assert "## Snowflake Semantic Views" in schema - assert "db.schema.metrics" in schema + assert "## Semantic Views" in section + assert "db.schema.metrics" in section + assert "CREATE SEMANTIC VIEW" in section diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index 74ff970c..f3c092bf 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -116,9 +116,9 @@ DBISource <- R6::R6Class( }, #' @description - #' Get formatted DDL content for semantic views - #' @return A string with DDL definitions, or empty string if none - get_semantic_view_ddls = function() { + #' Get the complete semantic views section for the prompt + #' @return A string with the full semantic views section, or empty string if none + get_semantic_views_section = function() { if (!is_snowflake_connection(private$conn)) { return("") } @@ -126,7 +126,7 @@ DBISource <- R6::R6Class( if (length(views) == 0) { return("") } - format_semantic_view_ddls(views) + get_semantic_views_section_impl(views) }, #' @description @@ -525,3 +525,30 @@ format_semantic_view_ddls <- function(semantic_views) { paste(lines, collapse = "\n") } + +#' Build the complete semantic views section for the prompt +#' +#' @param semantic_views A list of semantic view info (name and ddl) +#' @return A formatted string with the full semantic views section +#' @noRd +get_semantic_views_section_impl <- function(semantic_views) { + if (length(semantic_views) == 0) { + return("") + } + + prompts_dir <- system.file("prompts", "semantic-views", package = "querychat") + prompt_text <- readLines(file.path(prompts_dir, "prompt.md"), warn = FALSE) + syntax_text <- readLines(file.path(prompts_dir, "syntax.md"), warn = FALSE) + ddls_text <- format_semantic_view_ddls(semantic_views) + + paste( + paste(prompt_text, collapse = "\n"), + "", + paste(syntax_text, collapse = "\n"), + "", + "", + ddls_text, + "", + sep = "\n" + ) +} diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index f109a4ca..7be4d9c8 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,8 +285,10 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) {}, - reset_dashboard = function() {} + update_dashboard = function(query, title) { + }, + reset_dashboard = function() { + } ) { private$require_data_source("$client") diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index c44a586c..448ad124 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -82,21 +82,16 @@ QueryChatSystemPrompt <- R6::R6Class( db_type <- self$data_source$get_db_type() is_duck_db <- tolower(db_type) == "duckdb" - # Check for semantic views (available with DBISource for Snowflake connections) - semantic_view_ddls <- "" + # Get semantic views section (available with DBISource for Snowflake) + semantic_views <- "" if (inherits(self$data_source, "DBISource")) { - semantic_view_ddls <- self$data_source$get_semantic_view_ddls() + semantic_views <- self$data_source$get_semantic_views_section() } - has_semantic_views <- nzchar(semantic_view_ddls) context <- list( db_type = db_type, is_duck_db = is_duck_db, - has_semantic_views = if (has_semantic_views) "true", - semantic_view_syntax = if (has_semantic_views) { - get_semantic_view_syntax() - }, - semantic_view_ddls = semantic_view_ddls, + semantic_views = semantic_views, schema = self$schema, data_description = self$data_description, extra_instructions = self$extra_instructions, @@ -110,16 +105,6 @@ QueryChatSystemPrompt <- R6::R6Class( ) ) -# Load SEMANTIC_VIEW_SYNTAX from shared prompt file -get_semantic_view_syntax <- function() { - path <- system.file( - "prompts", - "semantic-view-syntax.md", - package = "querychat" - ) - read_utf8(path) -} - # Utility function for loading file or string content read_text <- function(x) { if (file.exists(x)) { diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index be29fb01..49c33c0f 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,7 +5,8 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) {} + update_fn = function(query, title) { + } ) { check_data_source(data_source) diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 5972e075..8c6ff97b 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -71,21 +71,7 @@ quantile_cont(salary, 0.5) ``` {{/is_duck_db}} -{{#has_semantic_views}} -## Semantic Views - -**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions. - -**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. - -When Semantic Views are available, use the `SEMANTIC_VIEW()` table function instead of raw SQL. - -{{{semantic_view_syntax}}} - - -{{{semantic_view_ddls}}} - -{{/has_semantic_views}} +{{{semantic_views}}} ## Your Capabilities You can handle these types of requests: diff --git a/pkg-r/inst/prompts/semantic-views/prompt.md b/pkg-r/inst/prompts/semantic-views/prompt.md new file mode 100644 index 00000000..32f448ee --- /dev/null +++ b/pkg-r/inst/prompts/semantic-views/prompt.md @@ -0,0 +1,5 @@ +## Semantic Views + +**IMPORTANT**: This database has Semantic Views available. Semantic Views provide a curated layer over raw data with pre-defined metrics, dimensions, and relationships. They encode business logic and calculation rules that ensure consistent, accurate results. When a Semantic View covers the data you need, prefer it over raw tables to benefit from these certified definitions (that is, use the `SEMANTIC_VIEW()` table function where appropriate when generating SQL). + +**Real-world example**: A legacy ERP database had a revenue column (`X_AMT`) with hidden business rules—only status code 90 transactions count as realized revenue, and a discount factor (`ADJ_FCTR`) must be applied. Querying raw tables for "external customer revenue" returned **$184B**. The same query using the semantic model's certified `NET_REVENUE` metric returned **$84.5B**—the correct answer. The raw query was **2x+ too high** because it ignored discounts and included invalid transaction codes. diff --git a/pkg-r/inst/prompts/semantic-view-syntax.md b/pkg-r/inst/prompts/semantic-views/syntax.md similarity index 100% rename from pkg-r/inst/prompts/semantic-view-syntax.md rename to pkg-r/inst/prompts/semantic-views/syntax.md diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index 15fa793d..a295c56d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -665,7 +665,8 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) {} + app = function(...) { + } ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-SnowflakeSource.R b/pkg-r/tests/testthat/test-SnowflakeSource.R index 71be9c93..1b6d54b2 100644 --- a/pkg-r/tests/testthat/test-SnowflakeSource.R +++ b/pkg-r/tests/testthat/test-SnowflakeSource.R @@ -1,13 +1,12 @@ # Tests for Snowflake semantic view functionality in DBISource -describe("format_semantic_views_section()", { +describe("format_semantic_view_ddls()", { it("formats single semantic view correctly", { views <- list( list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") ) - result <- format_semantic_views_section(views) + result <- format_semantic_view_ddls(views) - expect_match(result, "## Snowflake Semantic Views") expect_match(result, "db.schema.view") expect_match(result, "CREATE SEMANTIC VIEW test_view") expect_match(result, "```sql") @@ -18,21 +17,36 @@ describe("format_semantic_views_section()", { list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") ) - result <- format_semantic_views_section(views) + result <- format_semantic_view_ddls(views) expect_match(result, "db.schema.view1") expect_match(result, "db.schema.view2") expect_match(result, "CREATE SEMANTIC VIEW v1") expect_match(result, "CREATE SEMANTIC VIEW v2") }) +}) +describe("get_semantic_views_section_impl()", { it("includes IMPORTANT notice", { views <- list( list(name = "test", ddl = "DDL") ) - result <- format_semantic_views_section(views) + result <- get_semantic_views_section_impl(views) expect_match(result, "\\*\\*IMPORTANT\\*\\*") }) + + it("includes section header", { + views <- list( + list(name = "test", ddl = "DDL") + ) + result <- get_semantic_views_section_impl(views) + expect_match(result, "## Semantic Views") + }) + + it("returns empty string for empty views list", { + result <- get_semantic_views_section_impl(list()) + expect_equal(result, "") + }) }) describe("SQL escaping in get_semantic_view_ddl()", { @@ -72,18 +86,7 @@ describe("is_snowflake_connection()", { }) describe("DBISource semantic views", { - it("has_semantic_views() returns FALSE before get_schema() is called", { - skip_if_not_installed("RSQLite") - - conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - withr::defer(DBI::dbDisconnect(conn)) - DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) - - source <- DBISource$new(conn, "test_table") - expect_false(source$has_semantic_views()) - }) - - it("has_semantic_views() returns FALSE for non-Snowflake after get_schema()", { + it("get_semantic_views_section() returns empty for non-Snowflake", { skip_if_not_installed("RSQLite") conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") @@ -91,8 +94,7 @@ describe("DBISource semantic views", { DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) source <- DBISource$new(conn, "test_table") - source$get_schema() - expect_false(source$has_semantic_views()) + expect_equal(source$get_semantic_views_section(), "") }) }) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index 99170514..ec37d2e7 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,8 +6,10 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) {}) - tool_update_dashboard(df_source, update_fn = function(title, extra) {}) + tool_update_dashboard(df_source, update_fn = function(query) { + }) + tool_update_dashboard(df_source, update_fn = function(title, extra) { + }) }) }) From b2d6bf348fc2d7ffd96a536fde222bcacff1f3b2 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 23:23:55 +0000 Subject: [PATCH 31/45] `air format` (GitHub Actions) --- pkg-r/R/QueryChat.R | 6 ++---- pkg-r/R/querychat_tools.R | 3 +-- pkg-r/tests/testthat/test-QueryChat.R | 3 +-- pkg-r/tests/testthat/test-querychat_tools.R | 6 ++---- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 7be4d9c8..f109a4ca 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -285,10 +285,8 @@ QueryChat <- R6::R6Class( #' `reset_dashboard` tool is called. client = function( tools = NA, - update_dashboard = function(query, title) { - }, - reset_dashboard = function() { - } + update_dashboard = function(query, title) {}, + reset_dashboard = function() {} ) { private$require_data_source("$client") diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index 49c33c0f..be29fb01 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -5,8 +5,7 @@ # summarizing the intent of the SQL query. tool_update_dashboard <- function( data_source, - update_fn = function(query, title) { - } + update_fn = function(query, title) {} ) { check_data_source(data_source) diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index a295c56d..15fa793d 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -665,8 +665,7 @@ test_that("querychat_app() only cleans up data frame sources on exit", { # have to use an option because the code is evaluated in a far-away env options(.test_cleanup = cleanup) }, - app = function(...) { - } + app = function(...) {} ) ) withr::local_options(rlang_interactive = TRUE) diff --git a/pkg-r/tests/testthat/test-querychat_tools.R b/pkg-r/tests/testthat/test-querychat_tools.R index ec37d2e7..99170514 100644 --- a/pkg-r/tests/testthat/test-querychat_tools.R +++ b/pkg-r/tests/testthat/test-querychat_tools.R @@ -6,10 +6,8 @@ test_that("tool_update_dashboard() checks inputs", { df_source <- local_data_frame_source(new_test_df()) expect_snapshot(error = TRUE, { tool_update_dashboard(df_source, update_fn = NULL) - tool_update_dashboard(df_source, update_fn = function(query) { - }) - tool_update_dashboard(df_source, update_fn = function(title, extra) { - }) + tool_update_dashboard(df_source, update_fn = function(query) {}) + tool_update_dashboard(df_source, update_fn = function(title, extra) {}) }) }) From cfbb9cb7fcd2ee41729952d6e67ed02be61ee2de Mon Sep 17 00:00:00 2001 From: cpsievert Date: Mon, 26 Jan 2026 23:23:58 +0000 Subject: [PATCH 32/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 14 +++++++------- pkg-r/man/DataFrameSource.Rd | 2 +- pkg-r/man/TblSqlSource.Rd | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index 01771cbc..5a9d79a9 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -42,7 +42,7 @@ db_source$cleanup() \item \href{#method-DBISource-new}{\code{DBISource$new()}} \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} -\item \href{#method-DBISource-get_semantic_view_ddls}{\code{DBISource$get_semantic_view_ddls()}} +\item \href{#method-DBISource-get_semantic_views_section}{\code{DBISource$get_semantic_views_section()}} \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} \item \href{#method-DBISource-get_data}{\code{DBISource$get_data()}} @@ -108,16 +108,16 @@ A string describing the schema } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DBISource-get_semantic_view_ddls}{}}} -\subsection{Method \code{get_semantic_view_ddls()}}{ -Get formatted DDL content for semantic views +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DBISource-get_semantic_views_section}{}}} +\subsection{Method \code{get_semantic_views_section()}}{ +Get the complete semantic views section for the prompt \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DBISource$get_semantic_view_ddls()}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{DBISource$get_semantic_views_section()}\if{html}{\out{
    }} } \subsection{Returns}{ -A string with DDL definitions, or empty string if none +A string with the full semantic views section, or empty string if none } } \if{html}{\out{
    }} diff --git a/pkg-r/man/DataFrameSource.Rd b/pkg-r/man/DataFrameSource.Rd index 9deaa6ae..9b768eee 100644 --- a/pkg-r/man/DataFrameSource.Rd +++ b/pkg-r/man/DataFrameSource.Rd @@ -56,7 +56,7 @@ df_sqlite$cleanup()
  • querychat::DBISource$get_data()
  • querychat::DBISource$get_db_type()
  • querychat::DBISource$get_schema()
  • -
  • querychat::DBISource$get_semantic_view_ddls()
  • +
  • querychat::DBISource$get_semantic_views_section()
  • querychat::DBISource$test_query()
  • diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index 2e3386e6..d0bf80b7 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -57,7 +57,7 @@ mtcars_source$cleanup() \if{html}{\out{
    Inherited methods
    }} From 3c73e5df3360dbc16e3ffdfc929794fd7ee8c72b Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 17:32:37 -0600 Subject: [PATCH 33/45] style: Revert formatting changes in DBISource.R Minimize diff by reverting air format changes that were not part of the semantic views feature. Co-Authored-By: Claude Opus 4.5 --- pkg-r/R/DBISource.R | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index f3c092bf..b8d6a6e8 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -58,27 +58,23 @@ DBISource <- R6::R6Class( # Check if table exists if (!DBI::dbExistsTable(conn, table_name)) { - cli::cli_abort( - c( - "Table {.val {DBI::dbQuoteIdentifier(conn, table_name)}} not found in database", - "i" = "If you're using a table in a catalog or schema, pass a {.fn DBI::Id} object to {.arg table_name}" - ) - ) + cli::cli_abort(c( + "Table {.val {DBI::dbQuoteIdentifier(conn, table_name)}} not found in database", + "i" = "If you're using a table in a catalog or schema, pass a {.fn DBI::Id} object to {.arg table_name}" + )) } private$conn <- conn self$table_name <- table_name # Store original column names for validation - private$colnames <- colnames( - DBI::dbGetQuery( - conn, - sprintf( - "SELECT * FROM %s LIMIT 0", - DBI::dbQuoteIdentifier(conn, table_name) - ) + private$colnames <- colnames(DBI::dbGetQuery( + conn, + sprintf( + "SELECT * FROM %s LIMIT 0", + DBI::dbQuoteIdentifier(conn, table_name) ) - ) + )) }, #' @description Get the database type @@ -108,11 +104,7 @@ DBISource <- R6::R6Class( #' @return A string describing the schema get_schema = function(categorical_threshold = 20) { check_number_whole(categorical_threshold, min = 1) - get_schema_impl( - private$conn, - self$table_name, - categorical_threshold - ) + get_schema_impl(private$conn, self$table_name, categorical_threshold) }, #' @description @@ -203,6 +195,7 @@ DBISource <- R6::R6Class( ) ) + get_schema_impl <- function( conn, table_name, @@ -382,6 +375,7 @@ get_schema_impl <- function( paste(schema_lines, collapse = "\n") } + # nocov start # Map R classes to SQL types r_class_to_sql_type <- function(r_class) { From 27867cc17cd531534a4acd9ba5000655749142ba Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 10:45:27 -0600 Subject: [PATCH 34/45] fix: Use raw_sql() for Ibis backends to support SHOW commands The `backend.sql()` method in Ibis parses queries with sqlglot, which doesn't support Snowflake commands like `SHOW SEMANTIC VIEWS`. Switch to using `backend.raw_sql()` which executes queries without parsing. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_snowflake.py | 9 +++++---- pkg-py/tests/test_snowflake_source.py | 17 ++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index 87b58633..7ac9a4b0 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -42,10 +42,11 @@ def execute_raw_sql( keys = list(result.keys()) return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] else: - result_table = backend.sql(query) - df = result_table.execute() - columns = list(df.columns) - return [dict(zip(columns, row, strict=False)) for row in df.itertuples(index=False)] + # Use raw_sql() for Ibis backends to avoid SQL parsing issues + # with Snowflake commands like SHOW SEMANTIC VIEWS + with backend.raw_sql(query) as cursor: + columns = [desc[0] for desc in cursor.description] + return [dict(zip(columns, row, strict=False)) for row in cursor.fetchall()] def discover_semantic_views( diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index 847040c1..db717084 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -128,20 +128,19 @@ def test_sqlalchemy_backend(self): def test_ibis_backend(self): """Test execute_raw_sql with Ibis backend.""" - import pandas as pd - mock_backend = MagicMock() - mock_table = MagicMock() - # Use a real pandas DataFrame for itertuples to work correctly - df = pd.DataFrame({"col1": ["a", "b"]}) + mock_cursor = MagicMock() + mock_cursor.description = [("col1",), ("col2",)] + mock_cursor.fetchall.return_value = [("a", "b"), ("c", "d")] - mock_backend.sql.return_value = mock_table - mock_table.execute.return_value = df + # raw_sql returns a context manager + mock_backend.raw_sql.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_backend.raw_sql.return_value.__exit__ = MagicMock(return_value=False) result = execute_raw_sql("SELECT 1", mock_backend) - assert result == [{"col1": "a"}, {"col1": "b"}] - mock_backend.sql.assert_called_once_with("SELECT 1") + assert result == [{"col1": "a", "col2": "b"}, {"col1": "c", "col2": "d"}] + mock_backend.raw_sql.assert_called_once_with("SELECT 1") class TestDiscoverSemanticViews: From eec140a46fa20a7698c95906c5e061463fa631c6 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 12:58:26 -0600 Subject: [PATCH 35/45] fix: Address PR feedback and fix pyright error - Remove comment about raw_sql() per PR feedback - Add type ignore for raw_sql() to fix pyright error (method exists on concrete backends but not typed on SQLBackend base class) - Remove unused PROMPTS_DIR constant per PR feedback Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_snowflake.py | 4 +--- pkg-py/src/querychat/_system_prompt.py | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index 7ac9a4b0..46a65b85 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -42,9 +42,7 @@ def execute_raw_sql( keys = list(result.keys()) return [dict(zip(keys, row, strict=False)) for row in result.fetchall()] else: - # Use raw_sql() for Ibis backends to avoid SQL parsing issues - # with Snowflake commands like SHOW SEMANTIC VIEWS - with backend.raw_sql(query) as cursor: + with backend.raw_sql(query) as cursor: # type: ignore[union-attr] columns = [desc[0] for desc in cursor.description] return [dict(zip(columns, row, strict=False)) for row in cursor.fetchall()] diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index caa39f6b..519a56f9 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -10,9 +10,6 @@ from ._querychat_base import TOOL_GROUPS -PROMPTS_DIR = Path(__file__).parent / "prompts" - - class QueryChatSystemPrompt: """Manages system prompt template and component assembly.""" From 8814ab36f448f98b8bb62ebb11464055d99631d9 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 13:07:09 -0600 Subject: [PATCH 36/45] Add brand.yml website dependency --- .github/workflows/docs-r-pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs-r-pkgdown.yml b/.github/workflows/docs-r-pkgdown.yml index 68313a93..baeec2ad 100644 --- a/.github/workflows/docs-r-pkgdown.yml +++ b/.github/workflows/docs-r-pkgdown.yml @@ -48,7 +48,7 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::pkgdown, local::. + extra-packages: any::pkgdown, any::brand.yml, local::. needs: website working-directory: pkg-r From 334635b336856f99305f8076e09730b45f9502fa Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 13:13:43 -0600 Subject: [PATCH 37/45] refactor: Rename semantic views methods for clarity - Rename get_semantic_views_section() to get_semantic_views_description() on DataSource classes (clearer intent) - Rename get_semantic_views_section() to format_semantic_views() in _snowflake.py / DBISource.R (matches other format_* functions) - Update tests to use new method names Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 33 ++++++++++++------ pkg-py/src/querychat/_shiny.py | 4 ++- pkg-py/src/querychat/_snowflake.py | 2 +- pkg-py/src/querychat/_system_prompt.py | 2 +- pkg-py/tests/test_snowflake_source.py | 38 ++++++++++++--------- pkg-r/R/DBISource.R | 8 ++--- pkg-r/R/QueryChatSystemPrompt.R | 4 +-- pkg-r/tests/testthat/test-SnowflakeSource.R | 12 +++---- 8 files changed, 60 insertions(+), 43 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index 64a5dff9..a7a5aec5 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -13,7 +13,7 @@ from ._df_compat import read_sql from ._snowflake import ( discover_semantic_views, - get_semantic_views_section, + format_semantic_views, ) from ._utils import as_narwhals, check_query @@ -60,7 +60,11 @@ def format_schema(table_name: str, columns: list[ColumnMeta]) -> str: for col in columns: lines.append(f"- {col.name} ({col.sql_type})") - if col.kind in ("numeric", "date") and col.min_val is not None and col.max_val is not None: + if ( + col.kind in ("numeric", "date") + and col.min_val is not None + and col.max_val is not None + ): lines.append(f" Range: {col.min_val} to {col.max_val}") elif col.categories: cats = ", ".join(f"'{v}'" for v in col.categories) @@ -183,7 +187,7 @@ def cleanup(self) -> None: """ - def get_semantic_views_section(self) -> str: + def get_semantic_views_description(self) -> str: """Get the complete semantic views section for the prompt.""" return "" @@ -497,19 +501,21 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, categorical_threshold) return format_schema(self.table_name, columns) - def get_semantic_views_section(self) -> str: + def get_semantic_views_description(self) -> str: """Get the complete semantic views section for the prompt.""" if self._engine.dialect.name.lower() != "snowflake": return "" views = discover_semantic_views(self._engine) - return get_semantic_views_section(views) + return format_semantic_views(views) @staticmethod def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" kind: Literal["numeric", "text", "date", "other"] - if isinstance(sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger)): + if isinstance( + sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger) + ): kind = "numeric" sql_type = "INTEGER" elif isinstance(sa_type, sqltypes.Float): @@ -552,7 +558,9 @@ def _add_column_stats( select_parts.append(f"MIN({col.name}) as {col.name}__min") select_parts.append(f"MAX({col.name}) as {col.name}__max") elif col.kind == "text": - select_parts.append(f"COUNT(DISTINCT {col.name}) as {col.name}__nunique") + select_parts.append( + f"COUNT(DISTINCT {col.name}) as {col.name}__nunique" + ) if not select_parts: return @@ -560,7 +568,9 @@ def _add_column_stats( # Execute stats query stats = {} try: - stats_query = text(f"SELECT {', '.join(select_parts)} FROM {self.table_name}") + stats_query = text( + f"SELECT {', '.join(select_parts)} FROM {self.table_name}" + ) with self._get_connection() as conn: result = conn.execute(stats_query).fetchone() if result: @@ -576,7 +586,8 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col for col in columns + col + for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -975,12 +986,12 @@ def get_schema(self, *, categorical_threshold: int) -> str: self._add_column_stats(columns, self._table, categorical_threshold) return format_schema(self.table_name, columns) - def get_semantic_views_section(self) -> str: + def get_semantic_views_description(self) -> str: """Get the complete semantic views section for the prompt.""" if self._backend.name.lower() != "snowflake": return "" views = discover_semantic_views(self._backend) - return get_semantic_views_section(views) + return format_semantic_views(views) @staticmethod def _make_column_meta(name: str, dtype: IbisDataType) -> ColumnMeta: diff --git a/pkg-py/src/querychat/_shiny.py b/pkg-py/src/querychat/_shiny.py index c1dcc9a1..b65e9f2c 100644 --- a/pkg-py/src/querychat/_shiny.py +++ b/pkg-py/src/querychat/_shiny.py @@ -325,7 +325,9 @@ def _(): @render.data_frame def dt(): # Collect lazy sources (LazyFrame, Ibis Table) to eager DataFrame - return as_narwhals(vals.df()) + res = as_narwhals(vals.df()) + # TODO: Allow for specifying max rows? + return res.head(100) @render.ui def sql_output(): diff --git a/pkg-py/src/querychat/_snowflake.py b/pkg-py/src/querychat/_snowflake.py index 46a65b85..381c9cd7 100644 --- a/pkg-py/src/querychat/_snowflake.py +++ b/pkg-py/src/querychat/_snowflake.py @@ -104,7 +104,7 @@ def format_semantic_view_ddls(semantic_views: list[SemanticViewInfo]) -> str: return "\n".join(lines) -def get_semantic_views_section(semantic_views: list[SemanticViewInfo]) -> str: +def format_semantic_views(semantic_views: list[SemanticViewInfo]) -> str: """Build the complete semantic views section for the prompt.""" if not semantic_views: return "" diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 519a56f9..7b4f737a 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -71,7 +71,7 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: context = { "db_type": db_type, "is_duck_db": is_duck_db, - "semantic_views": self.data_source.get_semantic_views_section(), + "semantic_views": self.data_source.get_semantic_views_description(), "schema": self.schema, "data_description": self.data_description, "extra_instructions": self.extra_instructions, diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index db717084..d0a2abd4 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -42,7 +42,9 @@ class TestFormatSemanticViewDdls: def test_format_single_view(self): """Test that format produces expected markdown structure for single view.""" - views = [SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1")] + views = [ + SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1") + ] section = format_semantic_view_ddls(views) assert "db.schema.view1" in section @@ -134,7 +136,9 @@ def test_ibis_backend(self): mock_cursor.fetchall.return_value = [("a", "b"), ("c", "d")] # raw_sql returns a context manager - mock_backend.raw_sql.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_backend.raw_sql.return_value.__enter__ = MagicMock( + return_value=mock_cursor + ) mock_backend.raw_sql.return_value.__exit__ = MagicMock(return_value=False) result = execute_raw_sql("SELECT 1", mock_backend) @@ -289,8 +293,8 @@ def test_discovery_for_snowflake_backend(self): source = SQLAlchemySource(mock_engine, "test_table") mock_discover.assert_not_called() - # Discovery happens when calling get_semantic_views_section - source.get_semantic_views_section() + # Discovery happens when calling get_semantic_views_description + source.get_semantic_views_description() mock_discover.assert_called_once_with(mock_engine) @@ -311,12 +315,12 @@ def test_discovery_skipped_for_non_snowflake(self): source = SQLAlchemySource(mock_engine, "test_table") # For non-Snowflake, discovery is not called - source.get_semantic_views_section() + source.get_semantic_views_description() mock_discover.assert_not_called() - def test_get_semantic_views_section_includes_views(self): - """Test that get_semantic_views_section includes semantic view content.""" + def test_get_semantic_views_description_includes_views(self): + """Test that get_semantic_views_description includes semantic view content.""" from querychat._datasource import SQLAlchemySource views = [SemanticViewInfo(name="db.schema.metrics", ddl="CREATE SEMANTIC VIEW")] @@ -335,14 +339,14 @@ def test_get_semantic_views_section_includes_views(self): ), ): source = SQLAlchemySource(mock_engine, "test_table") - section = source.get_semantic_views_section() + section = source.get_semantic_views_description() assert "## Semantic Views" in section assert "db.schema.metrics" in section assert "CREATE SEMANTIC VIEW" in section - def test_get_semantic_views_section_empty_for_non_snowflake(self): - """Test that get_semantic_views_section returns empty for non-Snowflake.""" + def test_get_semantic_views_description_empty_for_non_snowflake(self): + """Test that get_semantic_views_description returns empty for non-Snowflake.""" from querychat._datasource import SQLAlchemySource mock_engine = MagicMock() @@ -353,7 +357,7 @@ def test_get_semantic_views_section_empty_for_non_snowflake(self): with patch("querychat._datasource.inspect", return_value=mock_inspector): source = SQLAlchemySource(mock_engine, "test_table") - section = source.get_semantic_views_section() + section = source.get_semantic_views_description() assert section == "" @@ -384,8 +388,8 @@ def test_discovery_for_snowflake_backend(self): source = IbisSource(mock_table, "test") mock_discover.assert_not_called() - # Discovery happens when calling get_semantic_views_section - source.get_semantic_views_section() + # Discovery happens when calling get_semantic_views_description + source.get_semantic_views_description() mock_discover.assert_called_once_with(mock_backend) @@ -410,12 +414,12 @@ def test_discovery_skipped_for_non_snowflake(self): source = IbisSource(mock_table, "test") # For non-Snowflake, discovery is not called - source.get_semantic_views_section() + source.get_semantic_views_description() mock_discover.assert_not_called() - def test_get_semantic_views_section_includes_views(self): - """Test that get_semantic_views_section includes semantic view content.""" + def test_get_semantic_views_description_includes_views(self): + """Test that get_semantic_views_description includes semantic view content.""" from ibis.backends.sql import SQLBackend from querychat._datasource import IbisSource @@ -438,7 +442,7 @@ def test_get_semantic_views_section_includes_views(self): return_value=views, ): source = IbisSource(mock_table, "test_table") - section = source.get_semantic_views_section() + section = source.get_semantic_views_description() assert "## Semantic Views" in section assert "db.schema.metrics" in section diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index b8d6a6e8..7cab4244 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -108,9 +108,9 @@ DBISource <- R6::R6Class( }, #' @description - #' Get the complete semantic views section for the prompt + #' Get the complete semantic views description for the prompt #' @return A string with the full semantic views section, or empty string if none - get_semantic_views_section = function() { + get_semantic_views_description = function() { if (!is_snowflake_connection(private$conn)) { return("") } @@ -118,7 +118,7 @@ DBISource <- R6::R6Class( if (length(views) == 0) { return("") } - get_semantic_views_section_impl(views) + format_semantic_views(views) }, #' @description @@ -525,7 +525,7 @@ format_semantic_view_ddls <- function(semantic_views) { #' @param semantic_views A list of semantic view info (name and ddl) #' @return A formatted string with the full semantic views section #' @noRd -get_semantic_views_section_impl <- function(semantic_views) { +format_semantic_views <- function(semantic_views) { if (length(semantic_views) == 0) { return("") } diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 448ad124..db98e952 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -82,10 +82,10 @@ QueryChatSystemPrompt <- R6::R6Class( db_type <- self$data_source$get_db_type() is_duck_db <- tolower(db_type) == "duckdb" - # Get semantic views section (available with DBISource for Snowflake) + # Get semantic views description (available with DBISource for Snowflake) semantic_views <- "" if (inherits(self$data_source, "DBISource")) { - semantic_views <- self$data_source$get_semantic_views_section() + semantic_views <- self$data_source$get_semantic_views_description() } context <- list( diff --git a/pkg-r/tests/testthat/test-SnowflakeSource.R b/pkg-r/tests/testthat/test-SnowflakeSource.R index 1b6d54b2..32217d90 100644 --- a/pkg-r/tests/testthat/test-SnowflakeSource.R +++ b/pkg-r/tests/testthat/test-SnowflakeSource.R @@ -26,12 +26,12 @@ describe("format_semantic_view_ddls()", { }) }) -describe("get_semantic_views_section_impl()", { +describe("format_semantic_views()", { it("includes IMPORTANT notice", { views <- list( list(name = "test", ddl = "DDL") ) - result <- get_semantic_views_section_impl(views) + result <- format_semantic_views(views) expect_match(result, "\\*\\*IMPORTANT\\*\\*") }) @@ -39,12 +39,12 @@ describe("get_semantic_views_section_impl()", { views <- list( list(name = "test", ddl = "DDL") ) - result <- get_semantic_views_section_impl(views) + result <- format_semantic_views(views) expect_match(result, "## Semantic Views") }) it("returns empty string for empty views list", { - result <- get_semantic_views_section_impl(list()) + result <- format_semantic_views(list()) expect_equal(result, "") }) }) @@ -86,7 +86,7 @@ describe("is_snowflake_connection()", { }) describe("DBISource semantic views", { - it("get_semantic_views_section() returns empty for non-Snowflake", { + it("get_semantic_views_description() returns empty for non-Snowflake", { skip_if_not_installed("RSQLite") conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") @@ -94,7 +94,7 @@ describe("DBISource semantic views", { DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) source <- DBISource$new(conn, "test_table") - expect_equal(source$get_semantic_views_section(), "") + expect_equal(source$get_semantic_views_description(), "") }) }) From e76c80c51c2d4a18ceda9cbf7aa3b84c2d4607b1 Mon Sep 17 00:00:00 2001 From: cpsievert Date: Tue, 27 Jan 2026 19:19:47 +0000 Subject: [PATCH 38/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 12 ++++++------ pkg-r/man/DataFrameSource.Rd | 2 +- pkg-r/man/TblSqlSource.Rd | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index 5a9d79a9..867a2e07 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -42,7 +42,7 @@ db_source$cleanup() \item \href{#method-DBISource-new}{\code{DBISource$new()}} \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} -\item \href{#method-DBISource-get_semantic_views_section}{\code{DBISource$get_semantic_views_section()}} +\item \href{#method-DBISource-get_semantic_views_description}{\code{DBISource$get_semantic_views_description()}} \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} \item \href{#method-DBISource-get_data}{\code{DBISource$get_data()}} @@ -108,12 +108,12 @@ A string describing the schema } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-DBISource-get_semantic_views_section}{}}} -\subsection{Method \code{get_semantic_views_section()}}{ -Get the complete semantic views section for the prompt +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-DBISource-get_semantic_views_description}{}}} +\subsection{Method \code{get_semantic_views_description()}}{ +Get the complete semantic views description for the prompt \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{DBISource$get_semantic_views_section()}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{DBISource$get_semantic_views_description()}\if{html}{\out{
    }} } \subsection{Returns}{ diff --git a/pkg-r/man/DataFrameSource.Rd b/pkg-r/man/DataFrameSource.Rd index 9b768eee..c1d18815 100644 --- a/pkg-r/man/DataFrameSource.Rd +++ b/pkg-r/man/DataFrameSource.Rd @@ -56,7 +56,7 @@ df_sqlite$cleanup()
  • querychat::DBISource$get_data()
  • querychat::DBISource$get_db_type()
  • querychat::DBISource$get_schema()
  • -
  • querychat::DBISource$get_semantic_views_section()
  • +
  • querychat::DBISource$get_semantic_views_description()
  • querychat::DBISource$test_query()
  • diff --git a/pkg-r/man/TblSqlSource.Rd b/pkg-r/man/TblSqlSource.Rd index d0bf80b7..4446cc77 100644 --- a/pkg-r/man/TblSqlSource.Rd +++ b/pkg-r/man/TblSqlSource.Rd @@ -57,7 +57,7 @@ mtcars_source$cleanup() \if{html}{\out{
    Inherited methods
    }} From 4d88a77555b37b191ed91d700364bba3f248e176 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 13:23:05 -0600 Subject: [PATCH 39/45] style: Revert formatting changes and _shiny.py modification Minimize diff by reverting auto-formatter changes that were not part of the semantic views feature. Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 27 +++++++-------------------- pkg-py/src/querychat/_shiny.py | 4 +--- pkg-py/tests/test_snowflake_source.py | 8 ++------ 3 files changed, 10 insertions(+), 29 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index a7a5aec5..d40243f1 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -60,11 +60,7 @@ def format_schema(table_name: str, columns: list[ColumnMeta]) -> str: for col in columns: lines.append(f"- {col.name} ({col.sql_type})") - if ( - col.kind in ("numeric", "date") - and col.min_val is not None - and col.max_val is not None - ): + if col.kind in ("numeric", "date") and col.min_val is not None and col.max_val is not None: lines.append(f" Range: {col.min_val} to {col.max_val}") elif col.categories: cats = ", ".join(f"'{v}'" for v in col.categories) @@ -513,9 +509,7 @@ def _make_column_meta(name: str, sa_type: sqltypes.TypeEngine) -> ColumnMeta: """Create ColumnMeta from SQLAlchemy type.""" kind: Literal["numeric", "text", "date", "other"] - if isinstance( - sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger) - ): + if isinstance(sa_type, (sqltypes.Integer, sqltypes.BigInteger, sqltypes.SmallInteger)): kind = "numeric" sql_type = "INTEGER" elif isinstance(sa_type, sqltypes.Float): @@ -558,9 +552,7 @@ def _add_column_stats( select_parts.append(f"MIN({col.name}) as {col.name}__min") select_parts.append(f"MAX({col.name}) as {col.name}__max") elif col.kind == "text": - select_parts.append( - f"COUNT(DISTINCT {col.name}) as {col.name}__nunique" - ) + select_parts.append(f"COUNT(DISTINCT {col.name}) as {col.name}__nunique") if not select_parts: return @@ -568,9 +560,7 @@ def _add_column_stats( # Execute stats query stats = {} try: - stats_query = text( - f"SELECT {', '.join(select_parts)} FROM {self.table_name}" - ) + stats_query = text(f"SELECT {', '.join(select_parts)} FROM {self.table_name}") with self._get_connection() as conn: result = conn.execute(stats_query).fetchone() if result: @@ -586,8 +576,7 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -921,8 +910,7 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -1051,8 +1039,7 @@ def _add_column_stats( col.max_val = stats.get(f"{col.name}__max") categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold diff --git a/pkg-py/src/querychat/_shiny.py b/pkg-py/src/querychat/_shiny.py index b65e9f2c..c1dcc9a1 100644 --- a/pkg-py/src/querychat/_shiny.py +++ b/pkg-py/src/querychat/_shiny.py @@ -325,9 +325,7 @@ def _(): @render.data_frame def dt(): # Collect lazy sources (LazyFrame, Ibis Table) to eager DataFrame - res = as_narwhals(vals.df()) - # TODO: Allow for specifying max rows? - return res.head(100) + return as_narwhals(vals.df()) @render.ui def sql_output(): diff --git a/pkg-py/tests/test_snowflake_source.py b/pkg-py/tests/test_snowflake_source.py index d0a2abd4..168401ad 100644 --- a/pkg-py/tests/test_snowflake_source.py +++ b/pkg-py/tests/test_snowflake_source.py @@ -42,9 +42,7 @@ class TestFormatSemanticViewDdls: def test_format_single_view(self): """Test that format produces expected markdown structure for single view.""" - views = [ - SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1") - ] + views = [SemanticViewInfo(name="db.schema.view1", ddl="CREATE SEMANTIC VIEW v1")] section = format_semantic_view_ddls(views) assert "db.schema.view1" in section @@ -136,9 +134,7 @@ def test_ibis_backend(self): mock_cursor.fetchall.return_value = [("a", "b"), ("c", "d")] # raw_sql returns a context manager - mock_backend.raw_sql.return_value.__enter__ = MagicMock( - return_value=mock_cursor - ) + mock_backend.raw_sql.return_value.__enter__ = MagicMock(return_value=mock_cursor) mock_backend.raw_sql.return_value.__exit__ = MagicMock(return_value=False) result = execute_raw_sql("SELECT 1", mock_backend) From 4aee0c7c26cc18dca7f36a8df2536032cde91ebb Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 15:11:36 -0600 Subject: [PATCH 40/45] docs: Add Snowflake Semantic Views to CHANGELOG and NEWS Co-Authored-By: Claude Opus 4.5 --- pkg-py/CHANGELOG.md | 6 ++++++ pkg-r/NEWS.md | 2 ++ 2 files changed, 8 insertions(+) diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md index efda1d24..ae25050b 100644 --- a/pkg-py/CHANGELOG.md +++ b/pkg-py/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### New features + +* Added support for Snowflake Semantic Views. When connected to Snowflake (via SQLAlchemy or Ibis), querychat automatically discovers available Semantic Views and includes their definitions in the system prompt. This helps the LLM generate correct queries using the `SEMANTIC_VIEW()` table function with certified business metrics and dimensions. (#200) + ## [0.5.1] - 2026-01-23 ### New features diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md index 807ce92f..7594c765 100644 --- a/pkg-r/NEWS.md +++ b/pkg-r/NEWS.md @@ -1,5 +1,7 @@ # querychat (development version) +* Added support for Snowflake Semantic Views. When connected to Snowflake via DBI, querychat automatically discovers available Semantic Views and includes their definitions in the system prompt. This helps the LLM generate correct queries using the `SEMANTIC_VIEW()` table function with certified business metrics and dimensions. (#200) + * `QueryChat$new()` now supports deferred data source. Pass `data_source = NULL` at initialization time, then provide the actual data source via the `data_source` parameter of `$server()` or by setting the `$data_source` property. This enables use cases where the data source depends on session-specific authentication or per-user database connections. (#202) # querychat 0.2.0 From f6bd8200e2e06f705d181014745e1572e093bd4a Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 15:15:06 -0600 Subject: [PATCH 41/45] refactor: Update docstrings and revert formatting changes - Update get_semantic_views_description() docstrings to clarify purpose - Revert list comprehension formatting to match original style Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 15 +++++++++------ pkg-r/R/DBISource.R | 4 ++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index d40243f1..e9330295 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -184,7 +184,7 @@ def cleanup(self) -> None: """ def get_semantic_views_description(self) -> str: - """Get the complete semantic views section for the prompt.""" + """Get information about semantic views (if any) for the system prompt.""" return "" @@ -498,7 +498,7 @@ def get_schema(self, *, categorical_threshold: int) -> str: return format_schema(self.table_name, columns) def get_semantic_views_description(self) -> str: - """Get the complete semantic views section for the prompt.""" + """Get information about semantic views (if any) for the system prompt.""" if self._engine.dialect.name.lower() != "snowflake": return "" views = discover_semantic_views(self._engine) @@ -576,7 +576,8 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col for col in columns + col + for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -910,7 +911,8 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col for col in columns + col + for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -975,7 +977,7 @@ def get_schema(self, *, categorical_threshold: int) -> str: return format_schema(self.table_name, columns) def get_semantic_views_description(self) -> str: - """Get the complete semantic views section for the prompt.""" + """Get information about semantic views (if any) for the system prompt.""" if self._backend.name.lower() != "snowflake": return "" views = discover_semantic_views(self._backend) @@ -1039,7 +1041,8 @@ def _add_column_stats( col.max_val = stats.get(f"{col.name}__max") categorical_cols = [ - col for col in columns + col + for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold diff --git a/pkg-r/R/DBISource.R b/pkg-r/R/DBISource.R index 7cab4244..389592d6 100644 --- a/pkg-r/R/DBISource.R +++ b/pkg-r/R/DBISource.R @@ -108,8 +108,8 @@ DBISource <- R6::R6Class( }, #' @description - #' Get the complete semantic views description for the prompt - #' @return A string with the full semantic views section, or empty string if none + #' Get information about semantic views (if any) for the system prompt. + #' @return A string with semantic view information, or empty string if none get_semantic_views_description = function() { if (!is_snowflake_connection(private$conn)) { return("") From 17ce54da1e66f5c643c6cdaa7987d38275b4b2cb Mon Sep 17 00:00:00 2001 From: cpsievert Date: Tue, 27 Jan 2026 21:21:07 +0000 Subject: [PATCH 42/45] `devtools::document()` (GitHub Actions) --- pkg-r/man/DBISource.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg-r/man/DBISource.Rd b/pkg-r/man/DBISource.Rd index 867a2e07..75e61096 100644 --- a/pkg-r/man/DBISource.Rd +++ b/pkg-r/man/DBISource.Rd @@ -111,13 +111,13 @@ A string describing the schema \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-DBISource-get_semantic_views_description}{}}} \subsection{Method \code{get_semantic_views_description()}}{ -Get the complete semantic views description for the prompt +Get information about semantic views (if any) for the system prompt. \subsection{Usage}{ \if{html}{\out{
    }}\preformatted{DBISource$get_semantic_views_description()}\if{html}{\out{
    }} } \subsection{Returns}{ -A string with the full semantic views section, or empty string if none +A string with semantic view information, or empty string if none } } \if{html}{\out{
    }} From f357d39ed5b1c2f386995098c6fb6bfd8c72a999 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 15:28:57 -0600 Subject: [PATCH 43/45] style: Revert list comprehension formatting to original style Co-Authored-By: Claude Opus 4.5 --- pkg-py/src/querychat/_datasource.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py index e9330295..5cac5f08 100644 --- a/pkg-py/src/querychat/_datasource.py +++ b/pkg-py/src/querychat/_datasource.py @@ -576,8 +576,7 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -911,8 +910,7 @@ def _add_column_stats( # Find text columns that qualify as categorical categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold @@ -1041,8 +1039,7 @@ def _add_column_stats( col.max_val = stats.get(f"{col.name}__max") categorical_cols = [ - col - for col in columns + col for col in columns if col.kind == "text" and (nunique := stats.get(f"{col.name}__nunique")) and nunique <= categorical_threshold From cbe2d2f0e12fa795d68ecafad183eb49dee85e12 Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 15:32:01 -0600 Subject: [PATCH 44/45] style: Revert unnecessary R formatting changes Minimize diff by reverting air format changes that were not part of the semantic views feature. Co-Authored-By: Claude Opus 4.5 --- pkg-r/R/DataFrameSource.R | 14 ++- pkg-r/R/QueryChat.R | 25 +++-- pkg-r/R/querychat_module.R | 12 +-- pkg-r/R/querychat_tools.R | 11 +-- pkg-r/R/utils-check.R | 2 + pkg-r/R/utils-ellmer.R | 1 + .../_problems/test-SnowflakeSource-21.R | 26 +++++ .../_problems/test-SnowflakeSource-33.R | 33 +++++++ .../_problems/test-SnowflakeSource-8.R | 14 +++ .../_problems/test-SnowflakeSource-83.R | 13 +++ .../_problems/test-SnowflakeSource-95.R | 24 +++++ pkg-r/tests/testthat/test-DBISource.R | 8 +- pkg-r/tests/testthat/test-DataSource.R | 2 + pkg-r/tests/testthat/test-QueryChat.R | 20 ++-- sandbox/app-chaos.R | 47 +++++++++ sandbox/app-chaos.py | 25 +++++ sandbox/chaos.py | 98 +++++++++++++++++++ sandbox/greeting.md | 1 + 18 files changed, 322 insertions(+), 54 deletions(-) create mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R create mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R create mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R create mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R create mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R create mode 100644 sandbox/app-chaos.R create mode 100644 sandbox/app-chaos.py create mode 100644 sandbox/chaos.py create mode 100644 sandbox/greeting.md diff --git a/pkg-r/R/DataFrameSource.R b/pkg-r/R/DataFrameSource.R index cb723700..6349c0f1 100644 --- a/pkg-r/R/DataFrameSource.R +++ b/pkg-r/R/DataFrameSource.R @@ -115,12 +115,10 @@ get_default_dataframe_engine <- function() { if (is_installed("RSQLite")) { return("sqlite") } - cli::cli_abort( - c( - "No compatible database engine installed for DataFrameSource", - "i" = "Install either {.pkg duckdb} or {.pkg RSQLite}:", - " " = "{.run install.packages(\"duckdb\")}", - " " = "{.run install.packages(\"RSQLite\")}" - ) - ) + cli::cli_abort(c( + "No compatible database engine installed for DataFrameSource", + "i" = "Install either {.pkg duckdb} or {.pkg RSQLite}:", + " " = "{.run install.packages(\"duckdb\")}", + " " = "{.run install.packages(\"RSQLite\")}" + )) } diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index f109a4ca..20574442 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -409,12 +409,10 @@ QueryChat <- R6::R6Class( ui <- function(req) { bslib::page_sidebar( - title = shiny::HTML( - sprintf( - "querychat with %s", - table_name - ) - ), + title = shiny::HTML(sprintf( + "querychat with %s", + table_name + )), class = "bslib-page-dashboard", sidebar = self$sidebar(), shiny::useBusyIndicators(pulse = TRUE, spinners = FALSE), @@ -510,14 +508,12 @@ QueryChat <- R6::R6Class( }) shiny::observeEvent(input$close_btn, label = "on_close_btn", { - shiny::stopApp( - list( - df = qc_vals$df(), - sql = qc_vals$sql(), - title = qc_vals$title(), - client = qc_vals$client - ) - ) + shiny::stopApp(list( + df = qc_vals$df(), + sql = qc_vals$sql(), + title = qc_vals$title(), + client = qc_vals$client + )) }) } @@ -947,6 +943,7 @@ normalize_data_source <- function(data_source, table_name) { ) } + namespaced_id <- function(id, session = shiny::getDefaultReactiveDomain()) { if (is.null(session)) { id diff --git a/pkg-r/R/querychat_module.R b/pkg-r/R/querychat_module.R index 2d4e597a..3d977d1d 100644 --- a/pkg-r/R/querychat_module.R +++ b/pkg-r/R/querychat_module.R @@ -89,13 +89,11 @@ mod_server <- function( greeting_content <- if (!is.null(greeting) && any(nzchar(greeting))) { greeting } else { - cli::cli_warn( - c( - "No {.arg greeting} provided to {.fn QueryChat}. Using the LLM {.arg client} to generate one now.", - "i" = "For faster startup, lower cost, and determinism, consider providing a {.arg greeting} to {.fn QueryChat}.", - "i" = "You can use your {.help querychat::QueryChat} object's {.fn $generate_greeting} method to generate a greeting." - ) - ) + cli::cli_warn(c( + "No {.arg greeting} provided to {.fn QueryChat}. Using the LLM {.arg client} to generate one now.", + "i" = "For faster startup, lower cost, and determinism, consider providing a {.arg greeting} to {.fn QueryChat}.", + "i" = "You can use your {.help querychat::QueryChat} object's {.fn $generate_greeting} method to generate a greeting." + )) chat$stream_async(GREETING_PROMPT) } diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index be29fb01..60e99af9 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -67,6 +67,7 @@ tool_update_dashboard_impl <- function(data_source, update_fn) { } } + tool_reset_dashboard <- function(reset_fn = identity) { check_function(reset_fn) @@ -131,12 +132,10 @@ querychat_tool_details_option <- function() { valid_settings <- c("expanded", "collapsed", "default") if (!setting %in% valid_settings) { - cli::cli_warn( - c( - "Invalid value for {.code querychat.tool_details} option or {.envvar QUERYCHAT_TOOL_DETAILS} environment variable: {.val {setting}}", - "i" = "Must be one of: {.or {.val {valid_settings}}}" - ) - ) + cli::cli_warn(c( + "Invalid value for {.code querychat.tool_details} option or {.envvar QUERYCHAT_TOOL_DETAILS} environment variable: {.val {setting}}", + "i" = "Must be one of: {.or {.val {valid_settings}}}" + )) return(NULL) } diff --git a/pkg-r/R/utils-check.R b/pkg-r/R/utils-check.R index 9b557028..b9242bb0 100644 --- a/pkg-r/R/utils-check.R +++ b/pkg-r/R/utils-check.R @@ -12,6 +12,7 @@ check_data_source <- function( } } + # SQL table name validation ---------------------------------------------- #' Check SQL table name validity @@ -63,6 +64,7 @@ is_valid_sql_table_name <- function(x) { grepl("^[a-zA-Z][a-zA-Z0-9_]*$", x) } + # SQL query validation -------------------------------------------------------- #' Check SQL query for disallowed operations diff --git a/pkg-r/R/utils-ellmer.R b/pkg-r/R/utils-ellmer.R index 045e97af..aadbe895 100644 --- a/pkg-r/R/utils-ellmer.R +++ b/pkg-r/R/utils-ellmer.R @@ -14,6 +14,7 @@ interpolate_package <- function(path, ..., .envir = parent.frame()) { ellmer::interpolate_file(path, ..., .envir = .envir) } + as_querychat_client <- function(client = NULL) { if (is.null(client)) { client <- querychat_client_option() diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R new file mode 100644 index 00000000..f681fbec --- /dev/null +++ b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R @@ -0,0 +1,26 @@ +# Extracted from test-SnowflakeSource.R:21 + +# test ------------------------------------------------------------------------- +it("formats single semantic view correctly", { + views <- list( + list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "## Snowflake Semantic Views") + expect_match(result, "db.schema.view") + expect_match(result, "CREATE SEMANTIC VIEW test_view") + expect_match(result, "```sql") +}) +it("formats multiple views", { + views <- list( + list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), + list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "db.schema.view1") + expect_match(result, "db.schema.view2") + expect_match(result, "CREATE SEMANTIC VIEW v1") + expect_match(result, "CREATE SEMANTIC VIEW v2") +}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R new file mode 100644 index 00000000..854eded0 --- /dev/null +++ b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R @@ -0,0 +1,33 @@ +# Extracted from test-SnowflakeSource.R:33 + +# test ------------------------------------------------------------------------- +it("formats single semantic view correctly", { + views <- list( + list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "## Snowflake Semantic Views") + expect_match(result, "db.schema.view") + expect_match(result, "CREATE SEMANTIC VIEW test_view") + expect_match(result, "```sql") +}) +it("formats multiple views", { + views <- list( + list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), + list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "db.schema.view1") + expect_match(result, "db.schema.view2") + expect_match(result, "CREATE SEMANTIC VIEW v1") + expect_match(result, "CREATE SEMANTIC VIEW v2") +}) +it("includes IMPORTANT notice", { + views <- list( + list(name = "test", ddl = "DDL") + ) + result <- format_semantic_views_section(views) + expect_match(result, "\\*\\*IMPORTANT\\*\\*") +}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R new file mode 100644 index 00000000..b7dedd72 --- /dev/null +++ b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R @@ -0,0 +1,14 @@ +# Extracted from test-SnowflakeSource.R:8 + +# test ------------------------------------------------------------------------- +it("formats single semantic view correctly", { + views <- list( + list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") + ) + result <- format_semantic_views_section(views) + + expect_match(result, "## Snowflake Semantic Views") + expect_match(result, "db.schema.view") + expect_match(result, "CREATE SEMANTIC VIEW test_view") + expect_match(result, "```sql") +}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R new file mode 100644 index 00000000..6df7963e --- /dev/null +++ b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R @@ -0,0 +1,13 @@ +# Extracted from test-SnowflakeSource.R:83 + +# test ------------------------------------------------------------------------- +it("has_semantic_views() returns FALSE before get_schema() is called", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) + + source <- DBISource$new(conn, "test_table") + expect_false(source$has_semantic_views()) +}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R new file mode 100644 index 00000000..27cfec85 --- /dev/null +++ b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R @@ -0,0 +1,24 @@ +# Extracted from test-SnowflakeSource.R:95 + +# test ------------------------------------------------------------------------- +it("has_semantic_views() returns FALSE before get_schema() is called", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) + + source <- DBISource$new(conn, "test_table") + expect_false(source$has_semantic_views()) +}) +it("has_semantic_views() returns FALSE for non-Snowflake after get_schema()", { + skip_if_not_installed("RSQLite") + + conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + withr::defer(DBI::dbDisconnect(conn)) + DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) + + source <- DBISource$new(conn, "test_table") + source$get_schema() + expect_false(source$has_semantic_views()) +}) diff --git a/pkg-r/tests/testthat/test-DBISource.R b/pkg-r/tests/testthat/test-DBISource.R index 487052ff..69ff7be2 100644 --- a/pkg-r/tests/testthat/test-DBISource.R +++ b/pkg-r/tests/testthat/test-DBISource.R @@ -82,11 +82,9 @@ describe("DBISource$test_query()", { expect_error(dbi_source$test_query("SELECT * FROM non_existent_table")) - expect_error( - dbi_source$test_query( - "SELECT non_existent_column FROM test_table" - ) - ) + expect_error(dbi_source$test_query( + "SELECT non_existent_column FROM test_table" + )) }) it("works with different data types", { diff --git a/pkg-r/tests/testthat/test-DataSource.R b/pkg-r/tests/testthat/test-DataSource.R index 7defe8c2..cf2fc046 100644 --- a/pkg-r/tests/testthat/test-DataSource.R +++ b/pkg-r/tests/testthat/test-DataSource.R @@ -29,6 +29,7 @@ describe("DataSource base class", { }) }) + describe("DataSource$get_schema()", { it("returns proper schema for DataFrameSource", { skip_if_no_dataframe_engine() @@ -310,6 +311,7 @@ describe("DataSource$execute_query()", { }) }) + describe("test_query() column validation", { skip_if_no_dataframe_engine() diff --git a/pkg-r/tests/testthat/test-QueryChat.R b/pkg-r/tests/testthat/test-QueryChat.R index 15fa793d..b01a66c5 100644 --- a/pkg-r/tests/testthat/test-QueryChat.R +++ b/pkg-r/tests/testthat/test-QueryChat.R @@ -364,13 +364,9 @@ describe("QueryChat$client()", { # Find and call the update tool tools <- client$get_tools() - update_tool <- tools[[ - which( - sapply(tools, function(t) { - t@name == "querychat_update_dashboard" - }) - ) - ]] + update_tool <- tools[[which(sapply(tools, function(t) { + t@name == "querychat_update_dashboard" + }))]] # Call the tool - it should execute the query and call the callback result <- update_tool( @@ -400,13 +396,9 @@ describe("QueryChat$client()", { # Find and call the reset tool tools <- client$get_tools() - reset_tool <- tools[[ - which( - sapply(tools, function(t) { - t@name == "querychat_reset_dashboard" - }) - ) - ]] + reset_tool <- tools[[which(sapply(tools, function(t) { + t@name == "querychat_reset_dashboard" + }))]] # Call the tool reset_tool() diff --git a/sandbox/app-chaos.R b/sandbox/app-chaos.R new file mode 100644 index 00000000..c0f41c84 --- /dev/null +++ b/sandbox/app-chaos.R @@ -0,0 +1,47 @@ +library(shiny) +library(DBI) +library(querychat) + +# Database/table parameters +WAREHOUSE <- "DEFAULT_WH" +DATABASE <- "DEMO_CHAOS_DB" +SCHEMA <- "ERP_DUMP" +TABLE_NAME <- "T_DATA_LOG" + +# Snowflake account +ACCOUNT <- "duloftf-posit-software-pbc-dev" + +# Get greeting from file in same directory +script_dir <- tryCatch( + dirname(sys.frame(1)$ofile), + error = function(e) "." +) +if (is.null(script_dir) || script_dir == "") { + script_dir <- "sandbox" +} +greeting <- paste(readLines(file.path(script_dir, "greeting.md")), collapse = "\n") + + + +conn <- DBI::dbConnect( + odbc::snowflake(), + #driver = odbc:::snowflake_default_driver(), + authenticator = "externalbrowser", + account = ACCOUNT, + #warehouse = WAREHOUSE, + #database = DATABASE, + #schema = SCHEMA +) + +# Print first few rows to verify connection +print(DBI::dbGetQuery(conn, sprintf("SELECT * FROM %s LIMIT 5", TABLE_NAME))) + +# Create QueryChat +qc <- QueryChat$new( + conn, + table_name = TABLE_NAME, + greeting = greeting +) + +# Run the app +qc$app() diff --git a/sandbox/app-chaos.py b/sandbox/app-chaos.py new file mode 100644 index 00000000..d55d18b1 --- /dev/null +++ b/sandbox/app-chaos.py @@ -0,0 +1,25 @@ +import ibis +from pathlib import Path +import querychat + +import chaos + +with open(Path(__file__).parent / "greeting.md", "r") as f: + greeting = f.read() + +# Establish Ibis connection to Snowflake +conn = ibis.snowflake.from_connection( + chaos.get_connection(), + create_object_udfs=False, +) + +tbl = conn.table(chaos.TABLE_NAME) +print(tbl.head(5)) + +qc = querychat.QueryChat( + tbl, + table_name=chaos.TABLE_NAME, + greeting=greeting, +) + +app = qc.app() diff --git a/sandbox/chaos.py b/sandbox/chaos.py new file mode 100644 index 00000000..cc917714 --- /dev/null +++ b/sandbox/chaos.py @@ -0,0 +1,98 @@ +import os +from pathlib import Path + +import chatlas +import ibis +import snowflake.connector + +from shiny import session +from posit.connect.external.snowflake import PositAuthenticator + +# A connection name within ~/.snowflake/connections.toml +# TODO: Set to workbench by default? +CONNECTION_NAME = "posit" + +# Default Snowflake account +ACCOUNT = "duloftf-posit-software-pbc-dev" + +# Database/table parameters +WAREHOUSE = "DEFAULT_WH" +DATABASE = "DEMO_CHAOS_DB" +SCHEMA = "ERP_DUMP" +TABLE_NAME = "T_DATA_LOG" + +# A model name supported by Snowflake +MODEL = "claude-3-7-sonnet" + + +def chat_client(): + kwargs = {} + + if is_connect(): + auth = get_connect_auth() + kwargs["authenticator"] = auth.authenticator + kwargs["token"] = auth.token + else: + if not has_local_config(): + raise ValueError( + "No Snowflake configuration found. Please set up " + "~/.snowflake/connections.toml with the connection details." + ) + kwargs["connection_name"] = CONNECTION_NAME + + return chatlas.ChatSnowflake(model=MODEL, account=ACCOUNT, kwargs=kwargs) + + +def get_connection(): + """Get a Snowflake connection based on the environment.""" + if is_connect(): + auth = get_connect_auth() + return snowflake.connector.connect( + account=ACCOUNT, + warehouse=WAREHOUSE, + database=DATABASE, + schema=SCHEMA, + authenticator=auth.authenticator, + token=auth.token, + ) + + if not has_local_config(): + raise ValueError( + "No Snowflake configuration found. Please set up " + "~/.snowflake/connections.toml with the connection details." + ) + + return snowflake.connector.connect( + connection_name=CONNECTION_NAME, + warehouse=WAREHOUSE, + database=DATABASE, + schema=SCHEMA, + ) + + +def get_connect_auth(): + """Get Posit Connect Snowflake authenticator.""" + sess = session.get_current_session() + if sess is None: + raise RuntimeError("get_connect_auth() must be called within a Shiny session") + + # No-op for (1st run of) Express sessions + if sess.is_stub_session(): + return None + + user_session_token = sess.http_conn.headers.get("Posit-Connect-User-Session-Token") + return PositAuthenticator( + local_authenticator="EXTERNALBROWSER", + user_session_token=user_session_token, + ) + + +def is_connect(): + """Check if the app is running on Posit Connect.""" + return os.getenv("RSTUDIO_PRODUCT") == "CONNECT" + + +def has_local_config(): + home = Path(os.getenv("SNOWFLAKE_HOME", "~/.snowflake")).expanduser() + config_path = home / "connections.toml" + return config_path.exists() diff --git a/sandbox/greeting.md b/sandbox/greeting.md new file mode 100644 index 00000000..19b39d72 --- /dev/null +++ b/sandbox/greeting.md @@ -0,0 +1 @@ +Hello! Ask a question about your data \ No newline at end of file From 9b995fc49c7e56e887d7f187d893eac860b25e9f Mon Sep 17 00:00:00 2001 From: Carson Date: Tue, 27 Jan 2026 15:32:13 -0600 Subject: [PATCH 45/45] chore: Remove accidentally committed files --- .../_problems/test-SnowflakeSource-21.R | 26 ----- .../_problems/test-SnowflakeSource-33.R | 33 ------- .../_problems/test-SnowflakeSource-8.R | 14 --- .../_problems/test-SnowflakeSource-83.R | 13 --- .../_problems/test-SnowflakeSource-95.R | 24 ----- sandbox/app-chaos.R | 47 --------- sandbox/app-chaos.py | 25 ----- sandbox/chaos.py | 98 ------------------- sandbox/greeting.md | 1 - 9 files changed, 281 deletions(-) delete mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R delete mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R delete mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R delete mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R delete mode 100644 pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R delete mode 100644 sandbox/app-chaos.R delete mode 100644 sandbox/app-chaos.py delete mode 100644 sandbox/chaos.py delete mode 100644 sandbox/greeting.md diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R deleted file mode 100644 index f681fbec..00000000 --- a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-21.R +++ /dev/null @@ -1,26 +0,0 @@ -# Extracted from test-SnowflakeSource.R:21 - -# test ------------------------------------------------------------------------- -it("formats single semantic view correctly", { - views <- list( - list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") - ) - result <- format_semantic_views_section(views) - - expect_match(result, "## Snowflake Semantic Views") - expect_match(result, "db.schema.view") - expect_match(result, "CREATE SEMANTIC VIEW test_view") - expect_match(result, "```sql") -}) -it("formats multiple views", { - views <- list( - list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), - list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") - ) - result <- format_semantic_views_section(views) - - expect_match(result, "db.schema.view1") - expect_match(result, "db.schema.view2") - expect_match(result, "CREATE SEMANTIC VIEW v1") - expect_match(result, "CREATE SEMANTIC VIEW v2") -}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R deleted file mode 100644 index 854eded0..00000000 --- a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-33.R +++ /dev/null @@ -1,33 +0,0 @@ -# Extracted from test-SnowflakeSource.R:33 - -# test ------------------------------------------------------------------------- -it("formats single semantic view correctly", { - views <- list( - list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") - ) - result <- format_semantic_views_section(views) - - expect_match(result, "## Snowflake Semantic Views") - expect_match(result, "db.schema.view") - expect_match(result, "CREATE SEMANTIC VIEW test_view") - expect_match(result, "```sql") -}) -it("formats multiple views", { - views <- list( - list(name = "db.schema.view1", ddl = "CREATE SEMANTIC VIEW v1"), - list(name = "db.schema.view2", ddl = "CREATE SEMANTIC VIEW v2") - ) - result <- format_semantic_views_section(views) - - expect_match(result, "db.schema.view1") - expect_match(result, "db.schema.view2") - expect_match(result, "CREATE SEMANTIC VIEW v1") - expect_match(result, "CREATE SEMANTIC VIEW v2") -}) -it("includes IMPORTANT notice", { - views <- list( - list(name = "test", ddl = "DDL") - ) - result <- format_semantic_views_section(views) - expect_match(result, "\\*\\*IMPORTANT\\*\\*") -}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R deleted file mode 100644 index b7dedd72..00000000 --- a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-8.R +++ /dev/null @@ -1,14 +0,0 @@ -# Extracted from test-SnowflakeSource.R:8 - -# test ------------------------------------------------------------------------- -it("formats single semantic view correctly", { - views <- list( - list(name = "db.schema.view", ddl = "CREATE SEMANTIC VIEW test_view") - ) - result <- format_semantic_views_section(views) - - expect_match(result, "## Snowflake Semantic Views") - expect_match(result, "db.schema.view") - expect_match(result, "CREATE SEMANTIC VIEW test_view") - expect_match(result, "```sql") -}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R deleted file mode 100644 index 6df7963e..00000000 --- a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-83.R +++ /dev/null @@ -1,13 +0,0 @@ -# Extracted from test-SnowflakeSource.R:83 - -# test ------------------------------------------------------------------------- -it("has_semantic_views() returns FALSE before get_schema() is called", { - skip_if_not_installed("RSQLite") - - conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - withr::defer(DBI::dbDisconnect(conn)) - DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) - - source <- DBISource$new(conn, "test_table") - expect_false(source$has_semantic_views()) -}) diff --git a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R b/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R deleted file mode 100644 index 27cfec85..00000000 --- a/pkg-r/tests/testthat/_problems/test-SnowflakeSource-95.R +++ /dev/null @@ -1,24 +0,0 @@ -# Extracted from test-SnowflakeSource.R:95 - -# test ------------------------------------------------------------------------- -it("has_semantic_views() returns FALSE before get_schema() is called", { - skip_if_not_installed("RSQLite") - - conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - withr::defer(DBI::dbDisconnect(conn)) - DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) - - source <- DBISource$new(conn, "test_table") - expect_false(source$has_semantic_views()) -}) -it("has_semantic_views() returns FALSE for non-Snowflake after get_schema()", { - skip_if_not_installed("RSQLite") - - conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - withr::defer(DBI::dbDisconnect(conn)) - DBI::dbWriteTable(conn, "test_table", data.frame(x = 1:3)) - - source <- DBISource$new(conn, "test_table") - source$get_schema() - expect_false(source$has_semantic_views()) -}) diff --git a/sandbox/app-chaos.R b/sandbox/app-chaos.R deleted file mode 100644 index c0f41c84..00000000 --- a/sandbox/app-chaos.R +++ /dev/null @@ -1,47 +0,0 @@ -library(shiny) -library(DBI) -library(querychat) - -# Database/table parameters -WAREHOUSE <- "DEFAULT_WH" -DATABASE <- "DEMO_CHAOS_DB" -SCHEMA <- "ERP_DUMP" -TABLE_NAME <- "T_DATA_LOG" - -# Snowflake account -ACCOUNT <- "duloftf-posit-software-pbc-dev" - -# Get greeting from file in same directory -script_dir <- tryCatch( - dirname(sys.frame(1)$ofile), - error = function(e) "." -) -if (is.null(script_dir) || script_dir == "") { - script_dir <- "sandbox" -} -greeting <- paste(readLines(file.path(script_dir, "greeting.md")), collapse = "\n") - - - -conn <- DBI::dbConnect( - odbc::snowflake(), - #driver = odbc:::snowflake_default_driver(), - authenticator = "externalbrowser", - account = ACCOUNT, - #warehouse = WAREHOUSE, - #database = DATABASE, - #schema = SCHEMA -) - -# Print first few rows to verify connection -print(DBI::dbGetQuery(conn, sprintf("SELECT * FROM %s LIMIT 5", TABLE_NAME))) - -# Create QueryChat -qc <- QueryChat$new( - conn, - table_name = TABLE_NAME, - greeting = greeting -) - -# Run the app -qc$app() diff --git a/sandbox/app-chaos.py b/sandbox/app-chaos.py deleted file mode 100644 index d55d18b1..00000000 --- a/sandbox/app-chaos.py +++ /dev/null @@ -1,25 +0,0 @@ -import ibis -from pathlib import Path -import querychat - -import chaos - -with open(Path(__file__).parent / "greeting.md", "r") as f: - greeting = f.read() - -# Establish Ibis connection to Snowflake -conn = ibis.snowflake.from_connection( - chaos.get_connection(), - create_object_udfs=False, -) - -tbl = conn.table(chaos.TABLE_NAME) -print(tbl.head(5)) - -qc = querychat.QueryChat( - tbl, - table_name=chaos.TABLE_NAME, - greeting=greeting, -) - -app = qc.app() diff --git a/sandbox/chaos.py b/sandbox/chaos.py deleted file mode 100644 index cc917714..00000000 --- a/sandbox/chaos.py +++ /dev/null @@ -1,98 +0,0 @@ -import os -from pathlib import Path - -import chatlas -import ibis -import snowflake.connector - -from shiny import session -from posit.connect.external.snowflake import PositAuthenticator - -# A connection name within ~/.snowflake/connections.toml -# TODO: Set to workbench by default? -CONNECTION_NAME = "posit" - -# Default Snowflake account -ACCOUNT = "duloftf-posit-software-pbc-dev" - -# Database/table parameters -WAREHOUSE = "DEFAULT_WH" -DATABASE = "DEMO_CHAOS_DB" -SCHEMA = "ERP_DUMP" -TABLE_NAME = "T_DATA_LOG" - -# A model name supported by Snowflake -MODEL = "claude-3-7-sonnet" - - -def chat_client(): - kwargs = {} - - if is_connect(): - auth = get_connect_auth() - kwargs["authenticator"] = auth.authenticator - kwargs["token"] = auth.token - else: - if not has_local_config(): - raise ValueError( - "No Snowflake configuration found. Please set up " - "~/.snowflake/connections.toml with the connection details." - ) - kwargs["connection_name"] = CONNECTION_NAME - - return chatlas.ChatSnowflake(model=MODEL, account=ACCOUNT, kwargs=kwargs) - - -def get_connection(): - """Get a Snowflake connection based on the environment.""" - if is_connect(): - auth = get_connect_auth() - return snowflake.connector.connect( - account=ACCOUNT, - warehouse=WAREHOUSE, - database=DATABASE, - schema=SCHEMA, - authenticator=auth.authenticator, - token=auth.token, - ) - - if not has_local_config(): - raise ValueError( - "No Snowflake configuration found. Please set up " - "~/.snowflake/connections.toml with the connection details." - ) - - return snowflake.connector.connect( - connection_name=CONNECTION_NAME, - warehouse=WAREHOUSE, - database=DATABASE, - schema=SCHEMA, - ) - - -def get_connect_auth(): - """Get Posit Connect Snowflake authenticator.""" - sess = session.get_current_session() - if sess is None: - raise RuntimeError("get_connect_auth() must be called within a Shiny session") - - # No-op for (1st run of) Express sessions - if sess.is_stub_session(): - return None - - user_session_token = sess.http_conn.headers.get("Posit-Connect-User-Session-Token") - return PositAuthenticator( - local_authenticator="EXTERNALBROWSER", - user_session_token=user_session_token, - ) - - -def is_connect(): - """Check if the app is running on Posit Connect.""" - return os.getenv("RSTUDIO_PRODUCT") == "CONNECT" - - -def has_local_config(): - home = Path(os.getenv("SNOWFLAKE_HOME", "~/.snowflake")).expanduser() - config_path = home / "connections.toml" - return config_path.exists() diff --git a/sandbox/greeting.md b/sandbox/greeting.md deleted file mode 100644 index 19b39d72..00000000 --- a/sandbox/greeting.md +++ /dev/null @@ -1 +0,0 @@ -Hello! Ask a question about your data \ No newline at end of file