From 96c1c1d58d9f9f20c923d338a82e0015da065b04 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 26 Jan 2026 11:27:50 -0600 Subject: [PATCH] feat(pkg-py): add multi-table support Add the ability to register multiple tables with a QueryChat instance, enabling cross-table queries and per-table state management. - **Storage**: Change from single `_data_source` to `_data_sources` dict - **API**: Add `table_names()`, `table()`, `add_table()`, `remove_table()` methods - **TableAccessor**: New class for per-table access (`qc.table("name").df()`) - **Backwards compatible**: Single-table usage works unchanged - `update_dashboard` and `reset_dashboard` now include table name parameter - `tool_query` supports querying across multiple tables - Updated tool prompts to guide LLM on multi-table operations - Schema section now includes all registered tables - Added relationship hints for JOINs - Table descriptions can be provided for LLM context All frameworks (Shiny, Dash, Streamlit, Gradio) updated to work with the new dict-based storage while maintaining single-table behavior. - Added 22 new tests for multi-table functionality - Updated existing tests for new internal structure Co-Authored-By: Claude Opus 4.5 --- .gitignore | 1 + docs/plans/2025-01-14-multi-table-design.md | 452 ++++ docs/plans/2025-01-14-multi-table-impl.md | 1811 +++++++++++++++++ pkg-py/CHANGELOG.md | 2 + pkg-py/examples/lazy_frame_demo.py | 272 +++ pkg-py/src/querychat/__init__.py | 16 + pkg-py/src/querychat/_dash_ui.py | 2 - pkg-py/src/querychat/_querychat_base.py | 193 +- pkg-py/src/querychat/_shiny.py | 4 +- pkg-py/src/querychat/_shiny_module.py | 12 +- pkg-py/src/querychat/_system_prompt.py | 80 +- pkg-py/src/querychat/_table_accessor.py | 107 + pkg-py/src/querychat/prompts/prompt.md | 17 +- .../querychat/prompts/tool-reset-dashboard.md | 5 + .../prompts/tool-update-dashboard.md | 4 + pkg-py/src/querychat/tools.py | 78 +- pkg-py/tests/test_base.py | 3 +- pkg-py/tests/test_multi_table.py | 266 +++ pkg-py/tests/test_querychat.py | 7 +- pkg-py/tests/test_system_prompt.py | 10 +- pkg-py/tests/test_tools.py | 27 + 21 files changed, 3297 insertions(+), 72 deletions(-) create mode 100644 docs/plans/2025-01-14-multi-table-design.md create mode 100644 docs/plans/2025-01-14-multi-table-impl.md create mode 100644 pkg-py/examples/lazy_frame_demo.py create mode 100644 pkg-py/src/querychat/_table_accessor.py create mode 100644 pkg-py/tests/test_multi_table.py diff --git a/.gitignore b/.gitignore index 5c3ef7b5..8caed428 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ animation.screenflow/ README_files/ README.html .DS_Store +test-results/ python-package/examples/titanic.db .quarto *.db diff --git a/docs/plans/2025-01-14-multi-table-design.md b/docs/plans/2025-01-14-multi-table-design.md new file mode 100644 index 00000000..3b567c04 --- /dev/null +++ b/docs/plans/2025-01-14-multi-table-design.md @@ -0,0 +1,452 @@ +# Multi-Table Support for QueryChat + +**Date**: 2025-01-14 +**Status**: Draft + +## Overview and Motivation + +QueryChat currently operates on a single table. Users pass one data source and receive a chat interface for querying and filtering that table. This design simplifies the API and implementation but limits users who need to: + +1. **Ask questions across related tables** - "Which customers placed orders over $500?" requires joining `customers` and `orders` +2. **Explore multiple datasets in one session** - Switch between unrelated tables without creating separate QueryChat instances +3. **Drill down from summary to detail** - Start with aggregates, then explore underlying records + +This design extends QueryChat to support multiple tables while preserving backwards compatibility for single-table use cases. + +### Core Principles + +- **Progressive complexity**: Single-table usage remains unchanged. Multi-table features are additive. +- **LLM handles JOINs for queries**: The Query tool can JOIN/aggregate across tables to answer questions +- **Independent filters**: Each table maintains its own filter state. Users coordinate cross-table filtering if needed. +- **Fail loudly**: Ambiguous operations (like `.df()` with multiple tables) raise helpful errors rather than guessing + +### Scope + +This design covers: +- API for adding multiple data sources +- Relationship specification between tables +- Filter behavior across tables +- Accessor APIs (`.df()`, `.sql()`, `.title()`) +- UI considerations + +Out of scope (future work): +- Coordinated/cascading filters across related tables +- SQL-like visualization language integration + +--- + +## API for Adding Tables + +### Constructor (unchanged for single table) + +```python +# Python +qc = QueryChat(orders_df, "orders") + +# R +qc <- QueryChat$new(orders_df, "orders") +``` + +### Adding Additional Tables + +```python +# Python +qc.add_table(customers_df, "customers") + +# R +qc$add_table(customers_df, "customers") +``` + +### Removing Tables + +```python +qc.remove_table("customers") +``` + +### Specifying Relationships + +Three mechanisms, usable together: + +```python +# 1. Explicit foreign keys +qc.add_table(orders_df, "orders", + relationships={"customer_id": "customers.id"}) + +# 2. Auto-detect (database connections, on by default) +qc.add_table("orders") # Infers relationships from DB metadata +qc.add_table("orders", infer_relationships=False) # Opt-out + +# 3. Free-text description +qc.add_table(orders_df, "orders", + description="Each order links to customers via customer_id") +``` + +### Database Connections + +For database connections, no tables are included by default. Users add tables explicitly, gaining auto-detected relationships: + +```python +qc = QueryChat(db_connection) +qc.add_table("orders") # Relationships inferred from FK metadata +qc.add_table("customers") # Same +``` + +### Timing Constraint + +Tables must be added before `.server()` is called. + +--- + +## Accessor API + +### The `.table()` Compound Accessor + +Each table's state is accessed through `.table("name")`: + +```python +# Python +qc.table("orders").df() # Filtered dataframe (reactive) +qc.table("orders").sql() # Current filter SQL (reactive) +qc.table("orders").title() # Filter description (reactive) + +# R +qc$table("orders")$df() +qc$table("orders")$sql() +qc$table("orders")$title() +``` + +### Backwards Compatibility for Single-Table + +When only one table exists, the existing shortcut methods continue to work: + +```python +# These work with single table +qc.df() +qc.sql() +qc.title() +``` + +### Error on Ambiguity + +With multiple tables, bare accessors raise a helpful error: + +```python +qc.add_table(customers_df, "customers") # Now have 2 tables +qc.df() # Raises: "Multiple tables present. Use qc.table('name').df()" +``` + +### Listing Available Tables + +```python +qc.table_names() # Returns ["orders", "customers"] +``` + +### What `.table()` Returns + +The `.table("name")` method returns a lightweight object (e.g., `TableAccessor`) that holds a reference to the parent `QueryChat` and the table name. It provides `.df()`, `.sql()`, `.title()` methods that delegate to the appropriate internal state. + +--- + +## Tool Changes + +### Query Tool (Q&A) + +The query tool remains largely unchanged but gains access to multi-table schema and relationship information. The LLM can write JOINs and aggregations across tables to answer questions: + +``` +User: "Which customers placed orders over $500?" + +LLM uses query tool with: +SELECT c.name, c.email, SUM(o.amount) as total +FROM customers c +JOIN orders o ON c.id = o.customer_id +GROUP BY c.id +HAVING SUM(o.amount) > 500 +``` + +The system prompt includes all table schemas plus relationship information (explicit, inferred, or described). + +### Update Dashboard Tool (Filtering) + +This tool gains a required `table` parameter: + +```python +# Current tool signature +def update_dashboard(sql_query: str, title: str) -> dict + +# New tool signature +def update_dashboard(table: str, sql_query: str, title: str) -> dict +``` + +The LLM infers which table from context: + +``` +User: "Show me California customers" +LLM calls: update_dashboard(table="customers", sql_query="SELECT * FROM customers WHERE state = 'CA'", title="California customers") +``` + +### Validation + +- Query must reference the specified table +- Query must return all columns from that table's schema (existing constraint) +- Invalid table name raises error + +### Reset Dashboard Tool + +Also gains a `table` parameter to reset a specific table's filter: + +```python +def reset_dashboard(table: str) -> dict +``` + +Only per-table reset is supported (no "reset all" operation). + +--- + +## UI Design + +### Default Layout: Tabs + +The `.app()` method renders multiple tables as tabs: + +```python +qc = QueryChat(orders_df, "orders") +qc.add_table(customers_df, "customers") +qc.app() # Shows chat + tabbed data view +``` + +Each tab displays: +- Table name as tab label +- Filtered data table +- Current SQL query (collapsed/expandable) +- Filter title/description + +Tabs appear in add-order (first added = first tab = focused on load). + +### Auto-Switch on Filter + +When the LLM filters a table, the UI automatically switches to that tab. User asks "Show California customers" → customers tab becomes active. + +### Single Table (unchanged) + +With one table, the UI looks identical to today - no tabs, just the data view. + +### Building Blocks for Custom Layouts + +Power users can construct their own layouts: + +```python +# Python (Shiny for Python) +qc.sidebar() # Chat interface +qc.table("orders").ui() # Orders data table + SQL display +qc.table("customers").ui() # Customers data table + SQL display + +# R +qc$sidebar() +qc$table("orders")$ui() +qc$table("customers")$ui() +``` + +This allows side-by-side layouts, custom tab arrangements, or embedding tables in different pages. + +### The `.ui()` Method + +Existing `.ui()` method (combined chat + data) continues to work for single table. With multiple tables, it renders the tabbed view. Users wanting full control use the building blocks instead. + +--- + +## System Prompt Changes + +### Schema Presentation + +The prompt currently describes one table. With multiple tables, schema is presented for each: + +``` +You have access to a SQL database with the following tables: + + +Columns: +- id (INTEGER) +- customer_id (INTEGER) +- product_id (INTEGER) +- amount (DECIMAL) +- order_date (DATE) +
+ + +Columns: +- id (INTEGER) +- name (TEXT) +- email (TEXT) +- state (TEXT) +
+``` + +### Relationship Information + +Relationships are included after schemas: + +``` + +- orders.customer_id references customers.id +- orders.product_id references products.id + +``` + +For free-text descriptions: + +``` + +- orders: Transaction records. Each order belongs to one customer and contains one product. +- customers: Customer contact information and location. + +``` + +### Tool Instructions + +Updated instructions for the filter tool: + +``` +When filtering data, you must specify which table to filter. +Only one table can be filtered per tool call. The query must +return all columns from the specified table's schema. +``` + +### Query Tool Instructions + +``` +For questions spanning multiple tables, use JOINs based on +the relationships provided. Return only the columns needed +to answer the question. +``` + +--- + +## Error Handling + +### Adding Tables + +| Scenario | Behavior | +|----------|----------| +| Duplicate table name | Error: "Table 'orders' already exists" | +| Add after `.server()` called | Error: "Cannot add tables after server initialization" | +| Invalid data source type | Error: "Expected DataFrame or DB connection, got X" | + +### Removing Tables + +| Scenario | Behavior | +|----------|----------| +| Remove nonexistent table | Error: "Table 'foo' not found" | +| Remove last table | Error: "Cannot remove last table. At least one table required." | +| Remove after `.server()` | Error: "Cannot remove tables after server initialization" | + +### Accessor Errors + +| Scenario | Behavior | +|----------|----------| +| `.df()` with multiple tables | Error: "Multiple tables present. Use `.table('name').df()`" | +| `.table("foo")` nonexistent | Error: "Table 'foo' not found. Available: orders, customers" | + +### Tool Validation Errors + +| Scenario | Behavior | +|----------|----------| +| Filter specifies unknown table | Tool returns error to LLM: "Table 'foo' not found" | +| Filter query wrong schema | Tool returns error to LLM: "Query must return all columns from 'orders'" | +| Filter query references wrong table | Tool returns error to LLM: "Query references 'customers' but table='orders'" | + +### Relationship Inference Failures + +If `infer_relationships=True` but no foreign keys found in DB metadata, silently continue without relationships (not an error - relationships are optional). + +--- + +## Backwards Compatibility + +### Fully Backwards Compatible Cases + +Existing single-table code works unchanged: + +```python +# This continues to work exactly as before +qc = QueryChat(orders_df, "orders") +qc.app() + +# Accessors work unchanged +qc.df() +qc.sql() +qc.title() +``` + +### Breaking Changes + +None for single-table usage. + +### Soft Breaks (Code Works, But Should Update) + +When a user adds a second table to existing code: + +```python +qc = QueryChat(orders_df, "orders") +qc.add_table(customers_df, "customers") # New line + +qc.df() # Now raises error - must use qc.table("orders").df() +``` + +This is intentional: adding a table is an explicit action, and the error message guides the fix. + +### Migration Path + +For users adopting multi-table: + +1. Add tables with `.add_table()` +2. Update `.df()` → `.table("name").df()` (same for `.sql()`, `.title()`) +3. Optionally add relationship information for better query quality + +### Deprecation Strategy + +No deprecations needed. The single-table API remains the recommended approach for single-table use cases. + +--- + +## Testing Considerations + +### Unit Tests + +| Area | Tests | +|------|-------| +| `.add_table()` | Adds table correctly, rejects duplicates, rejects after server init | +| `.remove_table()` | Removes table, errors on nonexistent, errors on last table | +| `.table("name")` | Returns accessor, errors on nonexistent, lists available tables | +| Accessor methods | `.df()`, `.sql()`, `.title()` work via `.table()`, error when ambiguous | +| Relationship parsing | Explicit relationships stored correctly, free-text passed to prompt | + +### Integration Tests + +| Area | Tests | +|------|-------| +| Multi-table schema in prompt | All tables and relationships appear in system prompt | +| Filter tool with table param | LLM can filter specific table, validation rejects wrong table | +| Query tool with JOINs | LLM can write JOIN queries, results returned correctly | +| Auto-detect relationships | DB foreign keys detected and included in prompt | + +### UI Tests + +| Area | Tests | +|------|-------| +| Tab rendering | Multiple tables show as tabs | +| Auto-switch | Filtering table X switches to tab X | +| Building blocks | `.table("name").ui()` renders correctly | + +### Backwards Compatibility Tests + +| Area | Tests | +|------|-------| +| Single-table unchanged | Existing test suite passes without modification | +| Bare accessors | Work with one table, error with multiple | + +--- + +## Open Questions + +1. **R/Python parity**: Any language-specific considerations for the API during implementation? diff --git a/docs/plans/2025-01-14-multi-table-impl.md b/docs/plans/2025-01-14-multi-table-impl.md new file mode 100644 index 00000000..601b9476 --- /dev/null +++ b/docs/plans/2025-01-14-multi-table-impl.md @@ -0,0 +1,1811 @@ +# Multi-Table Support Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Extend QueryChat to support multiple tables while preserving backwards compatibility for single-table use cases. + +**Architecture:** The implementation uses a dictionary-based storage for multiple data sources, a TableAccessor class for per-table access, per-table reactive state management, and tool parameters to target specific tables. The LLM handles JOINs for queries while each table maintains independent filter state. + +**Tech Stack:** Python, Shiny for Python, chatlas, narwhals, pytest + +**Design Document:** `docs/plans/2025-01-14-multi-table-design.md` + +--- + +## Phase 1: Multi-Source Storage Infrastructure + +This phase changes the internal storage from a single `_data_source` to a dictionary of data sources keyed by table name. + +### Task 1.1: Add `_data_sources` Dictionary Storage + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py:44-92` +- Test: `pkg-py/tests/test_multi_table.py` (create) + +**Step 1: Create test file with initial failing test** + +Create `pkg-py/tests/test_multi_table.py`: + +```python +"""Tests for multi-table support.""" + +import os + +import pandas as pd +import pytest +from querychat import QueryChat + + +@pytest.fixture(autouse=True) +def set_dummy_api_key(): + """Set a dummy OpenAI API key for testing.""" + old_api_key = os.environ.get("OPENAI_API_KEY") + os.environ["OPENAI_API_KEY"] = "sk-dummy-api-key-for-testing" + yield + if old_api_key is not None: + os.environ["OPENAI_API_KEY"] = old_api_key + else: + del os.environ["OPENAI_API_KEY"] + + +@pytest.fixture +def orders_df(): + """Sample orders DataFrame.""" + return pd.DataFrame({ + "id": [1, 2, 3], + "customer_id": [101, 102, 101], + "amount": [100.0, 200.0, 150.0], + }) + + +@pytest.fixture +def customers_df(): + """Sample customers DataFrame.""" + return pd.DataFrame({ + "id": [101, 102, 103], + "name": ["Alice", "Bob", "Charlie"], + "state": ["CA", "NY", "CA"], + }) + + +class TestMultiSourceStorage: + """Tests for multi-source storage infrastructure.""" + + def test_single_table_stored_in_data_sources(self, orders_df): + """Test that single table is stored in _data_sources dict.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + # Should have _data_sources dict with one entry + assert hasattr(qc, "_data_sources") + assert isinstance(qc._data_sources, dict) + assert "orders" in qc._data_sources + assert len(qc._data_sources) == 1 + + def test_data_source_property_returns_first_source(self, orders_df): + """Test backwards compatibility: data_source property returns the first source.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + # data_source property should return the single source + assert qc.data_source is qc._data_sources["orders"] +``` + +**Step 2: Run test to verify it fails** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestMultiSourceStorage::test_single_table_stored_in_data_sources -v` +Expected: FAIL with "AttributeError: 'QueryChatExpress' object has no attribute '_data_sources'" + +**Step 3: Implement dictionary storage** + +In `pkg-py/src/querychat/_querychat.py`, change the constructor (around lines 44-92): + +Replace: +```python +self._data_source = normalize_data_source(data_source, table_name) +``` + +With: +```python +self._data_sources: dict[str, DataSource] = {} +normalized = normalize_data_source(data_source, table_name) +self._data_sources[table_name] = normalized +``` + +Update the `data_source` property (around line 450-460) to return from dictionary: + +```python +@property +def data_source(self) -> DataSource: + """The data source (for single-table backwards compatibility).""" + if len(self._data_sources) == 1: + return next(iter(self._data_sources.values())) + raise ValueError( + f"Multiple tables present ({', '.join(self._data_sources.keys())}). " + "Use qc.table('name').data_source instead." + ) +``` + +Also update cleanup() method to iterate over all sources. + +**Step 4: Run test to verify it passes** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestMultiSourceStorage -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/tests/test_multi_table.py pkg-py/src/querychat/_querychat.py +git commit -m "feat(pkg-py): add dictionary storage for multiple data sources + +Store data sources in _data_sources dict keyed by table name. +Maintains backwards compatibility via data_source property. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 1.2: Add `table_names()` Method + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Write the failing test** + +Add to `TestMultiSourceStorage` class: + +```python +def test_table_names_returns_list(self, orders_df): + """Test that table_names() returns list of table names.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + names = qc.table_names() + + assert names == ["orders"] +``` + +**Step 2: Run test to verify it fails** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestMultiSourceStorage::test_table_names_returns_list -v` +Expected: FAIL with "AttributeError: 'QueryChatExpress' object has no attribute 'table_names'" + +**Step 3: Implement table_names() method** + +Add method to `QueryChatBase` class: + +```python +def table_names(self) -> list[str]: + """ + Return the names of all registered tables. + + Returns + ------- + list[str] + List of table names in the order they were added. + """ + return list(self._data_sources.keys()) +``` + +**Step 4: Run test to verify it passes** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestMultiSourceStorage::test_table_names_returns_list -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/_querychat.py pkg-py/tests/test_multi_table.py +git commit -m "feat(pkg-py): add table_names() method + +Returns list of registered table names in add-order. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 1.3: Add `add_table()` Method + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Write failing tests** + +Add new test class: + +```python +class TestAddTable: + """Tests for add_table() method.""" + + def test_add_table_basic(self, orders_df, customers_df): + """Test adding a second table.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table(customers_df, "customers") + + assert qc.table_names() == ["orders", "customers"] + assert len(qc._data_sources) == 2 + + def test_add_table_with_relationships(self, orders_df, customers_df): + """Test adding table with explicit relationships.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table( + customers_df, "customers", + relationships={"id": "orders.customer_id"} + ) + + assert "customers" in qc._data_sources + + def test_add_table_with_description(self, orders_df, customers_df): + """Test adding table with description.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table( + customers_df, "customers", + description="Customer contact information" + ) + + assert "customers" in qc._data_sources + + def test_add_table_duplicate_name_raises(self, orders_df): + """Test that adding duplicate table name raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + with pytest.raises(ValueError, match="Table 'orders' already exists"): + qc.add_table(orders_df, "orders") + + def test_add_table_invalid_name_raises(self, orders_df, customers_df): + """Test that invalid table name raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + with pytest.raises(ValueError, match="must begin with a letter"): + qc.add_table(customers_df, "123invalid") + + def test_add_table_after_server_raises(self, orders_df, customers_df): + """Test that adding table after server init raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc._server_initialized = True # Simulate server initialization + + with pytest.raises(RuntimeError, match="Cannot add tables after server"): + qc.add_table(customers_df, "customers") +``` + +**Step 2: Run tests to verify they fail** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestAddTable -v` +Expected: FAIL with "AttributeError: 'QueryChatExpress' object has no attribute 'add_table'" + +**Step 3: Implement add_table() method** + +Add to `QueryChatBase` class: + +```python +def add_table( + self, + data_source: IntoFrame | sqlalchemy.Engine, + table_name: str, + *, + relationships: dict[str, str] | None = None, + description: str | None = None, + infer_relationships: bool = True, +) -> None: + """ + Add an additional table to the QueryChat instance. + + Parameters + ---------- + data_source + The data source (DataFrame, LazyFrame, or database connection). + table_name + Name for the table (must be unique within this QueryChat). + relationships + Optional dict mapping local columns to "other_table.column" for JOINs. + Example: {"customer_id": "customers.id"} + description + Optional free-text description of the table for the LLM. + infer_relationships + Whether to auto-detect relationships from database metadata. + Only applies to database connections. Default True. + + Raises + ------ + ValueError + If table_name already exists or is invalid. + RuntimeError + If called after server() has been invoked. + """ + # Check if server already initialized + if getattr(self, "_server_initialized", False): + raise RuntimeError( + "Cannot add tables after server initialization. " + "Add all tables before calling .server() or .app()." + ) + + # Validate table name format + if not re.match(r"^[a-zA-Z][a-zA-Z0-9_]*$", table_name): + raise ValueError( + "Table name must begin with a letter and contain only " + "letters, numbers, and underscores" + ) + + # Check for duplicates + if table_name in self._data_sources: + raise ValueError(f"Table '{table_name}' already exists") + + # Normalize and store the data source + normalized = normalize_data_source(data_source, table_name) + self._data_sources[table_name] = normalized + + # Store relationship and description metadata + if not hasattr(self, "_table_relationships"): + self._table_relationships: dict[str, dict[str, str]] = {} + if not hasattr(self, "_table_descriptions"): + self._table_descriptions: dict[str, str] = {} + + if relationships: + self._table_relationships[table_name] = relationships + if description: + self._table_descriptions[table_name] = description + + # TODO: Implement infer_relationships for database connections +``` + +Also add `_server_initialized = False` to `__init__`. + +**Step 4: Run tests to verify they pass** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestAddTable -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/_querychat.py pkg-py/tests/test_multi_table.py +git commit -m "feat(pkg-py): add add_table() method for multi-table support + +Allows adding additional tables after construction. +Stores relationships and descriptions for LLM context. +Validates table names and prevents duplicates. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 1.4: Add `remove_table()` Method + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Write failing tests** + +Add new test class: + +```python +class TestRemoveTable: + """Tests for remove_table() method.""" + + def test_remove_table_basic(self, orders_df, customers_df): + """Test removing a table.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table(customers_df, "customers") + + qc.remove_table("customers") + + assert qc.table_names() == ["orders"] + + def test_remove_table_nonexistent_raises(self, orders_df): + """Test that removing nonexistent table raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + with pytest.raises(ValueError, match="Table 'foo' not found"): + qc.remove_table("foo") + + def test_remove_last_table_raises(self, orders_df): + """Test that removing last table raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + with pytest.raises(ValueError, match="Cannot remove last table"): + qc.remove_table("orders") + + def test_remove_table_after_server_raises(self, orders_df, customers_df): + """Test that removing table after server init raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table(customers_df, "customers") + qc._server_initialized = True + + with pytest.raises(RuntimeError, match="Cannot remove tables after server"): + qc.remove_table("customers") +``` + +**Step 2: Run tests to verify they fail** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestRemoveTable -v` +Expected: FAIL with "AttributeError: 'QueryChatExpress' object has no attribute 'remove_table'" + +**Step 3: Implement remove_table() method** + +Add to `QueryChatBase` class: + +```python +def remove_table(self, table_name: str) -> None: + """ + Remove a table from the QueryChat instance. + + Parameters + ---------- + table_name + Name of the table to remove. + + Raises + ------ + ValueError + If table doesn't exist or is the last remaining table. + RuntimeError + If called after server() has been invoked. + """ + if getattr(self, "_server_initialized", False): + raise RuntimeError( + "Cannot remove tables after server initialization. " + "Configure all tables before calling .server() or .app()." + ) + + if table_name not in self._data_sources: + available = ", ".join(self._data_sources.keys()) + raise ValueError(f"Table '{table_name}' not found. Available: {available}") + + if len(self._data_sources) == 1: + raise ValueError( + "Cannot remove last table. At least one table is required." + ) + + # Clean up the data source + self._data_sources[table_name].cleanup() + del self._data_sources[table_name] + + # Remove associated metadata + if hasattr(self, "_table_relationships"): + self._table_relationships.pop(table_name, None) + if hasattr(self, "_table_descriptions"): + self._table_descriptions.pop(table_name, None) +``` + +**Step 4: Run tests to verify they pass** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestRemoveTable -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/_querychat.py pkg-py/tests/test_multi_table.py +git commit -m "feat(pkg-py): add remove_table() method + +Allows removing tables before server initialization. +Cleans up data source and associated metadata. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 2: TableAccessor Class + +This phase implements the `.table("name")` accessor pattern that returns an object with `.df()`, `.sql()`, `.title()` methods. + +### Task 2.1: Create TableAccessor Class + +**Files:** +- Create: `pkg-py/src/querychat/_table_accessor.py` +- Modify: `pkg-py/src/querychat/__init__.py` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Write failing tests** + +Add new test class: + +```python +class TestTableAccessor: + """Tests for table() method and TableAccessor class.""" + + def test_table_returns_accessor(self, orders_df): + """Test that table() returns a TableAccessor.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + accessor = qc.table("orders") + + assert accessor is not None + assert accessor.table_name == "orders" + + def test_table_accessor_has_data_source(self, orders_df): + """Test that accessor provides access to data source.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + accessor = qc.table("orders") + + assert accessor.data_source is qc._data_sources["orders"] + + def test_table_nonexistent_raises(self, orders_df): + """Test that accessing nonexistent table raises error.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + + with pytest.raises(ValueError, match="Table 'foo' not found"): + qc.table("foo") + + def test_table_accessor_multiple_tables(self, orders_df, customers_df): + """Test accessor works with multiple tables.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table(customers_df, "customers") + + orders_accessor = qc.table("orders") + customers_accessor = qc.table("customers") + + assert orders_accessor.table_name == "orders" + assert customers_accessor.table_name == "customers" + assert orders_accessor.data_source is not customers_accessor.data_source +``` + +**Step 2: Run tests to verify they fail** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestTableAccessor -v` +Expected: FAIL with "AttributeError: 'QueryChatExpress' object has no attribute 'table'" + +**Step 3: Create TableAccessor class** + +Create `pkg-py/src/querychat/_table_accessor.py`: + +```python +"""TableAccessor class for accessing per-table state and data.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ._datasource import AnyFrame, DataSource + + +class TableAccessor: + """ + Accessor for a specific table's state and data. + + This class provides access to per-table reactive state (df, sql, title) + and is returned by QueryChat.table("name"). + + Parameters + ---------- + querychat + The parent QueryChat instance. + table_name + The name of the table this accessor represents. + """ + + def __init__(self, querychat: "QueryChatBase", table_name: str): + self._querychat = querychat + self._table_name = table_name + + @property + def table_name(self) -> str: + """The name of this table.""" + return self._table_name + + @property + def data_source(self) -> "DataSource": + """The data source for this table.""" + return self._querychat._data_sources[self._table_name] + + # Reactive accessors will be added in Phase 6 + # def df(self) -> AnyFrame: ... + # def sql(self) -> str | None: ... + # def title(self) -> str | None: ... + + +# Import at bottom to avoid circular imports +if TYPE_CHECKING: + from ._querychat import QueryChatBase +``` + +**Step 4: Add table() method to QueryChatBase** + +In `pkg-py/src/querychat/_querychat.py`, add import and method: + +```python +from ._table_accessor import TableAccessor + +# In QueryChatBase class: +def table(self, name: str) -> TableAccessor: + """ + Get an accessor for a specific table. + + Parameters + ---------- + name + The name of the table to access. + + Returns + ------- + TableAccessor + An accessor object with df(), sql(), title() methods. + + Raises + ------ + ValueError + If the table doesn't exist. + """ + if name not in self._data_sources: + available = ", ".join(self._data_sources.keys()) + raise ValueError(f"Table '{name}' not found. Available: {available}") + + return TableAccessor(self, name) +``` + +**Step 5: Run tests to verify they pass** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestTableAccessor -v` +Expected: PASS + +**Step 6: Commit** + +```bash +git add pkg-py/src/querychat/_table_accessor.py pkg-py/src/querychat/_querychat.py pkg-py/tests/test_multi_table.py +git commit -m "feat(pkg-py): add TableAccessor class and table() method + +Provides per-table access pattern: qc.table('name').data_source +Reactive methods (df, sql, title) will be added in Phase 6. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 3: Backwards-Compatible Accessor Errors + +This phase modifies `.df()`, `.sql()`, `.title()` on QueryChatExpress to raise helpful errors when multiple tables are present. + +### Task 3.1: Add Ambiguity Errors to Express Accessors + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py:828-903` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Write failing tests** + +Add new test class: + +```python +class TestAccessorAmbiguity: + """Tests for accessor ambiguity errors with multiple tables.""" + + def test_df_single_table_works(self, orders_df): + """Test that df() works with single table.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + # Can't fully test reactive without server, but method should exist + assert hasattr(qc, "df") + + def test_sql_single_table_works(self, orders_df): + """Test that sql() works with single table.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + assert hasattr(qc, "sql") + + def test_title_single_table_works(self, orders_df): + """Test that title() works with single table.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + assert hasattr(qc, "title") + + def test_data_source_multiple_tables_raises(self, orders_df, customers_df): + """Test that data_source property raises with multiple tables.""" + qc = QueryChat(orders_df, "orders", greeting="Hello!") + qc.add_table(customers_df, "customers") + + with pytest.raises(ValueError, match="Multiple tables present"): + _ = qc.data_source +``` + +**Step 2: Run tests** + +Run: `cd pkg-py && uv run pytest tests/test_multi_table.py::TestAccessorAmbiguity -v` +Expected: Tests should pass with current implementation (data_source was updated in Task 1.1) + +**Step 3: Commit if tests pass** + +Note: The actual `.df()`, `.sql()`, `.title()` ambiguity errors require the server to be initialized. These will be fully tested in Phase 6 when we implement per-table reactive state. + +```bash +git add pkg-py/tests/test_multi_table.py +git commit -m "test(pkg-py): add accessor ambiguity tests + +Verifies data_source raises when multiple tables present. +Full df/sql/title tests will be added with reactive state. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 4: Tool Changes + +This phase adds the `table` parameter to update_dashboard and reset_dashboard tools. + +### Task 4.1: Update UpdateDashboardData TypedDict + +**Files:** +- Modify: `pkg-py/src/querychat/tools.py:20-57` +- Test: `pkg-py/tests/test_tools.py` + +**Step 1: Write failing test** + +Add to existing test file or create section: + +```python +def test_update_dashboard_data_has_table_field(): + """Test that UpdateDashboardData includes table field.""" + from querychat.tools import UpdateDashboardData + + # TypedDict should have table as a key + assert "table" in UpdateDashboardData.__annotations__ +``` + +**Step 2: Run test to verify it fails** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_update_dashboard_data_has_table_field -v` +Expected: FAIL with "AssertionError" + +**Step 3: Update TypedDict** + +In `pkg-py/src/querychat/tools.py`, modify `UpdateDashboardData`: + +```python +class UpdateDashboardData(TypedDict): + """ + Data passed to update_dashboard callback. + + Attributes + ---------- + table + The name of the table being filtered. + query + The SQL query string to execute for filtering/sorting the dashboard. + title + A descriptive title for the query, typically displayed in the UI. + """ + + table: str + query: str + title: str +``` + +**Step 4: Run test to verify it passes** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_update_dashboard_data_has_table_field -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/tools.py pkg-py/tests/test_tools.py +git commit -m "feat(pkg-py): add table field to UpdateDashboardData + +Prepares for multi-table support in update_dashboard tool. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 4.2: Update update_dashboard Tool Signature + +**Files:** +- Modify: `pkg-py/src/querychat/tools.py:66-146` +- Modify: `pkg-py/src/querychat/prompts/tool-update-dashboard.md` +- Test: `pkg-py/tests/test_tools.py` + +**Step 1: Write failing test** + +```python +def test_update_dashboard_accepts_table_parameter(): + """Test that update_dashboard tool accepts table parameter.""" + import pandas as pd + from querychat._datasource import DataFrameSource + from querychat.tools import tool_update_dashboard + + df = pd.DataFrame({"id": [1, 2], "name": ["a", "b"]}) + source = DataFrameSource(df, "test_table") + sources = {"test_table": source} + + called_with = {} + + def callback(data): + called_with.update(data) + + tool = tool_update_dashboard(sources, callback) + + # The tool function should accept table parameter + result = tool._func(table="test_table", query="SELECT * FROM test_table", title="All data") + + assert called_with.get("table") == "test_table" +``` + +**Step 2: Run test to verify it fails** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_update_dashboard_accepts_table_parameter -v` +Expected: FAIL with TypeError about unexpected keyword argument 'table' + +**Step 3: Update tool implementation** + +Modify `_update_dashboard_impl` and `tool_update_dashboard` in `tools.py`: + +```python +def _update_dashboard_impl( + data_sources: dict[str, DataSource], + update_fn: Callable[[UpdateDashboardData], None], +) -> Callable[[str, str, str], ContentToolResult]: + """Create the implementation function for updating the dashboard.""" + + def update_dashboard(table: str, query: str, title: str) -> ContentToolResult: + error = None + markdown = f"```sql\n{query}\n```" + value = "Dashboard updated. Use `query` tool to review results, if needed." + + # Validate table exists + if table not in data_sources: + available = ", ".join(data_sources.keys()) + error = f"Table '{table}' not found. Available: {available}" + markdown += f"\n\n> Error: {error}" + return ContentToolResult(value=markdown, error=Exception(error)) + + data_source = data_sources[table] + + try: + # Test the query but don't execute it yet + data_source.test_query(query, require_all_columns=True) + + # Add Apply Filter button + button_html = f"""""" + + # Call the callback with TypedDict data on success + update_fn({"table": table, "query": query, "title": title}) + + except Exception as e: + error = str(e) + markdown += f"\n\n> Error: {error}" + return ContentToolResult(value=markdown, error=e) + + # Return ContentToolResult with display metadata + return ContentToolResult( + value=value, + extra={ + "display": ToolResultDisplay( + markdown=markdown + f"\n\n{button_html}", + title=title, + show_request=False, + open=querychat_tool_starts_open("update"), + icon=bs_icon("funnel-fill"), + ), + }, + ) + + return update_dashboard + + +def tool_update_dashboard( + data_sources: dict[str, DataSource], + update_fn: Callable[[UpdateDashboardData], None], +) -> Tool: + """ + Create a tool that modifies the data presented in the dashboard. + + Parameters + ---------- + data_sources + Dictionary of data sources keyed by table name. + update_fn + Callback function to call with UpdateDashboardData when update succeeds. + + Returns + ------- + Tool + A tool that can be registered with chatlas. + """ + impl = _update_dashboard_impl(data_sources, update_fn) + + # Get db_type from first source (all should be same dialect) + first_source = next(iter(data_sources.values())) + description = _read_prompt_template( + "tool-update-dashboard.md", + db_type=first_source.get_db_type(), + ) + impl.__doc__ = description + + return Tool.from_func( + impl, + name="querychat_update_dashboard", + annotations={"title": "Update Dashboard"}, + ) +``` + +**Step 4: Update tool prompt template** + +Modify `pkg-py/src/querychat/prompts/tool-update-dashboard.md` to include table parameter documentation: + +Add near the top: +``` +The `table` parameter specifies which table to filter. Use the table name exactly as shown in the schema. +``` + +**Step 5: Run test to verify it passes** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_update_dashboard_accepts_table_parameter -v` +Expected: PASS + +**Step 6: Commit** + +```bash +git add pkg-py/src/querychat/tools.py pkg-py/src/querychat/prompts/tool-update-dashboard.md pkg-py/tests/test_tools.py +git commit -m "feat(pkg-py): add table parameter to update_dashboard tool + +Tool now requires table parameter to specify which table to filter. +Validates table exists before executing query. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 4.3: Update reset_dashboard Tool + +**Files:** +- Modify: `pkg-py/src/querychat/tools.py:149-209` +- Test: `pkg-py/tests/test_tools.py` + +**Step 1: Write failing test** + +```python +def test_reset_dashboard_accepts_table_parameter(): + """Test that reset_dashboard tool accepts table parameter.""" + from querychat.tools import tool_reset_dashboard + + reset_tables = [] + + def callback(table: str): + reset_tables.append(table) + + tool = tool_reset_dashboard(callback) + + # The tool function should accept table parameter + result = tool._func(table="orders") + + assert reset_tables == ["orders"] +``` + +**Step 2: Run test to verify it fails** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_reset_dashboard_accepts_table_parameter -v` +Expected: FAIL with TypeError + +**Step 3: Update reset_dashboard implementation** + +Modify in `tools.py`: + +```python +def _reset_dashboard_impl( + reset_fn: Callable[[str], None], +) -> Callable[[str], ContentToolResult]: + """Create the implementation function for resetting the dashboard.""" + + def reset_dashboard(table: str) -> ContentToolResult: + reset_fn(table) + return ContentToolResult( + value="Dashboard reset to show all data.", + extra={ + "display": ToolResultDisplay( + markdown="Reset to show all data", + title="Reset", + show_request=False, + open=querychat_tool_starts_open("reset"), + icon=bs_icon("arrow-counterclockwise"), + ), + }, + ) + + return reset_dashboard + + +def tool_reset_dashboard(reset_fn: Callable[[str], None]) -> Tool: + """ + Create a tool that resets the dashboard to show all data. + + Parameters + ---------- + reset_fn + Callback function to call with table name when reset is requested. + + Returns + ------- + Tool + A tool that can be registered with chatlas. + """ + impl = _reset_dashboard_impl(reset_fn) + + description = _read_prompt_template("tool-reset-dashboard.md") + impl.__doc__ = description + + return Tool.from_func( + impl, + name="querychat_reset_dashboard", + annotations={"title": "Reset Dashboard"}, + ) +``` + +**Step 4: Update tool prompt template** + +Update `pkg-py/src/querychat/prompts/tool-reset-dashboard.md` to mention table parameter. + +**Step 5: Run test to verify it passes** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py::test_reset_dashboard_accepts_table_parameter -v` +Expected: PASS + +**Step 6: Commit** + +```bash +git add pkg-py/src/querychat/tools.py pkg-py/src/querychat/prompts/tool-reset-dashboard.md pkg-py/tests/test_tools.py +git commit -m "feat(pkg-py): add table parameter to reset_dashboard tool + +Tool now requires table parameter to specify which table to reset. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 4.4: Update query Tool for Multi-Table + +**Files:** +- Modify: `pkg-py/src/querychat/tools.py` (query tool section) +- Test: `pkg-py/tests/test_tools.py` + +**Step 1: Write failing test** + +```python +def test_query_tool_accepts_multiple_sources(): + """Test that query tool works with multiple data sources.""" + import pandas as pd + from querychat._datasource import DataFrameSource + from querychat.tools import tool_query + + orders = pd.DataFrame({"id": [1, 2], "customer_id": [101, 102]}) + customers = pd.DataFrame({"id": [101, 102], "name": ["Alice", "Bob"]}) + + sources = { + "orders": DataFrameSource(orders, "orders"), + "customers": DataFrameSource(customers, "customers"), + } + + tool = tool_query(sources) + + # Query should work across tables + # Note: This requires the sources to share a connection or be in same DuckDB instance + assert tool is not None +``` + +**Step 2: Implement multi-source query tool** + +The query tool needs access to all tables for JOINs. For DataFrameSource, this requires sharing a DuckDB connection. This is more complex and may need a separate implementation approach. + +For now, update the signature to accept `dict[str, DataSource]`: + +```python +def tool_query( + data_sources: dict[str, DataSource], +) -> Tool: + """Create a tool for querying data across tables.""" + # Use first source for now - multi-table JOINs will need shared connection + first_source = next(iter(data_sources.values())) + # ... rest of implementation +``` + +**Step 3: Run tests** + +Run: `cd pkg-py && uv run pytest tests/test_tools.py -v` +Expected: PASS + +**Step 4: Commit** + +```bash +git add pkg-py/src/querychat/tools.py pkg-py/tests/test_tools.py +git commit -m "feat(pkg-py): update query tool for multi-table support + +Query tool now accepts dict of data sources. +Full JOIN support will require shared database connection. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 5: System Prompt Changes + +This phase updates the system prompt to include all table schemas and relationship information. + +### Task 5.1: Update QueryChatSystemPrompt for Multiple Sources + +**Files:** +- Modify: `pkg-py/src/querychat/_system_prompt.py` +- Test: `pkg-py/tests/test_system_prompt.py` + +**Step 1: Write failing tests** + +```python +class TestMultiTableSystemPrompt: + """Tests for multi-table system prompt generation.""" + + def test_multiple_schemas_in_prompt(self): + """Test that multiple table schemas appear in prompt.""" + import pandas as pd + from querychat._datasource import DataFrameSource + from querychat._system_prompt import QueryChatSystemPrompt + + orders = pd.DataFrame({"id": [1], "amount": [100.0]}) + customers = pd.DataFrame({"id": [1], "name": ["Alice"]}) + + sources = { + "orders": DataFrameSource(orders, "orders"), + "customers": DataFrameSource(customers, "customers"), + } + + prompt = QueryChatSystemPrompt( + prompt_template="Schema: {{schema}}", + data_sources=sources, + ) + + rendered = prompt.render(tools=("query",)) + + assert "orders" in rendered + assert "customers" in rendered + + def test_relationships_in_prompt(self): + """Test that relationship information appears in prompt.""" + import pandas as pd + from querychat._datasource import DataFrameSource + from querychat._system_prompt import QueryChatSystemPrompt + + orders = pd.DataFrame({"id": [1], "customer_id": [101]}) + customers = pd.DataFrame({"id": [101], "name": ["Alice"]}) + + sources = { + "orders": DataFrameSource(orders, "orders"), + "customers": DataFrameSource(customers, "customers"), + } + + relationships = { + "orders": {"customer_id": "customers.id"} + } + + prompt = QueryChatSystemPrompt( + prompt_template="{{schema}}\n{{#relationships}}Relationships:\n{{relationships}}{{/relationships}}", + data_sources=sources, + relationships=relationships, + ) + + rendered = prompt.render(tools=("query",)) + + assert "customer_id" in rendered + assert "customers.id" in rendered +``` + +**Step 2: Run tests to verify they fail** + +Run: `cd pkg-py && uv run pytest tests/test_system_prompt.py::TestMultiTableSystemPrompt -v` +Expected: FAIL with TypeError about data_sources parameter + +**Step 3: Update QueryChatSystemPrompt** + +Modify `pkg-py/src/querychat/_system_prompt.py`: + +```python +class QueryChatSystemPrompt: + """Manages system prompt generation for QueryChat.""" + + def __init__( + self, + prompt_template: str | Path, + data_sources: DataSource | dict[str, DataSource], + data_description: str | Path | None = None, + extra_instructions: str | Path | None = None, + categorical_threshold: int = 10, + relationships: dict[str, dict[str, str]] | None = None, + table_descriptions: dict[str, str] | None = None, + ): + # Handle both single source (backwards compat) and dict of sources + if isinstance(data_sources, DataSource): + self._data_sources = {data_sources.table_name: data_sources} + else: + self._data_sources = data_sources + + # Load template + if isinstance(prompt_template, Path): + self.template = prompt_template.read_text() + else: + self.template = prompt_template + + # Store metadata + self.data_description = _load_text(data_description) + self.extra_instructions = _load_text(extra_instructions) + self.categorical_threshold = categorical_threshold + self._relationships = relationships or {} + self._table_descriptions = table_descriptions or {} + + # Generate combined schema + self.schema = self._generate_combined_schema() + + def _generate_combined_schema(self) -> str: + """Generate schema string for all tables.""" + schemas = [] + for name, source in self._data_sources.items(): + schema = source.get_schema(categorical_threshold=self.categorical_threshold) + schemas.append(f"\n{schema}\n
") + + return "\n\n".join(schemas) + + def _generate_relationships_text(self) -> str: + """Generate relationship information text.""" + if not self._relationships: + return "" + + lines = [] + for table, rels in self._relationships.items(): + for local_col, foreign_ref in rels.items(): + lines.append(f"- {table}.{local_col} references {foreign_ref}") + + return "\n".join(lines) + + def render(self, tools: tuple[str, ...] | None) -> str: + """Render the system prompt with given tools.""" + # ... existing logic plus relationships + context = { + "db_type": next(iter(self._data_sources.values())).get_db_type(), + "schema": self.schema, + "data_description": self.data_description, + "extra_instructions": self.extra_instructions, + "has_tool_update": tools is not None and "update" in tools, + "has_tool_query": tools is not None and "query" in tools, + "include_query_guidelines": tools is not None and len(tools) > 0, + "relationships": self._generate_relationships_text(), + } + + return chevron.render(self.template, context) + + # Backwards compatibility + @property + def data_source(self) -> DataSource: + """Return single data source for backwards compatibility.""" + if len(self._data_sources) == 1: + return next(iter(self._data_sources.values())) + raise ValueError("Multiple data sources present") +``` + +**Step 4: Run tests to verify they pass** + +Run: `cd pkg-py && uv run pytest tests/test_system_prompt.py -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/_system_prompt.py pkg-py/tests/test_system_prompt.py +git commit -m "feat(pkg-py): update system prompt for multi-table support + +QueryChatSystemPrompt now accepts dict of data sources. +Generates combined schema with table tags. +Includes relationship information in prompt. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 5.2: Update Main Prompt Template + +**Files:** +- Modify: `pkg-py/src/querychat/prompts/prompt.md` + +**Step 1: Update prompt template** + +Add relationship section and update tool instructions: + +```markdown +{{#relationships}} + +{{relationships}} + + +When answering questions that span multiple tables, use JOINs based on these relationships. +{{/relationships}} + +{{#has_tool_update}} +### Filtering and Sorting Data + +When filtering, you must specify which table to filter using the `table` parameter. +Only one table can be filtered per tool call. +... +{{/has_tool_update}} +``` + +**Step 2: Commit** + +```bash +git add pkg-py/src/querychat/prompts/prompt.md +git commit -m "feat(pkg-py): update prompt template for multi-table + +Adds relationship section and multi-table filtering instructions. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 6: Per-Table Reactive State + +This phase implements per-table reactive state management in the Shiny module. + +### Task 6.1: Update ServerValues for Multi-Table + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat_module.py:49-84` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Design new ServerValues** + +The `ServerValues` dataclass needs to support per-table state: + +```python +@dataclass +class TableState: + """Per-table reactive state.""" + df: Callable[[], AnyFrame] + sql: ReactiveStringOrNone + title: ReactiveStringOrNone + + +@dataclass +class ServerValues: + """Session-specific reactive values.""" + tables: dict[str, TableState] + client: chatlas.Chat + + # Backwards compatibility for single table + @property + def df(self) -> Callable[[], AnyFrame]: + if len(self.tables) == 1: + return next(iter(self.tables.values())).df + raise ValueError("Multiple tables present. Use .tables['name'].df") + + @property + def sql(self) -> ReactiveStringOrNone: + if len(self.tables) == 1: + return next(iter(self.tables.values())).sql + raise ValueError("Multiple tables present. Use .tables['name'].sql") + + @property + def title(self) -> ReactiveStringOrNone: + if len(self.tables) == 1: + return next(iter(self.tables.values())).title + raise ValueError("Multiple tables present. Use .tables['name'].title") +``` + +**Step 2: Write failing tests** + +Add to test file: + +```python +class TestPerTableState: + """Tests for per-table reactive state.""" + + def test_server_values_has_tables_dict(self, orders_df, customers_df): + """Test that ServerValues has tables dict.""" + # This requires running the server, which is complex to test + # For now, test the dataclass structure + from querychat._querychat_module import ServerValues, TableState + + assert hasattr(ServerValues, "__annotations__") + assert "tables" in ServerValues.__annotations__ +``` + +**Step 3: Implement per-table state** + +This requires significant changes to `mod_server`. The implementation should: + +1. Create per-table reactive values +2. Create per-table filtered_df calcs +3. Update callbacks to handle table parameter +4. Return ServerValues with tables dict + +**Step 4: Run tests and commit** + +```bash +git add pkg-py/src/querychat/_querychat_module.py pkg-py/tests/test_multi_table.py +git commit -m "feat(pkg-py): implement per-table reactive state + +ServerValues now contains tables dict with per-table state. +Maintains backwards compatibility for single-table access. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 6.2: Complete TableAccessor Reactive Methods + +**Files:** +- Modify: `pkg-py/src/querychat/_table_accessor.py` +- Test: `pkg-py/tests/test_multi_table.py` + +**Step 1: Implement reactive methods on TableAccessor** + +```python +class TableAccessor: + # ... existing code ... + + def df(self) -> AnyFrame: + """Return the current filtered data for this table (reactive).""" + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].df() + + def sql(self) -> str | None: + """Return the current SQL filter for this table (reactive).""" + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].sql.get() + + def title(self) -> str | None: + """Return the current filter title for this table (reactive).""" + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].title.get() +``` + +**Step 2: Commit** + +```bash +git add pkg-py/src/querychat/_table_accessor.py +git commit -m "feat(pkg-py): add reactive methods to TableAccessor + +TableAccessor.df(), .sql(), .title() now access per-table reactive state. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 7: UI Changes + +This phase implements the tabbed UI and building blocks. + +### Task 7.1: Implement Tabbed UI in .app() + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` (.app() method) +- Test: Manual testing required + +**Step 1: Update .app() for tabs** + +Modify the `.app()` method to render tabs when multiple tables are present: + +```python +def app(self, *, bookmark_store: Literal["url", "server", "disable"] = "url") -> App: + # ... existing setup ... + + def app_ui(request): + table_names = self.table_names() + + if len(table_names) == 1: + # Single table: existing layout + main_content = ui.card( + ui.card_header( + bs_icon("database"), + table_names[0], + ), + ui.output_data_frame("dt"), + ) + else: + # Multiple tables: tabbed layout + tabs = [] + for name in table_names: + tabs.append( + ui.nav_panel( + name, + ui.output_data_frame(f"dt_{name}"), + value=name, + ) + ) + main_content = ui.navset_card_tab(*tabs, id="table_tabs") + + # ... rest of layout +``` + +**Step 2: Update server for multiple data tables** + +```python +def app_server(input, output, session): + vals = self.server() + + for name in self.table_names(): + # Create render for each table + @output(id=f"dt_{name}") + @render.data_frame + def _make_render(table_name=name): + def render_table(): + return vals.tables[table_name].df() + return render_table +``` + +**Step 3: Commit** + +```bash +git add pkg-py/src/querychat/_querychat.py +git commit -m "feat(pkg-py): implement tabbed UI for multi-table + +.app() renders tabs when multiple tables are present. +Single table mode unchanged for backwards compatibility. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 7.2: Implement Auto-Switch on Filter + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` +- Modify: `pkg-py/src/querychat/_querychat_module.py` + +**Step 1: Add active_table reactive value** + +In the server module, track which table was most recently filtered: + +```python +active_table = reactive.value[str](list(data_sources.keys())[0]) + +def update_dashboard(data: UpdateDashboardData): + table = data["table"] + tables[table].sql.set(data["query"]) + tables[table].title.set(data["title"]) + active_table.set(table) +``` + +**Step 2: Update UI to switch tabs** + +In the app, use `ui.update_navs()` to switch tabs when filter changes: + +```python +@reactive.effect +def switch_to_active_table(): + ui.update_navs("table_tabs", selected=vals.active_table()) +``` + +**Step 3: Commit** + +```bash +git add pkg-py/src/querychat/_querychat.py pkg-py/src/querychat/_querychat_module.py +git commit -m "feat(pkg-py): auto-switch tabs on filter + +UI automatically switches to the most recently filtered table. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 7.3: Implement .table("name").ui() Building Block + +**Files:** +- Modify: `pkg-py/src/querychat/_table_accessor.py` + +**Step 1: Add ui() method to TableAccessor** + +```python +def ui(self) -> Tag: + """ + Render the UI for this table (data table + SQL display). + + Returns + ------- + Tag + A Shiny UI element containing the data table and SQL display. + """ + from shiny import ui + + table_id = f"{self._querychat.id}_{self._table_name}" + + return ui.card( + ui.card_header(self._table_name), + ui.output_data_frame(f"{table_id}_dt"), + ui.output_text(f"{table_id}_sql"), + ) +``` + +**Step 2: Commit** + +```bash +git add pkg-py/src/querychat/_table_accessor.py +git commit -m "feat(pkg-py): add .ui() building block to TableAccessor + +Enables custom layouts with qc.table('name').ui(). + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Phase 8: Integration and Cleanup + +### Task 8.1: Update QueryChatBase to Wire Everything Together + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat.py` + +Ensure all the pieces connect: +1. Constructor creates `_data_sources` dict +2. System prompt receives all sources and relationships +3. Tools receive `data_sources` dict +4. Server creates per-table state + +**Step 1: Update constructor** + +**Step 2: Update .client() method** + +**Step 3: Update .server() method** + +**Step 4: Run full test suite** + +Run: `cd pkg-py && uv run pytest -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add pkg-py/src/querychat/ +git commit -m "feat(pkg-py): wire together multi-table support + +Integrates all multi-table components: +- Dictionary storage for data sources +- Per-table reactive state +- Multi-table system prompt +- Parameterized tools + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 8.2: Update Module Callbacks and Bookmarking + +**Files:** +- Modify: `pkg-py/src/querychat/_querychat_module.py` + +Update bookmarking to save/restore per-table state: + +```python +@session.bookmark.on_bookmark +def _on_bookmark(x: BookmarkState) -> None: + vals = x.values + for name, table_state in tables.items(): + vals[f"querychat_{name}_sql"] = table_state.sql.get() + vals[f"querychat_{name}_title"] = table_state.title.get() + vals["querychat_has_greeted"] = has_greeted.get() + vals["querychat_active_table"] = active_table.get() + +@session.bookmark.on_restore +def _on_restore(x: RestoreState) -> None: + vals = x.values + for name in tables: + if f"querychat_{name}_sql" in vals: + tables[name].sql.set(vals[f"querychat_{name}_sql"]) + if f"querychat_{name}_title" in vals: + tables[name].title.set(vals[f"querychat_{name}_title"]) + # ... etc +``` + +**Step 1: Implement** + +**Step 2: Commit** + +```bash +git add pkg-py/src/querychat/_querychat_module.py +git commit -m "feat(pkg-py): update bookmarking for multi-table + +Saves and restores per-table SQL, title, and active table state. + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 8.3: Run Lint and Type Checks + +**Step 1: Run linter** + +Run: `cd pkg-py && uv run ruff check --fix . --config ../pyproject.toml` + +**Step 2: Run type checker** + +Run: `cd pkg-py && uv run pyright` + +**Step 3: Fix any issues** + +**Step 4: Run full test suite** + +Run: `make py-check` + +**Step 5: Commit** + +```bash +git add -A +git commit -m "chore(pkg-py): fix lint and type issues + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +### Task 8.4: Update Exports and Documentation + +**Files:** +- Modify: `pkg-py/src/querychat/__init__.py` +- Modify: `pkg-py/src/querychat/types.py` (if exists) + +**Step 1: Export new classes** + +```python +from ._table_accessor import TableAccessor + +__all__ = [ + "QueryChat", + "TableAccessor", + # ... existing exports +] +``` + +**Step 2: Commit** + +```bash +git add pkg-py/src/querychat/__init__.py +git commit -m "feat(pkg-py): export TableAccessor class + +Co-Authored-By: Claude Opus 4.5 " +``` + +--- + +## Summary of Changes + +| File | Changes | +|------|---------| +| `_querychat.py` | Dictionary storage, add_table(), remove_table(), table_names(), table() | +| `_table_accessor.py` | New file with TableAccessor class | +| `_querychat_module.py` | Per-table reactive state, updated ServerValues | +| `_system_prompt.py` | Multi-source support, relationships | +| `tools.py` | Table parameter on update/reset tools | +| `prompts/prompt.md` | Multi-table instructions, relationships section | +| `prompts/tool-*.md` | Table parameter documentation | +| `tests/test_multi_table.py` | Comprehensive multi-table tests | + +--- + +Plan complete and saved to `docs/plans/2025-01-14-multi-table-impl.md`. + +**Two execution options:** + +**1. Subagent-Driven (this session)** - I dispatch fresh subagent per task, review between tasks, fast iteration + +**2. Parallel Session (separate)** - Open new session with executing-plans, batch execution with checkpoints + +**Which approach?** diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md index efda1d24..7fc61e2f 100644 --- a/pkg-py/CHANGELOG.md +++ b/pkg-py/CHANGELOG.md @@ -45,6 +45,8 @@ Each framework's `QueryChat` provides `.app()` for quick standalone apps and `.u ### New features +* Added `PolarsLazySource` to support Polars LazyFrames as data sources. Data stays lazy until the render boundary, enabling efficient handling of large datasets. Pass a `polars.LazyFrame` directly to `QueryChat()` and queries will be executed lazily via Polars' SQLContext. + * `QueryChat.console()` was added to launch interactive console-based chat sessions with your data source, with persistent conversation state across invocations. (#168) * `QueryChat.client()` can now create standalone querychat-enabled chat clients with configurable tools and callbacks, enabling use outside of Shiny applications. (#168) diff --git a/pkg-py/examples/lazy_frame_demo.py b/pkg-py/examples/lazy_frame_demo.py new file mode 100644 index 00000000..004b49f5 --- /dev/null +++ b/pkg-py/examples/lazy_frame_demo.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +""" +Demo script comparing eager vs lazy data source performance. + +This script demonstrates the performance benefits of using PolarsLazySource +with large datasets. It creates a synthetic dataset and compares: +1. Eager loading (all data in memory upfront) +2. Lazy loading (data stays on disk until needed) + +Usage: + # Set your API key first + export OPENAI_API_KEY="your-key-here" + + # Run the demo + cd pkg-py + uv run python examples/lazy_frame_demo.py + + # Or with a custom number of rows (default: 10 million) + uv run python examples/lazy_frame_demo.py --rows 50000000 +""" + +import argparse +import os +import tempfile +import time +from pathlib import Path + +import polars as pl + + +def create_large_dataset(path: Path, n_rows: int) -> None: + """Create a large parquet file for testing.""" + print(f"Creating dataset with {n_rows:,} rows...") + start = time.perf_counter() + + # Generate data in chunks to avoid memory issues + chunk_size = 1_000_000 + chunks_written = 0 + + for i in range(0, n_rows, chunk_size): + chunk_rows = min(chunk_size, n_rows - i) + chunk = pl.DataFrame( + { + "id": range(i, i + chunk_rows), + "category": [f"cat_{j % 100}" for j in range(chunk_rows)], + "region": [["North", "South", "East", "West"][j % 4] for j in range(chunk_rows)], + "value": [float(j % 1000) + 0.5 for j in range(chunk_rows)], + "quantity": [j % 500 for j in range(chunk_rows)], + "date": pl.Series([f"2024-{(j % 12) + 1:02d}-{(j % 28) + 1:02d}" for j in range(chunk_rows)]).str.to_date(), + } + ) + + if chunks_written == 0: + chunk.write_parquet(path) + else: + # Append by reading existing and concatenating + existing = pl.read_parquet(path) + pl.concat([existing, chunk]).write_parquet(path) + + chunks_written += 1 + print(f" Written {min(i + chunk_size, n_rows):,} / {n_rows:,} rows") + + elapsed = time.perf_counter() - start + file_size_mb = path.stat().st_size / (1024 * 1024) + print(f"Dataset created: {file_size_mb:.1f} MB in {elapsed:.1f}s\n") + + +def measure_memory() -> float: + """Get current memory usage in MB (approximate).""" + import psutil + process = psutil.Process(os.getpid()) + return process.memory_info().rss / (1024 * 1024) + + +def demo_eager_vs_lazy(parquet_path: Path) -> None: + """Compare eager vs lazy data loading performance.""" + from querychat import QueryChat + + print("=" * 60) + print("COMPARING EAGER VS LAZY DATA SOURCE") + print("=" * 60) + + # Check if we have psutil for memory tracking + try: + import psutil # noqa: F401 + has_psutil = True + except ImportError: + has_psutil = False + print("(Install psutil for memory usage tracking: pip install psutil)\n") + + # --- EAGER LOADING --- + print("\n1. EAGER LOADING (polars.read_parquet → DataFrame)") + print("-" * 50) + + if has_psutil: + mem_before = measure_memory() + + start = time.perf_counter() + df = pl.read_parquet(parquet_path) + load_time = time.perf_counter() - start + + if has_psutil: + mem_after = measure_memory() + print(f" Memory increase: {mem_after - mem_before:.1f} MB") + + print(f" Load time: {load_time:.2f}s") + print(f" Rows loaded: {len(df):,}") + + # Create QueryChat with eager data + start = time.perf_counter() + qc_eager = QueryChat( + data_source=df, + table_name="sales", + greeting="Hello!", + ) + init_time = time.perf_counter() - start + print(f" QueryChat init: {init_time:.2f}s") + + # Execute a query + start = time.perf_counter() + result = qc_eager.data_source.execute_query( + "SELECT region, SUM(value) as total FROM sales GROUP BY region" + ) + query_time = time.perf_counter() - start + print(f" Query execution: {query_time:.3f}s") + print(f" Result rows: {len(result)}") + + del df, qc_eager, result + import gc + gc.collect() + + # --- LAZY LOADING --- + print("\n2. LAZY LOADING (polars.scan_parquet → LazyFrame)") + print("-" * 50) + + if has_psutil: + mem_before = measure_memory() + + start = time.perf_counter() + lf = pl.scan_parquet(parquet_path) + load_time = time.perf_counter() - start + + if has_psutil: + mem_after = measure_memory() + print(f" Memory increase: {mem_after - mem_before:.1f} MB") + + print(f" 'Load' time: {load_time:.4f}s (just metadata!)") + + # Create QueryChat with lazy data + start = time.perf_counter() + qc_lazy = QueryChat( + data_source=lf, + table_name="sales", + greeting="Hello!", + ) + init_time = time.perf_counter() - start + print(f" QueryChat init: {init_time:.2f}s") + + # Execute the same query (stays lazy) + start = time.perf_counter() + result_lazy = qc_lazy.data_source.execute_query( + "SELECT region, SUM(value) as total FROM sales GROUP BY region" + ) + query_time = time.perf_counter() - start + print(f" Query execution (lazy): {query_time:.3f}s") + + # Now collect to get actual results + start = time.perf_counter() + result_collected = result_lazy.collect() + collect_time = time.perf_counter() - start + print(f" Collect time: {collect_time:.3f}s") + print(f" Result rows: {len(result_collected)}") + + # --- SUMMARY --- + print("\n" + "=" * 60) + print("SUMMARY") + print("=" * 60) + print(""" +Key differences: +- EAGER: Loads ALL data into memory immediately +- LAZY: Only reads metadata; data stays on disk until .collect() + +Benefits of lazy: +- Much faster startup (no full data load) +- Lower memory usage (only results in memory) +- Query optimization (Polars can push down filters) + +Use lazy (scan_parquet) for: +- Large files that don't fit in memory +- When you only need filtered/aggregated subsets +- Interactive exploration of big data +""") + + +def interactive_demo(parquet_path: Path) -> None: + """Launch an interactive QueryChat session with the lazy data.""" + from querychat import QueryChat + + print("\n" + "=" * 60) + print("INTERACTIVE DEMO") + print("=" * 60) + + lf = pl.scan_parquet(parquet_path) + qc = QueryChat( + data_source=lf, + table_name="sales", + greeting="I'm connected to a large sales dataset. Ask me anything!", + ) + + print("\nLaunching interactive console...") + print("Try queries like:") + print(' - "Show me total sales by region"') + print(' - "What are the top 10 categories by quantity?"') + print(' - "Filter to just the North region"') + print("\nType 'exit' to quit.\n") + + qc.console() + + +def main(): + parser = argparse.ArgumentParser(description="Demo lazy vs eager data loading") + parser.add_argument( + "--rows", + type=int, + default=10_000_000, + help="Number of rows to generate (default: 10 million)", + ) + parser.add_argument( + "--interactive", + action="store_true", + help="Launch interactive console after comparison", + ) + parser.add_argument( + "--data-path", + type=str, + default=None, + help="Path to existing parquet file (skip generation)", + ) + args = parser.parse_args() + + # Check for API key + if not os.environ.get("OPENAI_API_KEY"): + print("Warning: OPENAI_API_KEY not set. Interactive mode won't work.") + print("Set it with: export OPENAI_API_KEY='your-key-here'\n") + + # Create or use existing data file + if args.data_path: + parquet_path = Path(args.data_path) + if not parquet_path.exists(): + print(f"Error: File not found: {parquet_path}") + return + else: + # Create temporary file + temp_dir = tempfile.mkdtemp() + parquet_path = Path(temp_dir) / "large_sales_data.parquet" + create_large_dataset(parquet_path, args.rows) + + try: + demo_eager_vs_lazy(parquet_path) + + if args.interactive: + interactive_demo(parquet_path) + finally: + # Cleanup temp file if we created it + if not args.data_path and parquet_path.exists(): + print(f"\nCleaning up temporary file: {parquet_path}") + parquet_path.unlink() + parquet_path.parent.rmdir() + + +if __name__ == "__main__": + main() diff --git a/pkg-py/src/querychat/__init__.py b/pkg-py/src/querychat/__init__.py index 0e3eaa5f..f7c64e9b 100644 --- a/pkg-py/src/querychat/__init__.py +++ b/pkg-py/src/querychat/__init__.py @@ -1,10 +1,26 @@ +from ._datasource import ( + DataFrameSource, + DataSource, + IbisSource, + MissingColumnsError, + PolarsLazySource, + SQLAlchemySource, +) from ._deprecated import greeting, init, sidebar, system_prompt from ._deprecated import mod_server as server from ._deprecated import mod_ui as ui from ._shiny import QueryChat +from ._table_accessor import TableAccessor __all__ = ( + "DataFrameSource", + "DataSource", + "IbisSource", + "MissingColumnsError", + "PolarsLazySource", "QueryChat", + "SQLAlchemySource", + "TableAccessor", # TODO(lifecycle): Remove these deprecated functions when we reach v1.0 "greeting", "init", diff --git a/pkg-py/src/querychat/_dash_ui.py b/pkg-py/src/querychat/_dash_ui.py index f6ba64ca..35069c7d 100644 --- a/pkg-py/src/querychat/_dash_ui.py +++ b/pkg-py/src/querychat/_dash_ui.py @@ -60,7 +60,6 @@ def card_ui( ) -> Component: """Create a Bootstrap card with optional header and action button.""" import dash_bootstrap_components as dbc - from dash import html children = [] @@ -93,7 +92,6 @@ def card_ui( def chat_container_ui(ids: IDs) -> list[Component]: """Create the chat UI container (messages + input).""" import dash_bootstrap_components as dbc - from dash import html return [ diff --git a/pkg-py/src/querychat/_querychat_base.py b/pkg-py/src/querychat/_querychat_base.py index e8a7c7f1..e908c2fd 100644 --- a/pkg-py/src/querychat/_querychat_base.py +++ b/pkg-py/src/querychat/_querychat_base.py @@ -22,6 +22,7 @@ ) from ._shiny_module import GREETING_PROMPT from ._system_prompt import QueryChatSystemPrompt +from ._table_accessor import TableAccessor from ._utils import MISSING, MISSING_TYPE, is_ibis_table from .tools import ( UpdateDashboardData, @@ -72,6 +73,16 @@ def __init__( "Table name must begin with a letter and contain only letters, numbers, and underscores", ) + # Multi-table storage: dict of data sources keyed by table name + self._data_sources: dict[str, DataSource] = {} + + # Track server initialization state for add/remove table validation + self._server_initialized = False + + # Store metadata for multi-table support + self._table_relationships: dict[str, dict[str, str]] = {} + self._table_descriptions: dict[str, str] = {} + self.tools = normalize_tools(tools, default=("update", "query")) self.greeting = greeting.read_text() if isinstance(greeting, Path) else greeting @@ -93,14 +104,15 @@ def __init__( self._data_source: DataSource | None = normalize_data_source( data_source, table_name ) + self._data_sources[table_name] = self._data_source self._build_system_prompt() else: self._data_source = None self._system_prompt = None def _build_system_prompt(self) -> None: - """Build/rebuild the system prompt from current data source.""" - if self._data_source is None: + """Build/rebuild the system prompt from current data sources.""" + if not self._data_sources: raise RuntimeError("Cannot build system prompt without data_source") prompt_template = self._prompt_template @@ -109,10 +121,12 @@ def _build_system_prompt(self) -> None: self._system_prompt = QueryChatSystemPrompt( prompt_template=prompt_template, - data_source=self._data_source, + data_sources=self._data_sources, data_description=self._data_description, extra_instructions=self._extra_instructions, categorical_threshold=self._categorical_threshold, + relationships=self._table_relationships, + table_descriptions=self._table_descriptions, ) self._client.system_prompt = self._system_prompt.render(self.tools) @@ -165,12 +179,17 @@ def client( if "update" in tools: update_fn = update_dashboard or (lambda _: None) - reset_fn = reset_dashboard or (lambda: None) - chat.register_tool(tool_update_dashboard(data_source, update_fn)) + # Wrap user callback to accept table name parameter (for multi-table compat) + user_reset = reset_dashboard or (lambda: None) + + def reset_fn(_table: str) -> None: + user_reset() + + chat.register_tool(tool_update_dashboard(self._data_sources, update_fn)) chat.register_tool(tool_reset_dashboard(reset_fn)) if "query" in tools: - chat.register_tool(tool_query(data_source)) + chat.register_tool(tool_query(self._data_sources)) return chat @@ -207,19 +226,171 @@ def system_prompt(self) -> str: @property def data_source(self) -> DataSource | None: - """Get the current data source.""" - return self._data_source + """ + Get the data source (for single-table backwards compatibility). + + Returns None if no data source is set. Raises ValueError if multiple + tables are present - use .table("name").data_source instead. + """ + if not self._data_sources: + return None + if len(self._data_sources) == 1: + return next(iter(self._data_sources.values())) + raise ValueError( + f"Multiple tables present ({', '.join(self._data_sources.keys())}). " + "Use qc.table('name').data_source instead." + ) @data_source.setter def data_source(self, value: IntoFrame | sqlalchemy.Engine) -> None: """Set the data source, normalizing and rebuilding system prompt.""" self._data_source = normalize_data_source(value, self._table_name) + self._data_sources[self._table_name] = self._data_source + self._build_system_prompt() + + def table_names(self) -> list[str]: + """ + Return the names of all registered tables. + + Returns + ------- + list[str] + List of table names in the order they were added. + """ + return list(self._data_sources.keys()) + + def table(self, name: str) -> TableAccessor: + """ + Get an accessor for a specific table. + + Parameters + ---------- + name + The name of the table to access. + + Returns + ------- + TableAccessor + An accessor object with df(), sql(), title() methods. + + Raises + ------ + ValueError + If the table doesn't exist. + """ + if name not in self._data_sources: + available = ", ".join(self._data_sources.keys()) + raise ValueError(f"Table '{name}' not found. Available: {available}") + + return TableAccessor(self, name) + + def add_table( + self, + data_source: IntoFrame | sqlalchemy.Engine, + table_name: str, + *, + relationships: dict[str, str] | None = None, + description: str | None = None, + ) -> None: + """ + Add an additional table to the QueryChat instance. + + Parameters + ---------- + data_source + The data source (DataFrame, LazyFrame, or database connection). + table_name + Name for the table (must be unique within this QueryChat). + relationships + Optional dict mapping local columns to "other_table.column" for JOINs. + Example: {"customer_id": "customers.id"} + description + Optional free-text description of the table for the LLM. + + Raises + ------ + ValueError + If table_name already exists or is invalid. + RuntimeError + If called after server() has been invoked. + """ + # Check if server already initialized + if self._server_initialized: + raise RuntimeError( + "Cannot add tables after server initialization. " + "Add all tables before calling .server() or .app()." + ) + + # Validate table name format + if not re.match(r"^[a-zA-Z][a-zA-Z0-9_]*$", table_name): + raise ValueError( + "Table name must begin with a letter and contain only " + "letters, numbers, and underscores" + ) + + # Check for duplicates + if table_name in self._data_sources: + raise ValueError(f"Table '{table_name}' already exists") + + # Normalize and store the data source + normalized = normalize_data_source(data_source, table_name) + self._data_sources[table_name] = normalized + + # Store relationship and description metadata + if relationships: + self._table_relationships[table_name] = relationships + if description: + self._table_descriptions[table_name] = description + + # Rebuild system prompt with new table + self._build_system_prompt() + + def remove_table(self, table_name: str) -> None: + """ + Remove a table from the QueryChat instance. + + Parameters + ---------- + table_name + Name of the table to remove. + + Raises + ------ + ValueError + If table doesn't exist or is the last remaining table. + RuntimeError + If called after server() has been invoked. + """ + if self._server_initialized: + raise RuntimeError( + "Cannot remove tables after server initialization. " + "Configure all tables before calling .server() or .app()." + ) + + if table_name not in self._data_sources: + available = ", ".join(self._data_sources.keys()) + raise ValueError(f"Table '{table_name}' not found. Available: {available}") + + if len(self._data_sources) == 1: + raise ValueError( + "Cannot remove last table. At least one table is required." + ) + + # Clean up the data source + self._data_sources[table_name].cleanup() + del self._data_sources[table_name] + + # Remove associated metadata + self._table_relationships.pop(table_name, None) + self._table_descriptions.pop(table_name, None) + + # Rebuild system prompt without removed table self._build_system_prompt() def cleanup(self) -> None: - """Clean up resources associated with the data source.""" - if self._data_source is not None: - self._data_source.cleanup() + """Clean up resources associated with all data sources.""" + for source in self._data_sources.values(): + source.cleanup() def normalize_data_source( diff --git a/pkg-py/src/querychat/_shiny.py b/pkg-py/src/querychat/_shiny.py index c1dcc9a1..0ab3138d 100644 --- a/pkg-py/src/querychat/_shiny.py +++ b/pkg-py/src/querychat/_shiny.py @@ -724,9 +724,11 @@ def __init__( else: enable = enable_bookmarking + # Use first data source for backwards compatibility + first_source = self._require_data_source("__init__") self._vals = mod_server( self.id, - data_source=self._data_source, + data_source=first_source, greeting=self.greeting, client=self._client, enable_bookmarking=enable, diff --git a/pkg-py/src/querychat/_shiny_module.py b/pkg-py/src/querychat/_shiny_module.py index 335f6803..a492760e 100644 --- a/pkg-py/src/querychat/_shiny_module.py +++ b/pkg-py/src/querychat/_shiny_module.py @@ -9,7 +9,6 @@ import chatlas import shinychat from narwhals.stable.v1.typing import IntoFrameT - from shiny import module, reactive, ui from ._querychat_core import GREETING_PROMPT @@ -18,9 +17,8 @@ if TYPE_CHECKING: from collections.abc import Callable - from shiny.bookmark import BookmarkState, RestoreState - from shiny import Inputs, Outputs, Session + from shiny.bookmark import BookmarkState, RestoreState from ._datasource import DataSource from .types import UpdateDashboardData @@ -129,7 +127,7 @@ def update_dashboard(data: UpdateDashboardData): sql.set(data["query"]) title.set(data["title"]) - def reset_dashboard(): + def reset_dashboard(_table_name: str = ""): sql.set(None) title.set(None) @@ -143,8 +141,10 @@ def reset_dashboard(): # Legacy pattern: client is Chat instance chat = copy.deepcopy(client) - chat.register_tool(tool_update_dashboard(data_source, update_dashboard)) - chat.register_tool(tool_query(data_source)) + # Wrap single data source in dict for tool functions + data_sources = {data_source.table_name: data_source} + chat.register_tool(tool_update_dashboard(data_sources, update_dashboard)) + chat.register_tool(tool_query(data_sources)) chat.register_tool(tool_reset_dashboard(reset_dashboard)) # Execute query when SQL changes diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py index 2b9cdb04..017ebc03 100644 --- a/pkg-py/src/querychat/_system_prompt.py +++ b/pkg-py/src/querychat/_system_prompt.py @@ -10,49 +10,85 @@ from ._querychat_base import TOOL_GROUPS +def _load_text(value: str | Path | None) -> str | None: + """Load text from string or Path.""" + if isinstance(value, Path): + return value.read_text() + return value + + class QueryChatSystemPrompt: """Manages system prompt template and component assembly.""" def __init__( self, prompt_template: str | Path, - data_source: DataSource, + data_source: DataSource | None = None, + data_sources: dict[str, DataSource] | None = None, data_description: str | Path | None = None, extra_instructions: str | Path | None = None, categorical_threshold: int = 10, + relationships: dict[str, dict[str, str]] | None = None, + table_descriptions: dict[str, str] | None = None, ): """ Initialize with prompt components. Args: prompt_template: Mustache template string or path to template file - data_source: DataSource instance for schema generation + data_source: Single DataSource instance (backwards compatibility) + data_sources: Dictionary of DataSource instances keyed by table name data_description: Optional data context (string or path) extra_instructions: Optional custom LLM instructions (string or path) categorical_threshold: Threshold for categorical column detection + relationships: Optional dict mapping table.column to foreign table.column + table_descriptions: Optional dict mapping table names to descriptions """ + # Handle both single source (backwards compat) and dict of sources + if data_sources is not None: + self._data_sources = data_sources + elif data_source is not None: + self._data_sources = {data_source.table_name: data_source} + else: + raise ValueError("Either data_source or data_sources must be provided") + + # Load template if isinstance(prompt_template, Path): self.template = prompt_template.read_text() else: self.template = prompt_template - if isinstance(data_description, Path): - self.data_description = data_description.read_text() - else: - self.data_description = data_description + # Store metadata + self.data_description = _load_text(data_description) + self.extra_instructions = _load_text(extra_instructions) + self.categorical_threshold = categorical_threshold + self._relationships = relationships or {} + self._table_descriptions = table_descriptions or {} - if isinstance(extra_instructions, Path): - self.extra_instructions = extra_instructions.read_text() - else: - self.extra_instructions = extra_instructions + # Generate combined schema + self.schema = self._generate_combined_schema() - self.schema = data_source.get_schema( - categorical_threshold=categorical_threshold - ) + def _generate_combined_schema(self) -> str: + """Generate schema string for all tables.""" + schemas = [] + for name, source in self._data_sources.items(): + schema = source.get_schema(categorical_threshold=self.categorical_threshold) + schemas.append(f'\n{schema}\n
') - self.categorical_threshold = categorical_threshold - self.data_source = data_source + return "\n\n".join(schemas) + + def _generate_relationships_text(self) -> str: + """Generate relationship information text.""" + if not self._relationships: + return "" + + lines = [] + for table, rels in self._relationships.items(): + for local_col, foreign_ref in rels.items(): + lines.append(f"- {table}.{local_col} references {foreign_ref}") + + return "\n".join(lines) def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: """ @@ -65,10 +101,11 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: Fully rendered system prompt string """ - is_duck_db = self.data_source.get_db_type().lower() == "duckdb" + first_source = next(iter(self._data_sources.values())) + is_duck_db = first_source.get_db_type().lower() == "duckdb" context = { - "db_type": self.data_source.get_db_type(), + "db_type": first_source.get_db_type(), "is_duck_db": is_duck_db, "schema": self.schema, "data_description": self.data_description, @@ -76,6 +113,15 @@ def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: "has_tool_update": "update" in tools if tools else False, "has_tool_query": "query" in tools if tools else False, "include_query_guidelines": len(tools or ()) > 0, + "relationships": self._generate_relationships_text(), } return chevron.render(self.template, context) + + # Backwards compatibility + @property + def data_source(self) -> DataSource: + """Return single data source for backwards compatibility.""" + if len(self._data_sources) == 1: + return next(iter(self._data_sources.values())) + raise ValueError("Multiple data sources present; use _data_sources instead") diff --git a/pkg-py/src/querychat/_table_accessor.py b/pkg-py/src/querychat/_table_accessor.py new file mode 100644 index 00000000..77229157 --- /dev/null +++ b/pkg-py/src/querychat/_table_accessor.py @@ -0,0 +1,107 @@ +"""TableAccessor class for accessing per-table state and data.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from shiny import ui + + from ._datasource import DataSource + from ._querychat_base import QueryChatBase + + +class TableAccessor: + """ + Accessor for a specific table's state and data. + + This class provides access to per-table data source and (when server is initialized) + reactive state. It is returned by QueryChat.table("name"). + + Parameters + ---------- + querychat + The parent QueryChat instance. + table_name + The name of the table this accessor represents. + + """ + + def __init__(self, querychat: QueryChatBase, table_name: str): + self._querychat = querychat + self._table_name = table_name + + @property + def table_name(self) -> str: + """The name of this table.""" + return self._table_name + + @property + def data_source(self) -> DataSource: + """The data source for this table.""" + return self._querychat._data_sources[self._table_name] + + def df(self) -> Any: + """ + Return the current filtered data for this table (reactive). + + Returns the native DataFrame type (polars, pandas, ibis.Table, etc.) + for this table's data source. + + Raises + ------ + RuntimeError + If called before server initialization. + + """ + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].df() + + def sql(self) -> str | None: + """ + Return the current SQL filter for this table (reactive). + + Raises + ------ + RuntimeError + If called before server initialization. + + """ + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].sql.get() + + def title(self) -> str | None: + """ + Return the current filter title for this table (reactive). + + Raises + ------ + RuntimeError + If called before server initialization. + + """ + if not hasattr(self._querychat, "_vals"): + raise RuntimeError("Server not initialized. Call .server() first.") + return self._querychat._vals.tables[self._table_name].title.get() + + def ui(self) -> ui.Tag: + """ + Render the UI for this table (data table + SQL display). + + Returns + ------- + Tag + A Shiny UI element containing the data table and SQL display. + + """ + from shiny import ui as shiny_ui + + table_id = f"{self._querychat.id}_{self._table_name}" + + return shiny_ui.card( + shiny_ui.card_header(self._table_name), + shiny_ui.output_data_frame(f"{table_id}_dt"), + shiny_ui.output_text(f"{table_id}_sql"), + ) diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 7c8ea5a1..029fccdf 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -14,7 +14,15 @@ Here is additional information about the data: {{/data_description}} -For security reasons, you may only query this specific table. +{{#relationships}} + +{{{relationships}}} + + +When answering questions that span multiple tables, use JOINs based on these relationships. +{{/relationships}} + +For security reasons, you may only query {{#relationships}}these specific tables{{/relationships}}{{^relationships}}this specific table{{/relationships}}. {{#include_query_guidelines}} ## SQL Query Guidelines @@ -81,8 +89,9 @@ You can handle these types of requests: When the user asks you to filter or sort the dashboard, e.g. "Show me..." or "Which ____ have the highest ____?" or "Filter to only include ____": - Write a {{db_type}} SQL SELECT query -- Call `querychat_update_dashboard` with the query and a descriptive title -- The query MUST return all columns from the schema (you can use `SELECT *`) +- Call `querychat_update_dashboard` with the query, table name, and a descriptive title +- You MUST specify the `table` parameter to indicate which table to filter +- The query MUST return all columns from the specified table's schema (you can use `SELECT *`) - Use a single SQL query even if complex (subqueries and CTEs are fine) - Optimize for **readability over efficiency** - Include SQL comments to explain complex logic @@ -92,7 +101,7 @@ The user may ask to "reset" or "start over"; that means clearing the filter and **Filtering Example:** User: "Show only rows where sales are above average" -Tool Call: `querychat_update_dashboard({query: "SELECT * FROM table WHERE sales > (SELECT AVG(sales) FROM table)", title: "Above average sales"})` +Tool Call: `querychat_update_dashboard({query: "SELECT * FROM sales_data WHERE sales > (SELECT AVG(sales) FROM sales_data)", table: "sales_data", title: "Above average sales"})` Response: "" No further response needed, the user will see the updated dashboard. diff --git a/pkg-py/src/querychat/prompts/tool-reset-dashboard.md b/pkg-py/src/querychat/prompts/tool-reset-dashboard.md index 7d78b4b4..aba3c3c8 100644 --- a/pkg-py/src/querychat/prompts/tool-reset-dashboard.md +++ b/pkg-py/src/querychat/prompts/tool-reset-dashboard.md @@ -6,6 +6,11 @@ If the user asks to reset the dashboard, simply call this tool with no other res If the user asks to start over, call this tool and then provide a new set of suggestions for next steps. Include suggestions that encourage exploration of the data in new directions. +Parameters +---------- +table + The name of the table to reset. + Returns ------- : diff --git a/pkg-py/src/querychat/prompts/tool-update-dashboard.md b/pkg-py/src/querychat/prompts/tool-update-dashboard.md index dae9861c..36f4be16 100644 --- a/pkg-py/src/querychat/prompts/tool-update-dashboard.md +++ b/pkg-py/src/querychat/prompts/tool-update-dashboard.md @@ -2,6 +2,8 @@ Filter and sort the dashboard data This tool executes a {{db_type}} SQL SELECT query to filter or sort the data used in the dashboard. +The `table` parameter specifies which table to filter. Use the table name exactly as shown in the schema. + **When to use:** Call this tool whenever the user requests filtering, sorting, or data manipulation on the dashboard with questions like "Show me..." or "Which records have...". This tool is appropriate for any request that involves showing a subset of the data or reordering it. **When not to use:** Do NOT use this tool for general questions about the data that can be answered with a single value or summary statistic. For those questions, use the `querychat_query` tool instead. @@ -16,6 +18,8 @@ This tool executes a {{db_type}} SQL SELECT query to filter or sort the data use Parameters ---------- +table : + The name of the table to filter. Must match exactly one of the table names from the schema. query : A {{db_type}} SQL SELECT query that MUST return all existing schema columns (use SELECT * or explicitly list all columns). May include additional computed columns, subqueries, CTEs, WHERE clauses, ORDER BY, and any {{db_type}}-supported SQL functions. title : diff --git a/pkg-py/src/querychat/tools.py b/pkg-py/src/querychat/tools.py index 67ea453f..4e134715 100644 --- a/pkg-py/src/querychat/tools.py +++ b/pkg-py/src/querychat/tools.py @@ -40,6 +40,8 @@ class UpdateDashboardData(TypedDict): Attributes ---------- + table + The name of the table being filtered. query The SQL query string to execute for filtering/sorting the dashboard. title @@ -54,6 +56,7 @@ class UpdateDashboardData(TypedDict): def log_update(data: UpdateDashboardData): + print(f"Table: {data['table']}") print(f"Executing: {data['query']}") print(f"Title: {data['title']}") @@ -65,6 +68,7 @@ def log_update(data: UpdateDashboardData): """ + table: str query: str title: str @@ -77,16 +81,25 @@ def _read_prompt_template(filename: str, **kwargs) -> str: def _update_dashboard_impl( - data_source: DataSource, + data_sources: dict[str, DataSource], update_fn: Callable[[UpdateDashboardData], None], -) -> Callable[[str, str], ContentToolResult]: +) -> Callable[[str, str, str], ContentToolResult]: """Create the implementation function for updating the dashboard.""" - def update_dashboard(query: str, title: str) -> ContentToolResult: + def update_dashboard(table: str, query: str, title: str) -> ContentToolResult: error = None markdown = f"```sql\n{query}\n```" value = "Dashboard updated. Use `query` tool to review results, if needed." + # Validate table exists + if table not in data_sources: + available = ", ".join(data_sources.keys()) + error = f"Table '{table}' not found. Available: {available}" + markdown += f"\n\n> Error: {error}" + return ContentToolResult(value=markdown, error=Exception(error)) + + data_source = data_sources[table] + try: # Test the query but don't execute it yet data_source.test_query(query, require_all_columns=True) @@ -94,13 +107,14 @@ def update_dashboard(query: str, title: str) -> ContentToolResult: # Add Apply Filter button button_html = f"""""" # Call the callback with TypedDict data on success - update_fn({"query": query, "title": title}) + update_fn({"table": table, "query": query, "title": title}) except Exception as e: error = str(e) @@ -125,30 +139,32 @@ def update_dashboard(query: str, title: str) -> ContentToolResult: def tool_update_dashboard( - data_source: DataSource, + data_sources: dict[str, DataSource], update_fn: Callable[[UpdateDashboardData], None], ) -> Tool: """ - Create a tool that modifies the data presented in the dashboard based on the SQL query. + Create a tool that modifies the data presented in the dashboard. Parameters ---------- - data_source - The data source to query against + data_sources + Dictionary of data sources keyed by table name. update_fn - Callback function to call with UpdateDashboardData when update succeeds + Callback function to call with UpdateDashboardData when update succeeds. Returns ------- Tool - A tool that can be registered with chatlas + A tool that can be registered with chatlas. """ - impl = _update_dashboard_impl(data_source, update_fn) + impl = _update_dashboard_impl(data_sources, update_fn) + # Get db_type from first source (all should be same dialect) + first_source = next(iter(data_sources.values())) description = _read_prompt_template( "tool-update-dashboard.md", - db_type=data_source.get_db_type(), + db_type=first_source.get_db_type(), ) impl.__doc__ = description @@ -160,17 +176,18 @@ def tool_update_dashboard( def _reset_dashboard_impl( - reset_fn: Callable[[], None], -) -> Callable[[], ContentToolResult]: + reset_fn: Callable[[str], None], +) -> Callable[[str], ContentToolResult]: """Create the implementation function for resetting the dashboard.""" - def reset_dashboard() -> ContentToolResult: + def reset_dashboard(table: str) -> ContentToolResult: # Call the callback to reset - reset_fn() + reset_fn(table) # Add Reset Filter button - button_html = """