From e785ec66c34ba9f30ff74c879d2e39412cdd92bb Mon Sep 17 00:00:00 2001 From: boringdata Date: Mon, 9 Mar 2026 13:32:20 +0000 Subject: [PATCH 1/2] test: add regression test for nullif on aggregations (#169) The underlying bug was already fixed by the serialization and AggregationExpr refactors. This adds a targeted regression test. Closes #169 Co-Authored-By: Claude Opus 4.6 --- .../tests/test_deferred_api.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/boring_semantic_layer/tests/test_deferred_api.py b/src/boring_semantic_layer/tests/test_deferred_api.py index 9f7f929..1a828e6 100644 --- a/src/boring_semantic_layer/tests/test_deferred_api.py +++ b/src/boring_semantic_layer/tests/test_deferred_api.py @@ -388,6 +388,26 @@ def test_aggregation_expr_method_chaining(): assert df[df.session_id == 2]["duration_seconds"].values[0] == 0 +def test_nullif_on_aggregation_regression(): + """Regression test for #169: nullif() on aggregations caused TypeError.""" + tbl = ibis.memtable({"a": [1, 2, 3], "b": [10, 20, 30]}) + + model = ( + to_semantic_table(tbl, name="test", description="test") + .with_dimensions(a={"expr": lambda t: t.a, "description": "dim"}) + .with_measures( + ratio={ + "expr": lambda t: t.a.sum() / t.b.sum().nullif(0), + "description": "ratio", + } + ) + ) + + df = model.query(dimensions=("a",), measures=("ratio",)).execute() + assert len(df) == 3 + assert "ratio" in df.columns + + # --------------------------------------------------------------------------- # Tests for Deferred support in group_by() and aggregate() positional args # --------------------------------------------------------------------------- From c06cc98e668080cc976ed9047e7846393b970b65 Mon Sep 17 00:00:00 2001 From: boringdata Date: Mon, 9 Mar 2026 20:21:15 +0000 Subject: [PATCH 2/2] fix: repair dead links and wrong import paths across documentation Fix 15 broken links across 7 doc files: wrong example filenames, dead file paths, non-existent routes, broken anchors, and incorrect Python import paths. Regenerate doc site JSON data. Co-Authored-By: Claude Opus 4.6 --- docs/md/doc/builder-agent.md | 4 +- docs/md/doc/getting-started.md | 14 +- docs/md/doc/mcp.md | 7 +- docs/md/doc/query-agent-mcp.md | 2 +- docs/md/doc/reference.md | 2 +- docs/md/doc/semantic-table.md | 6 +- docs/web/public/bsl-data/bucketing.json | 429 +---- docs/web/public/bsl-data/builder-agent.json | 2 +- docs/web/public/bsl-data/charting.json | 661 +------- docs/web/public/bsl-data/compose.json | 99 +- docs/web/public/bsl-data/example.json | 208 +-- docs/web/public/bsl-data/getting-started.json | 196 +-- docs/web/public/bsl-data/indexing.json | 823 ++-------- docs/web/public/bsl-data/mcp.json | 2 +- .../web/public/bsl-data/nested-subtotals.json | 880 ++-------- .../web/public/bsl-data/percentage-total.json | 184 +-- docs/web/public/bsl-data/query-agent-mcp.json | 2 +- docs/web/public/bsl-data/query-methods.json | 1461 ++--------------- docs/web/public/bsl-data/reference.json | 2 +- docs/web/public/bsl-data/semantic-table.json | 524 +----- docs/web/public/bsl-data/sessionized.json | 472 +----- docs/web/public/bsl-data/windowing.json | 1187 ++----------- docs/web/public/bsl-data/yaml-config.json | 87 +- examples/README.md | 13 +- 24 files changed, 577 insertions(+), 6690 deletions(-) diff --git a/docs/md/doc/builder-agent.md b/docs/md/doc/builder-agent.md index 5ced254..039133a 100644 --- a/docs/md/doc/builder-agent.md +++ b/docs/md/doc/builder-agent.md @@ -4,7 +4,7 @@ The Builder Agent is focused on authoring and evolving semantic tables: defining ## Claude Code Skill -- File: [`src/boring_semantic_layer/agents/claude-code/bsl-model-building/SKILL.md`](../../src/boring_semantic_layer/agents/claude-code/bsl-model-building/SKILL.md) +- File: [`docs/md/skills/claude-code/bsl-model-builder/SKILL.md`](../skills/claude-code/bsl-model-builder/SKILL.md) - Use it when you want Claude Desktop to help write new semantic tables, add time dimensions, or compose models. - The skill includes: - Python DSL examples showing `SemanticTable(...)`, `.with_dimensions`, `.with_measures`, `.with_calculated_measures`, and `.join()` patterns. @@ -17,7 +17,7 @@ The Builder Agent is focused on authoring and evolving semantic tables: defining Running inside the Codex CLI (the environment this assistant uses) already gives you repo access. Pair that with the Builder skill to automate scaffolding: -1. Open `docs/content/semantic-table.md` or the relevant source file in your editor for context. +1. Open `docs/md/doc/semantic-table.md` or the relevant source file in your editor for context. 2. Ask Codex to "apply the builder skill" when drafting new semantic tables. It will reference `bsl-model-building/SKILL.md` to keep the API usage correct. 3. Use the CLI's `apply_patch` output directly to drop in the generated models or YAML definitions. diff --git a/docs/md/doc/getting-started.md b/docs/md/doc/getting-started.md index 1a010b6..c073db3 100644 --- a/docs/md/doc/getting-started.md +++ b/docs/md/doc/getting-started.md @@ -151,13 +151,13 @@ Then run: bsl chat --sm my_model.yaml ``` -See [Query Agent Chat](/examples/query-agent-chat) for full documentation on YAML models with joins and advanced features. +See [Query Agent Chat](/agents/chat) for full documentation on YAML models with joins and advanced features. ## Next Steps -- [Chat with your data](/examples/query-agent-chat) using natural language -- Define models in [YAML configuration](/examples/yaml-config) -- Configure database connections with [Profiles](/examples/profile) -- Learn how to [Build Semantic Tables](/examples/semantic-table) with dimensions, measures, and joins -- Explore [Query Methods](/examples/query-methods) for retrieving data -- Discover how to [Compose Models](/examples/compose) together +- [Chat with your data](/agents/chat) using natural language +- Define models in [YAML configuration](/building/yaml) +- Configure database connections with [Profiles](/building/profile) +- Learn how to [Build Semantic Tables](/building/semantic-tables) with dimensions, measures, and joins +- Explore [Query Methods](/querying/methods) for retrieving data +- Discover how to [Compose Models](/building/compose) together diff --git a/docs/md/doc/mcp.md b/docs/md/doc/mcp.md index 6b4c970..fedbcc2 100644 --- a/docs/md/doc/mcp.md +++ b/docs/md/doc/mcp.md @@ -3,7 +3,7 @@ BSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude. -**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#adding-descriptions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them. +**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#with_dimensions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them. ## Installation @@ -20,8 +20,7 @@ Create an MCP server script that exposes your semantic models: ```python import ibis -from boring_semantic_layer.semantic_api import to_semantic_table -from boring_semantic_layer.api.mcp import MCPSemanticModel +from boring_semantic_layer import to_semantic_table, MCPSemanticModel # Create synthetic flights data flights_data = ibis.memtable({ @@ -257,7 +256,7 @@ When exposing models through MCP, you need to explicitly define time dimensions To define a time dimension, set `is_time_dimension=True` and specify the `smallest_time_grain`: ```python -from boring_semantic_layer.semantic_api import to_semantic_table +from boring_semantic_layer import to_semantic_table flights = ( to_semantic_table(flights_data, name="flights") diff --git a/docs/md/doc/query-agent-mcp.md b/docs/md/doc/query-agent-mcp.md index f48f108..9cb3fcc 100644 --- a/docs/md/doc/query-agent-mcp.md +++ b/docs/md/doc/query-agent-mcp.md @@ -3,7 +3,7 @@ BSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude. -**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#adding-descriptions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them. +**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#with_dimensions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them. ## Installation diff --git a/docs/md/doc/reference.md b/docs/md/doc/reference.md index b8e2089..d0c1428 100644 --- a/docs/md/doc/reference.md +++ b/docs/md/doc/reference.md @@ -89,7 +89,7 @@ Load semantic models from a YAML configuration file. Returns a dictionary of sem **Example:** ```python -from boring_semantic_layer.yaml import from_yaml +from boring_semantic_layer import from_yaml models = from_yaml("models.yaml") flights_st = models["flights"] diff --git a/docs/md/doc/semantic-table.md b/docs/md/doc/semantic-table.md index 9f122fd..897a07b 100644 --- a/docs/md/doc/semantic-table.md +++ b/docs/md/doc/semantic-table.md @@ -361,6 +361,6 @@ date_range_join = flights_st.join( ## Next Steps -- Learn about [Composing Models](/examples/compose) -- Explore [YAML Configuration](/examples/yaml-config) -- Start [Querying Semantic Tables](/examples/query-methods) +- Learn about [Composing Models](/building/compose) +- Explore [YAML Configuration](/building/yaml) +- Start [Querying Semantic Tables](/querying/methods) diff --git a/docs/web/public/bsl-data/bucketing.json b/docs/web/public/bsl-data/bucketing.json index 14214f0..8eba392 100644 --- a/docs/web/public/bsl-data/bucketing.json +++ b/docs/web/public/bsl-data/bucketing.json @@ -138,7 +138,7 @@ }, "semantic_table_def": { "code": "from boring_semantic_layer import to_semantic_table\n\ncustomer_st = (\n to_semantic_table(customer_data, name=\"customers\")\n .with_dimensions(\n customer_id=lambda t: t.customer_id,\n age=lambda t: t.age,\n product_category=lambda t: t.product_category\n )\n .with_measures(\n customer_count=lambda t: t.count(),\n total_revenue=lambda t: t.purchase_amount.sum(),\n avg_purchase=lambda t: t.purchase_amount.mean().round(2)\n )\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\"", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]", "table": { "columns": [ @@ -273,7 +273,7 @@ }, "query_top_categories": { "code": "from ibis import _\n\n# Two-stage pipeline: rank then consolidate\nresult = (\n customer_st\n .group_by(\"product_category\")\n .aggregate(\"total_revenue\", \"customer_count\")\n .mutate(\n # Rank categories by revenue\n rank=lambda t: xo.row_number().over(\n xo.window(order_by=xo.desc(t.total_revenue))\n )\n )\n .mutate(\n # Replace non-top categories with \"Other\"\n category_display=lambda t: xo.case()\n .when(t.rank <= 2, t.product_category)\n .else_(\"Other\")\n .end(),\n # Keep original revenue for sorting (only for top categories)\n sort_value=lambda t: xo.case()\n .when(t.rank <= 2, t.total_revenue)\n .else_(0)\n .end()\n )\n .group_by(\"category_display\")\n .aggregate(\n revenue=lambda t: t.total_revenue.sum(),\n customers=lambda t: t.customer_count.sum(),\n sort_helper=lambda t: t.sort_value.max()\n )\n .mutate(\n avg_per_customer=lambda t: (t.revenue / t.customers).round(2)\n )\n .order_by(_.sort_helper.desc())\n)", - "sql": "SELECT\n \"t6\".\"category_display\",\n \"t6\".\"revenue\",\n \"t6\".\"customers\",\n \"t6\".\"sort_helper\",\n ROUND(CAST(\"t6\".\"revenue\" AS DOUBLE PRECISION) / \"t6\".\"customers\", 2) AS \"avg_per_customer\"\nFROM (\n SELECT\n \"t5\".\"category_display\",\n SUM(\"t5\".\"total_revenue\") AS \"revenue\",\n SUM(\"t5\".\"customer_count\") AS \"customers\",\n MAX(\"t5\".\"sort_value\") AS \"sort_helper\"\n FROM (\n SELECT\n \"t5\".\"product_category\",\n \"t5\".\"total_revenue\",\n \"t5\".\"customer_count\",\n \"t5\".\"rank\",\n \"t5\".\"sort_value\",\n \"t5\".\"category_display\"\n FROM (\n SELECT\n \"t4\".\"product_category\",\n \"t4\".\"total_revenue\",\n \"t4\".\"customer_count\",\n \"t4\".\"rank\",\n CASE WHEN \"t4\".\"rank\" <= 2 THEN \"t4\".\"product_category\" ELSE 'Other' END AS \"category_display\",\n CASE WHEN \"t4\".\"rank\" <= 2 THEN \"t4\".\"total_revenue\" ELSE 0 END AS \"sort_value\"\n FROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"customer_count\",\n ROW_NUMBER() OVER (ORDER BY \"t3\".\"total_revenue\" DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS \"rank\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\",\n COUNT(*) AS \"customer_count\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS t5\n GROUP BY\n \"t5\".\"category_display\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"sort_helper\" DESC NULLS LAST", + "sql": "SELECT\n \"t6\".\"category_display\",\n \"t6\".\"revenue\",\n \"t6\".\"customers\",\n \"t6\".\"sort_helper\",\n ROUND(CAST(\"t6\".\"revenue\" AS DOUBLE PRECISION) / \"t6\".\"customers\", 2) AS \"avg_per_customer\"\nFROM (\n SELECT\n \"t5\".\"category_display\",\n SUM(\"t5\".\"total_revenue\") AS \"revenue\",\n SUM(\"t5\".\"customer_count\") AS \"customers\",\n MAX(\"t5\".\"sort_value\") AS \"sort_helper\"\n FROM (\n SELECT\n \"t5\".\"product_category\",\n \"t5\".\"total_revenue\",\n \"t5\".\"customer_count\",\n \"t5\".\"rank\",\n \"t5\".\"sort_value\",\n \"t5\".\"category_display\"\n FROM (\n SELECT\n \"t4\".\"product_category\",\n \"t4\".\"total_revenue\",\n \"t4\".\"customer_count\",\n \"t4\".\"rank\",\n CASE WHEN \"t4\".\"rank\" <= 2 THEN \"t4\".\"product_category\" ELSE 'Other' END AS \"category_display\",\n CASE WHEN \"t4\".\"rank\" <= 2 THEN \"t4\".\"total_revenue\" ELSE 0 END AS \"sort_value\"\n FROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"customer_count\",\n ROW_NUMBER() OVER (\n ORDER BY \"t3\".\"total_revenue\" DESC NULLS LAST\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) - 1 AS \"rank\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\",\n COUNT(*) AS \"customer_count\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS t5\n GROUP BY\n \"t5\".\"category_display\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"sort_helper\" DESC NULLS LAST", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]\n-> GroupBy(product_category)\n-> Aggregate(total_revenue, customer_count)\n-> Mutate(rank)\n-> Mutate(category_display, sort_value)\n-> GroupBy(category_display)\n-> Aggregate(revenue, customers, sort_helper)\n-> Mutate(avg_per_customer)\n-> OrderBy(_CallableWrapper(_fn=_.sort_helper.desc()))", "table": { "columns": [ @@ -306,102 +306,11 @@ 89.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-ab49cf3817a433557e4ded931ac10a32" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "category_display", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "category_display", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "revenue", - "customers", - "sort_helper", - "avg_per_customer" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-ab49cf3817a433557e4ded931ac10a32": [ - { - "category_display": "Electronics", - "revenue": 3290, - "customers": 8, - "sort_helper": 3290, - "avg_per_customer": 411.0 - }, - { - "category_display": "Home", - "revenue": 1410, - "customers": 5, - "sort_helper": 1410, - "avg_per_customer": 282.0 - }, - { - "category_display": "Clothing", - "revenue": 626, - "customers": 7, - "sort_helper": 626, - "avg_per_customer": 89.0 - } - ] - } - } } }, "query_age_buckets": { "code": "from ibis import _\nresult = (\n customer_st\n .group_by(\"customer_id\", \"age\", \"product_category\")\n .aggregate(\"total_revenue\")\n .mutate(\n age_group=lambda t: xo.case()\n .when(t.age < 25, \"18-24\")\n .when(t.age < 35, \"25-34\")\n .when(t.age < 45, \"35-44\")\n .when(t.age < 55, \"45-54\")\n .else_(\"55+\")\n .end()\n )\n .group_by(\"age_group\")\n .aggregate(\n customers=lambda t: t.count(),\n revenue=lambda t: t.total_revenue.sum()\n )\n .order_by(_.age_group)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"age_group\",\n COUNT(*) AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"age\",\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"age_group\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"age\",\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"age\" < 25\n THEN '18-24'\n WHEN \"t2\".\"age\" < 35\n THEN '25-34'\n WHEN \"t2\".\"age\" < 45\n THEN '35-44'\n WHEN \"t2\".\"age\" < 55\n THEN '45-54'\n ELSE '55+'\n END AS \"age_group\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"purchase_amount\",\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"age_group\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"age_group\" ASC", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"age_group\",\n COUNT(*) AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"age\",\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"age_group\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"age\",\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"age\" < 25\n THEN '18-24'\n WHEN \"t2\".\"age\" < 35\n THEN '25-34'\n WHEN \"t2\".\"age\" < 45\n THEN '35-44'\n WHEN \"t2\".\"age\" < 55\n THEN '45-54'\n ELSE '55+'\n END AS \"age_group\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"purchase_amount\",\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"age_group\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"age_group\" ASC", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]\n-> GroupBy(customer_id, age, product_category)\n-> Aggregate(total_revenue)\n-> Mutate(age_group)\n-> GroupBy(age_group)\n-> Aggregate(customers, revenue)\n-> OrderBy(_CallableWrapper(_fn=_.age_group))", "table": { "columns": [ @@ -436,104 +345,11 @@ 2640 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-9ea38b7e3ee45e5a0a5c2b768afb1e75" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "age_group", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "age_group", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "customers", - "revenue" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-9ea38b7e3ee45e5a0a5c2b768afb1e75": [ - { - "age_group": "18-24", - "customers": 3, - "revenue": 145 - }, - { - "age_group": "25-34", - "customers": 5, - "revenue": 526 - }, - { - "age_group": "35-44", - "customers": 5, - "revenue": 1130 - }, - { - "age_group": "45-54", - "customers": 3, - "revenue": 885 - }, - { - "age_group": "55+", - "customers": 4, - "revenue": 2640 - } - ] - } - } } }, "query_purchase_tiers": { "code": "from ibis import _\nresult = (\n customer_st\n .group_by(\"customer_id\")\n .aggregate(\"total_revenue\")\n .mutate(\n tier=lambda t: xo.case()\n .when(t.total_revenue < 100, \"Small ($0-99)\")\n .when(t.total_revenue < 250, \"Medium ($100-249)\")\n .when(t.total_revenue < 500, \"Large ($250-499)\")\n .else_(\"Premium ($500+)\")\n .end()\n )\n .group_by(\"tier\")\n .aggregate(\n customer_count=lambda t: t.count(),\n total_value=lambda t: t.total_revenue.sum(),\n avg_value=lambda t: t.total_revenue.mean().round(2)\n )\n .order_by(_.total_value.desc())\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"tier\",\n COUNT(*) AS \"customer_count\",\n SUM(\"t3\".\"total_revenue\") AS \"total_value\",\n ROUND(AVG(\"t3\".\"total_revenue\"), 2) AS \"avg_value\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"total_revenue\",\n \"t3\".\"tier\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"total_revenue\" < 100\n THEN 'Small ($0-99)'\n WHEN \"t2\".\"total_revenue\" < 250\n THEN 'Medium ($100-249)'\n WHEN \"t2\".\"total_revenue\" < 500\n THEN 'Large ($250-499)'\n ELSE 'Premium ($500+)'\n END AS \"tier\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\",\n \"t1\".\"customer_id\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"tier\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"total_value\" DESC NULLS LAST", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"tier\",\n COUNT(*) AS \"customer_count\",\n SUM(\"t3\".\"total_revenue\") AS \"total_value\",\n ROUND(CAST(AVG(\"t3\".\"total_revenue\") AS DECIMAL), 2) AS \"avg_value\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"total_revenue\",\n \"t3\".\"tier\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"total_revenue\" < 100\n THEN 'Small ($0-99)'\n WHEN \"t2\".\"total_revenue\" < 250\n THEN 'Medium ($100-249)'\n WHEN \"t2\".\"total_revenue\" < 500\n THEN 'Large ($250-499)'\n ELSE 'Premium ($500+)'\n END AS \"tier\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\",\n \"t1\".\"customer_id\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"tier\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"total_value\" DESC NULLS LAST", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]\n-> GroupBy(customer_id)\n-> Aggregate(total_revenue)\n-> Mutate(tier)\n-> GroupBy(tier)\n-> Aggregate(customer_count, total_value, avg_value)\n-> OrderBy(_CallableWrapper(_fn=_.total_value.desc()))", "table": { "columns": [ @@ -568,104 +384,11 @@ 67.67 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-5b19223277d622765747f08ab007e836" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "tier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "tier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "customer_count", - "total_value", - "avg_value" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-5b19223277d622765747f08ab007e836": [ - { - "tier": "Premium ($500+)", - "customer_count": 4, - "total_value": 2640, - "avg_value": 660.0 - }, - { - "tier": "Large ($250-499)", - "customer_count": 4, - "total_value": 1255, - "avg_value": 313.75 - }, - { - "tier": "Medium ($100-249)", - "customer_count": 6, - "total_value": 1025, - "avg_value": 170.83 - }, - { - "tier": "Small ($0-99)", - "customer_count": 6, - "total_value": 406, - "avg_value": 67.67 - } - ] - } - } } }, "query_with_other": { "code": "from ibis import _\n\nresult = (\n customer_st\n .group_by(\"product_category\")\n .aggregate(\"total_revenue\", \"customer_count\")\n .mutate(\n # Mark categories with less than 5 customers as \"Other\"\n category_grouped=lambda t: xo.case()\n .when(t.customer_count >= 5, t.product_category)\n .else_(\"Other\")\n .end()\n )\n .group_by(\"category_grouped\")\n .aggregate(\n customers=lambda t: t.customer_count.sum(),\n revenue=lambda t: t.total_revenue.sum()\n )\n .mutate(\n avg_per_customer=lambda t: (t.revenue / t.customers).round(2)\n )\n .order_by(_.revenue.desc())\n)", - "sql": "SELECT\n \"t4\".\"category_grouped\",\n \"t4\".\"customers\",\n \"t4\".\"revenue\",\n ROUND(CAST(\"t4\".\"revenue\" AS DOUBLE PRECISION) / \"t4\".\"customers\", 2) AS \"avg_per_customer\"\nFROM (\n SELECT\n \"t3\".\"category_grouped\",\n SUM(\"t3\".\"customer_count\") AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"customer_count\",\n \"t3\".\"category_grouped\"\n FROM (\n SELECT\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n \"t2\".\"customer_count\",\n CASE WHEN \"t2\".\"customer_count\" >= 5 THEN \"t2\".\"product_category\" ELSE 'Other' END AS \"category_grouped\"\n FROM (\n SELECT\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\",\n COUNT(*) AS \"customer_count\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"category_grouped\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"revenue\" DESC NULLS LAST", + "sql": "SELECT\n \"t4\".\"category_grouped\",\n \"t4\".\"customers\",\n \"t4\".\"revenue\",\n ROUND(CAST(\"t4\".\"revenue\" AS DOUBLE PRECISION) / \"t4\".\"customers\", 2) AS \"avg_per_customer\"\nFROM (\n SELECT\n \"t3\".\"category_grouped\",\n SUM(\"t3\".\"customer_count\") AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"customer_count\",\n \"t3\".\"category_grouped\"\n FROM (\n SELECT\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n \"t2\".\"customer_count\",\n CASE WHEN \"t2\".\"customer_count\" >= 5 THEN \"t2\".\"product_category\" ELSE 'Other' END AS \"category_grouped\"\n FROM (\n SELECT\n \"t1\".\"product_category\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\",\n COUNT(*) AS \"customer_count\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product_category\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"category_grouped\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"revenue\" DESC NULLS LAST", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]\n-> GroupBy(product_category)\n-> Aggregate(total_revenue, customer_count)\n-> Mutate(category_grouped)\n-> GroupBy(category_grouped)\n-> Aggregate(customers, revenue)\n-> Mutate(avg_per_customer)\n-> OrderBy(_CallableWrapper(_fn=_.revenue.desc()))", "table": { "columns": [ @@ -694,98 +417,11 @@ 89.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-12a0f133ab3f8d0586edcf3e313569a8" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "category_grouped", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "category_grouped", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "customers", - "revenue", - "avg_per_customer" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-12a0f133ab3f8d0586edcf3e313569a8": [ - { - "category_grouped": "Electronics", - "customers": 8, - "revenue": 3290, - "avg_per_customer": 411.0 - }, - { - "category_grouped": "Home", - "customers": 5, - "revenue": 1410, - "avg_per_customer": 282.0 - }, - { - "category_grouped": "Clothing", - "customers": 7, - "revenue": 626, - "avg_per_customer": 89.0 - } - ] - } - } } }, "query_combined_buckets": { "code": "from ibis import _\nresult = (\n customer_st\n .group_by(\"customer_id\", \"age\")\n .aggregate(\"total_revenue\")\n .mutate(\n age_group=lambda t: xo.case()\n .when(t.age < 30, \"Young (18-29)\")\n .when(t.age < 50, \"Middle (30-49)\")\n .else_(\"Senior (50+)\")\n .end(),\n value_tier=lambda t: xo.case()\n .when(t.total_revenue < 150, \"Low Value\")\n .when(t.total_revenue < 350, \"Mid Value\")\n .else_(\"High Value\")\n .end()\n )\n .group_by(\"age_group\", \"value_tier\")\n .aggregate(\n customers=lambda t: t.count(),\n revenue=lambda t: t.total_revenue.sum()\n )\n .order_by(_.age_group, _.revenue.desc())\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\",\n COUNT(*) AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"age\",\n \"t3\".\"total_revenue\",\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"age\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"age\" < 30\n THEN 'Young (18-29)'\n WHEN \"t2\".\"age\" < 50\n THEN 'Middle (30-49)'\n ELSE 'Senior (50+)'\n END AS \"age_group\",\n CASE\n WHEN \"t2\".\"total_revenue\" < 150\n THEN 'Low Value'\n WHEN \"t2\".\"total_revenue\" < 350\n THEN 'Mid Value'\n ELSE 'High Value'\n END AS \"value_tier\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\",\n \"t1\".\"customer_id\",\n \"t1\".\"age\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_v562rv5tjbedjfzl3nwiinqt4m\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\",\n \"t1\".\"age\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"age_group\" ASC,\n \"t4\".\"revenue\" DESC NULLS LAST", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\",\n COUNT(*) AS \"customers\",\n SUM(\"t3\".\"total_revenue\") AS \"revenue\"\n FROM (\n SELECT\n \"t3\".\"customer_id\",\n \"t3\".\"age\",\n \"t3\".\"total_revenue\",\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\"\n FROM (\n SELECT\n \"t2\".\"customer_id\",\n \"t2\".\"age\",\n \"t2\".\"total_revenue\",\n CASE\n WHEN \"t2\".\"age\" < 30\n THEN 'Young (18-29)'\n WHEN \"t2\".\"age\" < 50\n THEN 'Middle (30-49)'\n ELSE 'Senior (50+)'\n END AS \"age_group\",\n CASE\n WHEN \"t2\".\"total_revenue\" < 150\n THEN 'Low Value'\n WHEN \"t2\".\"total_revenue\" < 350\n THEN 'Mid Value'\n ELSE 'High Value'\n END AS \"value_tier\"\n FROM (\n SELECT\n \"t1\".\"customer_id\",\n \"t1\".\"age\",\n SUM(\"t1\".\"purchase_amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"purchase_amount\",\n \"t1\".\"product_category\",\n \"t1\".\"customer_id\",\n \"t1\".\"age\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_5ucshn4zbrczvjw54qyamz32fm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer_id\",\n \"t1\".\"age\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS t3\n GROUP BY\n \"t3\".\"age_group\",\n \"t3\".\"value_tier\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"age_group\" ASC,\n \"t4\".\"revenue\" DESC NULLS LAST", "plan": "SemanticTable: customers\n customer_id [dim]\n age [dim]\n product_category [dim]\n customer_count [measure]\n total_revenue [measure]\n avg_purchase [measure]\n-> GroupBy(customer_id, age)\n-> Aggregate(total_revenue)\n-> Mutate(age_group, value_tier)\n-> GroupBy(age_group, value_tier)\n-> Aggregate(customers, revenue)\n-> OrderBy(_CallableWrapper(_fn=_.age_group), _CallableWrapper(_fn=_.revenue.desc()))", "table": { "columns": [ @@ -820,59 +456,6 @@ 431 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-e53063265ad8373a4153f3a953d1706f" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-e53063265ad8373a4153f3a953d1706f": [ - { - "age_group": "Middle (30-49)", - "value_tier": "Mid Value", - "customers": 7, - "revenue": 1595 - }, - { - "age_group": "Middle (30-49)", - "value_tier": "Low Value", - "customers": 2, - "revenue": 240 - }, - { - "age_group": "Senior (50+)", - "value_tier": "High Value", - "customers": 5, - "revenue": 3060 - }, - { - "age_group": "Young (18-29)", - "value_tier": "Low Value", - "customers": 6, - "revenue": 431 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/builder-agent.json b/docs/web/public/bsl-data/builder-agent.json index 0bb8632..a7cc421 100644 --- a/docs/web/public/bsl-data/builder-agent.json +++ b/docs/web/public/bsl-data/builder-agent.json @@ -1,5 +1,5 @@ { - "markdown": "# Builder Agent\n\nThe Builder Agent is focused on authoring and evolving semantic tables: defining dimensions, measures, joins, calculated measures, YAML config, and validation patterns. It uses a different Claude skill than the Query Agent because it needs to reason about modeling APIs rather than issuing queries.\n\n## Claude Code Skill\n\n- File: [`src/boring_semantic_layer/agents/claude-code/bsl-model-building/SKILL.md`](../../src/boring_semantic_layer/agents/claude-code/bsl-model-building/SKILL.md)\n- Use it when you want Claude Desktop to help write new semantic tables, add time dimensions, or compose models.\n- The skill includes:\n - Python DSL examples showing `SemanticTable(...)`, `.with_dimensions`, `.with_measures`, `.with_calculated_measures`, and `.join()` patterns.\n - YAML equivalents so you can copy the same logic into declarative configs.\n - Gotchas such as \"measures must aggregate\" and \"join keys must be defined dimensions\".\n\n**Workflow:** Load the skill in Claude Desktop, paste the schema or YAML snippet you are editing, and ask \"Generate a semantic table for flights with avg delay and join to airports\". Claude will respond with both Python and YAML patterns that mirror the documentation.\n\n## Codex Skill\n\nRunning inside the Codex CLI (the environment this assistant uses) already gives you repo access. Pair that with the Builder skill to automate scaffolding:\n\n1. Open `docs/content/semantic-table.md` or the relevant source file in your editor for context.\n2. Ask Codex to \"apply the builder skill\" when drafting new semantic tables. It will reference `bsl-model-building/SKILL.md` to keep the API usage correct.\n3. Use the CLI's `apply_patch` output directly to drop in the generated models or YAML definitions.\n\nThis approach keeps all modeling work version-controlled while still benefiting from the same guard rails the Claude skill enforces.\n\n## Cursor (or other AI IDEs)\n\nIf you prefer Cursor, VS Code Copilot Chat, or another AI-assisted IDE:\n\n1. Store the builder skill text in a snippet (Cursor: *Settings -> Custom Instructions*).\n2. Add quick prompts like \"Use the BSL builder skill\" so the IDE pastes the instructions before generating code.\n3. Point the IDE at your actual data context (DuckDB schema, YAML file) so it can thread the builder guard rails through your request.\n\nRegardless of the host, the Builder Agent should always cite the same modeling patterns. That keeps upstream MCP/Query agents consistent because every semantic table passes through the same validation philosophy.\n", + "markdown": "# Builder Agent\n\nThe Builder Agent is focused on authoring and evolving semantic tables: defining dimensions, measures, joins, calculated measures, YAML config, and validation patterns. It uses a different Claude skill than the Query Agent because it needs to reason about modeling APIs rather than issuing queries.\n\n## Claude Code Skill\n\n- File: [`docs/md/skills/claude-code/bsl-model-builder/SKILL.md`](../skills/claude-code/bsl-model-builder/SKILL.md)\n- Use it when you want Claude Desktop to help write new semantic tables, add time dimensions, or compose models.\n- The skill includes:\n - Python DSL examples showing `SemanticTable(...)`, `.with_dimensions`, `.with_measures`, `.with_calculated_measures`, and `.join()` patterns.\n - YAML equivalents so you can copy the same logic into declarative configs.\n - Gotchas such as \"measures must aggregate\" and \"join keys must be defined dimensions\".\n\n**Workflow:** Load the skill in Claude Desktop, paste the schema or YAML snippet you are editing, and ask \"Generate a semantic table for flights with avg delay and join to airports\". Claude will respond with both Python and YAML patterns that mirror the documentation.\n\n## Codex Skill\n\nRunning inside the Codex CLI (the environment this assistant uses) already gives you repo access. Pair that with the Builder skill to automate scaffolding:\n\n1. Open `docs/md/doc/semantic-table.md` or the relevant source file in your editor for context.\n2. Ask Codex to \"apply the builder skill\" when drafting new semantic tables. It will reference `bsl-model-building/SKILL.md` to keep the API usage correct.\n3. Use the CLI's `apply_patch` output directly to drop in the generated models or YAML definitions.\n\nThis approach keeps all modeling work version-controlled while still benefiting from the same guard rails the Claude skill enforces.\n\n## Cursor (or other AI IDEs)\n\nIf you prefer Cursor, VS Code Copilot Chat, or another AI-assisted IDE:\n\n1. Store the builder skill text in a snippet (Cursor: *Settings -> Custom Instructions*).\n2. Add quick prompts like \"Use the BSL builder skill\" so the IDE pastes the instructions before generating code.\n3. Point the IDE at your actual data context (DuckDB schema, YAML file) so it can thread the builder guard rails through your request.\n\nRegardless of the host, the Builder Agent should always cite the same modeling patterns. That keeps upstream MCP/Query agents consistent because every semantic table passes through the same validation philosophy.\n", "queries": {}, "files": {} } diff --git a/docs/web/public/bsl-data/charting.json b/docs/web/public/bsl-data/charting.json index 87344fb..7b50d0f 100644 --- a/docs/web/public/bsl-data/charting.json +++ b/docs/web/public/bsl-data/charting.json @@ -81,70 +81,6 @@ 98 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-65b589ba3da7959d7f1e1d2c1bdd1c23" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-65b589ba3da7959d7f1e1d2c1bdd1c23": [ - { - "origin": "JFK", - "flight_count": 150 - }, - { - "origin": "ATL", - "flight_count": 145 - }, - { - "origin": "LAX", - "flight_count": 135 - }, - { - "origin": "ORD", - "flight_count": 112 - }, - { - "origin": "DFW", - "flight_count": 98 - } - ] - } - } } }, "query_bar_chart": { @@ -186,78 +122,6 @@ 78 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-614905082e7810dae76519e0046336d6" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-614905082e7810dae76519e0046336d6": [ - { - "origin": "JFK", - "flight_count": 150 - }, - { - "origin": "ATL", - "flight_count": 145 - }, - { - "origin": "LAX", - "flight_count": 135 - }, - { - "origin": "ORD", - "flight_count": 112 - }, - { - "origin": "DFW", - "flight_count": 98 - }, - { - "origin": "SFO", - "flight_count": 89 - }, - { - "origin": "DEN", - "flight_count": 78 - } - ] - } - } } }, "setup_timeseries": { @@ -299,79 +163,6 @@ 78 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-614905082e7810dae76519e0046336d6" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-614905082e7810dae76519e0046336d6": [ - { - "origin": "JFK", - "flight_count": 150 - }, - { - "origin": "ATL", - "flight_count": 145 - }, - { - "origin": "LAX", - "flight_count": 135 - }, - { - "origin": "ORD", - "flight_count": 112 - }, - { - "origin": "DFW", - "flight_count": 98 - }, - { - "origin": "SFO", - "flight_count": 89 - }, - { - "origin": "DEN", - "flight_count": 78 - } - ] - } - } } }, "query_timeseries": { @@ -384,10 +175,6 @@ "flight_count" ], "data": [ - [ - "2024-01-06", - 161 - ], [ "2024-01-01", 145 @@ -408,86 +195,15 @@ "2024-01-04", 139 ], + [ + "2024-01-06", + 161 + ], [ "2024-01-07", 143 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-be00614a4e01f643be45b19aa6eb0e0d" - }, - "mark": { - "type": "line" - }, - "encoding": { - "tooltip": [ - { - "field": "date", - "format": "%Y-%m-%d", - "type": "temporal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "axis": { - "labelAngle": -45 - }, - "field": "date", - "type": "temporal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-be00614a4e01f643be45b19aa6eb0e0d": [ - { - "date": "2024-01-07", - "flight_count": 143 - }, - { - "date": "2024-01-06", - "flight_count": 161 - }, - { - "date": "2024-01-01", - "flight_count": 145 - }, - { - "date": "2024-01-02", - "flight_count": 152 - }, - { - "date": "2024-01-03", - "flight_count": 148 - }, - { - "date": "2024-01-05", - "flight_count": 156 - }, - { - "date": "2024-01-04", - "flight_count": 139 - } - ] - } - } } }, "setup_heatmap": { @@ -500,18 +216,10 @@ "flight_count" ], "data": [ - [ - "2024-01-07", - 143 - ], [ "2024-01-01", 145 ], - [ - "2024-01-06", - 161 - ], [ "2024-01-02", 152 @@ -524,87 +232,19 @@ "2024-01-05", 156 ], + [ + "2024-01-07", + 143 + ], [ "2024-01-04", 139 + ], + [ + "2024-01-06", + 161 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-85bcae625279cb8285f8e212cce2f36f" - }, - "mark": { - "type": "line" - }, - "encoding": { - "tooltip": [ - { - "field": "date", - "format": "%Y-%m-%d", - "type": "temporal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "axis": { - "labelAngle": -45 - }, - "field": "date", - "type": "temporal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-85bcae625279cb8285f8e212cce2f36f": [ - { - "date": "2024-01-06", - "flight_count": 161 - }, - { - "date": "2024-01-07", - "flight_count": 143 - }, - { - "date": "2024-01-01", - "flight_count": 145 - }, - { - "date": "2024-01-04", - "flight_count": 139 - }, - { - "date": "2024-01-02", - "flight_count": 152 - }, - { - "date": "2024-01-03", - "flight_count": 148 - }, - { - "date": "2024-01-05", - "flight_count": 156 - } - ] - } - } } }, "query_heatmap": { @@ -624,114 +264,31 @@ 31 ], [ - "JFK", "LAX", - 45 - ], - [ "SFO", - "LAX", - 27 + 28 ], [ "JFK", "SFO", 32 ], + [ + "JFK", + "LAX", + 45 + ], [ "LAX", "JFK", 43 ], [ - "LAX", "SFO", - 28 + "LAX", + 27 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-21389972f53abe08ece770cdfd6d09cb" - }, - "mark": { - "type": "rect" - }, - "encoding": { - "color": { - "field": "flight_count", - "type": "quantitative" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "dest", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "dest", - "sort": null, - "type": "ordinal" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-21389972f53abe08ece770cdfd6d09cb": [ - { - "origin": "SFO", - "dest": "JFK", - "flight_count": 31 - }, - { - "origin": "JFK", - "dest": "SFO", - "flight_count": 32 - }, - { - "origin": "JFK", - "dest": "LAX", - "flight_count": 45 - }, - { - "origin": "SFO", - "dest": "LAX", - "flight_count": 27 - }, - { - "origin": "LAX", - "dest": "JFK", - "flight_count": 43 - }, - { - "origin": "LAX", - "dest": "SFO", - "flight_count": 28 - } - ] - } - } } }, "query_multi_measure": { @@ -745,20 +302,15 @@ "avg_distance" ], "data": [ - [ - "SFO", - 89, - 1200.0 - ], [ "ATL", 145, 1650.0 ], [ - "JFK", - 150, - 2475.0 + "LAX", + 135, + 1850.0 ], [ "ORD", @@ -766,103 +318,16 @@ 950.0 ], [ - "LAX", - 135, - 1850.0 + "DEN", + 78, + 900.0 + ], + [ + "SFO", + 89, + 1200.0 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-82cb19bd1764a0185ecb24320d21f265" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "avg_distance" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-82cb19bd1764a0185ecb24320d21f265": [ - { - "origin": "ATL", - "flight_count": 145, - "avg_distance": 1650.0 - }, - { - "origin": "SFO", - "flight_count": 89, - "avg_distance": 1200.0 - }, - { - "origin": "JFK", - "flight_count": 150, - "avg_distance": 2475.0 - }, - { - "origin": "ORD", - "flight_count": 112, - "avg_distance": 950.0 - }, - { - "origin": "DFW", - "flight_count": 98, - "avg_distance": 1100.0 - } - ] - } - } } }, "query_custom_mark": { @@ -896,70 +361,6 @@ 98 ] ] - }, - "chart": { - "chart_spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-65b589ba3da7959d7f1e1d2c1bdd1c23" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-65b589ba3da7959d7f1e1d2c1bdd1c23": [ - { - "origin": "JFK", - "flight_count": 150 - }, - { - "origin": "ATL", - "flight_count": 145 - }, - { - "origin": "LAX", - "flight_count": 135 - }, - { - "origin": "ORD", - "flight_count": 112 - }, - { - "origin": "DFW", - "flight_count": 98 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/compose.json b/docs/web/public/bsl-data/compose.json index 9e6e747..7ee6c8b 100644 --- a/docs/web/public/bsl-data/compose.json +++ b/docs/web/public/bsl-data/compose.json @@ -211,10 +211,10 @@ ], "data": [ [ - "Airbus A320", + "Boeing 737", 1, - 180, - 200 + 150, + 180 ], [ "Boeing 777", @@ -223,99 +223,12 @@ 350 ], [ - "Boeing 737", + "Airbus A320", 1, - 150, - 180 + 180, + 200 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-9e6a135759fa882049de0f8d362d1862" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "aircraft_model", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "aircraft_model", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flights_flight_count", - "flights_total_passengers", - "aircraft_total_capacity" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-9e6a135759fa882049de0f8d362d1862": [ - { - "aircraft_model": "Boeing 777", - "flights_flight_count": 1, - "flights_total_passengers": 120, - "aircraft_total_capacity": 350 - }, - { - "aircraft_model": "Airbus A320", - "flights_flight_count": 1, - "flights_total_passengers": 180, - "aircraft_total_capacity": 200 - }, - { - "aircraft_model": "Boeing 737", - "flights_flight_count": 1, - "flights_total_passengers": 150, - "aircraft_total_capacity": 180 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/example.json b/docs/web/public/bsl-data/example.json index 65b840f..61b1ecb 100644 --- a/docs/web/public/bsl-data/example.json +++ b/docs/web/public/bsl-data/example.json @@ -3,7 +3,7 @@ "queries": { "revenue_by_customer": { "code": "result = orders_st.group_by(\"customer\").aggregate(\n \"total_orders\",\n \"total_revenue\",\n \"avg_order_value\"\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"customer\",\n COUNT(*) AS \"total_orders\",\n SUM(\"t1\".\"amount\") AS \"total_revenue\",\n AVG(\"t1\".\"amount\") AS \"avg_order_value\"\n FROM (\n SELECT\n \"t1\".\"order_id\",\n \"t1\".\"product\",\n \"t1\".\"amount\",\n \"t1\".\"quantity\",\n \"t1\".\"customer\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_vw4gnti255f2jlrgpawvduu5he\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"customer\",\n COUNT(*) AS \"total_orders\",\n SUM(\"t1\".\"amount\") AS \"total_revenue\",\n AVG(\"t1\".\"amount\") AS \"avg_order_value\"\n FROM (\n SELECT\n \"t1\".\"order_id\",\n \"t1\".\"product\",\n \"t1\".\"amount\",\n \"t1\".\"quantity\",\n \"t1\".\"customer\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_kod4h4nvwbbpfag4osr7gtlqpm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"customer\"\n) AS \"t2\"", "plan": "SemanticTable: orders\n customer [dim]\n product [dim]\n total_orders [measure]\n total_revenue [measure]\n total_quantity [measure]\n avg_order_value [measure]\n-> GroupBy(customer)\n-> Aggregate(total_orders, total_revenue, avg_order_value)", "table": { "columns": [ @@ -13,6 +13,12 @@ "avg_order_value" ], "data": [ + [ + "Alice", + 3, + 350, + 116.66666666666667 + ], [ "Bob", 2, @@ -30,112 +36,13 @@ 1, 100, 100.0 - ], - [ - "Alice", - 3, - 350, - 116.66666666666667 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-546d79ef4037296695139b770369fa2e" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "customer", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "customer", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_orders", - "total_revenue", - "avg_order_value" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-546d79ef4037296695139b770369fa2e": [ - { - "customer": "Bob", - "total_orders": 2, - "total_revenue": 250, - "avg_order_value": 125.0 - }, - { - "customer": "Charlie", - "total_orders": 2, - "total_revenue": 225, - "avg_order_value": 112.5 - }, - { - "customer": "David", - "total_orders": 1, - "total_revenue": 100, - "avg_order_value": 100.0 - }, - { - "customer": "Alice", - "total_orders": 3, - "total_revenue": 350, - "avg_order_value": 116.66666666666667 - } - ] - } - } } }, "product_performance": { "code": "result = orders_st.group_by(\"product\").aggregate(\n \"total_orders\",\n \"total_quantity\",\n \"total_revenue\"\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"product\",\n COUNT(*) AS \"total_orders\",\n SUM(\"t1\".\"quantity\") AS \"total_quantity\",\n SUM(\"t1\".\"amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"order_id\",\n \"t1\".\"customer\",\n \"t1\".\"amount\",\n \"t1\".\"quantity\",\n \"t1\".\"product\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_vw4gnti255f2jlrgpawvduu5he\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"product\",\n COUNT(*) AS \"total_orders\",\n SUM(\"t1\".\"quantity\") AS \"total_quantity\",\n SUM(\"t1\".\"amount\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"order_id\",\n \"t1\".\"customer\",\n \"t1\".\"amount\",\n \"t1\".\"quantity\",\n \"t1\".\"product\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_kod4h4nvwbbpfag4osr7gtlqpm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"product\"\n) AS \"t2\"", "plan": "SemanticTable: orders\n customer [dim]\n product [dim]\n total_orders [measure]\n total_revenue [measure]\n total_quantity [measure]\n avg_order_value [measure]\n-> GroupBy(product)\n-> Aggregate(total_orders, total_quantity, total_revenue)", "table": { "columns": [ @@ -151,106 +58,19 @@ 4, 400 ], - [ - "Gadget", - 3, - 6, - 450 - ], [ "Doohickey", 1, 3, 75 + ], + [ + "Gadget", + 3, + 6, + 450 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-65396652800538ed7d6b8820e15429b6" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "product", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "product", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_orders", - "total_quantity", - "total_revenue" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-65396652800538ed7d6b8820e15429b6": [ - { - "product": "Widget", - "total_orders": 4, - "total_quantity": 4, - "total_revenue": 400 - }, - { - "product": "Gadget", - "total_orders": 3, - "total_quantity": 6, - "total_revenue": 450 - }, - { - "product": "Doohickey", - "total_orders": 1, - "total_quantity": 3, - "total_revenue": 75 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/getting-started.json b/docs/web/public/bsl-data/getting-started.json index 06a4c06..f4ada0f 100644 --- a/docs/web/public/bsl-data/getting-started.json +++ b/docs/web/public/bsl-data/getting-started.json @@ -1,9 +1,9 @@ { - "markdown": "# Getting Started with BSL\n\nBSL (Boring Semantic Layer) is a lightweight semantic layer built on top of Ibis. It allows you to define your data models once and query them anywhere.\n\n## Installation\n\n```bash\npip install boring-semantic-layer\n```\n\n## Quick Start\n\nLet's create your first Semantic Table using synthetic data in Ibis.\n\n```setup_flights\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\n# Create sample flight data\nflights_tbl = ibis.memtable({\n \"origin\": [\"NYC\", \"LAX\", \"NYC\", \"SFO\", \"LAX\", \"NYC\", \"SFO\", \"LAX\"],\n \"destination\": [\"LAX\", \"NYC\", \"SFO\", \"NYC\", \"SFO\", \"LAX\", \"LAX\", \"SFO\"],\n \"distance\": [2789, 2789, 2902, 2902, 347, 2789, 347, 347],\n \"duration\": [330, 330, 360, 360, 65, 330, 65, 65],\n})\n```\n\nYou can then convert these tables in Semantic Tables that contains dimensios and measures definitions:\n\n```define_semantic_table\n# Define semantic table with dimensions and measures\nflights_st = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin=lambda t: t.origin,\n destination=lambda t: t.destination,\n )\n .with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n avg_duration=lambda t: t.duration.mean(),\n )\n)\n```\n\n## Query Your Data\n\nNow let's query the semantic table by grouping flights by origin:\n\n```query_by_origin\n# Group flights by origin airport\nresult = flights_st.group_by(\"origin\").aggregate(\n \"flight_count\",\n \"total_distance\",\n \"avg_duration\"\n)\n```\n\n\n\nYou can also group by destination:\n\n```query_by_destination\n# Group flights by destination airport\nresult = flights_st.group_by(\"destination\").aggregate(\n \"flight_count\",\n \"total_distance\"\n)\n```\n\n\n\n## Chat with Your Data\n\nBSL includes a built-in chat interface to query your semantic models using natural language.\n\n### 1. Install the agent extra\n\n```bash\npip install 'boring-semantic-layer[agent]'\n\n# Install your LLM provider\npip install langchain-anthropic # or langchain-openai, langchain-google-genai\n```\n\n### 2. Set your API key\n\nCreate a `.env` file:\n\n```bash\nANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY, GOOGLE_API_KEY\n```\n\n### 3. Start chatting\n\nTry the built-in flights demo model (loads remote data automatically):\n\n```bash\n# Interactive mode\nbsl chat --sm https://raw.githubusercontent.com/boringdata/boring-semantic-layer/main/examples/flights.yml\n\n# Or pass a question directly\nbsl chat --sm https://raw.githubusercontent.com/boringdata/boring-semantic-layer/main/examples/flights.yml \\\n \"What are the top 5 origins by flight count?\"\n\n```\n\n### Create your own YAML model\n\nHere's a minimal example showing how to define your own semantic model:\n\n```yaml\n# my_model.yaml - Minimal BSL semantic model\n\n# Database profile - loads remote parquet into in-memory DuckDB\nprofile:\n type: duckdb\n database: \":memory:\"\n tables:\n orders_tbl: \"path/to/orders.parquet\"\n\n# Semantic model definition\norders:\n table: orders_tbl\n description: \"Order data with categories and metrics\"\n\n dimensions:\n category:\n expr: _.category\n description: \"Product category\"\n region:\n expr: _.region\n description: \"Sales region\"\n status: _.status\n\n measures:\n order_count:\n expr: _.count()\n description: \"Total number of orders\"\n total_sales:\n expr: _.amount.sum()\n description: \"Total sales amount\"\n avg_order_value:\n expr: _.amount.mean()\n description: \"Average order value\"\n```\n\nThen run:\n\n```bash\nbsl chat --sm my_model.yaml\n```\n\nSee [Query Agent Chat](/examples/query-agent-chat) for full documentation on YAML models with joins and advanced features.\n\n## Next Steps\n\n- [Chat with your data](/examples/query-agent-chat) using natural language\n- Define models in [YAML configuration](/examples/yaml-config)\n- Configure database connections with [Profiles](/examples/profile)\n- Learn how to [Build Semantic Tables](/examples/semantic-table) with dimensions, measures, and joins\n- Explore [Query Methods](/examples/query-methods) for retrieving data\n- Discover how to [Compose Models](/examples/compose) together\n", + "markdown": "# Getting Started with BSL\n\nBSL (Boring Semantic Layer) is a lightweight semantic layer built on top of Ibis. It allows you to define your data models once and query them anywhere.\n\n## Installation\n\n```bash\npip install boring-semantic-layer\n```\n\n## Quick Start\n\nLet's create your first Semantic Table using synthetic data in Ibis.\n\n```setup_flights\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\n# Create sample flight data\nflights_tbl = ibis.memtable({\n \"origin\": [\"NYC\", \"LAX\", \"NYC\", \"SFO\", \"LAX\", \"NYC\", \"SFO\", \"LAX\"],\n \"destination\": [\"LAX\", \"NYC\", \"SFO\", \"NYC\", \"SFO\", \"LAX\", \"LAX\", \"SFO\"],\n \"distance\": [2789, 2789, 2902, 2902, 347, 2789, 347, 347],\n \"duration\": [330, 330, 360, 360, 65, 330, 65, 65],\n})\n```\n\nYou can then convert these tables in Semantic Tables that contains dimensios and measures definitions:\n\n```define_semantic_table\n# Define semantic table with dimensions and measures\nflights_st = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin=lambda t: t.origin,\n destination=lambda t: t.destination,\n )\n .with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n avg_duration=lambda t: t.duration.mean(),\n )\n)\n```\n\n## Query Your Data\n\nNow let's query the semantic table by grouping flights by origin:\n\n```query_by_origin\n# Group flights by origin airport\nresult = flights_st.group_by(\"origin\").aggregate(\n \"flight_count\",\n \"total_distance\",\n \"avg_duration\"\n)\n```\n\n\n\nYou can also group by destination:\n\n```query_by_destination\n# Group flights by destination airport\nresult = flights_st.group_by(\"destination\").aggregate(\n \"flight_count\",\n \"total_distance\"\n)\n```\n\n\n\n## Chat with Your Data\n\nBSL includes a built-in chat interface to query your semantic models using natural language.\n\n### 1. Install the agent extra\n\n```bash\npip install 'boring-semantic-layer[agent]'\n\n# Install your LLM provider\npip install langchain-anthropic # or langchain-openai, langchain-google-genai\n```\n\n### 2. Set your API key\n\nCreate a `.env` file:\n\n```bash\nANTHROPIC_API_KEY=sk-ant-... # or OPENAI_API_KEY, GOOGLE_API_KEY\n```\n\n### 3. Start chatting\n\nTry the built-in flights demo model (loads remote data automatically):\n\n```bash\n# Interactive mode\nbsl chat --sm https://raw.githubusercontent.com/boringdata/boring-semantic-layer/main/examples/flights.yml\n\n# Or pass a question directly\nbsl chat --sm https://raw.githubusercontent.com/boringdata/boring-semantic-layer/main/examples/flights.yml \\\n \"What are the top 5 origins by flight count?\"\n\n```\n\n### Create your own YAML model\n\nHere's a minimal example showing how to define your own semantic model:\n\n```yaml\n# my_model.yaml - Minimal BSL semantic model\n\n# Database profile - loads remote parquet into in-memory DuckDB\nprofile:\n type: duckdb\n database: \":memory:\"\n tables:\n orders_tbl: \"path/to/orders.parquet\"\n\n# Semantic model definition\norders:\n table: orders_tbl\n description: \"Order data with categories and metrics\"\n\n dimensions:\n category:\n expr: _.category\n description: \"Product category\"\n region:\n expr: _.region\n description: \"Sales region\"\n status: _.status\n\n measures:\n order_count:\n expr: _.count()\n description: \"Total number of orders\"\n total_sales:\n expr: _.amount.sum()\n description: \"Total sales amount\"\n avg_order_value:\n expr: _.amount.mean()\n description: \"Average order value\"\n```\n\nThen run:\n\n```bash\nbsl chat --sm my_model.yaml\n```\n\nSee [Query Agent Chat](/agents/chat) for full documentation on YAML models with joins and advanced features.\n\n## Next Steps\n\n- [Chat with your data](/agents/chat) using natural language\n- Define models in [YAML configuration](/building/yaml)\n- Configure database connections with [Profiles](/building/profile)\n- Learn how to [Build Semantic Tables](/building/semantic-tables) with dimensions, measures, and joins\n- Explore [Query Methods](/querying/methods) for retrieving data\n- Discover how to [Compose Models](/building/compose) together\n", "queries": { "query_by_origin": { "code": "# Group flights by origin airport\nresult = flights_st.group_by(\"origin\").aggregate(\n \"flight_count\",\n \"total_distance\",\n \"avg_duration\"\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"destination\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_u3fmkw7qdjf5phwurpgg5cf4ra\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"destination\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ezhx46n3pnbczjgagwnn24amcu\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n destination [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, total_distance, avg_duration)", "table": { "columns": [ @@ -13,18 +13,18 @@ "avg_duration" ], "data": [ - [ - "SFO", - 2, - 3249, - 212.5 - ], [ "NYC", 3, 8480, 340.0 ], + [ + "SFO", + 2, + 3249, + 212.5 + ], [ "LAX", 3, @@ -32,98 +32,11 @@ 153.33333333333334 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-0137bc47e458ebaf211d83de5a1f535d" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance", - "avg_duration" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-0137bc47e458ebaf211d83de5a1f535d": [ - { - "origin": "NYC", - "flight_count": 3, - "total_distance": 8480, - "avg_duration": 340.0 - }, - { - "origin": "LAX", - "flight_count": 3, - "total_distance": 3483, - "avg_duration": 153.33333333333334 - }, - { - "origin": "SFO", - "flight_count": 2, - "total_distance": 3249, - "avg_duration": 212.5 - } - ] - } - } } }, "query_by_destination": { "code": "# Group flights by destination airport\nresult = flights_st.group_by(\"destination\").aggregate(\n \"flight_count\",\n \"total_distance\"\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"destination\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"destination\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_u3fmkw7qdjf5phwurpgg5cf4ra\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"destination\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"destination\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"destination\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ezhx46n3pnbczjgagwnn24amcu\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"destination\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n destination [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(destination)\n-> Aggregate(flight_count, total_distance)", "table": { "columns": [ @@ -133,9 +46,9 @@ ], "data": [ [ - "LAX", + "SFO", 3, - 5925 + 3596 ], [ "NYC", @@ -143,94 +56,11 @@ 5691 ], [ - "SFO", + "LAX", 3, - 3596 + 5925 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-5b11c0130dbedb35bf74107f4b9dbe42" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "destination", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "destination", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-5b11c0130dbedb35bf74107f4b9dbe42": [ - { - "destination": "SFO", - "flight_count": 3, - "total_distance": 3596 - }, - { - "destination": "LAX", - "flight_count": 3, - "total_distance": 5925 - }, - { - "destination": "NYC", - "flight_count": 2, - "total_distance": 5691 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/indexing.json b/docs/web/public/bsl-data/indexing.json index 0e64f92..856af25 100644 --- a/docs/web/public/bsl-data/indexing.json +++ b/docs/web/public/bsl-data/indexing.json @@ -3,7 +3,7 @@ "queries": { "setup_airports": { "code": "import ibis\nfrom boring_semantic_layer import to_semantic_table\n\n# Create synthetic airports data\nairports_data = ibis.memtable({\n \"code\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\", \"DEN\", \"SFO\", \"LAS\", \"SEA\", \"PHX\",\n \"IAH\", \"MCO\", \"EWR\", \"BOS\", \"MIA\", \"SAN\", \"LGA\", \"PHL\", \"DTW\", \"MSP\"],\n \"city\": [\"NEW YORK\", \"LOS ANGELES\", \"CHICAGO\", \"ATLANTA\", \"DALLAS\", \"DENVER\",\n \"SAN FRANCISCO\", \"LAS VEGAS\", \"SEATTLE\", \"PHOENIX\", \"HOUSTON\", \"ORLANDO\",\n \"NEWARK\", \"BOSTON\", \"MIAMI\", \"SAN DIEGO\", \"NEW YORK\", \"PHILADELPHIA\",\n \"DETROIT\", \"MINNEAPOLIS\"],\n \"state\": [\"NY\", \"CA\", \"IL\", \"GA\", \"TX\", \"CO\", \"CA\", \"NV\", \"WA\", \"AZ\",\n \"TX\", \"FL\", \"NJ\", \"MA\", \"FL\", \"CA\", \"NY\", \"PA\", \"MI\", \"MN\"],\n \"fac_type\": [\"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\",\n \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\",\n \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\", \"AIRPORT\",\n \"AIRPORT\", \"AIRPORT\"],\n \"elevation\": [13, 128, 672, 1026, 607, 5433, 13, 2181, 433, 1135,\n 97, 96, 18, 19, 8, 17, 21, 36, 645, 841]\n})\n\n# Define semantic table\nairports = (\n to_semantic_table(airports_data, name=\"airports\")\n .with_dimensions(\n code=lambda t: t.code,\n city=lambda t: t.city,\n state=lambda t: t.state,\n fac_type=lambda t: t.fac_type,\n elevation=lambda t: t.elevation,\n )\n .with_measures(\n airport_count=lambda t: t.count(),\n avg_elevation=lambda t: t.elevation.mean(),\n )\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\"", "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]", "table": { "columns": [ @@ -159,7 +159,7 @@ }, "query_index_all": { "code": "# Index all dimensions (None means all)\nresult = airports.index(None).limit(10)", - "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n) AS \"t13\"\nUNION ALL\nSELECT\n *\nFROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t11\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n) AS \"t14\"\nLIMIT 10", + "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n) AS \"t13\"\nUNION ALL\nSELECT\n *\nFROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t11\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n) AS \"t14\"\nLIMIT 10", "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index()\n-> Limit(10)", "table": { "columns": [ @@ -171,32 +171,32 @@ ], "data": [ [ - "state", - "state", - "string", - "GA", - 1 + "elevation", + "elevation", + "number", + "8 to 5433", + 20 ], [ "state", "state", "string", - "TX", - 2 + "CA", + 3 ], [ "state", "state", "string", - "MA", + "MN", 1 ], [ "state", "state", "string", - "FL", - 2 + "WA", + 1 ], [ "fac_type", @@ -209,143 +209,44 @@ "state", "state", "string", - "IL", + "PA", 1 ], [ "state", "state", "string", - "NY", - 2 + "NV", + 1 ], [ "state", "state", "string", - "PA", + "AZ", 1 ], [ - "city", - "city", + "state", + "state", "string", - "SEATTLE", - 1 + "FL", + 2 ], [ - "city", - "city", + "state", + "state", "string", - "DETROIT", + "IL", 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-0b567f5dc7bc3b0c82412f7b14ac48cb" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-0b567f5dc7bc3b0c82412f7b14ac48cb": [ - { - "fieldName": "elevation", - "fieldPath": "elevation", - "fieldType": "number", - "fieldValue": "8 to 5433", - "weight": 20 - }, - { - "fieldName": "fac_type", - "fieldPath": "fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 20 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "GA", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "TX", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "MA", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "ORD", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "DEN", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "MSP", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "LAS VEGAS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "BOSTON", - "weight": 1 - } - ] - } - } } }, "query_index_specific": { "code": "# Index only state and city\nresult = (\n airports.index(lambda t: [t.state, t.city])\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n ) AS \"t13\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t11\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n ) AS \"t14\"\n) AS \"t15\"\nORDER BY\n \"t15\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x1460877e0>)\n-> OrderBy(_CallableWrapper(_fn= at 0x146087b00>))\n-> Limit(10)", + "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n ) AS \"t13\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t11\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n ) AS \"t14\"\n) AS \"t15\"\nORDER BY\n \"t15\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x77f239220cc0>)\n-> OrderBy(_CallableWrapper(_fn= at 0x77f239283d80>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -377,160 +278,61 @@ 3 ], [ - "state", - "state", + "city", + "city", "string", - "NY", + "NEW YORK", 2 ], [ "state", "state", "string", - "FL", + "TX", 2 ], [ - "city", - "city", + "state", + "state", "string", - "NEW YORK", + "NY", 2 ], [ "state", "state", "string", - "TX", + "FL", 2 ], [ "code", "code", "string", - "DFW", + "BOS", 1 ], [ "city", "city", "string", - "BOSTON", + "ORLANDO", 1 ], [ - "code", - "code", + "city", + "city", "string", - "LAS", + "SAN DIEGO", 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-f8727b2fea4618121f42813a0cb87a8c" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-f8727b2fea4618121f42813a0cb87a8c": [ - { - "fieldName": "fac_type", - "fieldPath": "fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 20 - }, - { - "fieldName": "elevation", - "fieldPath": "elevation", - "fieldType": "number", - "fieldValue": "8 to 5433", - "weight": 20 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "CA", - "weight": 3 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "NY", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "FL", - "weight": 2 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "NEW YORK", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "TX", - "weight": 2 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "LAS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "LAS VEGAS", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "BOS", - "weight": 1 - } - ] - } - } } }, "query_autocomplete": { "code": "# Get city suggestions starting with \"SAN\"\nresult = (\n airports.index(lambda t: t.city)\n .filter(lambda t: t.fieldValue.like(\"SAN%\"))\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\nFROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n) AS \"t1\"\nWHERE\n \"t1\".\"value\" LIKE 'SAN%'\nORDER BY\n \"t1\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x146087c40>)\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x1460851c0>))\n-> Limit(10)", + "sql": "SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\nFROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n) AS \"t1\"\nWHERE\n \"t1\".\"value\" LIKE 'SAN%'\nORDER BY\n \"t1\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x77f2392207c0>)\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x77f239283420>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -555,55 +357,12 @@ 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-74617ccf07655ebdcf6519b63e86e8a1" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-74617ccf07655ebdcf6519b63e86e8a1": [ - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "SAN DIEGO", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "SAN FRANCISCO", - "weight": 1 - } - ] - } - } } }, "query_by_type": { "code": "# Get only string field values\nresult = (\n airports.index(None)\n .filter(lambda t: t.fieldType == \"string\")\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n ) AS \"t13\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t11\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t11\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n ) AS \"t14\"\n) AS \"t15\"\nWHERE\n \"t15\".\"fieldType\" = 'string'\nORDER BY\n \"t15\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index()\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x146087ba0>))\n-> Limit(10)", + "sql": "SELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"code\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"city\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t2\".\"value\" AS \"fieldValue\",\n \"t2\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t2\"\n ) AS \"t7\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\n FROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"fac_type\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n ) AS \"t1\"\n ) AS \"t6\"\n ) AS \"t13\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t11\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t11\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t11\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t11\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t11\".\"weight\"\n FROM (\n SELECT\n MIN(\"t5\".\"value\") AS \"min_val\",\n MAX(\"t5\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n WHERE\n \"t0\".\"elevation\" IS NOT NULL\n ) AS \"t5\"\n ) AS \"t11\"\n ) AS \"t14\"\n) AS \"t15\"\nWHERE\n \"t15\".\"fieldType\" = 'string'\nORDER BY\n \"t15\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index()\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x77f2392820c0>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -628,167 +387,68 @@ 3 ], [ - "state", - "state", + "city", + "city", "string", - "NY", + "NEW YORK", 2 ], [ "state", "state", "string", - "FL", + "TX", 2 ], [ - "city", - "city", + "state", + "state", "string", - "NEW YORK", + "NY", 2 ], [ "state", "state", "string", - "TX", + "FL", 2 ], [ "code", "code", "string", - "LAS", + "BOS", 1 ], [ "city", "city", "string", - "LAS VEGAS", + "ORLANDO", 1 ], [ - "code", - "code", + "city", + "city", "string", - "BOS", + "SAN DIEGO", 1 ], [ - "city", - "city", + "state", + "state", "string", - "BOSTON", + "NJ", 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-d31501cbd3112219fceb56cc53f417ea" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-d31501cbd3112219fceb56cc53f417ea": [ - { - "fieldName": "fac_type", - "fieldPath": "fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 20 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "CA", - "weight": 3 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "NY", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "FL", - "weight": 2 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "NEW YORK", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "TX", - "weight": 2 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "LAS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "LAS VEGAS", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "BOS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "BOSTON", - "weight": 1 - } - ] - } - } } }, "query_custom_weight": { "code": "# Find states with most airports\nresult = (\n airports.index(lambda t: t.state, by=\"airport_count\")\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\nFROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n) AS \"t1\"\nORDER BY\n \"t1\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x146084f40>, by=airport_count)\n-> OrderBy(_CallableWrapper(_fn= at 0x146085620>))\n-> Limit(10)", + "sql": "SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t1\".\"value\" AS \"fieldValue\",\n \"t1\".\"weight\"\nFROM (\n SELECT\n \"t0\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t0\".\"code\",\n \"t0\".\"city\",\n \"t0\".\"state\",\n \"t0\".\"fac_type\",\n \"t0\".\"elevation\",\n \"t0\".\"state\" AS \"value\"\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"value\"\n) AS \"t1\"\nORDER BY\n \"t1\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index( at 0x77f239281260>, by=airport_count)\n-> OrderBy(_CallableWrapper(_fn= at 0x77f239280e00>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -809,35 +469,35 @@ "state", "state", "string", - "NY", + "TX", 2 ], [ "state", "state", "string", - "FL", + "NY", 2 ], [ "state", "state", "string", - "TX", + "FL", 2 ], [ "state", "state", "string", - "PA", + "NJ", 1 ], [ "state", "state", "string", - "NV", + "MN", 1 ], [ @@ -858,7 +518,7 @@ "state", "state", "string", - "MN", + "GA", 1 ], [ @@ -869,111 +529,12 @@ 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-27f9782d9a18017476bfc3856a947adf" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-27f9782d9a18017476bfc3856a947adf": [ - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "CA", - "weight": 3 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "NY", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "FL", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "TX", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "PA", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "NV", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "CO", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "WA", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "MN", - "weight": 1 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "AZ", - "weight": 1 - } - ] - } - } } }, "query_sampled": { "code": "# Sample 100 rows before indexing\nresult = (\n airports.index(None, sample=100)\n .filter(lambda t: t.fieldType == \"string\")\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "WITH \"t1\" AS (\n SELECT\n *\n FROM \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t0\"\n LIMIT 100\n)\nSELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t5\".\"value\" AS \"fieldValue\",\n \"t5\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"code\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t5\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t6\".\"value\" AS \"fieldValue\",\n \"t6\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"city\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t6\"\n ) AS \"t11\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"state\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t14\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"fac_type\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n ) AS \"t15\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t13\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t13\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t13\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t13\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t13\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t13\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t13\".\"weight\"\n FROM (\n SELECT\n MIN(\"t7\".\"value\") AS \"min_val\",\n MAX(\"t7\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"elevation\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n WHERE\n \"t2\".\"elevation\" IS NOT NULL\n ) AS \"t7\"\n ) AS \"t13\"\n ) AS \"t16\"\n) AS \"t17\"\nWHERE\n \"t17\".\"fieldType\" = 'string'\nORDER BY\n \"t17\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index(sample=100)\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x146084cc0>))\n-> Limit(10)", + "sql": "WITH \"t1\" AS (\n SELECT\n *\n FROM \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t0\"\n LIMIT 100\n)\nSELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t5\".\"value\" AS \"fieldValue\",\n \"t5\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"code\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t5\"\n ) AS \"t10\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t6\".\"value\" AS \"fieldValue\",\n \"t6\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"city\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t6\"\n ) AS \"t11\"\n ) AS \"t12\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t4\".\"value\" AS \"fieldValue\",\n \"t4\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"state\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t4\"\n ) AS \"t9\"\n ) AS \"t14\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t3\".\"value\" AS \"fieldValue\",\n \"t3\".\"weight\"\n FROM (\n SELECT\n \"t2\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"code\",\n \"t2\".\"city\",\n \"t2\".\"state\",\n \"t2\".\"fac_type\",\n \"t2\".\"elevation\",\n \"t2\".\"fac_type\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"value\"\n ) AS \"t3\"\n ) AS \"t8\"\n ) AS \"t15\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t13\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t13\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t13\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t13\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t13\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t13\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t13\".\"weight\"\n FROM (\n SELECT\n MIN(\"t7\".\"value\") AS \"min_val\",\n MAX(\"t7\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t2\".\"elevation\" AS \"value\"\n FROM \"t1\" AS \"t2\"\n WHERE\n \"t2\".\"elevation\" IS NOT NULL\n ) AS \"t7\"\n ) AS \"t13\"\n ) AS \"t16\"\n) AS \"t17\"\nWHERE\n \"t17\".\"fieldType\" = 'string'\nORDER BY\n \"t17\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: airports\n code [dim]\n city [dim]\n state [dim]\n fac_type [dim]\n elevation [dim]\n airport_count [measure]\n avg_elevation [measure]\n-> Index(sample=100)\n-> Filter(\u03bb )\n-> OrderBy(_CallableWrapper(_fn= at 0x77f2392207c0>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -998,45 +559,45 @@ 3 ], [ - "state", - "state", + "city", + "city", "string", - "NY", + "NEW YORK", 2 ], [ "state", "state", "string", - "FL", + "TX", 2 ], [ - "city", - "city", + "state", + "state", "string", - "NEW YORK", + "NY", 2 ], [ "state", "state", "string", - "TX", + "FL", 2 ], [ - "code", - "code", + "city", + "city", "string", - "LAS", + "SAN DIEGO", 1 ], [ "city", "city", "string", - "LAS VEGAS", + "ORLANDO", 1 ], [ @@ -1047,118 +608,19 @@ 1 ], [ - "city", - "city", + "state", + "state", "string", - "BOSTON", + "NJ", 1 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-d529b7ce7be9fb07af30ecf8a0443a3b" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-d529b7ce7be9fb07af30ecf8a0443a3b": [ - { - "fieldName": "fac_type", - "fieldPath": "fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 20 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "CA", - "weight": 3 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "NY", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "FL", - "weight": 2 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "NEW YORK", - "weight": 2 - }, - { - "fieldName": "state", - "fieldPath": "state", - "fieldType": "string", - "fieldValue": "TX", - "weight": 2 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "LAS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "BOSTON", - "weight": 1 - }, - { - "fieldName": "code", - "fieldPath": "code", - "fieldType": "string", - "fieldValue": "BOS", - "weight": 1 - }, - { - "fieldName": "city", - "fieldPath": "city", - "fieldType": "string", - "fieldValue": "MIAMI", - "weight": 1 - } - ] - } - } } }, "query_index_joins": { "code": "# Create synthetic flights data\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 31)),\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\", \"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 3,\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\", \"SFO\", \"SEA\", \"DEN\", \"PHX\", \"BOS\"] * 3,\n})\n\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n carrier=lambda t: t.carrier,\n origin=lambda t: t.origin,\n )\n .with_measures(\n flight_count=lambda t: t.count(),\n )\n)\n\n# Join flights with airports\nflights_with_origin = flights.join_one(airports, lambda f, a: f.origin == a.code)\n\n# Index across the join\nresult = (\n flights_with_origin.index([\"flights.carrier\", \"airports.state\"])\n .order_by(lambda t: t.weight.desc())\n .limit(10)\n)", - "sql": "WITH \"t5\" AS (\n SELECT\n \"t2\".\"flight_id\",\n \"t2\".\"carrier\",\n \"t2\".\"origin\",\n \"t3\".\"code\",\n \"t3\".\"city\",\n \"t3\".\"state\",\n \"t3\".\"fac_type\",\n \"t3\".\"elevation\"\n FROM \"ibis_pandas_memtable_sapivfwrcnfyfgpusg6dorqdri\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n), \"t9\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"fac_type\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t10\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"state\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t11\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"city\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t12\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"code\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t13\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"origin\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t14\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"carrier\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t22\" AS (\n SELECT\n MIN(\"t8\".\"value\") AS \"min_val\",\n MAX(\"t8\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t3\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_sapivfwrcnfyfgpusg6dorqdri\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n ) AS \"t4\"\n WHERE\n \"t4\".\"value\" IS NOT NULL\n ) AS \"t8\"\n)\nSELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'carrier' AS \"fieldName\",\n 'flights.carrier' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t21\".\"value\" AS \"fieldValue\",\n \"t21\".\"weight\"\n FROM \"t14\" AS \"t21\"\n ) AS \"t34\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'origin' AS \"fieldName\",\n 'flights.origin' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t20\".\"value\" AS \"fieldValue\",\n \"t20\".\"weight\"\n FROM \"t13\" AS \"t20\"\n ) AS \"t32\"\n ) AS \"t37\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'airports.code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t19\".\"value\" AS \"fieldValue\",\n \"t19\".\"weight\"\n FROM \"t12\" AS \"t19\"\n ) AS \"t30\"\n ) AS \"t38\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'airports.city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t18\".\"value\" AS \"fieldValue\",\n \"t18\".\"weight\"\n FROM \"t11\" AS \"t18\"\n ) AS \"t28\"\n ) AS \"t39\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'airports.state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t17\".\"value\" AS \"fieldValue\",\n \"t17\".\"weight\"\n FROM \"t10\" AS \"t17\"\n ) AS \"t26\"\n ) AS \"t40\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'airports.fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t16\".\"value\" AS \"fieldValue\",\n \"t16\".\"weight\"\n FROM \"t9\" AS \"t16\"\n ) AS \"t24\"\n ) AS \"t41\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'airports.elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t36\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t36\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t36\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t36\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t36\".\"weight\"\n FROM \"t22\" AS \"t36\"\n ) AS \"t44\"\n ) AS \"t45\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'flight_id' AS \"fieldName\",\n 'flight_id' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t35\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t35\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t35\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t35\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t35\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t35\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t35\".\"weight\"\n FROM (\n SELECT\n MIN(\"t15\".\"value\") AS \"min_val\",\n MAX(\"t15\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t2\".\"flight_id\" AS \"value\"\n FROM \"ibis_pandas_memtable_sapivfwrcnfyfgpusg6dorqdri\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_yufwhhsetbekvfprmntxu33e5i\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n ) AS \"t6\"\n WHERE\n \"t6\".\"value\" IS NOT NULL\n ) AS \"t15\"\n ) AS \"t35\"\n ) AS \"t42\"\n ) AS \"t46\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'carrier' AS \"fieldName\",\n 'carrier' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t21\".\"value\" AS \"fieldValue\",\n \"t21\".\"weight\"\n FROM \"t14\" AS \"t21\"\n ) AS \"t33\"\n ) AS \"t47\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'origin' AS \"fieldName\",\n 'origin' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t20\".\"value\" AS \"fieldValue\",\n \"t20\".\"weight\"\n FROM \"t13\" AS \"t20\"\n ) AS \"t31\"\n ) AS \"t48\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t19\".\"value\" AS \"fieldValue\",\n \"t19\".\"weight\"\n FROM \"t12\" AS \"t19\"\n ) AS \"t29\"\n ) AS \"t49\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t18\".\"value\" AS \"fieldValue\",\n \"t18\".\"weight\"\n FROM \"t11\" AS \"t18\"\n ) AS \"t27\"\n ) AS \"t50\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t17\".\"value\" AS \"fieldValue\",\n \"t17\".\"weight\"\n FROM \"t10\" AS \"t17\"\n ) AS \"t25\"\n ) AS \"t51\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t16\".\"value\" AS \"fieldValue\",\n \"t16\".\"weight\"\n FROM \"t9\" AS \"t16\"\n ) AS \"t23\"\n ) AS \"t52\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t36\".\"min_val\" AS VARCHAR), ' to ')\n END IS NULL\n OR CAST(\"t36\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(\n CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CONCAT(CAST(\"t36\".\"min_val\" AS VARCHAR), ' to ')\n END,\n CAST(\"t36\".\"max_val\" AS VARCHAR)\n )\n END AS \"fieldValue\",\n \"t36\".\"weight\"\n FROM \"t22\" AS \"t36\"\n ) AS \"t43\"\n) AS \"t53\"\nORDER BY\n \"t53\".\"weight\" DESC NULLS LAST\nLIMIT 10", - "plan": "SemanticTable: flights\n carrier [dim]\n origin [dim]\n flight_count [measure]\n-> Join(left, right=airports)\n-> Index(flights.carrier, airports.state)\n-> OrderBy(_CallableWrapper(_fn= at 0x146087a60>))\n-> Limit(10)", + "sql": "WITH \"t5\" AS (\n SELECT\n \"t2\".\"flight_id\",\n \"t2\".\"carrier\",\n \"t2\".\"origin\",\n \"t3\".\"code\",\n \"t3\".\"city\",\n \"t3\".\"state\",\n \"t3\".\"fac_type\",\n \"t3\".\"elevation\"\n FROM \"ibis_pandas_memtable_shw7goquencdbjo7t5v4gd64gq\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n), \"t9\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"fac_type\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t10\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"state\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t11\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"city\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t12\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"code\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t13\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"origin\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t14\" AS (\n SELECT\n \"t7\".\"value\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n \"t7\".\"flight_id\",\n \"t7\".\"carrier\",\n \"t7\".\"origin\",\n \"t7\".\"code\",\n \"t7\".\"city\",\n \"t7\".\"state\",\n \"t7\".\"fac_type\",\n \"t7\".\"elevation\",\n \"t7\".\"carrier\" AS \"value\"\n FROM \"t5\" AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"value\"\n), \"t22\" AS (\n SELECT\n MIN(\"t8\".\"value\") AS \"min_val\",\n MAX(\"t8\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t3\".\"elevation\" AS \"value\"\n FROM \"ibis_pandas_memtable_shw7goquencdbjo7t5v4gd64gq\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n ) AS \"t4\"\n WHERE\n \"t4\".\"value\" IS NOT NULL\n ) AS \"t8\"\n)\nSELECT\n *\nFROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n *\n FROM (\n SELECT\n 'carrier' AS \"fieldName\",\n 'flights.carrier' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t21\".\"value\" AS \"fieldValue\",\n \"t21\".\"weight\"\n FROM \"t14\" AS \"t21\"\n ) AS \"t34\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'origin' AS \"fieldName\",\n 'flights.origin' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t20\".\"value\" AS \"fieldValue\",\n \"t20\".\"weight\"\n FROM \"t13\" AS \"t20\"\n ) AS \"t32\"\n ) AS \"t37\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'airports.code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t19\".\"value\" AS \"fieldValue\",\n \"t19\".\"weight\"\n FROM \"t12\" AS \"t19\"\n ) AS \"t30\"\n ) AS \"t38\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'airports.city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t18\".\"value\" AS \"fieldValue\",\n \"t18\".\"weight\"\n FROM \"t11\" AS \"t18\"\n ) AS \"t28\"\n ) AS \"t39\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'airports.state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t17\".\"value\" AS \"fieldValue\",\n \"t17\".\"weight\"\n FROM \"t10\" AS \"t17\"\n ) AS \"t26\"\n ) AS \"t40\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'airports.fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t16\".\"value\" AS \"fieldValue\",\n \"t16\".\"weight\"\n FROM \"t9\" AS \"t16\"\n ) AS \"t24\"\n ) AS \"t41\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'airports.elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t36\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t36\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t36\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t36\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t36\".\"weight\"\n FROM \"t22\" AS \"t36\"\n ) AS \"t44\"\n ) AS \"t45\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'flight_id' AS \"fieldName\",\n 'flight_id' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t35\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t35\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t35\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t35\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t35\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t35\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t35\".\"weight\"\n FROM (\n SELECT\n MIN(\"t15\".\"value\") AS \"min_val\",\n MAX(\"t15\".\"value\") AS \"max_val\",\n COUNT(*) AS \"weight\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t2\".\"flight_id\" AS \"value\"\n FROM \"ibis_pandas_memtable_shw7goquencdbjo7t5v4gd64gq\" AS \"t2\"\n LEFT OUTER JOIN \"ibis_pandas_memtable_wttenv6q55fldbw4bymbvoi4t4\" AS \"t3\"\n ON \"t2\".\"origin\" = \"t3\".\"code\"\n ) AS \"t6\"\n WHERE\n \"t6\".\"value\" IS NOT NULL\n ) AS \"t15\"\n ) AS \"t35\"\n ) AS \"t42\"\n ) AS \"t46\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'carrier' AS \"fieldName\",\n 'carrier' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t21\".\"value\" AS \"fieldValue\",\n \"t21\".\"weight\"\n FROM \"t14\" AS \"t21\"\n ) AS \"t33\"\n ) AS \"t47\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'origin' AS \"fieldName\",\n 'origin' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t20\".\"value\" AS \"fieldValue\",\n \"t20\".\"weight\"\n FROM \"t13\" AS \"t20\"\n ) AS \"t31\"\n ) AS \"t48\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'code' AS \"fieldName\",\n 'code' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t19\".\"value\" AS \"fieldValue\",\n \"t19\".\"weight\"\n FROM \"t12\" AS \"t19\"\n ) AS \"t29\"\n ) AS \"t49\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'city' AS \"fieldName\",\n 'city' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t18\".\"value\" AS \"fieldValue\",\n \"t18\".\"weight\"\n FROM \"t11\" AS \"t18\"\n ) AS \"t27\"\n ) AS \"t50\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'state' AS \"fieldName\",\n 'state' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t17\".\"value\" AS \"fieldValue\",\n \"t17\".\"weight\"\n FROM \"t10\" AS \"t17\"\n ) AS \"t25\"\n ) AS \"t51\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'fac_type' AS \"fieldName\",\n 'fac_type' AS \"fieldPath\",\n 'string' AS \"fieldType\",\n \"t16\".\"value\" AS \"fieldValue\",\n \"t16\".\"weight\"\n FROM \"t9\" AS \"t16\"\n ) AS \"t23\"\n ) AS \"t52\"\n UNION ALL\n SELECT\n *\n FROM (\n SELECT\n 'elevation' AS \"fieldName\",\n 'elevation' AS \"fieldPath\",\n 'number' AS \"fieldType\",\n CASE\n WHEN CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t36\".\"min_val\" AS VARCHAR) || ' to '\n END IS NULL\n OR CAST(\"t36\".\"max_val\" AS VARCHAR) IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CASE\n WHEN CAST(\"t36\".\"min_val\" AS VARCHAR) IS NULL OR ' to ' IS NULL\n THEN CAST(NULL AS VARCHAR)\n ELSE CAST(\"t36\".\"min_val\" AS VARCHAR) || ' to '\n END || CAST(\"t36\".\"max_val\" AS VARCHAR)\n END AS \"fieldValue\",\n \"t36\".\"weight\"\n FROM \"t22\" AS \"t36\"\n ) AS \"t43\"\n) AS \"t53\"\nORDER BY\n \"t53\".\"weight\" DESC NULLS LAST\nLIMIT 10", + "plan": "SemanticTable: flights\n carrier [dim]\n origin [dim]\n flight_count [measure]\n-> Join(left, right=airports)\n-> Index(flights.carrier, airports.state)\n-> OrderBy(_CallableWrapper(_fn= at 0x77f239283420>))\n-> Limit(10)", "table": { "columns": [ "fieldName", @@ -1210,13 +672,6 @@ "WN", 6 ], - [ - "state", - "airports.state", - "string", - "CA", - 6 - ], [ "carrier", "flights.carrier", @@ -1228,7 +683,7 @@ "carrier", "flights.carrier", "string", - "B6", + "UA", 6 ], [ @@ -1237,107 +692,15 @@ "string", "WN", 6 + ], + [ + "carrier", + "flights.carrier", + "string", + "B6", + 6 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-db3590b2f816b41a610864ebda0098e5" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-db3590b2f816b41a610864ebda0098e5": [ - { - "fieldName": "fac_type", - "fieldPath": "airports.fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 30 - }, - { - "fieldName": "elevation", - "fieldPath": "airports.elevation", - "fieldType": "number", - "fieldValue": "13 to 5433", - "weight": 30 - }, - { - "fieldName": "flight_id", - "fieldPath": "flight_id", - "fieldType": "number", - "fieldValue": "1 to 30", - "weight": 30 - }, - { - "fieldName": "fac_type", - "fieldPath": "fac_type", - "fieldType": "string", - "fieldValue": "AIRPORT", - "weight": 30 - }, - { - "fieldName": "elevation", - "fieldPath": "elevation", - "fieldType": "number", - "fieldValue": "13 to 5433", - "weight": 30 - }, - { - "fieldName": "carrier", - "fieldPath": "flights.carrier", - "fieldType": "string", - "fieldValue": "WN", - "weight": 6 - }, - { - "fieldName": "state", - "fieldPath": "airports.state", - "fieldType": "string", - "fieldValue": "CA", - "weight": 6 - }, - { - "fieldName": "carrier", - "fieldPath": "flights.carrier", - "fieldType": "string", - "fieldValue": "AA", - "weight": 6 - }, - { - "fieldName": "carrier", - "fieldPath": "flights.carrier", - "fieldType": "string", - "fieldValue": "UA", - "weight": 6 - }, - { - "fieldName": "carrier", - "fieldPath": "carrier", - "fieldType": "string", - "fieldValue": "WN", - "weight": 6 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/mcp.json b/docs/web/public/bsl-data/mcp.json index 6eafd22..848bc12 100644 --- a/docs/web/public/bsl-data/mcp.json +++ b/docs/web/public/bsl-data/mcp.json @@ -1,5 +1,5 @@ { - "markdown": "# Model Context Protocol (MCP) Integration\n\nBSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude.\n\n\n**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#adding-descriptions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them.\n\n\n## Installation\n\nTo use MCP functionality, install BSL with the `fastmcp` extra:\n\n```bash\npip install 'boring-semantic-layer[fastmcp]'\n```\n\n## Setting up an MCP Server\n\nCreate an MCP server script that exposes your semantic models:\n\n```python\nimport ibis\nfrom boring_semantic_layer.semantic_api import to_semantic_table\nfrom boring_semantic_layer.api.mcp import MCPSemanticModel\n\n# Create synthetic flights data\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 101)),\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\"] * 20,\n \"dest\": [\"LAX\", \"JFK\", \"DFW\", \"ORD\", \"ATL\"] * 20,\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 20,\n \"distance\": [2475, 2475, 801, 606, 732] * 20,\n})\n\n# Define your semantic table with descriptions\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code where the flight departed from\"\n },\n destination={\n \"expr\": lambda t: t.dest,\n \"description\": \"Destination airport code where the flight arrived\"\n },\n carrier={\n \"expr\": lambda t: t.carrier,\n \"description\": \"Airline carrier code (e.g., AA, UA, DL)\"\n },\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n },\n avg_distance={\n \"expr\": lambda t: t.distance.mean(),\n \"description\": \"Average flight distance in miles\"\n },\n )\n)\n\n# Create the MCP server\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights},\n name=\"Flight Data Server\"\n)\n\nif __name__ == \"__main__\":\n mcp_server.run(transport=\"stdio\")\n```\n\nSave this as `example_mcp.py` in your project directory.\n\n## Configuring Claude Desktop\n\nTo use your MCP server with Claude Desktop, add it to your configuration file.\n\n**Configuration file location:**\n- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`\n- **Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json`\n\n**Example configuration:**\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"uv\",\n \"args\": [\n \"--directory\",\n \"/path/to/your/project/\",\n \"run\",\n \"example_mcp.py\"\n ]\n }\n }\n}\n```\n\nReplace `/path/to/your/project/` with the actual path to your project directory.\n\n\nThis example uses [uv](https://docs.astral.sh/uv/) to run the MCP server. You can also use `python` directly if you have BSL installed in your environment:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/path/to/your/project/example_mcp.py\"]\n }\n }\n}\n```\n\n\nAfter updating the configuration:\n1. Restart Claude Desktop\n2. Look for the MCP server indicator in the Claude Desktop interface\n3. You should see \"flight_sm\" listed as an available server\n\n## Available MCP Tools\n\nOnce configured, Claude will have access to these tools for interacting with your semantic models:\n\n### list_models\n\nList all available semantic model names in the MCP server.\n\n**Example usage in Claude:**\n> \"What models are available?\"\n\n**Returns:** Array of model names (e.g., `[\"flights\", \"carriers\"]`)\n\n### get_model\n\nGet detailed information about a specific model including its dimensions, measures, and descriptions.\n\n**Parameters:**\n- `model_name` (str): Name of the model to inspect\n\n**Example usage in Claude:**\n> \"Show me the details of the flights model\"\n\n**Returns:** Model schema including:\n- Model name and description\n- List of dimensions with their descriptions\n- List of measures with their descriptions\n- Available joins (if any)\n\n### get_time_range\n\nGet the available time range for time-series data in a model.\n\n**Parameters:**\n- `model_name` (str): Name of the model\n- `time_dimension` (str): Name of the time dimension\n\n**Example usage in Claude:**\n> \"What's the time range available in the flights model?\"\n\n**Returns:** Dictionary with `min_time` and `max_time` values\n\n### query_model\n\nExecute queries against a semantic model with dimensions, measures, filters, and optional chart specifications.\n\n**Parameters:**\n- `model_name` (str): Name of the model to query\n- `dimensions` (list[str]): List of dimension names to group by\n- `measures` (list[str]): List of measure names to aggregate\n- `filters` (list[str], optional): List of filter expressions (e.g., `[\"origin == 'JFK'\"]`)\n- `limit` (int, optional): Maximum number of rows to return\n- `order_by` (list[str], optional): List of columns to sort by\n- `chart_spec` (dict, optional): Vega-Lite chart specification\n\n**Example usage in Claude:**\n> \"Show me the top 10 origins by flight count\"\n> \"Create a bar chart of average distance by carrier\"\n\n**Returns:**\n- When `chart_spec` is provided: `{\"records\": [...], \"chart\": {...}}`\n- When `chart_spec` is not provided: `{\"records\": [...]}`\n\n### Example Interactions\n\nHere are some example questions you can ask Claude when the MCP server is configured:\n\n**Data Exploration:**\n- \"What models are available in the flight data server?\"\n- \"Show me all dimensions and measures in the flights model\"\n- \"What is the time range covered by the flights data?\"\n\n**Basic Queries:**\n- \"How many flights departed from JFK?\"\n- \"Show me the top 5 destinations by flight count\"\n- \"What's the average flight distance for each carrier?\"\n\n**Filtered Queries:**\n- \"Show me flights from California airports (starting with 'S')\"\n- \"What carriers have an average distance over 1000 miles?\"\n- \"List the top 10 busiest routes\"\n\n**Visualizations:**\n- \"Create a bar chart showing flights by origin airport\"\n- \"Make a line chart of flights over time\"\n- \"Show me a heatmap of routes between origins and destinations\"\n\n## Best Practices\n\n### 1. Add Descriptions to All Fields\n\nDescriptions are crucial for LLMs to understand your data model:\n\n```python\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code (3-letter IATA code)\"\n }\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights in the dataset\"\n }\n )\n)\n```\n\n### 2. Use Descriptive Model Names\n\nChoose clear, descriptive names for your models:\n\n```python\n# Good\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights, \"carriers\": carriers},\n name=\"Aviation Analytics Server\"\n)\n\n# Less clear\nmcp_server = MCPSemanticModel(\n models={\"f\": flights, \"c\": carriers},\n name=\"Server\"\n)\n```\n\n### 3. Define Time Dimensions for MCP Time-Series Queries\n\nWhen exposing models through MCP, you need to explicitly define time dimensions to enable LLMs to query time ranges and perform time-based aggregations. This is specific to MCP\u2014when using BSL's fluent API directly, you can simply use Ibis functions like `.year()` and `.month()`.\n\nTo define a time dimension, set `is_time_dimension=True` and specify the `smallest_time_grain`:\n\n```python\nfrom boring_semantic_layer.semantic_api import to_semantic_table\n\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n arr_time={\n \"expr\": lambda t: t.arr_time,\n \"description\": \"Arrival time of the flight\",\n \"is_time_dimension\": True,\n \"smallest_time_grain\": \"TIME_GRAIN_SECOND\",\n },\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code\"\n },\n )\n .with_measures(\n flight_count={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n }\n )\n)\n```\n\n**Available time grains:**\n- `TIME_GRAIN_SECOND` - For second-level precision\n- `TIME_GRAIN_MINUTE` - For minute-level precision\n- `TIME_GRAIN_HOUR` - For hourly data\n- `TIME_GRAIN_DAY` - For daily data\n- `TIME_GRAIN_WEEK` - For weekly data\n- `TIME_GRAIN_MONTH` - For monthly data\n- `TIME_GRAIN_QUARTER` - For quarterly data\n- `TIME_GRAIN_YEAR` - For yearly data\n\n\nIf you define multiple time dimensions in your model, the `.query()` method and MCP tools will use the first time dimension that appears in your query's dimensions list.\n\n\n**Example time-based queries:**\n\nWith time dimensions defined, you can use the `.query()` method with time ranges and grains:\n\n```python\n# Query with a specific time range\nresult = flights.query(\n dimensions=[\"origin\"],\n measures=[\"flight_count\"],\n time_range={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"}\n)\n\n# Query with time grain aggregation\nresult = flights.query(\n dimensions=[\"arr_time\"],\n measures=[\"flight_count\"],\n time_grain=\"TIME_GRAIN_MONTH\"\n)\n```\n\nLLMs can then perform similar queries through MCP:\n```\n> \"What's the time range available in the flights data?\"\n> \"Show me flights from January 2024\"\n> \"Give me monthly flight counts for the last year\"\n```\n\n### 4. Structure Your Data Logically\n\nOrganize related dimensions and measures together, and use joins to connect related models:\n\n```python\n# Flights model focuses on flight operations\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(origin=..., destination=..., date=...)\n .with_measures(flight_count=..., avg_delay=...)\n)\n\n# Carriers model focuses on airline information\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(code=..., name=..., country=...)\n .with_measures(carrier_count=...)\n)\n\n# Connect them with joins\nflights_with_carriers = flights.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n## Troubleshooting\n\n### Server Not Appearing in Claude Desktop\n\n1. Check the configuration file path is correct\n2. Verify JSON syntax in `claude_desktop_config.json`\n3. Ensure BSL is installed with MCP support: `pip install 'boring-semantic-layer[fastmcp]'`\n4. Restart Claude Desktop completely\n5. Check Claude Desktop logs for error messages\n\n### Import Errors\n\nIf you see import errors when the server starts:\n\n```bash\n# Ensure all dependencies are installed\npip install 'boring-semantic-layer[fastmcp]'\n\n# Or install specific dependencies\npip install fastmcp ibis-framework\n```\n\n### Path Issues\n\nMake sure file paths in your configuration are absolute paths, not relative:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/Users/username/projects/my-project/example_mcp.py\"]\n }\n }\n}\n```\n\n## Next Steps\n\n- Learn about [YAML Configuration](/building/yaml) for managing multiple models\n- Explore [Query Methods](/querying/methods) to understand what queries LLMs can perform\n- See [Charting](/querying/charting) for visualization capabilities\n- Review the [full API Reference](/reference) for advanced features\n", + "markdown": "# Model Context Protocol (MCP) Integration\n\nBSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude.\n\n\n**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#with_dimensions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them.\n\n\n## Installation\n\nTo use MCP functionality, install BSL with the `fastmcp` extra:\n\n```bash\npip install 'boring-semantic-layer[fastmcp]'\n```\n\n## Setting up an MCP Server\n\nCreate an MCP server script that exposes your semantic models:\n\n```python\nimport ibis\nfrom boring_semantic_layer import to_semantic_table, MCPSemanticModel\n\n# Create synthetic flights data\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 101)),\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\"] * 20,\n \"dest\": [\"LAX\", \"JFK\", \"DFW\", \"ORD\", \"ATL\"] * 20,\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 20,\n \"distance\": [2475, 2475, 801, 606, 732] * 20,\n})\n\n# Define your semantic table with descriptions\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code where the flight departed from\"\n },\n destination={\n \"expr\": lambda t: t.dest,\n \"description\": \"Destination airport code where the flight arrived\"\n },\n carrier={\n \"expr\": lambda t: t.carrier,\n \"description\": \"Airline carrier code (e.g., AA, UA, DL)\"\n },\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n },\n avg_distance={\n \"expr\": lambda t: t.distance.mean(),\n \"description\": \"Average flight distance in miles\"\n },\n )\n)\n\n# Create the MCP server\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights},\n name=\"Flight Data Server\"\n)\n\nif __name__ == \"__main__\":\n mcp_server.run(transport=\"stdio\")\n```\n\nSave this as `example_mcp.py` in your project directory.\n\n## Configuring Claude Desktop\n\nTo use your MCP server with Claude Desktop, add it to your configuration file.\n\n**Configuration file location:**\n- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`\n- **Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json`\n\n**Example configuration:**\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"uv\",\n \"args\": [\n \"--directory\",\n \"/path/to/your/project/\",\n \"run\",\n \"example_mcp.py\"\n ]\n }\n }\n}\n```\n\nReplace `/path/to/your/project/` with the actual path to your project directory.\n\n\nThis example uses [uv](https://docs.astral.sh/uv/) to run the MCP server. You can also use `python` directly if you have BSL installed in your environment:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/path/to/your/project/example_mcp.py\"]\n }\n }\n}\n```\n\n\nAfter updating the configuration:\n1. Restart Claude Desktop\n2. Look for the MCP server indicator in the Claude Desktop interface\n3. You should see \"flight_sm\" listed as an available server\n\n## Available MCP Tools\n\nOnce configured, Claude will have access to these tools for interacting with your semantic models:\n\n### list_models\n\nList all available semantic model names in the MCP server.\n\n**Example usage in Claude:**\n> \"What models are available?\"\n\n**Returns:** Array of model names (e.g., `[\"flights\", \"carriers\"]`)\n\n### get_model\n\nGet detailed information about a specific model including its dimensions, measures, and descriptions.\n\n**Parameters:**\n- `model_name` (str): Name of the model to inspect\n\n**Example usage in Claude:**\n> \"Show me the details of the flights model\"\n\n**Returns:** Model schema including:\n- Model name and description\n- List of dimensions with their descriptions\n- List of measures with their descriptions\n- Available joins (if any)\n\n### get_time_range\n\nGet the available time range for time-series data in a model.\n\n**Parameters:**\n- `model_name` (str): Name of the model\n- `time_dimension` (str): Name of the time dimension\n\n**Example usage in Claude:**\n> \"What's the time range available in the flights model?\"\n\n**Returns:** Dictionary with `min_time` and `max_time` values\n\n### query_model\n\nExecute queries against a semantic model with dimensions, measures, filters, and optional chart specifications.\n\n**Parameters:**\n- `model_name` (str): Name of the model to query\n- `dimensions` (list[str]): List of dimension names to group by\n- `measures` (list[str]): List of measure names to aggregate\n- `filters` (list[str], optional): List of filter expressions (e.g., `[\"origin == 'JFK'\"]`)\n- `limit` (int, optional): Maximum number of rows to return\n- `order_by` (list[str], optional): List of columns to sort by\n- `chart_spec` (dict, optional): Vega-Lite chart specification\n\n**Example usage in Claude:**\n> \"Show me the top 10 origins by flight count\"\n> \"Create a bar chart of average distance by carrier\"\n\n**Returns:**\n- When `chart_spec` is provided: `{\"records\": [...], \"chart\": {...}}`\n- When `chart_spec` is not provided: `{\"records\": [...]}`\n\n### Example Interactions\n\nHere are some example questions you can ask Claude when the MCP server is configured:\n\n**Data Exploration:**\n- \"What models are available in the flight data server?\"\n- \"Show me all dimensions and measures in the flights model\"\n- \"What is the time range covered by the flights data?\"\n\n**Basic Queries:**\n- \"How many flights departed from JFK?\"\n- \"Show me the top 5 destinations by flight count\"\n- \"What's the average flight distance for each carrier?\"\n\n**Filtered Queries:**\n- \"Show me flights from California airports (starting with 'S')\"\n- \"What carriers have an average distance over 1000 miles?\"\n- \"List the top 10 busiest routes\"\n\n**Visualizations:**\n- \"Create a bar chart showing flights by origin airport\"\n- \"Make a line chart of flights over time\"\n- \"Show me a heatmap of routes between origins and destinations\"\n\n## Best Practices\n\n### 1. Add Descriptions to All Fields\n\nDescriptions are crucial for LLMs to understand your data model:\n\n```python\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code (3-letter IATA code)\"\n }\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights in the dataset\"\n }\n )\n)\n```\n\n### 2. Use Descriptive Model Names\n\nChoose clear, descriptive names for your models:\n\n```python\n# Good\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights, \"carriers\": carriers},\n name=\"Aviation Analytics Server\"\n)\n\n# Less clear\nmcp_server = MCPSemanticModel(\n models={\"f\": flights, \"c\": carriers},\n name=\"Server\"\n)\n```\n\n### 3. Define Time Dimensions for MCP Time-Series Queries\n\nWhen exposing models through MCP, you need to explicitly define time dimensions to enable LLMs to query time ranges and perform time-based aggregations. This is specific to MCP\u2014when using BSL's fluent API directly, you can simply use Ibis functions like `.year()` and `.month()`.\n\nTo define a time dimension, set `is_time_dimension=True` and specify the `smallest_time_grain`:\n\n```python\nfrom boring_semantic_layer import to_semantic_table\n\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n arr_time={\n \"expr\": lambda t: t.arr_time,\n \"description\": \"Arrival time of the flight\",\n \"is_time_dimension\": True,\n \"smallest_time_grain\": \"TIME_GRAIN_SECOND\",\n },\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code\"\n },\n )\n .with_measures(\n flight_count={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n }\n )\n)\n```\n\n**Available time grains:**\n- `TIME_GRAIN_SECOND` - For second-level precision\n- `TIME_GRAIN_MINUTE` - For minute-level precision\n- `TIME_GRAIN_HOUR` - For hourly data\n- `TIME_GRAIN_DAY` - For daily data\n- `TIME_GRAIN_WEEK` - For weekly data\n- `TIME_GRAIN_MONTH` - For monthly data\n- `TIME_GRAIN_QUARTER` - For quarterly data\n- `TIME_GRAIN_YEAR` - For yearly data\n\n\nIf you define multiple time dimensions in your model, the `.query()` method and MCP tools will use the first time dimension that appears in your query's dimensions list.\n\n\n**Example time-based queries:**\n\nWith time dimensions defined, you can use the `.query()` method with time ranges and grains:\n\n```python\n# Query with a specific time range\nresult = flights.query(\n dimensions=[\"origin\"],\n measures=[\"flight_count\"],\n time_range={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"}\n)\n\n# Query with time grain aggregation\nresult = flights.query(\n dimensions=[\"arr_time\"],\n measures=[\"flight_count\"],\n time_grain=\"TIME_GRAIN_MONTH\"\n)\n```\n\nLLMs can then perform similar queries through MCP:\n```\n> \"What's the time range available in the flights data?\"\n> \"Show me flights from January 2024\"\n> \"Give me monthly flight counts for the last year\"\n```\n\n### 4. Structure Your Data Logically\n\nOrganize related dimensions and measures together, and use joins to connect related models:\n\n```python\n# Flights model focuses on flight operations\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(origin=..., destination=..., date=...)\n .with_measures(flight_count=..., avg_delay=...)\n)\n\n# Carriers model focuses on airline information\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(code=..., name=..., country=...)\n .with_measures(carrier_count=...)\n)\n\n# Connect them with joins\nflights_with_carriers = flights.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n## Troubleshooting\n\n### Server Not Appearing in Claude Desktop\n\n1. Check the configuration file path is correct\n2. Verify JSON syntax in `claude_desktop_config.json`\n3. Ensure BSL is installed with MCP support: `pip install 'boring-semantic-layer[fastmcp]'`\n4. Restart Claude Desktop completely\n5. Check Claude Desktop logs for error messages\n\n### Import Errors\n\nIf you see import errors when the server starts:\n\n```bash\n# Ensure all dependencies are installed\npip install 'boring-semantic-layer[fastmcp]'\n\n# Or install specific dependencies\npip install fastmcp ibis-framework\n```\n\n### Path Issues\n\nMake sure file paths in your configuration are absolute paths, not relative:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/Users/username/projects/my-project/example_mcp.py\"]\n }\n }\n}\n```\n\n## Next Steps\n\n- Learn about [YAML Configuration](/building/yaml) for managing multiple models\n- Explore [Query Methods](/querying/methods) to understand what queries LLMs can perform\n- See [Charting](/querying/charting) for visualization capabilities\n- Review the [full API Reference](/reference) for advanced features\n", "queries": {}, "files": {} } diff --git a/docs/web/public/bsl-data/nested-subtotals.json b/docs/web/public/bsl-data/nested-subtotals.json index 412f970..c223f32 100644 --- a/docs/web/public/bsl-data/nested-subtotals.json +++ b/docs/web/public/bsl-data/nested-subtotals.json @@ -3,7 +3,7 @@ "queries": { "setup_data": { "code": "import ibis\nfrom ibis import _\nfrom boring_semantic_layer import to_semantic_table\n\n# Create synthetic order items data\norder_items_data = ibis.memtable({\n \"order_id\": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010,\n 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020,\n 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030],\n \"sale_price\": [45.99, 89.50, 120.00, 34.99, 67.80, 99.99, 54.50, 78.99, 150.00, 42.00,\n 55.99, 72.50, 88.80, 110.00, 39.99, 95.00, 62.50, 81.99, 125.00, 48.50,\n 66.99, 92.00, 105.50, 73.99, 58.80, 118.00, 84.50, 69.99, 135.00, 51.50],\n \"status\": [\"shipped\", \"delivered\", \"shipped\", \"processing\", \"delivered\",\n \"shipped\", \"cancelled\", \"delivered\", \"shipped\", \"processing\",\n \"delivered\", \"shipped\", \"delivered\", \"processing\", \"shipped\",\n \"cancelled\", \"delivered\", \"shipped\", \"delivered\", \"processing\",\n \"shipped\", \"delivered\", \"shipped\", \"processing\", \"delivered\",\n \"shipped\", \"cancelled\", \"delivered\", \"shipped\", \"processing\"],\n \"created_year\": [2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022,\n 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023,\n 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024],\n \"created_month\": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5,\n 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,\n 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]\n})\n\n# Create semantic table with measures\norder_items = to_semantic_table(\n order_items_data,\n name=\"order_items\",\n).with_measures(\n order_count=lambda t: t.count(),\n total_sales=lambda t: t.sale_price.sum(),\n avg_price=lambda t: t.sale_price.mean(),\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_qshowqo55jbq3jvudcyrnwannq\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_pqzxfzd72fhkrizksnexkuj3gm\"", "plan": "SemanticTable: order_items\n order_count [measure]\n total_sales [measure]\n avg_price [measure]", "table": { "columns": [ @@ -229,7 +229,7 @@ }, "query_year_with_months": { "code": "from ibis import _\n\n# First aggregate by year and month to get monthly subtotals\nmonthly_data = (\n order_items\n .group_by(\"created_year\", \"created_month\")\n .aggregate(\"order_count\", \"total_sales\")\n)\n\n# Then nest months within years\nresult = (\n monthly_data\n .group_by(\"created_year\")\n .aggregate(\n year_order_count=lambda t: t.order_count.sum(),\n year_total_sales=lambda t: t.total_sales.sum(),\n nest={\"by_month\": lambda t: t.group_by([\"created_month\", \"order_count\", \"total_sales\"]).order_by(\"created_month\")}\n )\n .order_by(\"created_year\")\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t2\".\"created_year\",\n SUM(\"t2\".\"order_count\") AS \"year_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'created_month',\n \"t2\".\"created_month\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'created_month',\n \"t2\".\"created_month\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n ) IS NOT NULL) AS \"by_month\"\n FROM (\n SELECT\n \"t2\".\"created_month\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"status\",\n \"t0\".\"created_year\",\n \"t0\".\"created_month\"\n FROM \"ibis_pandas_memtable_qshowqo55jbq3jvudcyrnwannq\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"created_month\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\"\n) AS \"t3\"\nORDER BY\n \"t3\".\"created_year\" ASC", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t2\".\"created_year\",\n SUM(\"t2\".\"order_count\") AS \"year_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'created_month',\n \"t2\".\"created_month\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'created_month',\n \"t2\".\"created_month\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n ) IS NOT NULL) AS \"by_month\"\n FROM (\n SELECT\n \"t2\".\"created_month\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"status\",\n \"t0\".\"created_year\",\n \"t0\".\"created_month\"\n FROM \"ibis_pandas_memtable_pqzxfzd72fhkrizksnexkuj3gm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"created_month\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\"\n) AS \"t3\"\nORDER BY\n \"t3\".\"created_year\" ASC", "plan": "SemanticTable: order_items\n order_count [measure]\n total_sales [measure]\n avg_price [measure]\n-> GroupBy(created_year, created_month)\n-> Aggregate(order_count, total_sales)\n-> GroupBy(created_year)\n-> Aggregate(year_order_count, year_total_sales, by_month)\n-> OrderBy(created_year)", "table": { "columns": [ @@ -245,15 +245,20 @@ 783.76, [ { - "created_month": 2, + "created_month": 3, "order_count": 2, - "total_sales": 154.99 + "total_sales": 167.79 }, { "created_month": 4, "order_count": 2, "total_sales": 133.49 }, + { + "created_month": 2, + "order_count": 2, + "total_sales": 154.99 + }, { "created_month": 5, "order_count": 2, @@ -263,11 +268,6 @@ "created_month": 1, "order_count": 2, "total_sales": 135.49 - }, - { - "created_month": 3, - "order_count": 2, - "total_sales": 167.79 } ] ], @@ -277,29 +277,29 @@ 780.27, [ { - "created_month": 3, + "created_month": 2, "order_count": 2, - "total_sales": 134.99 + "total_sales": 198.8 }, { - "created_month": 5, + "created_month": 3, "order_count": 2, - "total_sales": 173.5 + "total_sales": 134.99 }, { - "created_month": 1, + "created_month": 4, "order_count": 2, - "total_sales": 128.49 + "total_sales": 144.49 }, { - "created_month": 2, + "created_month": 5, "order_count": 2, - "total_sales": 198.8 + "total_sales": 173.5 }, { - "created_month": 4, + "created_month": 1, "order_count": 2, - "total_sales": 144.49 + "total_sales": 128.49 } ] ], @@ -309,14 +309,14 @@ 856.27, [ { - "created_month": 2, + "created_month": 4, "order_count": 2, - "total_sales": 179.49 + "total_sales": 154.49 }, { - "created_month": 4, + "created_month": 1, "order_count": 2, - "total_sales": 154.49 + "total_sales": 158.99 }, { "created_month": 3, @@ -324,9 +324,9 @@ "total_sales": 176.8 }, { - "created_month": 1, + "created_month": 2, "order_count": 2, - "total_sales": 158.99 + "total_sales": 179.49 }, { "created_month": 5, @@ -336,176 +336,11 @@ ] ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-bfc9dd5a3a31528b3de95ab119e59933" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "created_year", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "created_year", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "year_order_count", - "year_total_sales", - "by_month" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-bfc9dd5a3a31528b3de95ab119e59933": [ - { - "created_year": 2022, - "year_order_count": 10, - "year_total_sales": 783.76, - "by_month": [ - { - "created_month": 3, - "order_count": 2, - "total_sales": 167.79 - }, - { - "created_month": 2, - "order_count": 2, - "total_sales": 154.99 - }, - { - "created_month": 4, - "order_count": 2, - "total_sales": 133.49 - }, - { - "created_month": 5, - "order_count": 2, - "total_sales": 192.0 - }, - { - "created_month": 1, - "order_count": 2, - "total_sales": 135.49 - } - ] - }, - { - "created_year": 2023, - "year_order_count": 10, - "year_total_sales": 780.27, - "by_month": [ - { - "created_month": 2, - "order_count": 2, - "total_sales": 198.8 - }, - { - "created_month": 4, - "order_count": 2, - "total_sales": 144.49 - }, - { - "created_month": 5, - "order_count": 2, - "total_sales": 173.5 - }, - { - "created_month": 1, - "order_count": 2, - "total_sales": 128.49 - }, - { - "created_month": 3, - "order_count": 2, - "total_sales": 134.99 - } - ] - }, - { - "created_year": 2024, - "year_order_count": 10, - "year_total_sales": 856.27, - "by_month": [ - { - "created_month": 2, - "order_count": 2, - "total_sales": 179.49 - }, - { - "created_month": 4, - "order_count": 2, - "total_sales": 154.49 - }, - { - "created_month": 5, - "order_count": 2, - "total_sales": 186.5 - }, - { - "created_month": 1, - "order_count": 2, - "total_sales": 158.99 - }, - { - "created_month": 3, - "order_count": 2, - "total_sales": 176.8 - } - ] - } - ] - } - } } }, "query_year_with_status": { "code": "from ibis import _\n\n# First aggregate by year and status\nstatus_data = (\n order_items\n .group_by(\"created_year\", \"status\")\n .aggregate(\"order_count\", \"total_sales\", \"avg_price\")\n)\n\n# Then nest status within years\nresult = (\n status_data\n .group_by(\"created_year\")\n .aggregate(\n year_order_count=lambda t: t.order_count.sum(),\n year_total_sales=lambda t: t.total_sales.sum(),\n nest={\"by_status\": lambda t: t.group_by([\"status\", \"order_count\", \"total_sales\", \"avg_price\"]).order_by(xo.desc(\"total_sales\"))}\n )\n .order_by(\"created_year\")\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t2\".\"created_year\",\n SUM(\"t2\".\"order_count\") AS \"year_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\",\n 'avg_price',\n \"t2\".\"avg_price\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\",\n 'avg_price',\n \"t2\".\"avg_price\"\n ) IS NOT NULL) AS \"by_status\"\n FROM (\n SELECT\n \"t2\".\"status\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"avg_price\",\n \"t2\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"status\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\",\n AVG(\"t0\".\"sale_price\") AS \"avg_price\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"created_month\",\n \"t0\".\"created_year\",\n \"t0\".\"status\"\n FROM \"ibis_pandas_memtable_qshowqo55jbq3jvudcyrnwannq\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"status\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\"\n) AS \"t3\"\nORDER BY\n \"t3\".\"created_year\" ASC", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t2\".\"created_year\",\n SUM(\"t2\".\"order_count\") AS \"year_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\",\n 'avg_price',\n \"t2\".\"avg_price\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\",\n 'avg_price',\n \"t2\".\"avg_price\"\n ) IS NOT NULL) AS \"by_status\"\n FROM (\n SELECT\n \"t2\".\"status\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"avg_price\",\n \"t2\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"status\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\",\n AVG(\"t0\".\"sale_price\") AS \"avg_price\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"created_month\",\n \"t0\".\"created_year\",\n \"t0\".\"status\"\n FROM \"ibis_pandas_memtable_pqzxfzd72fhkrizksnexkuj3gm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"status\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\"\n) AS \"t3\"\nORDER BY\n \"t3\".\"created_year\" ASC", "plan": "SemanticTable: order_items\n order_count [measure]\n total_sales [measure]\n avg_price [measure]\n-> GroupBy(created_year, status)\n-> Aggregate(order_count, total_sales, avg_price)\n-> GroupBy(created_year)\n-> Aggregate(year_order_count, year_total_sales, by_status)\n-> OrderBy(created_year)", "table": { "columns": [ @@ -521,10 +356,10 @@ 783.76, [ { - "status": "processing", - "order_count": 2, - "total_sales": 76.99000000000001, - "avg_price": 38.495000000000005 + "status": "shipped", + "order_count": 4, + "total_sales": 415.98, + "avg_price": 103.995 }, { "status": "cancelled", @@ -532,17 +367,17 @@ "total_sales": 54.5, "avg_price": 54.5 }, + { + "status": "processing", + "order_count": 2, + "total_sales": 76.99000000000001, + "avg_price": 38.495000000000005 + }, { "status": "delivered", "order_count": 3, "total_sales": 236.29000000000002, "avg_price": 78.76333333333334 - }, - { - "status": "shipped", - "order_count": 4, - "total_sales": 415.98, - "avg_price": 103.995 } ] ], @@ -551,6 +386,12 @@ 10, 780.27, [ + { + "status": "processing", + "order_count": 2, + "total_sales": 158.5, + "avg_price": 79.25 + }, { "status": "shipped", "order_count": 3, @@ -568,12 +409,6 @@ "order_count": 1, "total_sales": 95.0, "avg_price": 95.0 - }, - { - "status": "processing", - "order_count": 2, - "total_sales": 158.5, - "avg_price": 79.25 } ] ], @@ -582,200 +417,38 @@ 10, 856.27, [ - { - "status": "delivered", - "order_count": 3, - "total_sales": 220.79000000000002, - "avg_price": 73.59666666666668 - }, - { - "status": "cancelled", - "order_count": 1, - "total_sales": 84.5, - "avg_price": 84.5 - }, { "status": "shipped", "order_count": 4, "total_sales": 425.49, "avg_price": 106.3725 }, + { + "status": "delivered", + "order_count": 3, + "total_sales": 220.79000000000002, + "avg_price": 73.59666666666668 + }, { "status": "processing", "order_count": 2, "total_sales": 125.49, "avg_price": 62.745 - } - ] - ] - ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-8eaa6607900ae49689a26f7fc89874c4" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "created_year", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "created_year", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "year_order_count", - "year_total_sales", - "by_status" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-8eaa6607900ae49689a26f7fc89874c4": [ - { - "created_year": 2022, - "year_order_count": 10, - "year_total_sales": 783.76, - "by_status": [ - { - "status": "cancelled", - "order_count": 1, - "total_sales": 54.5, - "avg_price": 54.5 - }, - { - "status": "processing", - "order_count": 2, - "total_sales": 76.99000000000001, - "avg_price": 38.495000000000005 - }, - { - "status": "shipped", - "order_count": 4, - "total_sales": 415.98, - "avg_price": 103.995 - }, - { - "status": "delivered", - "order_count": 3, - "total_sales": 236.29000000000002, - "avg_price": 78.76333333333334 - } - ] }, { - "created_year": 2023, - "year_order_count": 10, - "year_total_sales": 780.27, - "by_status": [ - { - "status": "shipped", - "order_count": 3, - "total_sales": 194.48000000000002, - "avg_price": 64.82666666666667 - }, - { - "status": "processing", - "order_count": 2, - "total_sales": 158.5, - "avg_price": 79.25 - }, - { - "status": "cancelled", - "order_count": 1, - "total_sales": 95.0, - "avg_price": 95.0 - }, - { - "status": "delivered", - "order_count": 4, - "total_sales": 332.28999999999996, - "avg_price": 83.07249999999999 - } - ] - }, - { - "created_year": 2024, - "year_order_count": 10, - "year_total_sales": 856.27, - "by_status": [ - { - "status": "shipped", - "order_count": 4, - "total_sales": 425.49, - "avg_price": 106.3725 - }, - { - "status": "cancelled", - "order_count": 1, - "total_sales": 84.5, - "avg_price": 84.5 - }, - { - "status": "delivered", - "order_count": 3, - "total_sales": 220.79000000000002, - "avg_price": 73.59666666666668 - }, - { - "status": "processing", - "order_count": 2, - "total_sales": 125.49, - "avg_price": 62.745 - } - ] + "status": "cancelled", + "order_count": 1, + "total_sales": 84.5, + "avg_price": 84.5 } ] - } - } + ] + ] } }, "query_multi_level": { "code": "from ibis import _\n\n# First aggregate at the finest level: year, month, status\ndetailed_data = (\n order_items\n .group_by(\"created_year\", \"created_month\", \"status\")\n .aggregate(\"order_count\", \"total_sales\")\n)\n\n# Second level: nest status within month\nmonthly_with_status = (\n detailed_data\n .group_by(\"created_year\", \"created_month\")\n .aggregate(\n month_order_count=lambda t: t.order_count.sum(),\n month_total_sales=lambda t: t.total_sales.sum(),\n nest={\"by_status\": lambda t: t.group_by([\"status\", \"order_count\", \"total_sales\"])}\n )\n)\n\n# Top level: nest months within year\nresult = (\n monthly_with_status\n .group_by(\"created_year\")\n .aggregate(\n year_order_count=lambda t: t.month_order_count.sum(),\n year_total_sales=lambda t: t.month_total_sales.sum(),\n nest={\"by_month\": lambda t: t.group_by([\"created_month\", \"month_order_count\", \"month_total_sales\", \"by_status\"]).order_by(\"created_month\")}\n )\n .order_by(\"created_year\")\n .limit(3)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t4\".\"created_year\",\n SUM(\"t4\".\"month_order_count\") AS \"year_order_count\",\n SUM(\"t4\".\"month_total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'created_month',\n \"t4\".\"created_month\",\n 'month_order_count',\n \"t4\".\"month_order_count\",\n 'month_total_sales',\n \"t4\".\"month_total_sales\",\n 'by_status',\n \"t4\".\"by_status\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'created_month',\n \"t4\".\"created_month\",\n 'month_order_count',\n \"t4\".\"month_order_count\",\n 'month_total_sales',\n \"t4\".\"month_total_sales\",\n 'by_status',\n \"t4\".\"by_status\"\n ) IS NOT NULL) AS \"by_month\"\n FROM (\n SELECT\n \"t4\".\"created_month\",\n \"t4\".\"month_order_count\",\n \"t4\".\"month_total_sales\",\n \"t4\".\"by_status\",\n \"t4\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t2\".\"created_year\",\n \"t2\".\"created_month\",\n SUM(\"t2\".\"order_count\") AS \"month_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"month_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n ) IS NOT NULL) AS \"by_status\"\n FROM (\n SELECT\n \"t2\".\"status\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"created_year\",\n \"t2\".\"created_month\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\"\n FROM \"ibis_pandas_memtable_qshowqo55jbq3jvudcyrnwannq\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\",\n \"t2\".\"created_month\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS t4\n GROUP BY\n \"t4\".\"created_year\"\n) AS \"t5\"\nORDER BY\n \"t5\".\"created_year\" ASC\nLIMIT 3", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t4\".\"created_year\",\n SUM(\"t4\".\"month_order_count\") AS \"year_order_count\",\n SUM(\"t4\".\"month_total_sales\") AS \"year_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'created_month',\n \"t4\".\"created_month\",\n 'month_order_count',\n \"t4\".\"month_order_count\",\n 'month_total_sales',\n \"t4\".\"month_total_sales\",\n 'by_status',\n \"t4\".\"by_status\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'created_month',\n \"t4\".\"created_month\",\n 'month_order_count',\n \"t4\".\"month_order_count\",\n 'month_total_sales',\n \"t4\".\"month_total_sales\",\n 'by_status',\n \"t4\".\"by_status\"\n ) IS NOT NULL) AS \"by_month\"\n FROM (\n SELECT\n \"t4\".\"created_month\",\n \"t4\".\"month_order_count\",\n \"t4\".\"month_total_sales\",\n \"t4\".\"by_status\",\n \"t4\".\"created_year\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t2\".\"created_year\",\n \"t2\".\"created_month\",\n SUM(\"t2\".\"order_count\") AS \"month_order_count\",\n SUM(\"t2\".\"total_sales\") AS \"month_total_sales\",\n ARRAY_AGG(\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n )\n ) FILTER(WHERE\n NAMED_STRUCT(\n 'status',\n \"t2\".\"status\",\n 'order_count',\n \"t2\".\"order_count\",\n 'total_sales',\n \"t2\".\"total_sales\"\n ) IS NOT NULL) AS \"by_status\"\n FROM (\n SELECT\n \"t2\".\"status\",\n \"t2\".\"order_count\",\n \"t2\".\"total_sales\",\n \"t2\".\"created_year\",\n \"t2\".\"created_month\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\",\n COUNT(*) AS \"order_count\",\n SUM(\"t0\".\"sale_price\") AS \"total_sales\"\n FROM (\n SELECT\n \"t0\".\"order_id\",\n \"t0\".\"sale_price\",\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\"\n FROM \"ibis_pandas_memtable_pqzxfzd72fhkrizksnexkuj3gm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"created_year\",\n \"t0\".\"created_month\",\n \"t0\".\"status\"\n ) AS \"t1\"\n ) AS \"t2\"\n ) AS t2\n GROUP BY\n \"t2\".\"created_year\",\n \"t2\".\"created_month\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS t4\n GROUP BY\n \"t4\".\"created_year\"\n) AS \"t5\"\nORDER BY\n \"t5\".\"created_year\" ASC\nLIMIT 3", "plan": "SemanticTable: order_items\n order_count [measure]\n total_sales [measure]\n avg_price [measure]\n-> GroupBy(created_year, created_month, status)\n-> Aggregate(order_count, total_sales)\n-> GroupBy(created_year, created_month)\n-> Aggregate(month_order_count, month_total_sales, by_status)\n-> GroupBy(created_year)\n-> Aggregate(year_order_count, year_total_sales, by_month)\n-> OrderBy(created_year)\n-> Limit(3)", "table": { "columns": [ @@ -791,53 +464,53 @@ 783.76, [ { - "created_month": 1, + "created_month": 3, "month_order_count": 2, - "month_total_sales": 135.49, + "month_total_sales": 167.79, "by_status": [ { "status": "shipped", "order_count": 1, - "total_sales": 45.99 + "total_sales": 99.99 }, { "status": "delivered", "order_count": 1, - "total_sales": 89.5 + "total_sales": 67.8 } ] }, { - "created_month": 3, + "created_month": 2, "month_order_count": 2, - "month_total_sales": 167.79, + "month_total_sales": 154.99, "by_status": [ { - "status": "delivered", + "status": "shipped", "order_count": 1, - "total_sales": 67.8 + "total_sales": 120.0 }, { - "status": "shipped", + "status": "processing", "order_count": 1, - "total_sales": 99.99 + "total_sales": 34.99 } ] }, { - "created_month": 2, + "created_month": 1, "month_order_count": 2, - "month_total_sales": 154.99, + "month_total_sales": 135.49, "by_status": [ { - "status": "processing", + "status": "shipped", "order_count": 1, - "total_sales": 34.99 + "total_sales": 45.99 }, { - "status": "shipped", + "status": "delivered", "order_count": 1, - "total_sales": 120.0 + "total_sales": 89.5 } ] }, @@ -847,14 +520,14 @@ "month_total_sales": 133.49, "by_status": [ { - "status": "cancelled", + "status": "delivered", "order_count": 1, - "total_sales": 54.5 + "total_sales": 78.99 }, { - "status": "delivered", + "status": "cancelled", "order_count": 1, - "total_sales": 78.99 + "total_sales": 54.5 } ] }, @@ -917,53 +590,53 @@ ] }, { - "created_month": 4, + "created_month": 5, "month_order_count": 2, - "month_total_sales": 144.49, + "month_total_sales": 173.5, "by_status": [ { - "status": "delivered", + "status": "processing", "order_count": 1, - "total_sales": 62.5 + "total_sales": 48.5 }, { - "status": "shipped", + "status": "delivered", "order_count": 1, - "total_sales": 81.99 + "total_sales": 125.0 } ] }, { - "created_month": 1, + "created_month": 4, "month_order_count": 2, - "month_total_sales": 128.49, + "month_total_sales": 144.49, "by_status": [ { - "status": "delivered", + "status": "shipped", "order_count": 1, - "total_sales": 55.99 + "total_sales": 81.99 }, { - "status": "shipped", + "status": "delivered", "order_count": 1, - "total_sales": 72.5 + "total_sales": 62.5 } ] }, { - "created_month": 5, + "created_month": 1, "month_order_count": 2, - "month_total_sales": 173.5, + "month_total_sales": 128.49, "by_status": [ { "status": "delivered", "order_count": 1, - "total_sales": 125.0 + "total_sales": 55.99 }, { - "status": "processing", + "status": "shipped", "order_count": 1, - "total_sales": 48.5 + "total_sales": 72.5 } ] } @@ -992,36 +665,36 @@ ] }, { - "created_month": 3, + "created_month": 5, "month_order_count": 2, - "month_total_sales": 176.8, + "month_total_sales": 186.5, "by_status": [ { - "status": "shipped", + "status": "processing", "order_count": 1, - "total_sales": 118.0 + "total_sales": 51.5 }, { - "status": "delivered", + "status": "shipped", "order_count": 1, - "total_sales": 58.8 + "total_sales": 135.0 } ] }, { - "created_month": 5, + "created_month": 3, "month_order_count": 2, - "month_total_sales": 186.5, + "month_total_sales": 176.8, "by_status": [ { - "status": "shipped", + "status": "delivered", "order_count": 1, - "total_sales": 135.0 + "total_sales": 58.8 }, { - "status": "processing", + "status": "shipped", "order_count": 1, - "total_sales": 51.5 + "total_sales": 118.0 } ] }, @@ -1062,351 +735,6 @@ ] ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-386fdaffdbf5261d6374115962b48c11" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "created_year", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "created_year", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "year_order_count", - "year_total_sales", - "by_month" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-386fdaffdbf5261d6374115962b48c11": [ - { - "created_year": 2022, - "year_order_count": 10, - "year_total_sales": 783.76, - "by_month": [ - { - "created_month": 1, - "month_order_count": 2, - "month_total_sales": 135.49, - "by_status": [ - { - "status": "shipped", - "order_count": 1, - "total_sales": 45.99 - }, - { - "status": "delivered", - "order_count": 1, - "total_sales": 89.5 - } - ] - }, - { - "created_month": 4, - "month_order_count": 2, - "month_total_sales": 133.49, - "by_status": [ - { - "status": "cancelled", - "order_count": 1, - "total_sales": 54.5 - }, - { - "status": "delivered", - "order_count": 1, - "total_sales": 78.99 - } - ] - }, - { - "created_month": 2, - "month_order_count": 2, - "month_total_sales": 154.99, - "by_status": [ - { - "status": "processing", - "order_count": 1, - "total_sales": 34.99 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 120.0 - } - ] - }, - { - "created_month": 3, - "month_order_count": 2, - "month_total_sales": 167.79, - "by_status": [ - { - "status": "delivered", - "order_count": 1, - "total_sales": 67.8 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 99.99 - } - ] - }, - { - "created_month": 5, - "month_order_count": 2, - "month_total_sales": 192.0, - "by_status": [ - { - "status": "processing", - "order_count": 1, - "total_sales": 42.0 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 150.0 - } - ] - } - ] - }, - { - "created_year": 2023, - "year_order_count": 10, - "year_total_sales": 780.27, - "by_month": [ - { - "created_month": 2, - "month_order_count": 2, - "month_total_sales": 198.8, - "by_status": [ - { - "status": "delivered", - "order_count": 1, - "total_sales": 88.8 - }, - { - "status": "processing", - "order_count": 1, - "total_sales": 110.0 - } - ] - }, - { - "created_month": 4, - "month_order_count": 2, - "month_total_sales": 144.49, - "by_status": [ - { - "status": "delivered", - "order_count": 1, - "total_sales": 62.5 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 81.99 - } - ] - }, - { - "created_month": 5, - "month_order_count": 2, - "month_total_sales": 173.5, - "by_status": [ - { - "status": "delivered", - "order_count": 1, - "total_sales": 125.0 - }, - { - "status": "processing", - "order_count": 1, - "total_sales": 48.5 - } - ] - }, - { - "created_month": 1, - "month_order_count": 2, - "month_total_sales": 128.49, - "by_status": [ - { - "status": "delivered", - "order_count": 1, - "total_sales": 55.99 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 72.5 - } - ] - }, - { - "created_month": 3, - "month_order_count": 2, - "month_total_sales": 134.99, - "by_status": [ - { - "status": "cancelled", - "order_count": 1, - "total_sales": 95.0 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 39.99 - } - ] - } - ] - }, - { - "created_year": 2024, - "year_order_count": 10, - "year_total_sales": 856.27, - "by_month": [ - { - "created_month": 1, - "month_order_count": 2, - "month_total_sales": 158.99, - "by_status": [ - { - "status": "shipped", - "order_count": 1, - "total_sales": 66.99 - }, - { - "status": "delivered", - "order_count": 1, - "total_sales": 92.0 - } - ] - }, - { - "created_month": 3, - "month_order_count": 2, - "month_total_sales": 176.8, - "by_status": [ - { - "status": "shipped", - "order_count": 1, - "total_sales": 118.0 - }, - { - "status": "delivered", - "order_count": 1, - "total_sales": 58.8 - } - ] - }, - { - "created_month": 5, - "month_order_count": 2, - "month_total_sales": 186.5, - "by_status": [ - { - "status": "shipped", - "order_count": 1, - "total_sales": 135.0 - }, - { - "status": "processing", - "order_count": 1, - "total_sales": 51.5 - } - ] - }, - { - "created_month": 4, - "month_order_count": 2, - "month_total_sales": 154.49, - "by_status": [ - { - "status": "cancelled", - "order_count": 1, - "total_sales": 84.5 - }, - { - "status": "delivered", - "order_count": 1, - "total_sales": 69.99 - } - ] - }, - { - "created_month": 2, - "month_order_count": 2, - "month_total_sales": 179.49, - "by_status": [ - { - "status": "processing", - "order_count": 1, - "total_sales": 73.99 - }, - { - "status": "shipped", - "order_count": 1, - "total_sales": 105.5 - } - ] - } - ] - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/percentage-total.json b/docs/web/public/bsl-data/percentage-total.json index cfbcd29..784aa53 100644 --- a/docs/web/public/bsl-data/percentage-total.json +++ b/docs/web/public/bsl-data/percentage-total.json @@ -3,7 +3,7 @@ "queries": { "setup_data": { "code": "import ibis\nfrom ibis import _\nfrom boring_semantic_layer import to_semantic_table\n\n# Create synthetic flights data with carrier information\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 51)),\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 10,\n \"nickname\": [\"American Airlines\", \"United Airlines\", \"Delta Air Lines\",\n \"Southwest Airlines\", \"JetBlue Airways\"] * 10,\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\"] * 10,\n \"distance\": [2475, 1745, 733, 946, 1383, 2475, 1745, 733, 946, 1383,\n 2475, 1745, 733, 946, 1383, 2475, 1745, 733, 946, 1383,\n 2475, 1745, 733, 946, 1383, 2475, 1745, 733, 946, 1383,\n 2475, 1745, 733, 946, 1383, 2475, 1745, 733, 946, 1383,\n 2475, 1745, 733, 946, 1383, 2475, 1745, 733, 946, 1383]\n})\n\n# Create semantic table with measures including percentage calculations\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n )\n .with_measures(\n market_share=lambda t: t.flight_count / t.all(t.flight_count) * 100,\n distance_share=lambda t: t.total_distance / t.all(t.total_distance) * 100,\n )\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_a3plt5y4evbnnmg4r5owcqizmu\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_jhatbbyjgngh5ou53h2apqqnaa\"", "plan": "SemanticTable: flights\n flight_count [measure]\n total_distance [measure]\n market_share [calc]\n distance_share [calc]", "table": { "columns": [ @@ -369,7 +369,7 @@ }, "query_market_share": { "code": "from ibis import _\n\nresult = (\n flights.group_by(\"nickname\")\n .aggregate(\"flight_count\", \"market_share\")\n .order_by(_.market_share.desc())\n .limit(10)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"nickname\",\n \"t5\".\"flight_count\",\n (\n CAST(\"t5\".\"flight_count\" AS DOUBLE PRECISION) / CAST(\"t5\".\"flight_count_right\" AS DOUBLE PRECISION)\n ) * 100 AS \"market_share\"\n FROM (\n SELECT\n \"t3\".\"nickname\",\n \"t3\".\"flight_count\",\n \"t4\".\"flight_count\" AS \"flight_count_right\"\n FROM (\n SELECT\n \"t0\".\"nickname\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t0\".\"flight_id\",\n \"t0\".\"carrier\",\n \"t0\".\"origin\",\n \"t0\".\"distance\",\n \"t0\".\"nickname\"\n FROM \"ibis_pandas_memtable_a3plt5y4evbnnmg4r5owcqizmu\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"nickname\"\n ) AS \"t3\"\n CROSS JOIN (\n SELECT\n COUNT(*) AS \"flight_count\"\n FROM \"ibis_pandas_memtable_a3plt5y4evbnnmg4r5owcqizmu\" AS \"t0\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"market_share\" DESC NULLS LAST\nLIMIT 10", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"nickname\",\n \"t5\".\"flight_count\",\n (\n CAST(\"t5\".\"flight_count\" AS DOUBLE PRECISION) / CAST(\"t5\".\"flight_count_right\" AS DOUBLE PRECISION)\n ) * 100 AS \"market_share\"\n FROM (\n SELECT\n \"t3\".\"nickname\",\n \"t3\".\"flight_count\",\n \"t4\".\"flight_count\" AS \"flight_count_right\"\n FROM (\n SELECT\n \"t0\".\"nickname\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t0\".\"flight_id\",\n \"t0\".\"carrier\",\n \"t0\".\"origin\",\n \"t0\".\"distance\",\n \"t0\".\"nickname\"\n FROM \"ibis_pandas_memtable_jhatbbyjgngh5ou53h2apqqnaa\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"nickname\"\n ) AS \"t3\"\n CROSS JOIN (\n SELECT\n COUNT(*) AS \"flight_count\"\n FROM \"ibis_pandas_memtable_jhatbbyjgngh5ou53h2apqqnaa\" AS \"t0\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"market_share\" DESC NULLS LAST\nLIMIT 10", "plan": "SemanticTable: flights\n flight_count [measure]\n total_distance [measure]\n market_share [calc]\n distance_share [calc]\n-> GroupBy(nickname)\n-> Aggregate(flight_count, market_share)\n-> OrderBy(_CallableWrapper(_fn=_.market_share.desc()))\n-> Limit(10)", "table": { "columns": [ @@ -384,12 +384,12 @@ 20.0 ], [ - "American Airlines", + "Delta Air Lines", 10, 20.0 ], [ - "Delta Air Lines", + "JetBlue Airways", 10, 20.0 ], @@ -399,109 +399,16 @@ 20.0 ], [ - "JetBlue Airways", + "American Airlines", 10, 20.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-595d70bb1f5af496ada304778efbfd7f" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "nickname", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "nickname", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-595d70bb1f5af496ada304778efbfd7f": [ - { - "nickname": "Southwest Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "nickname": "American Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "nickname": "Delta Air Lines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "nickname": "United Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "nickname": "JetBlue Airways", - "flight_count": 10, - "market_share": 20.0 - } - ] - } - } } }, "query_market_share_by_origin": { "code": "from ibis import _\n\nresult = (\n flights.group_by(\"origin\", \"nickname\")\n .aggregate(\"flight_count\", \"market_share\")\n .order_by(_.market_share.desc())\n .limit(15)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"origin\",\n \"t5\".\"nickname\",\n \"t5\".\"flight_count\",\n (\n CAST(\"t5\".\"flight_count\" AS DOUBLE PRECISION) / CAST(\"t5\".\"flight_count_right\" AS DOUBLE PRECISION)\n ) * 100 AS \"market_share\"\n FROM (\n SELECT\n \"t3\".\"origin\",\n \"t3\".\"nickname\",\n \"t3\".\"flight_count\",\n \"t4\".\"flight_count\" AS \"flight_count_right\"\n FROM (\n SELECT\n \"t0\".\"origin\",\n \"t0\".\"nickname\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t0\".\"flight_id\",\n \"t0\".\"carrier\",\n \"t0\".\"distance\",\n \"t0\".\"origin\",\n \"t0\".\"nickname\"\n FROM \"ibis_pandas_memtable_a3plt5y4evbnnmg4r5owcqizmu\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"origin\",\n \"t0\".\"nickname\"\n ) AS \"t3\"\n CROSS JOIN (\n SELECT\n COUNT(*) AS \"flight_count\"\n FROM \"ibis_pandas_memtable_a3plt5y4evbnnmg4r5owcqizmu\" AS \"t0\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"market_share\" DESC NULLS LAST\nLIMIT 15", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"origin\",\n \"t5\".\"nickname\",\n \"t5\".\"flight_count\",\n (\n CAST(\"t5\".\"flight_count\" AS DOUBLE PRECISION) / CAST(\"t5\".\"flight_count_right\" AS DOUBLE PRECISION)\n ) * 100 AS \"market_share\"\n FROM (\n SELECT\n \"t3\".\"origin\",\n \"t3\".\"nickname\",\n \"t3\".\"flight_count\",\n \"t4\".\"flight_count\" AS \"flight_count_right\"\n FROM (\n SELECT\n \"t0\".\"origin\",\n \"t0\".\"nickname\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t0\".\"flight_id\",\n \"t0\".\"carrier\",\n \"t0\".\"distance\",\n \"t0\".\"origin\",\n \"t0\".\"nickname\"\n FROM \"ibis_pandas_memtable_jhatbbyjgngh5ou53h2apqqnaa\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"origin\",\n \"t0\".\"nickname\"\n ) AS \"t3\"\n CROSS JOIN (\n SELECT\n COUNT(*) AS \"flight_count\"\n FROM \"ibis_pandas_memtable_jhatbbyjgngh5ou53h2apqqnaa\" AS \"t0\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"market_share\" DESC NULLS LAST\nLIMIT 15", "plan": "SemanticTable: flights\n flight_count [measure]\n total_distance [measure]\n market_share [calc]\n distance_share [calc]\n-> GroupBy(origin, nickname)\n-> Aggregate(flight_count, market_share)\n-> OrderBy(_CallableWrapper(_fn=_.market_share.desc()))\n-> Limit(15)", "table": { "columns": [ @@ -512,95 +419,36 @@ ], "data": [ [ - "ATL", - "Southwest Airlines", + "DFW", + "JetBlue Airways", 10, 20.0 ], [ - "LAX", - "United Airlines", + "JFK", + "American Airlines", 10, 20.0 ], [ - "ORD", - "Delta Air Lines", + "ATL", + "Southwest Airlines", 10, 20.0 ], [ - "JFK", - "American Airlines", + "LAX", + "United Airlines", 10, 20.0 ], [ - "DFW", - "JetBlue Airways", + "ORD", + "Delta Air Lines", 10, 20.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-f32ac0ea05b18a550cab71a81306f74f" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-f32ac0ea05b18a550cab71a81306f74f": [ - { - "origin": "ATL", - "nickname": "Southwest Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "origin": "LAX", - "nickname": "United Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "origin": "ORD", - "nickname": "Delta Air Lines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "origin": "JFK", - "nickname": "American Airlines", - "flight_count": 10, - "market_share": 20.0 - }, - { - "origin": "DFW", - "nickname": "JetBlue Airways", - "flight_count": 10, - "market_share": 20.0 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/query-agent-mcp.json b/docs/web/public/bsl-data/query-agent-mcp.json index afa2b66..b7680f2 100644 --- a/docs/web/public/bsl-data/query-agent-mcp.json +++ b/docs/web/public/bsl-data/query-agent-mcp.json @@ -1,5 +1,5 @@ { - "markdown": "# Query Agent: MCP Server\n\nBSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude.\n\n\n**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#adding-descriptions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them.\n\n\n## Installation\n\nTo use MCP functionality, install BSL with the `fastmcp` extra:\n\n```bash\npip install 'boring-semantic-layer[fastmcp]'\n```\n\n## Setting up an MCP Server\n\nCreate an MCP server script that exposes your semantic models:\n\n```python\nimport ibis\nfrom boring_semantic_layer import to_semantic_table, MCPSemanticModel\n\n# Create synthetic flights data\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 101)),\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\"] * 20,\n \"dest\": [\"LAX\", \"JFK\", \"DFW\", \"ORD\", \"ATL\"] * 20,\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 20,\n \"distance\": [2475, 2475, 801, 606, 732] * 20,\n})\n\n# Define your semantic table with descriptions\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code where the flight departed from\"\n },\n destination={\n \"expr\": lambda t: t.dest,\n \"description\": \"Destination airport code where the flight arrived\"\n },\n carrier={\n \"expr\": lambda t: t.carrier,\n \"description\": \"Airline carrier code (e.g., AA, UA, DL)\"\n },\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n },\n avg_distance={\n \"expr\": lambda t: t.distance.mean(),\n \"description\": \"Average flight distance in miles\"\n },\n )\n)\n\n# Create the MCP server\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights},\n name=\"Flight Data Server\"\n)\n\nif __name__ == \"__main__\":\n mcp_server.run(transport=\"stdio\")\n```\n\nSave this as `example_mcp.py` in your project directory.\n\n## Configuring Claude Desktop\n\nTo use your MCP server with Claude Desktop, add it to your configuration file.\n\n**Configuration file location:**\n- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`\n- **Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json`\n\n**Example configuration:**\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"uv\",\n \"args\": [\n \"--directory\",\n \"/path/to/your/project/\",\n \"run\",\n \"example_mcp.py\"\n ]\n }\n }\n}\n```\n\nReplace `/path/to/your/project/` with the actual path to your project directory.\n\n\nThis example uses [uv](https://docs.astral.sh/uv/) to run the MCP server. You can also use `python` directly if you have BSL installed in your environment:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/path/to/your/project/example_mcp.py\"]\n }\n }\n}\n```\n\n\nAfter updating the configuration:\n1. Restart Claude Desktop\n2. Look for the MCP server indicator in the Claude Desktop interface\n3. You should see \"flight_sm\" listed as an available server\n\n## Available MCP Tools\n\nOnce configured, Claude will have access to these tools for interacting with your semantic models:\n\n### list_models\n\nList all available semantic model names in the MCP server.\n\n**Example usage in Claude:**\n> \"What models are available?\"\n\n**Returns:** Array of model names (e.g., `[\"flights\", \"carriers\"]`)\n\n### get_model\n\nGet detailed information about a specific model including its dimensions, measures, and descriptions.\n\n**Parameters:**\n- `model_name` (str): Name of the model to inspect\n\n**Example usage in Claude:**\n> \"Show me the details of the flights model\"\n\n**Returns:** Model schema including:\n- Model name and description\n- List of dimensions with their descriptions\n- List of measures with their descriptions\n- Available joins (if any)\n\n### get_time_range\n\nGet the available time range for time-series data in a model.\n\n**Parameters:**\n- `model_name` (str): Name of the model\n- `time_dimension` (str): Name of the time dimension\n\n**Example usage in Claude:**\n> \"What's the time range available in the flights model?\"\n\n**Returns:** Dictionary with `min_time` and `max_time` values\n\n### query_model\n\nExecute queries against a semantic model with dimensions, measures, filters, and optional chart specifications.\n\n**Parameters:**\n- `model_name` (str): Name of the model to query\n- `dimensions` (list[str]): List of dimension names to group by\n- `measures` (list[str]): List of measure names to aggregate\n- `filters` (list[str], optional): List of filter expressions (e.g., `[\"origin == 'JFK'\"]`)\n- `limit` (int, optional): Maximum number of rows to return\n- `order_by` (list[str], optional): List of columns to sort by\n- `chart_spec` (dict, optional): Vega-Lite chart specification\n\n**Example usage in Claude:**\n> \"Show me the top 10 origins by flight count\"\n> \"Create a bar chart of average distance by carrier\"\n\n**Returns:**\n- When `chart_spec` is provided: `{\"records\": [...], \"chart\": {...}}`\n- When `chart_spec` is not provided: `{\"records\": [...]}`\n\n### Example Interactions\n\nHere are some example questions you can ask Claude when the MCP server is configured:\n\n**Data Exploration:**\n- \"What models are available in the flight data server?\"\n- \"Show me all dimensions and measures in the flights model\"\n- \"What is the time range covered by the flights data?\"\n\n**Basic Queries:**\n- \"How many flights departed from JFK?\"\n- \"Show me the top 5 destinations by flight count\"\n- \"What's the average flight distance for each carrier?\"\n\n**Filtered Queries:**\n- \"Show me flights from California airports (starting with 'S')\"\n- \"What carriers have an average distance over 1000 miles?\"\n- \"List the top 10 busiest routes\"\n\n**Visualizations:**\n- \"Create a bar chart showing flights by origin airport\"\n- \"Make a line chart of flights over time\"\n- \"Show me a heatmap of routes between origins and destinations\"\n\n## Best Practices\n\n### 1. Add Descriptions to All Fields\n\nDescriptions are crucial for LLMs to understand your data model:\n\n```python\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code (3-letter IATA code)\"\n }\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights in the dataset\"\n }\n )\n)\n```\n\n### 2. Use Descriptive Model Names\n\nChoose clear, descriptive names for your models:\n\n```python\n# Good\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights, \"carriers\": carriers},\n name=\"Aviation Analytics Server\"\n)\n\n# Less clear\nmcp_server = MCPSemanticModel(\n models={\"f\": flights, \"c\": carriers},\n name=\"Server\"\n)\n```\n\n### 3. Define Time Dimensions for Time-Series Queries\n\nWhen exposing models through MCP, you need to explicitly define time dimensions to enable LLMs to query time ranges and perform time-based aggregations. This is specific to MCP\u2014when using BSL's fluent API directly, you can simply use Ibis functions like `.year()` and `.month()`.\n\nTo define a time dimension, set `is_time_dimension=True` and specify the `smallest_time_grain`:\n\n```python\nfrom boring_semantic_layer import to_semantic_table\n\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n arr_time={\n \"expr\": lambda t: t.arr_time,\n \"description\": \"Arrival time of the flight\",\n \"is_time_dimension\": True,\n \"smallest_time_grain\": \"TIME_GRAIN_SECOND\",\n },\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code\"\n },\n )\n .with_measures(\n flight_count={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n }\n )\n)\n```\n\n**Available time grains:**\n- `TIME_GRAIN_SECOND` - For second-level precision\n- `TIME_GRAIN_MINUTE` - For minute-level precision\n- `TIME_GRAIN_HOUR` - For hourly data\n- `TIME_GRAIN_DAY` - For daily data\n- `TIME_GRAIN_WEEK` - For weekly data\n- `TIME_GRAIN_MONTH` - For monthly data\n- `TIME_GRAIN_QUARTER` - For quarterly data\n- `TIME_GRAIN_YEAR` - For yearly data\n\n\nIf you define multiple time dimensions in your model, the `.query()` method and MCP tools will use the first time dimension that appears in your query's dimensions list.\n\n\n**Example time-based queries:**\n\nWith time dimensions defined, you can use the `.query()` method with time ranges and grains:\n\n```python\n# Query with a specific time range\nresult = flights.query(\n dimensions=[\"origin\"],\n measures=[\"flight_count\"],\n time_range={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"}\n)\n\n# Query with time grain aggregation\nresult = flights.query(\n dimensions=[\"arr_time\"],\n measures=[\"flight_count\"],\n time_grain=\"TIME_GRAIN_MONTH\"\n)\n```\n\nLLMs can then perform similar queries through MCP:\n```\n> \"What's the time range available in the flights data?\"\n> \"Show me flights from January 2024\"\n> \"Give me monthly flight counts for the last year\"\n```\n\n### 4. Structure Your Data Logically\n\nOrganize related dimensions and measures together, and use joins to connect related models:\n\n```python\n# Flights model focuses on flight operations\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(origin=..., destination=..., date=...)\n .with_measures(flight_count=..., avg_delay=...)\n)\n\n# Carriers model focuses on airline information\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(code=..., name=..., country=...)\n .with_measures(carrier_count=...)\n)\n\n# Connect them with joins\nflights_with_carriers = flights.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n## Troubleshooting\n\n### Server Not Appearing in Claude Desktop\n\n1. Check the configuration file path is correct\n2. Verify JSON syntax in `claude_desktop_config.json`\n3. Ensure BSL is installed with MCP support: `pip install 'boring-semantic-layer[fastmcp]'`\n4. Restart Claude Desktop completely\n5. Check Claude Desktop logs for error messages\n\n### Import Errors\n\nIf you see import errors when the server starts:\n\n```bash\n# Ensure all dependencies are installed\npip install 'boring-semantic-layer[fastmcp]'\n\n# Or install specific dependencies\npip install fastmcp ibis-framework\n```\n\n### Path Issues\n\nMake sure file paths in your configuration are absolute paths, not relative:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/Users/username/projects/my-project/example_mcp.py\"]\n }\n }\n}\n```\n", + "markdown": "# Query Agent: MCP Server\n\nBSL includes built-in support for the [Model Context Protocol (MCP)](https://github.com/modelcontextprotocol/python-sdk), allowing you to expose your semantic models to Large Language Models like Claude.\n\n\n**Pro tip:** Use [descriptions in dimensions and measures](/building/semantic-tables#with_dimensions) to make your models more AI-friendly. Descriptions help provide context to LLMs, enabling them to understand what each field represents and when to use them.\n\n\n## Installation\n\nTo use MCP functionality, install BSL with the `fastmcp` extra:\n\n```bash\npip install 'boring-semantic-layer[fastmcp]'\n```\n\n## Setting up an MCP Server\n\nCreate an MCP server script that exposes your semantic models:\n\n```python\nimport ibis\nfrom boring_semantic_layer import to_semantic_table, MCPSemanticModel\n\n# Create synthetic flights data\nflights_data = ibis.memtable({\n \"flight_id\": list(range(1, 101)),\n \"origin\": [\"JFK\", \"LAX\", \"ORD\", \"ATL\", \"DFW\"] * 20,\n \"dest\": [\"LAX\", \"JFK\", \"DFW\", \"ORD\", \"ATL\"] * 20,\n \"carrier\": [\"AA\", \"UA\", \"DL\", \"WN\", \"B6\"] * 20,\n \"distance\": [2475, 2475, 801, 606, 732] * 20,\n})\n\n# Define your semantic table with descriptions\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code where the flight departed from\"\n },\n destination={\n \"expr\": lambda t: t.dest,\n \"description\": \"Destination airport code where the flight arrived\"\n },\n carrier={\n \"expr\": lambda t: t.carrier,\n \"description\": \"Airline carrier code (e.g., AA, UA, DL)\"\n },\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n },\n avg_distance={\n \"expr\": lambda t: t.distance.mean(),\n \"description\": \"Average flight distance in miles\"\n },\n )\n)\n\n# Create the MCP server\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights},\n name=\"Flight Data Server\"\n)\n\nif __name__ == \"__main__\":\n mcp_server.run(transport=\"stdio\")\n```\n\nSave this as `example_mcp.py` in your project directory.\n\n## Configuring Claude Desktop\n\nTo use your MCP server with Claude Desktop, add it to your configuration file.\n\n**Configuration file location:**\n- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`\n- **Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json`\n\n**Example configuration:**\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"uv\",\n \"args\": [\n \"--directory\",\n \"/path/to/your/project/\",\n \"run\",\n \"example_mcp.py\"\n ]\n }\n }\n}\n```\n\nReplace `/path/to/your/project/` with the actual path to your project directory.\n\n\nThis example uses [uv](https://docs.astral.sh/uv/) to run the MCP server. You can also use `python` directly if you have BSL installed in your environment:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/path/to/your/project/example_mcp.py\"]\n }\n }\n}\n```\n\n\nAfter updating the configuration:\n1. Restart Claude Desktop\n2. Look for the MCP server indicator in the Claude Desktop interface\n3. You should see \"flight_sm\" listed as an available server\n\n## Available MCP Tools\n\nOnce configured, Claude will have access to these tools for interacting with your semantic models:\n\n### list_models\n\nList all available semantic model names in the MCP server.\n\n**Example usage in Claude:**\n> \"What models are available?\"\n\n**Returns:** Array of model names (e.g., `[\"flights\", \"carriers\"]`)\n\n### get_model\n\nGet detailed information about a specific model including its dimensions, measures, and descriptions.\n\n**Parameters:**\n- `model_name` (str): Name of the model to inspect\n\n**Example usage in Claude:**\n> \"Show me the details of the flights model\"\n\n**Returns:** Model schema including:\n- Model name and description\n- List of dimensions with their descriptions\n- List of measures with their descriptions\n- Available joins (if any)\n\n### get_time_range\n\nGet the available time range for time-series data in a model.\n\n**Parameters:**\n- `model_name` (str): Name of the model\n- `time_dimension` (str): Name of the time dimension\n\n**Example usage in Claude:**\n> \"What's the time range available in the flights model?\"\n\n**Returns:** Dictionary with `min_time` and `max_time` values\n\n### query_model\n\nExecute queries against a semantic model with dimensions, measures, filters, and optional chart specifications.\n\n**Parameters:**\n- `model_name` (str): Name of the model to query\n- `dimensions` (list[str]): List of dimension names to group by\n- `measures` (list[str]): List of measure names to aggregate\n- `filters` (list[str], optional): List of filter expressions (e.g., `[\"origin == 'JFK'\"]`)\n- `limit` (int, optional): Maximum number of rows to return\n- `order_by` (list[str], optional): List of columns to sort by\n- `chart_spec` (dict, optional): Vega-Lite chart specification\n\n**Example usage in Claude:**\n> \"Show me the top 10 origins by flight count\"\n> \"Create a bar chart of average distance by carrier\"\n\n**Returns:**\n- When `chart_spec` is provided: `{\"records\": [...], \"chart\": {...}}`\n- When `chart_spec` is not provided: `{\"records\": [...]}`\n\n### Example Interactions\n\nHere are some example questions you can ask Claude when the MCP server is configured:\n\n**Data Exploration:**\n- \"What models are available in the flight data server?\"\n- \"Show me all dimensions and measures in the flights model\"\n- \"What is the time range covered by the flights data?\"\n\n**Basic Queries:**\n- \"How many flights departed from JFK?\"\n- \"Show me the top 5 destinations by flight count\"\n- \"What's the average flight distance for each carrier?\"\n\n**Filtered Queries:**\n- \"Show me flights from California airports (starting with 'S')\"\n- \"What carriers have an average distance over 1000 miles?\"\n- \"List the top 10 busiest routes\"\n\n**Visualizations:**\n- \"Create a bar chart showing flights by origin airport\"\n- \"Make a line chart of flights over time\"\n- \"Show me a heatmap of routes between origins and destinations\"\n\n## Best Practices\n\n### 1. Add Descriptions to All Fields\n\nDescriptions are crucial for LLMs to understand your data model:\n\n```python\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code (3-letter IATA code)\"\n }\n )\n .with_measures(\n total_flights={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights in the dataset\"\n }\n )\n)\n```\n\n### 2. Use Descriptive Model Names\n\nChoose clear, descriptive names for your models:\n\n```python\n# Good\nmcp_server = MCPSemanticModel(\n models={\"flights\": flights, \"carriers\": carriers},\n name=\"Aviation Analytics Server\"\n)\n\n# Less clear\nmcp_server = MCPSemanticModel(\n models={\"f\": flights, \"c\": carriers},\n name=\"Server\"\n)\n```\n\n### 3. Define Time Dimensions for Time-Series Queries\n\nWhen exposing models through MCP, you need to explicitly define time dimensions to enable LLMs to query time ranges and perform time-based aggregations. This is specific to MCP\u2014when using BSL's fluent API directly, you can simply use Ibis functions like `.year()` and `.month()`.\n\nTo define a time dimension, set `is_time_dimension=True` and specify the `smallest_time_grain`:\n\n```python\nfrom boring_semantic_layer import to_semantic_table\n\nflights = (\n to_semantic_table(flights_data, name=\"flights\")\n .with_dimensions(\n arr_time={\n \"expr\": lambda t: t.arr_time,\n \"description\": \"Arrival time of the flight\",\n \"is_time_dimension\": True,\n \"smallest_time_grain\": \"TIME_GRAIN_SECOND\",\n },\n origin={\n \"expr\": lambda t: t.origin,\n \"description\": \"Origin airport code\"\n },\n )\n .with_measures(\n flight_count={\n \"expr\": lambda t: t.count(),\n \"description\": \"Total number of flights\"\n }\n )\n)\n```\n\n**Available time grains:**\n- `TIME_GRAIN_SECOND` - For second-level precision\n- `TIME_GRAIN_MINUTE` - For minute-level precision\n- `TIME_GRAIN_HOUR` - For hourly data\n- `TIME_GRAIN_DAY` - For daily data\n- `TIME_GRAIN_WEEK` - For weekly data\n- `TIME_GRAIN_MONTH` - For monthly data\n- `TIME_GRAIN_QUARTER` - For quarterly data\n- `TIME_GRAIN_YEAR` - For yearly data\n\n\nIf you define multiple time dimensions in your model, the `.query()` method and MCP tools will use the first time dimension that appears in your query's dimensions list.\n\n\n**Example time-based queries:**\n\nWith time dimensions defined, you can use the `.query()` method with time ranges and grains:\n\n```python\n# Query with a specific time range\nresult = flights.query(\n dimensions=[\"origin\"],\n measures=[\"flight_count\"],\n time_range={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"}\n)\n\n# Query with time grain aggregation\nresult = flights.query(\n dimensions=[\"arr_time\"],\n measures=[\"flight_count\"],\n time_grain=\"TIME_GRAIN_MONTH\"\n)\n```\n\nLLMs can then perform similar queries through MCP:\n```\n> \"What's the time range available in the flights data?\"\n> \"Show me flights from January 2024\"\n> \"Give me monthly flight counts for the last year\"\n```\n\n### 4. Structure Your Data Logically\n\nOrganize related dimensions and measures together, and use joins to connect related models:\n\n```python\n# Flights model focuses on flight operations\nflights = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(origin=..., destination=..., date=...)\n .with_measures(flight_count=..., avg_delay=...)\n)\n\n# Carriers model focuses on airline information\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(code=..., name=..., country=...)\n .with_measures(carrier_count=...)\n)\n\n# Connect them with joins\nflights_with_carriers = flights.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n## Troubleshooting\n\n### Server Not Appearing in Claude Desktop\n\n1. Check the configuration file path is correct\n2. Verify JSON syntax in `claude_desktop_config.json`\n3. Ensure BSL is installed with MCP support: `pip install 'boring-semantic-layer[fastmcp]'`\n4. Restart Claude Desktop completely\n5. Check Claude Desktop logs for error messages\n\n### Import Errors\n\nIf you see import errors when the server starts:\n\n```bash\n# Ensure all dependencies are installed\npip install 'boring-semantic-layer[fastmcp]'\n\n# Or install specific dependencies\npip install fastmcp ibis-framework\n```\n\n### Path Issues\n\nMake sure file paths in your configuration are absolute paths, not relative:\n\n```json\n{\n \"mcpServers\": {\n \"flight_sm\": {\n \"command\": \"python\",\n \"args\": [\"/Users/username/projects/my-project/example_mcp.py\"]\n }\n }\n}\n```\n", "queries": {}, "files": {} } diff --git a/docs/web/public/bsl-data/query-methods.json b/docs/web/public/bsl-data/query-methods.json index 4d9ee8b..cafefaf 100644 --- a/docs/web/public/bsl-data/query-methods.json +++ b/docs/web/public/bsl-data/query-methods.json @@ -3,7 +3,7 @@ "queries": { "setup_table": { "code": "import ibis\nfrom ibis import _\nfrom boring_semantic_layer import to_semantic_table\n\n# Create Ibis table\nflights_tbl = ibis.memtable({\n \"origin\": [\"NYC\", \"LAX\", \"NYC\", \"SFO\", \"LAX\", \"NYC\", \"SFO\", \"LAX\", \"NYC\"],\n \"carrier\": [\"AA\", \"UA\", \"AA\", \"UA\", \"AA\", \"UA\", \"AA\", \"UA\", \"AA\"],\n \"distance\": [2789, 2789, 2902, 2902, 347, 2789, 347, 347, 2789],\n \"duration\": [330, 330, 360, 360, 65, 330, 65, 65, 330],\n})\n\n# Create semantic table\nflights_st = (\n to_semantic_table(flights_tbl, name=\"flights\")\n .with_dimensions(\n origin=lambda t: t.origin,\n carrier=lambda t: t.carrier,\n )\n .with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n avg_duration=lambda t: t.duration.mean(),\n )\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]", "table": { "columns": [ @@ -72,7 +72,7 @@ }, "simple_demo": { "code": "# Build a query\nresult = flights_st.group_by(\"origin\").aggregate(\"flight_count\")\n\n# Option 1: Execute and get data as pandas DataFrame\ndf = result.execute()\n\n# Option 2: View the generated SQL\nprint(result.sql())\n\n# Option 3: Generate a visualization (when applicable)\nchart = result.chart()\n\n# Option 4: See the semantic query plan\nprint(result)\n\nresult", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count)", "table": { "columns": [ @@ -84,77 +84,20 @@ "NYC", 4 ], - [ - "LAX", - 3 - ], [ "SFO", 2 + ], + [ + "LAX", + 3 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-23a07ac3675351225fc742d6f2d8816b" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-23a07ac3675351225fc742d6f2d8816b": [ - { - "origin": "SFO", - "flight_count": 2 - }, - { - "origin": "NYC", - "flight_count": 4 - }, - { - "origin": "LAX", - "flight_count": 3 - } - ] - } - } } }, "query_single_dimension": { "code": "# Group by one dimension\nresult = flights_st.group_by(\"origin\").aggregate(\"flight_count\")", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count)", "table": { "columns": [ @@ -166,77 +109,20 @@ "SFO", 2 ], - [ - "NYC", - 4 - ], [ "LAX", 3 + ], + [ + "NYC", + 4 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-23a07ac3675351225fc742d6f2d8816b" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "flight_count", - "type": "quantitative" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-23a07ac3675351225fc742d6f2d8816b": [ - { - "origin": "SFO", - "flight_count": 2 - }, - { - "origin": "NYC", - "flight_count": 4 - }, - { - "origin": "LAX", - "flight_count": 3 - } - ] - } - } } }, "query_multiple_dimensions": { "code": "# Group by multiple dimensions\nresult = flights_st.group_by(\"origin\", \"carrier\").aggregate(\"flight_count\")", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"carrier\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\",\n \"t1\".\"carrier\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\",\n \"t1\".\"carrier\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"carrier\",\n COUNT(*) AS \"flight_count\"\n FROM (\n SELECT\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\",\n \"t1\".\"carrier\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\",\n \"t1\".\"carrier\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin, carrier)\n-> Aggregate(flight_count)", "table": { "columns": [ @@ -245,24 +131,24 @@ "flight_count" ], "data": [ + [ + "NYC", + "AA", + 3 + ], [ "LAX", - "UA", - 2 + "AA", + 1 ], [ - "NYC", + "SFO", "UA", 1 ], [ "NYC", - "AA", - 3 - ], - [ - "LAX", - "AA", + "UA", 1 ], [ @@ -271,100 +157,16 @@ 1 ], [ - "SFO", + "LAX", "UA", - 1 + 2 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-3b9ef625220c2c3b55dd97c1e5d48174" - }, - "mark": { - "type": "rect" - }, - "encoding": { - "color": { - "field": "flight_count", - "type": "quantitative" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "flight_count", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "y": { - "field": "carrier", - "sort": null, - "type": "ordinal" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-3b9ef625220c2c3b55dd97c1e5d48174": [ - { - "origin": "SFO", - "carrier": "AA", - "flight_count": 1 - }, - { - "origin": "SFO", - "carrier": "UA", - "flight_count": 1 - }, - { - "origin": "NYC", - "carrier": "AA", - "flight_count": 3 - }, - { - "origin": "LAX", - "carrier": "AA", - "flight_count": 1 - }, - { - "origin": "LAX", - "carrier": "UA", - "flight_count": 2 - }, - { - "origin": "NYC", - "carrier": "UA", - "flight_count": 1 - } - ] - } - } } }, "query_no_grouping": { "code": "# Aggregate entire dataset without grouping\nresult = flights_st.group_by().aggregate(\"flight_count\", \"total_distance\", \"avg_duration\")", - "sql": "SELECT\n COUNT(*) AS \"flight_count\",\n SUM(\"t0\".\"distance\") AS \"total_distance\",\n AVG(\"t0\".\"duration\") AS \"avg_duration\"\nFROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"", + "sql": "SELECT\n COUNT(*) AS \"flight_count\",\n SUM(\"t0\".\"distance\") AS \"total_distance\",\n AVG(\"t0\".\"duration\") AS \"avg_duration\"\nFROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy()\n-> Aggregate(flight_count, total_distance, avg_duration)", "table": { "columns": [ @@ -379,45 +181,11 @@ 248.33333333333334 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-8a83363d11adffb1b866841c56e09dfc" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-8a83363d11adffb1b866841c56e09dfc": [ - { - "flight_count": 9, - "total_distance": 18001, - "avg_duration": 248.33333333333334 - } - ] - } - } } }, "query_predefined_measures": { "code": "# Use measures defined in with_measures()\nresult = flights_st.group_by(\"origin\").aggregate(\"flight_count\", \"avg_duration\")", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, avg_duration)", "table": { "columns": [ @@ -427,9 +195,9 @@ ], "data": [ [ - "SFO", - 2, - 212.5 + "LAX", + 3, + 153.33333333333334 ], [ "NYC", @@ -437,99 +205,16 @@ 337.5 ], [ - "LAX", - 3, - 153.33333333333334 + "SFO", + 2, + 212.5 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-8f98fea9cf7ed44934d74a32b8b82c2e" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "avg_duration" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-8f98fea9cf7ed44934d74a32b8b82c2e": [ - { - "origin": "SFO", - "flight_count": 2, - "avg_duration": 212.5 - }, - { - "origin": "NYC", - "flight_count": 4, - "avg_duration": 337.5 - }, - { - "origin": "LAX", - "flight_count": 3, - "avg_duration": 153.33333333333334 - } - ] - } - } } }, "query_onthefly_measures": { "code": "# Mix predefined and computed measures\nresult = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\n \"flight_count\", # Pre-defined measure\n \"avg_duration\", # Pre-defined measure\n total_miles=lambda t: t.distance.sum(), # Computed on-the-fly\n max_distance=lambda t: t.flight_count + 2 # You can reference other measures as well\n )\n)", - "sql": "SELECT\n \"t2\".\"origin\",\n \"t2\".\"flight_count\",\n \"t2\".\"avg_duration\",\n \"t2\".\"total_miles\",\n \"t2\".\"flight_count\" + 2 AS \"max_distance\"\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\",\n SUM(\"t1\".\"distance\") AS \"total_miles\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n \"t2\".\"origin\",\n \"t2\".\"flight_count\",\n \"t2\".\"avg_duration\",\n \"t2\".\"total_miles\",\n \"t2\".\"flight_count\" + 2 AS \"max_distance\"\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\",\n SUM(\"t1\".\"distance\") AS \"total_miles\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, avg_duration, total_miles, max_distance)", "table": { "columns": [ @@ -547,117 +232,26 @@ 3249, 4 ], - [ - "NYC", - 4, - 337.5, - 11269, - 6 - ], [ "LAX", 3, 153.33333333333334, 3483, 5 + ], + [ + "NYC", + 4, + 337.5, + 11269, + 6 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-22866678e8acf83c7eb8555595e76d9b" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "avg_duration", - "total_miles", - "max_distance" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-22866678e8acf83c7eb8555595e76d9b": [ - { - "origin": "NYC", - "flight_count": 4, - "avg_duration": 337.5, - "total_miles": 11269, - "max_distance": 6 - }, - { - "origin": "LAX", - "flight_count": 3, - "avg_duration": 153.33333333333334, - "total_miles": 3483, - "max_distance": 5 - }, - { - "origin": "SFO", - "flight_count": 2, - "avg_duration": 212.5, - "total_miles": 3249, - "max_distance": 4 - } - ] - } - } } }, "query_table_columns": { "code": "# Reference table columns directly in aggregate()\nresult = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\n \"flight_count\", # Pre-defined measure\n total_distance=lambda t: t.distance.sum(), # Table column 'distance'\n avg_duration=lambda t: t.duration.mean(), # Table column 'duration'\n distance_in_km=lambda t: (t.distance * 1.60934).sum() # Transform then aggregate\n )\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\",\n SUM(\"t1\".\"distance\" * 1.60934) AS \"distance_in_km\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\",\n SUM(\"t1\".\"distance\" * 1.60934) AS \"distance_in_km\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, total_distance, avg_duration, distance_in_km)", "table": { "columns": [ @@ -669,11 +263,11 @@ ], "data": [ [ - "NYC", - 4, - 11269, - 337.5, - 18135.65246 + "SFO", + 2, + 3249, + 212.5, + 5228.7456600000005 ], [ "LAX", @@ -683,109 +277,18 @@ 5605.331220000001 ], [ - "SFO", - 2, - 3249, - 212.5, - 5228.7456600000005 + "NYC", + 4, + 11269, + 337.5, + 18135.65246 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-27f0328d190a33105ca81108d7ff41f3" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance", - "avg_duration", - "distance_in_km" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-27f0328d190a33105ca81108d7ff41f3": [ - { - "origin": "NYC", - "flight_count": 4, - "total_distance": 11269, - "avg_duration": 337.5, - "distance_in_km": 18135.65246 - }, - { - "origin": "LAX", - "flight_count": 3, - "total_distance": 3483, - "avg_duration": 153.33333333333334, - "distance_in_km": 5605.331220000001 - }, - { - "origin": "SFO", - "flight_count": 2, - "total_distance": 3249, - "avg_duration": 212.5, - "distance_in_km": 5228.7456600000005 - } - ] - } - } } }, "query_filter_order_limit": { "code": "from ibis import _\n\n# Filter data, sort, and limit results\nresult = (\n flights_st\n .filter(lambda t: t.origin.isin([\"NYC\", \"LAX\"])) # Filter origins\n .filter(_.distance > 500) # Filter distance using _ syntax\n .group_by(\"origin\")\n .aggregate(\"flight_count\", \"avg_duration\") # Aggregate both measures\n .order_by(ibis.desc(\"flight_count\")) # Sort by flight_count descending\n .limit(5) # Top 5 results\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n WHERE\n \"t0\".\"origin\" IN ('NYC', 'LAX') AND \"t0\".\"distance\" > 500\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"\nORDER BY\n \"t2\".\"flight_count\" DESC NULLS LAST\nLIMIT 5", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"duration\") AS \"avg_duration\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n WHERE\n \"t0\".\"origin\" IN ('NYC', 'LAX') AND \"t0\".\"distance\" > 500\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"\nORDER BY\n \"t2\".\"flight_count\" DESC NULLS LAST\nLIMIT 5", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> Filter(\u03bb )\n-> Filter(\u03bb )\n-> GroupBy(origin)\n-> Aggregate(flight_count, avg_duration)\n-> OrderBy(_CallableWrapper(_fn=_['flight_count'].desc(nulls_first=False)))\n-> Limit(5)", "table": { "columns": [ @@ -805,89 +308,11 @@ 330.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-d715b987fbd69b5c84ba5625639181af" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "avg_duration" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-d715b987fbd69b5c84ba5625639181af": [ - { - "origin": "NYC", - "flight_count": 4, - "avg_duration": 337.5 - }, - { - "origin": "LAX", - "flight_count": 1, - "avg_duration": 330.0 - } - ] - } - } } }, "query_basic_nest": { "code": "from ibis import _\n\n# Nest flight details within each origin\nresult = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\n \"flight_count\",\n \"total_distance\",\n # Create nested array of flight details\n nest={\"flights\": lambda t: t.group_by([\"carrier\", \"distance\"])}\n )\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, total_distance, flights)", "table": { "columns": [ @@ -897,29 +322,6 @@ "flights" ], "data": [ - [ - "NYC", - 4, - 11269, - [ - { - "carrier": "AA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2902 - }, - { - "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2789 - } - ] - ], [ "LAX", 3, @@ -953,139 +355,36 @@ "distance": 347 } ] - ] - ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-84104b475a7c24825f52e7213f8ab180" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, + ], + [ + "NYC", + 4, + 11269, + [ { - "field": "measure", - "type": "nominal" + "carrier": "AA", + "distance": 2789 }, { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance", - "flights" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-84104b475a7c24825f52e7213f8ab180": [ - { - "origin": "SFO", - "flight_count": 2, - "total_distance": 3249, - "flights": [ - { - "carrier": "UA", - "distance": 2902 - }, - { - "carrier": "AA", - "distance": 347 - } - ] + "carrier": "AA", + "distance": 2902 }, { - "origin": "NYC", - "flight_count": 4, - "total_distance": 11269, - "flights": [ - { - "carrier": "AA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2902 - }, - { - "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2789 - } - ] + "carrier": "UA", + "distance": 2789 }, { - "origin": "LAX", - "flight_count": 3, - "total_distance": 3483, - "flights": [ - { - "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 347 - }, - { - "carrier": "UA", - "distance": 347 - } - ] + "carrier": "AA", + "distance": 2789 } ] - } - } + ] + ] } }, "query_nest_select": { "code": "# Nest specific columns\nresult = (\n flights_st\n .group_by(\"carrier\")\n .aggregate(\n \"flight_count\",\n nest={\"routes\": lambda t: t.select(\"origin\", \"distance\", \"duration\")}\n )\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"carrier\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(\n NAMED_STRUCT('origin', \"t1\".\"origin\", 'distance', \"t1\".\"distance\", 'duration', \"t1\".\"duration\")\n ) FILTER(WHERE\n NAMED_STRUCT('origin', \"t1\".\"origin\", 'distance', \"t1\".\"distance\", 'duration', \"t1\".\"duration\") IS NOT NULL) AS \"routes\"\n FROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"carrier\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"carrier\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"carrier\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(\n NAMED_STRUCT('origin', \"t1\".\"origin\", 'distance', \"t1\".\"distance\", 'duration', \"t1\".\"duration\")\n ) FILTER(WHERE\n NAMED_STRUCT('origin', \"t1\".\"origin\", 'distance', \"t1\".\"distance\", 'duration', \"t1\".\"duration\") IS NOT NULL) AS \"routes\"\n FROM (\n SELECT\n \"t1\".\"origin\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"carrier\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"carrier\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(carrier)\n-> Aggregate(flight_count, routes)", "table": { "columns": [ @@ -1094,32 +393,6 @@ "routes" ], "data": [ - [ - "UA", - 4, - [ - { - "origin": "LAX", - "distance": 2789, - "duration": 330 - }, - { - "origin": "SFO", - "distance": 2902, - "duration": 360 - }, - { - "origin": "NYC", - "distance": 2789, - "duration": 330 - }, - { - "origin": "LAX", - "distance": 347, - "duration": 65 - } - ] - ], [ "AA", 5, @@ -1150,138 +423,39 @@ "duration": 330 } ] - ] - ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-82d74a794a4a364fb1e9e683490ca229" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ + ], + [ + "UA", + 4, + [ { - "field": "carrier", - "type": "nominal" + "origin": "LAX", + "distance": 2789, + "duration": 330 }, { - "field": "measure", - "type": "nominal" + "origin": "SFO", + "distance": 2902, + "duration": 360 }, { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "routes" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-82d74a794a4a364fb1e9e683490ca229": [ - { - "carrier": "UA", - "flight_count": 4, - "routes": [ - { - "origin": "LAX", - "distance": 2789, - "duration": 330 - }, - { - "origin": "SFO", - "distance": 2902, - "duration": 360 - }, - { - "origin": "NYC", - "distance": 2789, - "duration": 330 - }, - { - "origin": "LAX", - "distance": 347, - "duration": 65 - } - ] + "origin": "NYC", + "distance": 2789, + "duration": 330 }, { - "carrier": "AA", - "flight_count": 5, - "routes": [ - { - "origin": "NYC", - "distance": 2789, - "duration": 330 - }, - { - "origin": "NYC", - "distance": 2902, - "duration": 360 - }, - { - "origin": "LAX", - "distance": 347, - "duration": 65 - }, - { - "origin": "SFO", - "distance": 347, - "duration": 65 - }, - { - "origin": "NYC", - "distance": 2789, - "duration": 330 - } - ] + "origin": "LAX", + "distance": 347, + "duration": 65 } ] - } - } + ] + ] } }, "query_nest_step1": { "code": "from ibis import _\n\n# Create nested data structure\nresult = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\n \"flight_count\",\n nest={\"flights\": lambda t: t.group_by([\"carrier\", \"distance\"])}\n )\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, flights)", "table": { "columns": [ @@ -1305,173 +479,51 @@ ] ], [ - "NYC", - 4, + "LAX", + 3, [ { - "carrier": "AA", + "carrier": "UA", "distance": 2789 }, { "carrier": "AA", - "distance": 2902 + "distance": 347 }, { "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2789 + "distance": 347 } ] ], [ - "LAX", - 3, + "NYC", + 4, [ { - "carrier": "UA", + "carrier": "AA", "distance": 2789 }, { "carrier": "AA", - "distance": 347 + "distance": 2902 }, { "carrier": "UA", - "distance": 347 - } - ] - ] - ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-22fff5453736929a356005cc8fc72142" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "flights" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-22fff5453736929a356005cc8fc72142": [ - { - "origin": "SFO", - "flight_count": 2, - "flights": [ - { - "carrier": "UA", - "distance": 2902 - }, - { - "carrier": "AA", - "distance": 347 - } - ] - }, - { - "origin": "NYC", - "flight_count": 4, - "flights": [ - { - "carrier": "AA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2902 - }, - { - "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 2789 - } - ] + "distance": 2789 }, { - "origin": "LAX", - "flight_count": 3, - "flights": [ - { - "carrier": "UA", - "distance": 2789 - }, - { - "carrier": "AA", - "distance": 347 - }, - { - "carrier": "UA", - "distance": 347 - } - ] + "carrier": "AA", + "distance": 2789 } ] - } - } + ] + ] } }, "query_nest_step2": { "code": "from ibis import _\n\n# Re-grouping automatically unnests the 'flights' array\nresult = (\n result\n .group_by(\"origin\")\n .aggregate(\n total_flights=lambda t: t.flight_count.sum(),\n # Access unnested fields from the flights array\n unique_carriers=lambda t: t.flights.carrier.nunique(),\n avg_distance=lambda t: t.flights.distance.mean()\n )\n)", - "sql": "WITH \"t3\" AS (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n ) AS \"t2\"\n)\nSELECT\n *\nFROM (\n SELECT\n \"t7\".\"origin\",\n \"t7\".\"total_flights\",\n \"t9\".\"unique_carriers\",\n \"t9\".\"avg_distance\"\n FROM (\n SELECT\n \"t4\".\"origin\",\n SUM(\"t4\".\"flight_count\") AS \"total_flights\"\n FROM (\n SELECT\n \"t4\".\"flight_count\",\n \"t4\".\"flights\",\n \"t4\".\"origin\"\n FROM \"t3\" AS \"t4\"\n ) AS t4\n GROUP BY\n \"t4\".\"origin\"\n ) AS \"t7\"\n LEFT OUTER JOIN (\n SELECT\n \"t5\".\"origin\",\n COUNT(DISTINCT \"t5\".\"flights\"['carrier']) AS \"unique_carriers\",\n AVG(\"t5\".\"flights\"['distance']) AS \"avg_distance\"\n FROM (\n SELECT\n \"t5\".\"flight_count\",\n \"t5\".\"flights\",\n \"t5\".\"origin\"\n FROM (\n SELECT\n \"t4\".\"origin\",\n \"t4\".\"flight_count\",\n UNNEST(\"t4\".\"flights\") AS \"flights\"\n FROM \"t3\" AS \"t4\"\n ) AS \"t5\"\n ) AS t5\n GROUP BY\n \"t5\".\"origin\"\n ) AS \"t9\"\n ON \"t7\".\"origin\" = \"t9\".\"origin\"\n) AS \"t10\"", + "sql": "WITH \"t3\" AS (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n ARRAY_AGG(NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\")) FILTER(WHERE\n NAMED_STRUCT('carrier', \"t1\".\"carrier\", 'distance', \"t1\".\"distance\") IS NOT NULL) AS \"flights\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n ) AS \"t2\"\n)\nSELECT\n *\nFROM (\n SELECT\n \"t7\".\"origin\",\n \"t7\".\"total_flights\",\n \"t9\".\"unique_carriers\",\n \"t9\".\"avg_distance\"\n FROM (\n SELECT\n \"t4\".\"origin\",\n SUM(\"t4\".\"flight_count\") AS \"total_flights\"\n FROM (\n SELECT\n \"t4\".\"flight_count\",\n \"t4\".\"flights\",\n \"t4\".\"origin\"\n FROM \"t3\" AS \"t4\"\n ) AS t4\n GROUP BY\n \"t4\".\"origin\"\n ) AS \"t7\"\n LEFT OUTER JOIN (\n SELECT\n \"t5\".\"origin\",\n COUNT(DISTINCT \"t5\".\"flights\"['carrier']) AS \"unique_carriers\",\n AVG(\"t5\".\"flights\"['distance']) AS \"avg_distance\"\n FROM (\n SELECT\n \"t5\".\"flight_count\",\n \"t5\".\"flights\",\n \"t5\".\"origin\"\n FROM (\n SELECT\n \"t4\".\"origin\",\n \"t4\".\"flight_count\",\n UNNEST(\"t4\".\"flights\") AS \"flights\"\n FROM \"t3\" AS \"t4\"\n ) AS \"t5\"\n ) AS t5\n GROUP BY\n \"t5\".\"origin\"\n ) AS \"t9\"\n ON \"t7\".\"origin\" = \"t9\".\"origin\"\n) AS \"t10\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, flights)\n-> GroupBy(origin)\n-> Aggregate(total_flights, unique_carriers, avg_distance)", "table": { "columns": [ @@ -1481,18 +533,18 @@ "avg_distance" ], "data": [ - [ - "SFO", - 2, - 2, - 1624.5 - ], [ "NYC", 4, 2, 2817.25 ], + [ + "SFO", + 2, + 2, + 1624.5 + ], [ "LAX", 3, @@ -1500,98 +552,11 @@ 1161.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-225c9696e9dcb537678e7264fec96042" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_flights", - "unique_carriers", - "avg_distance" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-225c9696e9dcb537678e7264fec96042": [ - { - "origin": "SFO", - "total_flights": 2, - "unique_carriers": 2, - "avg_distance": 1624.5 - }, - { - "origin": "NYC", - "total_flights": 4, - "unique_carriers": 2, - "avg_distance": 2817.25 - }, - { - "origin": "LAX", - "total_flights": 3, - "unique_carriers": 2, - "avg_distance": 1161.0 - } - ] - } - } } }, "query_mutate": { "code": "from ibis import _\n\n# Add post-aggregation calculations\nresult = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\"flight_count\", \"total_distance\")\n .mutate(\n avg_distance_per_flight=lambda t: t.total_distance / t.flight_count,\n flight_category=lambda t: xo.case()\n .when(t.flight_count >= 3, \"high\")\n .when(t.flight_count >= 2, \"medium\")\n .else_(\"low\")\n .end()\n )\n)", - "sql": "SELECT\n \"t2\".\"origin\",\n \"t2\".\"flight_count\",\n \"t2\".\"total_distance\",\n CAST(\"t2\".\"total_distance\" AS DOUBLE PRECISION) / \"t2\".\"flight_count\" AS \"avg_distance_per_flight\",\n CASE\n WHEN \"t2\".\"flight_count\" >= 3\n THEN 'high'\n WHEN \"t2\".\"flight_count\" >= 2\n THEN 'medium'\n ELSE 'low'\n END AS \"flight_category\"\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n \"t2\".\"origin\",\n \"t2\".\"flight_count\",\n \"t2\".\"total_distance\",\n CAST(\"t2\".\"total_distance\" AS DOUBLE PRECISION) / \"t2\".\"flight_count\" AS \"avg_distance_per_flight\",\n CASE\n WHEN \"t2\".\"flight_count\" >= 3\n THEN 'high'\n WHEN \"t2\".\"flight_count\" >= 2\n THEN 'medium'\n ELSE 'low'\n END AS \"flight_category\"\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, total_distance)\n-> Mutate(avg_distance_per_flight, flight_category)", "table": { "columns": [ @@ -1609,117 +574,26 @@ 1624.0, "medium" ], - [ - "NYC", - 4, - 11269, - 2817.0, - "high" - ], [ "LAX", 3, 3483, 1161.0, "high" + ], + [ + "NYC", + 4, + 11269, + 2817.0, + "high" ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-e59d3e7a9f48b3e4105cb878316f71c8" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance", - "avg_distance_per_flight", - "flight_category" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-e59d3e7a9f48b3e4105cb878316f71c8": [ - { - "origin": "SFO", - "flight_count": 2, - "total_distance": 3249, - "avg_distance_per_flight": 1624.0, - "flight_category": "medium" - }, - { - "origin": "NYC", - "flight_count": 4, - "total_distance": 11269, - "avg_distance_per_flight": 2817.0, - "flight_category": "high" - }, - { - "origin": "LAX", - "flight_count": 3, - "total_distance": 3483, - "avg_distance_per_flight": 1161.0, - "flight_category": "high" - } - ] - } - } } }, "query_window_example": { "code": "from ibis import _\n\n# First aggregate to daily level\ndaily_flights = (\n flights_st\n .group_by(\"origin\")\n .aggregate(\"flight_count\", \"total_distance\")\n .order_by(\"origin\")\n)\n\n# Then apply window function for cumulative distance\nwindow_spec = xo.window(order_by=\"origin\")\n\nresult = daily_flights.mutate(\n cumulative_distance=_.total_distance.cumsum(),\n flight_rank=lambda t: xo.rank().over(xo.window(order_by=xo.desc(t.flight_count)))\n).limit(10)", - "sql": "SELECT\n \"t4\".\"origin\",\n \"t4\".\"flight_count\",\n \"t4\".\"total_distance\",\n \"t4\".\"cumulative_distance\",\n RANK() OVER (ORDER BY \"t4\".\"flight_count\" DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS \"flight_rank\"\nFROM (\n SELECT\n \"t3\".\"origin\",\n \"t3\".\"flight_count\",\n \"t3\".\"total_distance\",\n SUM(\"t3\".\"total_distance\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_distance\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_qnlt5c3ycbdyrjsyv5ftqgmuym\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"origin\" ASC\n ) AS \"t3\"\n) AS \"t4\"\nLIMIT 10", + "sql": "SELECT\n \"t4\".\"origin\",\n \"t4\".\"flight_count\",\n \"t4\".\"total_distance\",\n \"t4\".\"cumulative_distance\",\n RANK() OVER (\n ORDER BY \"t4\".\"flight_count\" DESC NULLS LAST\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) - 1 AS \"flight_rank\"\nFROM (\n SELECT\n \"t3\".\"origin\",\n \"t3\".\"flight_count\",\n \"t3\".\"total_distance\",\n SUM(\"t3\".\"total_distance\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_distance\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n SUM(\"t1\".\"distance\") AS \"total_distance\"\n FROM (\n SELECT\n \"t1\".\"carrier\",\n \"t1\".\"distance\",\n \"t1\".\"duration\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_onqfscphkbhtzli4oya7wmodva\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"origin\" ASC\n ) AS \"t3\"\n) AS \"t4\"\nLIMIT 10", "plan": "SemanticTable: flights\n origin [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_duration [measure]\n-> GroupBy(origin)\n-> Aggregate(flight_count, total_distance)\n-> OrderBy(origin)\n-> Mutate(cumulative_distance, flight_rank)\n-> Limit(10)", "table": { "columns": [ @@ -1734,115 +608,24 @@ "NYC", 4, 11269, - 11269, + 18001, 0 ], [ "LAX", 3, 3483, - 14752, + 6732, 1 ], [ "SFO", 2, 3249, - 18001, + 3249, 2 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-d3b07097e2c848329adb9f88de6b41b6" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "total_distance", - "cumulative_distance", - "flight_rank" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-d3b07097e2c848329adb9f88de6b41b6": [ - { - "origin": "NYC", - "flight_count": 4, - "total_distance": 11269, - "cumulative_distance": 14518, - "flight_rank": 0 - }, - { - "origin": "LAX", - "flight_count": 3, - "total_distance": 3483, - "cumulative_distance": 18001, - "flight_rank": 1 - }, - { - "origin": "SFO", - "flight_count": 2, - "total_distance": 3249, - "cumulative_distance": 3249, - "flight_rank": 2 - } - ] - } - } } }, "query_as_table_problem": { diff --git a/docs/web/public/bsl-data/reference.json b/docs/web/public/bsl-data/reference.json index f4c7b63..4bae8cc 100644 --- a/docs/web/public/bsl-data/reference.json +++ b/docs/web/public/bsl-data/reference.json @@ -1,5 +1,5 @@ { - "markdown": "# API Reference\n\nComplete API documentation for the Boring Semantic Layer.\n\n## Table Creation & Configuration\n\nMethods for creating and configuring semantic tables.\n\n### to_semantic_table()\n\n```python\nto_semantic_table(\n table: ibis.Table,\n name: str,\n description: str = None\n) -> SemanticTable\n```\n\nCreate a semantic table from an Ibis table. This is the primary entry point for building semantic models.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `table` | `ibis.Table` | Ibis table to build the model from |\n| `name` | `str` | Unique identifier for the semantic table |\n| `description` | `str` | Optional description of the semantic table |\n\n**Example:**\n```python\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\nflights = ibis.read_parquet(\"flights.parquet\")\nflights_st = to_semantic_table(flights, \"flights\")\n```\n\n### with_dimensions()\n\n```python\nwith_dimensions(\n **dimensions: Callable | Dimension\n) -> SemanticTable\n```\n\nDefine dimensions for grouping and analysis. Dimensions are attributes that categorize data.\n\n**Example:**\n```python\nflights_st = flights_st.with_dimensions(\n origin=lambda t: t.origin,\n dest=lambda t: t.dest,\n carrier=lambda t: t.carrier\n)\n```\n\n### with_measures()\n\n```python\nwith_measures(\n **measures: Callable | Measure\n) -> SemanticTable\n```\n\nDefine aggregations and calculations. Measures are numeric values that can be aggregated.\n\n**Example:**\n```python\nflights_st = flights_st.with_measures(\n flight_count=lambda t: t.count(),\n avg_delay=lambda t: t.arr_delay.mean(),\n total_distance=lambda t: t.distance.sum()\n)\n```\n\n### from_yaml()\n\n```python\nfrom_yaml(\n yaml_path: str,\n connection: ibis.Connection = None\n) -> dict[str, SemanticTable]\n```\n\nLoad semantic models from a YAML configuration file. Returns a dictionary of semantic tables.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `yaml_path` | `str` | Path to YAML configuration file |\n| `connection` | `ibis.Connection` | Optional Ibis connection for database tables |\n\n**Example:**\n```python\nfrom boring_semantic_layer.yaml import from_yaml\n\nmodels = from_yaml(\"models.yaml\")\nflights_st = models[\"flights\"]\n```\n\n### Dimension Class\n\n```python\nDimension(\n expr: Callable,\n description: str = None\n)\n```\n\nSelf-documenting dimension with description. Use for better API documentation.\n\n**Example:**\n```python\nfrom boring_semantic_layer import Dimension\n\nflights_st = flights_st.with_dimensions(\n origin=Dimension(\n expr=lambda t: t.origin,\n description=\"Airport code where the flight departed from\"\n )\n)\n```\n\n### Measure Class\n\n```python\nMeasure(\n expr: Callable,\n description: str = None\n)\n```\n\nSelf-documenting measure with description. Use for better API documentation.\n\n**Example:**\n```python\nfrom boring_semantic_layer import Measure\n\nflights_st = flights_st.with_measures(\n avg_delay=Measure(\n expr=lambda t: t.arr_delay.mean(),\n description=\"Average arrival delay in minutes\"\n )\n)\n```\n\n### all()\n\n```python\nst.all()\n```\n\nReference the entire dataset within measure definitions. Primarily used for percentage-of-total calculations.\n\n**Example:**\n```python\nflights_st = to_semantic_table(data, \"flights\").with_measures(\n flight_count=lambda t: t.count(),\n pct_of_total=lambda t: (\n t.count() / t.all().count() * 100\n )\n)\n```\n\n## Join Methods\n\nMethods for composing semantic tables through joins.\n\n### join_many()\n\n```python\njoin_many(\n other: SemanticTable,\n on: Callable,\n name: str = None\n) -> SemanticTable\n```\n\nOne-to-many relationship join (LEFT JOIN). Use when the left table can match multiple rows in the right table.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `other` | `SemanticTable` | The semantic table to join with |\n| `on` | `Callable` | Lambda function defining the join condition |\n| `name` | `str` | Optional name for the joined table reference |\n\n**Example:**\n```python\nflights_st = flights_st.join_many(\n carriers_st,\n on=lambda l, r: l.carrier == r.code,\n name=\"carrier_info\"\n)\n```\n\n### join_one()\n\n```python\njoin_one(\n other: SemanticTable,\n on: Callable,\n name: str = None\n) -> SemanticTable\n```\n\nOne-to-one relationship join (INNER JOIN). Use when each row in the left table matches exactly one row in the right table.\n\n**Example:**\n```python\nflights_st = flights_st.join_one(\n airports_st,\n on=lambda l, r: l.origin == r.code\n)\n```\n\n### join_cross()\n\n```python\njoin_cross(\n other: SemanticTable,\n name: str = None\n) -> SemanticTable\n```\n\nCross join (CARTESIAN PRODUCT). Creates all possible combinations of rows from both tables.\n\n### join()\n\n```python\njoin(\n other: SemanticTable,\n on: Callable,\n how: str = \"inner\",\n name: str = None\n) -> SemanticTable\n```\n\nCustom join with flexible join type. Supports 'inner', 'left', 'right', 'outer', and 'cross'.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `other` | `SemanticTable` | The semantic table to join with |\n| `on` | `Callable` | Lambda function defining the join condition |\n| `how` | `str` | Join type: 'inner', 'left', 'right', 'outer', or 'cross' |\n| `name` | `str` | Optional name for the joined table reference |\n\n## Query Methods\n\nMethods for querying and transforming semantic tables.\n\n### group_by()\n\n```python\ngroup_by(\n *dimensions: str\n) -> QueryBuilder\n```\n\nGroup data by one or more dimension names. Returns a query builder for chaining with aggregate().\n\n**Example:**\n```python\nresult = flights_st.group_by(\"origin\", \"carrier\").aggregate(\"flight_count\")\n```\n\n### aggregate()\n\n```python\naggregate(\n *measures: str,\n **kwargs\n) -> ibis.Table\n```\n\nCalculate one or more measures. Can be used standalone or after group_by().\n\n**Examples:**\n```python\n# Without grouping\ntotal = flights_st.aggregate(\"flight_count\")\n\n# With grouping\nby_origin = flights_st.group_by(\"origin\").aggregate(\"flight_count\", \"avg_delay\")\n```\n\n### filter()\n\n```python\nfilter(\n condition: Callable\n) -> SemanticTable\n```\n\nApply conditions to filter data. Use lambda functions with Ibis expressions.\n\n**Example:**\n```python\ndelayed_flights = flights_st.filter(lambda t: t.arr_delay > 0)\n```\n\n### order_by()\n\n```python\norder_by(\n *columns: str | ibis.Expression\n) -> ibis.Table\n```\n\nSort query results. Use `ibis.desc()` for descending order.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"origin\").aggregate(\"flight_count\")\nresult = result.order_by(ibis.desc(\"flight_count\"))\n```\n\n### limit()\n\n```python\nlimit(\n n: int\n) -> ibis.Table\n```\n\nRestrict the number of rows returned.\n\n**Example:**\n```python\ntop_10 = result.order_by(ibis.desc(\"flight_count\")).limit(10)\n```\n\n### mutate()\n\n```python\nmutate(\n **expressions: Callable | ibis.Expression\n) -> ibis.Table\n```\n\nAdd or transform columns in aggregated results. Useful for calculations after aggregation.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"month\").aggregate(\"revenue\")\nresult = result.mutate(\n growth_rate=lambda t: (t.revenue - t.revenue.lag()) / t.revenue.lag() * 100\n)\n```\n\n### select()\n\n```python\nselect(\n *columns: str | ibis.Expression\n) -> ibis.Table\n```\n\nSelect specific columns from the result. Often used in nesting operations.\n\n**Example:**\n```python\nresult.select(\"origin\", \"flight_count\")\n```\n\n## Nesting\n\nCreate nested data structures within aggregations.\n\n### nest Parameter\n\n```python\naggregate(\n *measures,\n nest={\n \"nested_column\": lambda t: t.group_by([...]) | t.select(...)\n }\n)\n```\n\nCreate nested arrays of structs within aggregation results. Useful for hierarchical data or subtotals.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"carrier\").aggregate(\n \"total_flights\",\n nest={\n \"by_month\": lambda t: t.group_by(\"month\").aggregate(\"monthly_flights\")\n }\n)\n```\n\n## Charting\n\nGenerate visualizations from query results.\n\n### chart()\n\n```python\nchart(\n result: ibis.Table,\n backend: str = \"altair\",\n spec: dict = None,\n format: str = \"interactive\"\n) -> Chart\n```\n\nCreate visualizations from query results. Supports Altair (default) and Plotly backends.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `result` | `ibis.Table` | Query result table to visualize |\n| `backend` | `str` | \"altair\" or \"plotly\" |\n| `spec` | `dict` | Custom Vega-Lite specification (for Altair) |\n| `format` | `str` | \"interactive\", \"json\", \"png\", \"svg\" |\n\n**Auto-detection:**\nBSL automatically selects appropriate chart types:\n- Single dimension + measure \u2192 Bar chart\n- Time dimension + measure \u2192 Line chart\n- Two dimensions + measure \u2192 Heatmap\n\n**Example:**\n```python\nfrom boring_semantic_layer.chart import chart\n\nresult = flights_st.group_by(\"month\").aggregate(\"flight_count\")\nchart(result, backend=\"altair\")\n```\n\n## Dimensional Indexing\n\nCreate searchable catalogs of dimension values.\n\n### index()\n\n```python\nindex(\n dimensions: Callable | None = None,\n by: str = None,\n sample: int = None\n) -> ibis.Table\n```\n\nCreate a searchable catalog of unique dimension values with optional weighting and sampling.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `dimensions` | `Callable` | None (all dimensions) or lambda returning list of fields |\n| `by` | `str` | Measure name for weighting results |\n| `sample` | `int` | Number of rows to sample (for large datasets) |\n\n**Examples:**\n```python\n# Index all dimensions\nflights_st.index()\n\n# Index specific dimensions\nflights_st.index(lambda t: [t.origin, t.dest])\n\n# Weight by measure\nflights_st.index(by=\"flight_count\")\n\n# Sample large dataset\nflights_st.index(sample=10000)\n```\n\n## Other\n\n### MCP Integration\n\n#### MCPSemanticModel()\n\n```python\nMCPSemanticModel(\n models: dict[str, SemanticTable] | str,\n description: str = None\n)\n```\n\nCreate an MCP server to expose semantic models to LLMs like Claude. Accepts either a dictionary of models or a path to a YAML configuration file.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `models` | `dict` or `str` | Dictionary of SemanticTable objects or path to YAML config |\n| `description` | `str` | Optional description of the semantic model |\n\n**Available MCP Tools:**\n\n| Tool | Description |\n|------|-------------|\n| `list_models()` | List all available semantic model names |\n| `get_model()` | Get detailed model information (dimensions, measures, joins) |\n| `get_time_range()` | Get available time range for time-series data |\n| `query_model()` | Execute queries against semantic models |\n\n**Example:**\n```python\nfrom boring_semantic_layer import MCPSemanticModel\n\n# From dictionary\nserver = MCPSemanticModel(\n models={\"flights\": flights_st, \"airports\": airports_st},\n description=\"Flight data analysis\"\n)\n\n# From YAML\nserver = MCPSemanticModel(\"config.yaml\")\n```\n\n### YAML Configuration\n\n#### YAML Structure\n\n```yaml\nmodel_name:\n table: table_reference\n description: \"Optional description\"\n\n dimensions:\n dimension_name: expression\n # or with description\n dimension_name:\n expr: expression\n description: \"Dimension description\"\n\n measures:\n measure_name: expression\n # or with description\n measure_name:\n expr: expression\n description: \"Measure description\"\n\n joins:\n join_name:\n model: model_reference\n on: join_condition\n how: join_type # left, inner, right, outer, cross\n```\n\n#### Expression Syntax\n\n| Expression | Description |\n|------------|-------------|\n| `_` | Reference to the table |\n| `_.column` | Reference a column |\n| `_.count()` | Count aggregation |\n| `_.column.sum()` | Sum aggregation |\n| `_.column.mean()` | Average aggregation |\n| `_.column.min()` | Minimum value |\n| `_.column.max()` | Maximum value |\n\n**Example:**\n```yaml\nflights:\n table: flights_data\n description: \"Flight operations data\"\n\n dimensions:\n origin: _.origin\n dest: _.dest\n carrier:\n expr: _.carrier\n description: \"Airline carrier code\"\n\n measures:\n flight_count: _.count()\n avg_delay:\n expr: _.arr_delay.mean()\n description: \"Average arrival delay in minutes\"\n```\n\n## Next Steps\n\n- Learn about [Semantic Tables](/building/semantic-tables)\n- Explore [Query Methods](/querying/methods)\n- See [Advanced Patterns](/advanced/percentage-total)\n", + "markdown": "# API Reference\n\nComplete API documentation for the Boring Semantic Layer.\n\n## Table Creation & Configuration\n\nMethods for creating and configuring semantic tables.\n\n### to_semantic_table()\n\n```python\nto_semantic_table(\n table: ibis.Table,\n name: str,\n description: str = None\n) -> SemanticTable\n```\n\nCreate a semantic table from an Ibis table. This is the primary entry point for building semantic models.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `table` | `ibis.Table` | Ibis table to build the model from |\n| `name` | `str` | Unique identifier for the semantic table |\n| `description` | `str` | Optional description of the semantic table |\n\n**Example:**\n```python\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\nflights = ibis.read_parquet(\"flights.parquet\")\nflights_st = to_semantic_table(flights, \"flights\")\n```\n\n### with_dimensions()\n\n```python\nwith_dimensions(\n **dimensions: Callable | Dimension\n) -> SemanticTable\n```\n\nDefine dimensions for grouping and analysis. Dimensions are attributes that categorize data.\n\n**Example:**\n```python\nflights_st = flights_st.with_dimensions(\n origin=lambda t: t.origin,\n dest=lambda t: t.dest,\n carrier=lambda t: t.carrier\n)\n```\n\n### with_measures()\n\n```python\nwith_measures(\n **measures: Callable | Measure\n) -> SemanticTable\n```\n\nDefine aggregations and calculations. Measures are numeric values that can be aggregated.\n\n**Example:**\n```python\nflights_st = flights_st.with_measures(\n flight_count=lambda t: t.count(),\n avg_delay=lambda t: t.arr_delay.mean(),\n total_distance=lambda t: t.distance.sum()\n)\n```\n\n### from_yaml()\n\n```python\nfrom_yaml(\n yaml_path: str,\n connection: ibis.Connection = None\n) -> dict[str, SemanticTable]\n```\n\nLoad semantic models from a YAML configuration file. Returns a dictionary of semantic tables.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `yaml_path` | `str` | Path to YAML configuration file |\n| `connection` | `ibis.Connection` | Optional Ibis connection for database tables |\n\n**Example:**\n```python\nfrom boring_semantic_layer import from_yaml\n\nmodels = from_yaml(\"models.yaml\")\nflights_st = models[\"flights\"]\n```\n\n### Dimension Class\n\n```python\nDimension(\n expr: Callable,\n description: str = None\n)\n```\n\nSelf-documenting dimension with description. Use for better API documentation.\n\n**Example:**\n```python\nfrom boring_semantic_layer import Dimension\n\nflights_st = flights_st.with_dimensions(\n origin=Dimension(\n expr=lambda t: t.origin,\n description=\"Airport code where the flight departed from\"\n )\n)\n```\n\n### Measure Class\n\n```python\nMeasure(\n expr: Callable,\n description: str = None\n)\n```\n\nSelf-documenting measure with description. Use for better API documentation.\n\n**Example:**\n```python\nfrom boring_semantic_layer import Measure\n\nflights_st = flights_st.with_measures(\n avg_delay=Measure(\n expr=lambda t: t.arr_delay.mean(),\n description=\"Average arrival delay in minutes\"\n )\n)\n```\n\n### all()\n\n```python\nst.all()\n```\n\nReference the entire dataset within measure definitions. Primarily used for percentage-of-total calculations.\n\n**Example:**\n```python\nflights_st = to_semantic_table(data, \"flights\").with_measures(\n flight_count=lambda t: t.count(),\n pct_of_total=lambda t: (\n t.count() / t.all().count() * 100\n )\n)\n```\n\n## Join Methods\n\nMethods for composing semantic tables through joins.\n\n### join_many()\n\n```python\njoin_many(\n other: SemanticTable,\n on: Callable,\n name: str = None\n) -> SemanticTable\n```\n\nOne-to-many relationship join (LEFT JOIN). Use when the left table can match multiple rows in the right table.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `other` | `SemanticTable` | The semantic table to join with |\n| `on` | `Callable` | Lambda function defining the join condition |\n| `name` | `str` | Optional name for the joined table reference |\n\n**Example:**\n```python\nflights_st = flights_st.join_many(\n carriers_st,\n on=lambda l, r: l.carrier == r.code,\n name=\"carrier_info\"\n)\n```\n\n### join_one()\n\n```python\njoin_one(\n other: SemanticTable,\n on: Callable,\n name: str = None\n) -> SemanticTable\n```\n\nOne-to-one relationship join (INNER JOIN). Use when each row in the left table matches exactly one row in the right table.\n\n**Example:**\n```python\nflights_st = flights_st.join_one(\n airports_st,\n on=lambda l, r: l.origin == r.code\n)\n```\n\n### join_cross()\n\n```python\njoin_cross(\n other: SemanticTable,\n name: str = None\n) -> SemanticTable\n```\n\nCross join (CARTESIAN PRODUCT). Creates all possible combinations of rows from both tables.\n\n### join()\n\n```python\njoin(\n other: SemanticTable,\n on: Callable,\n how: str = \"inner\",\n name: str = None\n) -> SemanticTable\n```\n\nCustom join with flexible join type. Supports 'inner', 'left', 'right', 'outer', and 'cross'.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `other` | `SemanticTable` | The semantic table to join with |\n| `on` | `Callable` | Lambda function defining the join condition |\n| `how` | `str` | Join type: 'inner', 'left', 'right', 'outer', or 'cross' |\n| `name` | `str` | Optional name for the joined table reference |\n\n## Query Methods\n\nMethods for querying and transforming semantic tables.\n\n### group_by()\n\n```python\ngroup_by(\n *dimensions: str\n) -> QueryBuilder\n```\n\nGroup data by one or more dimension names. Returns a query builder for chaining with aggregate().\n\n**Example:**\n```python\nresult = flights_st.group_by(\"origin\", \"carrier\").aggregate(\"flight_count\")\n```\n\n### aggregate()\n\n```python\naggregate(\n *measures: str,\n **kwargs\n) -> ibis.Table\n```\n\nCalculate one or more measures. Can be used standalone or after group_by().\n\n**Examples:**\n```python\n# Without grouping\ntotal = flights_st.aggregate(\"flight_count\")\n\n# With grouping\nby_origin = flights_st.group_by(\"origin\").aggregate(\"flight_count\", \"avg_delay\")\n```\n\n### filter()\n\n```python\nfilter(\n condition: Callable\n) -> SemanticTable\n```\n\nApply conditions to filter data. Use lambda functions with Ibis expressions.\n\n**Example:**\n```python\ndelayed_flights = flights_st.filter(lambda t: t.arr_delay > 0)\n```\n\n### order_by()\n\n```python\norder_by(\n *columns: str | ibis.Expression\n) -> ibis.Table\n```\n\nSort query results. Use `ibis.desc()` for descending order.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"origin\").aggregate(\"flight_count\")\nresult = result.order_by(ibis.desc(\"flight_count\"))\n```\n\n### limit()\n\n```python\nlimit(\n n: int\n) -> ibis.Table\n```\n\nRestrict the number of rows returned.\n\n**Example:**\n```python\ntop_10 = result.order_by(ibis.desc(\"flight_count\")).limit(10)\n```\n\n### mutate()\n\n```python\nmutate(\n **expressions: Callable | ibis.Expression\n) -> ibis.Table\n```\n\nAdd or transform columns in aggregated results. Useful for calculations after aggregation.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"month\").aggregate(\"revenue\")\nresult = result.mutate(\n growth_rate=lambda t: (t.revenue - t.revenue.lag()) / t.revenue.lag() * 100\n)\n```\n\n### select()\n\n```python\nselect(\n *columns: str | ibis.Expression\n) -> ibis.Table\n```\n\nSelect specific columns from the result. Often used in nesting operations.\n\n**Example:**\n```python\nresult.select(\"origin\", \"flight_count\")\n```\n\n## Nesting\n\nCreate nested data structures within aggregations.\n\n### nest Parameter\n\n```python\naggregate(\n *measures,\n nest={\n \"nested_column\": lambda t: t.group_by([...]) | t.select(...)\n }\n)\n```\n\nCreate nested arrays of structs within aggregation results. Useful for hierarchical data or subtotals.\n\n**Example:**\n```python\nresult = flights_st.group_by(\"carrier\").aggregate(\n \"total_flights\",\n nest={\n \"by_month\": lambda t: t.group_by(\"month\").aggregate(\"monthly_flights\")\n }\n)\n```\n\n## Charting\n\nGenerate visualizations from query results.\n\n### chart()\n\n```python\nchart(\n result: ibis.Table,\n backend: str = \"altair\",\n spec: dict = None,\n format: str = \"interactive\"\n) -> Chart\n```\n\nCreate visualizations from query results. Supports Altair (default) and Plotly backends.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `result` | `ibis.Table` | Query result table to visualize |\n| `backend` | `str` | \"altair\" or \"plotly\" |\n| `spec` | `dict` | Custom Vega-Lite specification (for Altair) |\n| `format` | `str` | \"interactive\", \"json\", \"png\", \"svg\" |\n\n**Auto-detection:**\nBSL automatically selects appropriate chart types:\n- Single dimension + measure \u2192 Bar chart\n- Time dimension + measure \u2192 Line chart\n- Two dimensions + measure \u2192 Heatmap\n\n**Example:**\n```python\nfrom boring_semantic_layer.chart import chart\n\nresult = flights_st.group_by(\"month\").aggregate(\"flight_count\")\nchart(result, backend=\"altair\")\n```\n\n## Dimensional Indexing\n\nCreate searchable catalogs of dimension values.\n\n### index()\n\n```python\nindex(\n dimensions: Callable | None = None,\n by: str = None,\n sample: int = None\n) -> ibis.Table\n```\n\nCreate a searchable catalog of unique dimension values with optional weighting and sampling.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `dimensions` | `Callable` | None (all dimensions) or lambda returning list of fields |\n| `by` | `str` | Measure name for weighting results |\n| `sample` | `int` | Number of rows to sample (for large datasets) |\n\n**Examples:**\n```python\n# Index all dimensions\nflights_st.index()\n\n# Index specific dimensions\nflights_st.index(lambda t: [t.origin, t.dest])\n\n# Weight by measure\nflights_st.index(by=\"flight_count\")\n\n# Sample large dataset\nflights_st.index(sample=10000)\n```\n\n## Other\n\n### MCP Integration\n\n#### MCPSemanticModel()\n\n```python\nMCPSemanticModel(\n models: dict[str, SemanticTable] | str,\n description: str = None\n)\n```\n\nCreate an MCP server to expose semantic models to LLMs like Claude. Accepts either a dictionary of models or a path to a YAML configuration file.\n\n| Parameter | Type | Description |\n|-----------|------|-------------|\n| `models` | `dict` or `str` | Dictionary of SemanticTable objects or path to YAML config |\n| `description` | `str` | Optional description of the semantic model |\n\n**Available MCP Tools:**\n\n| Tool | Description |\n|------|-------------|\n| `list_models()` | List all available semantic model names |\n| `get_model()` | Get detailed model information (dimensions, measures, joins) |\n| `get_time_range()` | Get available time range for time-series data |\n| `query_model()` | Execute queries against semantic models |\n\n**Example:**\n```python\nfrom boring_semantic_layer import MCPSemanticModel\n\n# From dictionary\nserver = MCPSemanticModel(\n models={\"flights\": flights_st, \"airports\": airports_st},\n description=\"Flight data analysis\"\n)\n\n# From YAML\nserver = MCPSemanticModel(\"config.yaml\")\n```\n\n### YAML Configuration\n\n#### YAML Structure\n\n```yaml\nmodel_name:\n table: table_reference\n description: \"Optional description\"\n\n dimensions:\n dimension_name: expression\n # or with description\n dimension_name:\n expr: expression\n description: \"Dimension description\"\n\n measures:\n measure_name: expression\n # or with description\n measure_name:\n expr: expression\n description: \"Measure description\"\n\n joins:\n join_name:\n model: model_reference\n on: join_condition\n how: join_type # left, inner, right, outer, cross\n```\n\n#### Expression Syntax\n\n| Expression | Description |\n|------------|-------------|\n| `_` | Reference to the table |\n| `_.column` | Reference a column |\n| `_.count()` | Count aggregation |\n| `_.column.sum()` | Sum aggregation |\n| `_.column.mean()` | Average aggregation |\n| `_.column.min()` | Minimum value |\n| `_.column.max()` | Maximum value |\n\n**Example:**\n```yaml\nflights:\n table: flights_data\n description: \"Flight operations data\"\n\n dimensions:\n origin: _.origin\n dest: _.dest\n carrier:\n expr: _.carrier\n description: \"Airline carrier code\"\n\n measures:\n flight_count: _.count()\n avg_delay:\n expr: _.arr_delay.mean()\n description: \"Average arrival delay in minutes\"\n```\n\n## Next Steps\n\n- Learn about [Semantic Tables](/building/semantic-tables)\n- Explore [Query Methods](/querying/methods)\n- See [Advanced Patterns](/advanced/percentage-total)\n", "queries": {}, "files": {} } diff --git a/docs/web/public/bsl-data/semantic-table.json b/docs/web/public/bsl-data/semantic-table.json index d4aec9c..1856bce 100644 --- a/docs/web/public/bsl-data/semantic-table.json +++ b/docs/web/public/bsl-data/semantic-table.json @@ -1,5 +1,5 @@ { - "markdown": "# Building a Semantic Table\n\nDefine your data model with dimensions and measures using Ibis expressions.\n\n## Overview\n\nA Semantic Table is the core building block of BSL. It transforms a raw Ibis table into a reusable, self-documenting data model by defining:\n- **Dimensions**: Attributes to group by (e.g., origin, carrier, year)\n- **Measures**: Aggregations and calculations (e.g., flight count, total distance)\n\n## to_semantic_table()\n\n```setup_flights\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\n# 1. Start with an Ibis table\ncon = ibis.duckdb.connect(\":memory:\")\nflights_data = ibis.memtable({\n \"origin\": [\"JFK\", \"LAX\", \"SFO\"],\n \"dest\": [\"LAX\", \"SFO\", \"JFK\"],\n \"carrier\": [\"AA\", \"UA\", \"DL\"],\n \"year\": [2023, 2023, 2024],\n \"distance\": [2475, 337, 382],\n \"dep_delay\": [10, 5, 0]\n})\nflights_tbl = con.create_table(\"flights\", flights_data)\n\n# 2. Convert to a Semantic Table\nflights_st = to_semantic_table(flights_tbl, name=\"flights\")\n```\n\n## with_dimensions()\n\nDimensions define the attributes you can group by in your queries. They represent the categorical or descriptive aspects of your data that you want to analyze.\n\nYou can define dimensions using lambda expressions, unbound syntax (`_.`), or the `Dimension` class with descriptions:\n\n```dimensions_demo\nfrom ibis import _\nfrom boring_semantic_layer import Dimension\n\nflights_st = flights_st.with_dimensions(\n # Lambda expressions - simple and explicit\n origin=lambda t: t.origin,\n\n # Unbound syntax - cleaner and more concise\n destination=_.dest,\n year=_.year,\n\n # Dimension - self-documenting and AI-friendly\n carrier=Dimension(\n expr=lambda t: t.carrier,\n description=\"Airline carrier code\"\n )\n)\n\nflights_st.dimensions\n```\n\n\n## with_measures()\n\nMeasures define the aggregations and calculations you can query. They represent the quantitative aspects of your data that you want to analyze (counts, sums, averages, etc.).\n\nYou can define measures using lambda expressions, reference other measures for composition, or use the `Measure` class with descriptions:\n\n```measures_demo\nfrom boring_semantic_layer import Measure\n\nflights_st = flights_st.with_measures(\n # Lambda expressions - simple and concise\n total_flights=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n max_delay=lambda t: t.dep_delay.max(),\n\n # Reference other measures for composition\n avg_distance_per_flight=lambda t: t.total_distance / t.total_flights,\n\n # Measure - self-documenting and AI-friendly\n avg_distance=Measure(\n expr=lambda t: t.distance.mean(),\n description=\"Average flight distance in miles\"\n )\n)\n\nflights_st.measures\n```\n\n\n\n### all()\n\nThe `all()` function references the entire dataset within measure definitions, enabling percent-of-total and comparison calculations.\n\n**Example:** Calculate market share as a percentage\n\n```measure_all_demo\nflights_with_pct = flights_st.with_measures(\n flight_count=lambda t: t.count(),\n market_share=lambda t: t.flight_count / t.all(t.flight_count) * 100 # Percent of total\n )\n\n# Query by carrier\nresult = (\n flights_with_pct\n .group_by(\"carrier\")\n .aggregate(\"flight_count\", \"market_share\")\n)\n```\n\n\n\n\n`t.all()` is a method available on the table parameter `t` in measure definitions. It references the entire dataset regardless of grouping, making it perfect for calculating percentages, or comparing groups to the total.\n\n\nFor more examples, see the [Percent of Total pattern](/advanced/percentage-total).\n\n## graph\n\nThe `graph` property provides a dependency graph showing how dimensions and measures relate to each other. This is useful for:\n- **Understanding dependencies**: See what columns or fields each dimension/measure depends on\n- **Impact analysis**: Find what breaks when changing a field\n- **Documentation**: Generate visual representations of your data model\n- **Validation**: Ensure your model doesn't have circular dependencies\n\n```graph_demo\n# Build a semantic table with dependencies\nflights_with_deps = flights_st.with_dimensions(\n origin=lambda t: t.origin,\n destination=lambda t: t.dest,\n).with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n avg_distance_per_flight=lambda t: t.total_distance / t.flight_count\n)\n\n# Access the dependency graph\ngraph = flights_with_deps.get_graph()\ngraph\n```\n\n\n### Understanding the Graph Structure\n\nThe graph is a dictionary where:\n- **Keys**: Dimension or measure names\n- **Values**: Metadata containing:\n - `deps`: Dependencies mapped to their types (`'column'`, `'dimension'`, or `'measure'`)\n - `type`: The field type (`'dimension'`, `'measure'`, or `'calc_measure'`)\n\n```graph_structure\n# Access the graph - it's a dict-like object\ngraph = flights_with_deps.get_graph()\ngraph\n```\n\n\n```python\n# Find what a specific field depends on\nflights_with_deps.get_graph()['avg_distance_per_flight']['deps']\n# Output: {'total_distance': 'measure', 'flight_count': 'measure'}\n```\n\n### Graph Traversal\n\nUse `graph_predecessors()` and `graph_successors()` to navigate dependencies:\n\n```graph_traversal\nfrom boring_semantic_layer import graph_predecessors, graph_successors\n\ngraph = flights_with_deps.get_graph()\n\n# What does this field depend on? (predecessors)\ngraph_predecessors(graph, 'avg_distance_per_flight')\n# {'total_distance', 'flight_count'}\n\n# What depends on this field? (successors)\ngraph_successors(graph, 'total_distance')\n# {'avg_distance_per_flight'}\n```\n\n\n### Working with the Dependency Graph\n\nThe dependency graph is a dict-like object where each key is a field name and the value is a dict with `\"type\"` (dimension/measure/calc_measure/column) and `\"deps\"` (dependencies with their types):\n\n```python\n# Access the graph directly as a dict\ngraph = flights_with_deps.get_graph()\n\n# Iterate over fields and their dependencies\nfor field, info in graph.items():\n print(f\"{field} ({info['type']}): depends on {info['deps']}\")\n```\n\n## join_one() / join_many() / join_cross()\n\nJoin semantic tables together to query across relationships. Joins allow you to combine data from multiple semantic tables and access dimensions and measures across all joined tables.\n\n**What Makes Semantic Joins Different?**\n\nSemantic joins explicitly capture the **relationship type** between tables, rather than just specifying SQL join mechanics:\n\n**SQL Joins:**\n```python\n# Specifies HOW to join (LEFT/INNER), but not the relationship\nflights.join(carriers, condition, how=\"left\")\n```\n\n**Semantic Joins:**\n```python\n# Specifies the relationship: one carrier has many flights\nflights.join_many(carriers, lambda f, c: f.carrier == c.code)\n```\n\n**What You Get:**\n- **Explicit relationships**: `join_many()` documents that this is a one-to-many relationship\n- **Table hierarchy information**: The method name describes how tables relate to each other\n- **Richer metadata**: Makes the data model structure explicit for documentation and tooling\n\n\nAfter joining, dimensions and measures are prefixed with table names (e.g., `flights.origin`, `carriers.name`) to avoid naming conflicts.\n\n\n\n**Joining the same table multiple times?** If you need to join to the same source table via different foreign keys (e.g., pickup and dropoff locations), you must use `.view()` to create distinct table references:\n\n```python\n# Create distinct references when joining same table twice\npickup_locs = to_semantic_table(locs_tbl.view(), \"pickup_locs\")\ndropoff_locs = to_semantic_table(locs_tbl.view(), \"dropoff_locs\")\n```\n\nWithout `.view()`, you'll encounter an `IbisInputError: Ambiguous field reference` error. \n\n\nLet's get some additional data:\n\n```setup_carriers\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\ncon = ibis.duckdb.connect(\":memory:\")\n\n# Create carriers data\ncarriers_data = ibis.memtable({\n \"code\": [\"AA\", \"UA\", \"DL\"],\n \"name\": [\"American Airlines\", \"United Airlines\", \"Delta Air Lines\"]\n})\ncarriers_tbl = con.create_table(\"carriers\", carriers_data)\n```\n\n\nAnd create a carriers semantic table:\n\n```carriers_st\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(\n code=lambda t: t.code,\n name=lambda t: t.name\n )\n .with_measures(\n carrier_count=lambda t: t.count()\n )\n)\n```\n\n### join_many() - One-to-Many Relationships\n\nUse `join_many()` when one row in the left table can match multiple rows in the right table (LEFT JOIN).\n\n```join_demo\n# Join carriers to flights - one carrier has many flights\nflights_with_carriers = flights_st.join_many(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n\n# Inspect available dimensions and measures\nflights_with_carriers.dimensions\n```\n\n\nAfter joining, all dimensions and measures from both tables are available. Each is prefixed with its table name to avoid conflicts:\n\n\n### join_one() - One-to-One Relationships\n\nUse `join_one()` when rows have a unique matching relationship (INNER JOIN).\n\n```python\n# Many flights \u2192 one carrier (each flight has exactly one carrier)\nflights_with_carrier = flights_st.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n\n**Important Limitation:** Currently, `left_on` and `right_on` must be **COLUMN names**, not dimension names.\n\nIf you have a dimension that maps to a different column name, you must use the underlying column name in the join.\n\n**Example:**\n```python\n# If users table has column 'id' but dimension 'customer_id':\nusers = to_semantic_table(users_tbl).with_dimensions(\n customer_id=lambda t: t.id # Dimension renamed\n)\n\n# \u274c This will fail with a helpful error:\norders.join_one(users, left_on=\"customer_id\", right_on=\"customer_id\")\n\n# \u2713 Use the actual column name:\norders.join_one(users, left_on=\"customer_id\", right_on=\"id\")\n```\n\nThis is a known limitation tracked in [issue #43](https://github.com/boringdata/boring-semantic-layer/issues/43). If you attempt to use a dimension name that doesn't match a column name, you'll get a helpful error message guiding you to use the correct column name.\n\n\n### join_cross() - Cross Join\n\nUse `join_cross()` to create every possible combination of rows from both tables (CARTESIAN PRODUCT).\n\n```python\n# Every flight \u00d7 every carrier combination\nall_combinations = flights_st.join_cross(carriers)\n```\n\n### join() - Custom Join Conditions\n\nUse `join()` for complex join conditions or specific SQL join types.\n\n```python\n# LEFT JOIN with custom condition\nflights_with_carriers = flights_st.join(\n carriers,\n lambda f, c: f.carrier == c.code,\n how=\"left\"\n)\n\n# INNER JOIN\nflights_matched = flights_st.join(\n carriers,\n lambda f, c: f.carrier == c.code,\n how=\"inner\"\n)\n\n# Complex conditions\ndate_range_join = flights_st.join(\n promotions,\n lambda f, p: (f.date >= p.start_date) & (f.date <= p.end_date),\n how=\"left\"\n)\n```\n\n**Supported join types:** `\"inner\"`, `\"left\"`, `\"right\"`, `\"outer\"`, `\"cross\"`\n\n## Next Steps\n\n- Learn about [Composing Models](/examples/compose)\n- Explore [YAML Configuration](/examples/yaml-config)\n- Start [Querying Semantic Tables](/examples/query-methods)\n", + "markdown": "# Building a Semantic Table\n\nDefine your data model with dimensions and measures using Ibis expressions.\n\n## Overview\n\nA Semantic Table is the core building block of BSL. It transforms a raw Ibis table into a reusable, self-documenting data model by defining:\n- **Dimensions**: Attributes to group by (e.g., origin, carrier, year)\n- **Measures**: Aggregations and calculations (e.g., flight count, total distance)\n\n## to_semantic_table()\n\n```setup_flights\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\n# 1. Start with an Ibis table\ncon = ibis.duckdb.connect(\":memory:\")\nflights_data = ibis.memtable({\n \"origin\": [\"JFK\", \"LAX\", \"SFO\"],\n \"dest\": [\"LAX\", \"SFO\", \"JFK\"],\n \"carrier\": [\"AA\", \"UA\", \"DL\"],\n \"year\": [2023, 2023, 2024],\n \"distance\": [2475, 337, 382],\n \"dep_delay\": [10, 5, 0]\n})\nflights_tbl = con.create_table(\"flights\", flights_data)\n\n# 2. Convert to a Semantic Table\nflights_st = to_semantic_table(flights_tbl, name=\"flights\")\n```\n\n## with_dimensions()\n\nDimensions define the attributes you can group by in your queries. They represent the categorical or descriptive aspects of your data that you want to analyze.\n\nYou can define dimensions using lambda expressions, unbound syntax (`_.`), or the `Dimension` class with descriptions:\n\n```dimensions_demo\nfrom ibis import _\nfrom boring_semantic_layer import Dimension\n\nflights_st = flights_st.with_dimensions(\n # Lambda expressions - simple and explicit\n origin=lambda t: t.origin,\n\n # Unbound syntax - cleaner and more concise\n destination=_.dest,\n year=_.year,\n\n # Dimension - self-documenting and AI-friendly\n carrier=Dimension(\n expr=lambda t: t.carrier,\n description=\"Airline carrier code\"\n )\n)\n\nflights_st.dimensions\n```\n\n\n## with_measures()\n\nMeasures define the aggregations and calculations you can query. They represent the quantitative aspects of your data that you want to analyze (counts, sums, averages, etc.).\n\nYou can define measures using lambda expressions, reference other measures for composition, or use the `Measure` class with descriptions:\n\n```measures_demo\nfrom boring_semantic_layer import Measure\n\nflights_st = flights_st.with_measures(\n # Lambda expressions - simple and concise\n total_flights=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n max_delay=lambda t: t.dep_delay.max(),\n\n # Reference other measures for composition\n avg_distance_per_flight=lambda t: t.total_distance / t.total_flights,\n\n # Measure - self-documenting and AI-friendly\n avg_distance=Measure(\n expr=lambda t: t.distance.mean(),\n description=\"Average flight distance in miles\"\n )\n)\n\nflights_st.measures\n```\n\n\n\n### all()\n\nThe `all()` function references the entire dataset within measure definitions, enabling percent-of-total and comparison calculations.\n\n**Example:** Calculate market share as a percentage\n\n```measure_all_demo\nflights_with_pct = flights_st.with_measures(\n flight_count=lambda t: t.count(),\n market_share=lambda t: t.flight_count / t.all(t.flight_count) * 100 # Percent of total\n )\n\n# Query by carrier\nresult = (\n flights_with_pct\n .group_by(\"carrier\")\n .aggregate(\"flight_count\", \"market_share\")\n)\n```\n\n\n\n\n`t.all()` is a method available on the table parameter `t` in measure definitions. It references the entire dataset regardless of grouping, making it perfect for calculating percentages, or comparing groups to the total.\n\n\nFor more examples, see the [Percent of Total pattern](/advanced/percentage-total).\n\n## graph\n\nThe `graph` property provides a dependency graph showing how dimensions and measures relate to each other. This is useful for:\n- **Understanding dependencies**: See what columns or fields each dimension/measure depends on\n- **Impact analysis**: Find what breaks when changing a field\n- **Documentation**: Generate visual representations of your data model\n- **Validation**: Ensure your model doesn't have circular dependencies\n\n```graph_demo\n# Build a semantic table with dependencies\nflights_with_deps = flights_st.with_dimensions(\n origin=lambda t: t.origin,\n destination=lambda t: t.dest,\n).with_measures(\n flight_count=lambda t: t.count(),\n total_distance=lambda t: t.distance.sum(),\n avg_distance_per_flight=lambda t: t.total_distance / t.flight_count\n)\n\n# Access the dependency graph\ngraph = flights_with_deps.get_graph()\ngraph\n```\n\n\n### Understanding the Graph Structure\n\nThe graph is a dictionary where:\n- **Keys**: Dimension or measure names\n- **Values**: Metadata containing:\n - `deps`: Dependencies mapped to their types (`'column'`, `'dimension'`, or `'measure'`)\n - `type`: The field type (`'dimension'`, `'measure'`, or `'calc_measure'`)\n\n```graph_structure\n# Access the graph - it's a dict-like object\ngraph = flights_with_deps.get_graph()\ngraph\n```\n\n\n```python\n# Find what a specific field depends on\nflights_with_deps.get_graph()['avg_distance_per_flight']['deps']\n# Output: {'total_distance': 'measure', 'flight_count': 'measure'}\n```\n\n### Graph Traversal\n\nUse `graph_predecessors()` and `graph_successors()` to navigate dependencies:\n\n```graph_traversal\nfrom boring_semantic_layer import graph_predecessors, graph_successors\n\ngraph = flights_with_deps.get_graph()\n\n# What does this field depend on? (predecessors)\ngraph_predecessors(graph, 'avg_distance_per_flight')\n# {'total_distance', 'flight_count'}\n\n# What depends on this field? (successors)\ngraph_successors(graph, 'total_distance')\n# {'avg_distance_per_flight'}\n```\n\n\n### Working with the Dependency Graph\n\nThe dependency graph is a dict-like object where each key is a field name and the value is a dict with `\"type\"` (dimension/measure/calc_measure/column) and `\"deps\"` (dependencies with their types):\n\n```python\n# Access the graph directly as a dict\ngraph = flights_with_deps.get_graph()\n\n# Iterate over fields and their dependencies\nfor field, info in graph.items():\n print(f\"{field} ({info['type']}): depends on {info['deps']}\")\n```\n\n## join_one() / join_many() / join_cross()\n\nJoin semantic tables together to query across relationships. Joins allow you to combine data from multiple semantic tables and access dimensions and measures across all joined tables.\n\n**What Makes Semantic Joins Different?**\n\nSemantic joins explicitly capture the **relationship type** between tables, rather than just specifying SQL join mechanics:\n\n**SQL Joins:**\n```python\n# Specifies HOW to join (LEFT/INNER), but not the relationship\nflights.join(carriers, condition, how=\"left\")\n```\n\n**Semantic Joins:**\n```python\n# Specifies the relationship: one carrier has many flights\nflights.join_many(carriers, lambda f, c: f.carrier == c.code)\n```\n\n**What You Get:**\n- **Explicit relationships**: `join_many()` documents that this is a one-to-many relationship\n- **Table hierarchy information**: The method name describes how tables relate to each other\n- **Richer metadata**: Makes the data model structure explicit for documentation and tooling\n\n\nAfter joining, dimensions and measures are prefixed with table names (e.g., `flights.origin`, `carriers.name`) to avoid naming conflicts.\n\n\n\n**Joining the same table multiple times?** If you need to join to the same source table via different foreign keys (e.g., pickup and dropoff locations), you must use `.view()` to create distinct table references:\n\n```python\n# Create distinct references when joining same table twice\npickup_locs = to_semantic_table(locs_tbl.view(), \"pickup_locs\")\ndropoff_locs = to_semantic_table(locs_tbl.view(), \"dropoff_locs\")\n```\n\nWithout `.view()`, you'll encounter an `IbisInputError: Ambiguous field reference` error. \n\n\nLet's get some additional data:\n\n```setup_carriers\nimport ibis\nfrom boring_semantic_layer import to_semantic_table\n\ncon = ibis.duckdb.connect(\":memory:\")\n\n# Create carriers data\ncarriers_data = ibis.memtable({\n \"code\": [\"AA\", \"UA\", \"DL\"],\n \"name\": [\"American Airlines\", \"United Airlines\", \"Delta Air Lines\"]\n})\ncarriers_tbl = con.create_table(\"carriers\", carriers_data)\n```\n\n\nAnd create a carriers semantic table:\n\n```carriers_st\ncarriers = (\n to_semantic_table(carriers_tbl, name=\"carriers\")\n .with_dimensions(\n code=lambda t: t.code,\n name=lambda t: t.name\n )\n .with_measures(\n carrier_count=lambda t: t.count()\n )\n)\n```\n\n### join_many() - One-to-Many Relationships\n\nUse `join_many()` when one row in the left table can match multiple rows in the right table (LEFT JOIN).\n\n```join_demo\n# Join carriers to flights - one carrier has many flights\nflights_with_carriers = flights_st.join_many(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n\n# Inspect available dimensions and measures\nflights_with_carriers.dimensions\n```\n\n\nAfter joining, all dimensions and measures from both tables are available. Each is prefixed with its table name to avoid conflicts:\n\n\n### join_one() - One-to-One Relationships\n\nUse `join_one()` when rows have a unique matching relationship (INNER JOIN).\n\n```python\n# Many flights \u2192 one carrier (each flight has exactly one carrier)\nflights_with_carrier = flights_st.join_one(\n carriers,\n lambda f, c: f.carrier == c.code\n)\n```\n\n\n**Important Limitation:** Currently, `left_on` and `right_on` must be **COLUMN names**, not dimension names.\n\nIf you have a dimension that maps to a different column name, you must use the underlying column name in the join.\n\n**Example:**\n```python\n# If users table has column 'id' but dimension 'customer_id':\nusers = to_semantic_table(users_tbl).with_dimensions(\n customer_id=lambda t: t.id # Dimension renamed\n)\n\n# \u274c This will fail with a helpful error:\norders.join_one(users, left_on=\"customer_id\", right_on=\"customer_id\")\n\n# \u2713 Use the actual column name:\norders.join_one(users, left_on=\"customer_id\", right_on=\"id\")\n```\n\nThis is a known limitation tracked in [issue #43](https://github.com/boringdata/boring-semantic-layer/issues/43). If you attempt to use a dimension name that doesn't match a column name, you'll get a helpful error message guiding you to use the correct column name.\n\n\n### join_cross() - Cross Join\n\nUse `join_cross()` to create every possible combination of rows from both tables (CARTESIAN PRODUCT).\n\n```python\n# Every flight \u00d7 every carrier combination\nall_combinations = flights_st.join_cross(carriers)\n```\n\n### join() - Custom Join Conditions\n\nUse `join()` for complex join conditions or specific SQL join types.\n\n```python\n# LEFT JOIN with custom condition\nflights_with_carriers = flights_st.join(\n carriers,\n lambda f, c: f.carrier == c.code,\n how=\"left\"\n)\n\n# INNER JOIN\nflights_matched = flights_st.join(\n carriers,\n lambda f, c: f.carrier == c.code,\n how=\"inner\"\n)\n\n# Complex conditions\ndate_range_join = flights_st.join(\n promotions,\n lambda f, p: (f.date >= p.start_date) & (f.date <= p.end_date),\n how=\"left\"\n)\n```\n\n**Supported join types:** `\"inner\"`, `\"left\"`, `\"right\"`, `\"outer\"`, `\"cross\"`\n\n## Next Steps\n\n- Learn about [Composing Models](/building/compose)\n- Explore [YAML Configuration](/building/yaml)\n- Start [Querying Semantic Tables](/querying/methods)\n", "queries": { "dimensions_demo": { "output": "('origin', 'destination', 'year', 'carrier')" @@ -19,104 +19,21 @@ ], "data": [ [ - "DL", + "AA", 1, 33.33333333333333 ], [ - "AA", + "UA", 1, 33.33333333333333 ], [ - "UA", + "DL", 1, 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-02b3c9107cbad36a10c64f9097bccef8" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-02b3c9107cbad36a10c64f9097bccef8": [ - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } }, "graph_demo": { @@ -131,104 +48,21 @@ ], "data": [ [ - "AA", + "UA", 1, 33.33333333333333 ], [ - "DL", + "AA", 1, 33.33333333333333 ], [ - "UA", + "DL", 1, 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-8463e963c810d6a835c03ff21b8c7ddf" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-8463e963c810d6a835c03ff21b8c7ddf": [ - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } }, "graph_structure": { @@ -243,7 +77,7 @@ ], "data": [ [ - "UA", + "DL", 1, 33.33333333333333 ], @@ -253,94 +87,11 @@ 33.33333333333333 ], [ - "DL", + "UA", 1, 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-957c443b2b52e9d52db460acc818b08e" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-957c443b2b52e9d52db460acc818b08e": [ - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } }, "graph_traversal": { @@ -355,7 +106,7 @@ ], "data": [ [ - "UA", + "AA", 1, 33.33333333333333 ], @@ -365,94 +116,11 @@ 33.33333333333333 ], [ - "AA", + "UA", 1, 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-444a8c980d5c591c007caa61dccef4bc" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-444a8c980d5c591c007caa61dccef4bc": [ - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } }, "setup_carriers": { @@ -482,89 +150,6 @@ 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-8463e963c810d6a835c03ff21b8c7ddf" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-8463e963c810d6a835c03ff21b8c7ddf": [ - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } }, "join_demo": { @@ -579,7 +164,7 @@ ], "data": [ [ - "UA", + "DL", 1, 33.33333333333333 ], @@ -589,94 +174,11 @@ 33.33333333333333 ], [ - "DL", + "UA", 1, 33.33333333333333 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-a098f4283cba9ac81fceb508a14e3c06" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "carrier", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "carrier", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "market_share" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-a098f4283cba9ac81fceb508a14e3c06": [ - { - "carrier": "DL", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "AA", - "flight_count": 1, - "market_share": 33.33333333333333 - }, - { - "carrier": "UA", - "flight_count": 1, - "market_share": 33.33333333333333 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/sessionized.json b/docs/web/public/bsl-data/sessionized.json index 13b58fa..3c3124d 100644 --- a/docs/web/public/bsl-data/sessionized.json +++ b/docs/web/public/bsl-data/sessionized.json @@ -90,7 +90,7 @@ }, "query_session_boundaries": { "code": "from ibis import _\n\nresult = (\n activity_st\n .group_by(\"user_id\", \"minute_offset\", \"page_url\", \"action\")\n .aggregate()\n .mutate(\n # Calculate time since previous event for same user\n prev_minute=lambda t: t.minute_offset.lag().over(\n group_by=\"user_id\",\n order_by=t.minute_offset\n ),\n # Calculate minutes since last event\n minutes_since_last=lambda t: t.minute_offset - t.prev_minute,\n # Mark session start (>30 min gap or first event)\n is_session_start=lambda t: (t.minutes_since_last > 30) | t.prev_minute.isnull()\n )\n .order_by(_.user_id, _.minute_offset)\n)", - "sql": "SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"page_url\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\nFROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"page_url\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (PARTITION BY \"t3\".\"user_id\" ORDER BY \"t3\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ttrclo4g4bgdjmygjwa35uobuy\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"user_id\" ASC,\n \"t4\".\"minute_offset\" ASC", + "sql": "SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"page_url\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\nFROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"page_url\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (\n PARTITION BY \"t3\".\"user_id\"\n ORDER BY \"t3\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_wmd3uzq7rzcljgvxljaofcdisi\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"\nORDER BY\n \"t4\".\"user_id\" ASC,\n \"t4\".\"minute_offset\" ASC", "plan": "SemanticTable: activity\n user_id [dim]\n minute_offset [dim]\n page_url [dim]\n action [dim]\n event_count [measure]\n unique_users [measure]\n-> GroupBy(user_id, minute_offset, page_url, action)\n-> Aggregate()\n-> Mutate(prev_minute, minutes_since_last, is_session_start)\n-> OrderBy(_CallableWrapper(_fn=_.user_id), _CallableWrapper(_fn=_.minute_offset))", "table": { "columns": [ @@ -212,148 +212,11 @@ false ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-0e1714e04bd51403c86fef9e1144b305" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-0e1714e04bd51403c86fef9e1144b305": [ - { - "user_id": "user1", - "minute_offset": 0, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true - }, - { - "user_id": "user1", - "minute_offset": 5, - "page_url": "/products", - "action": "view", - "prev_minute": 0.0, - "minutes_since_last": 5.0, - "is_session_start": false - }, - { - "user_id": "user1", - "minute_offset": 10, - "page_url": "/cart", - "action": "view", - "prev_minute": 5.0, - "minutes_since_last": 5.0, - "is_session_start": false - }, - { - "user_id": "user1", - "minute_offset": 45, - "page_url": "/checkout", - "action": "purchase", - "prev_minute": 10.0, - "minutes_since_last": 35.0, - "is_session_start": true - }, - { - "user_id": "user2", - "minute_offset": 2, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true - }, - { - "user_id": "user2", - "minute_offset": 40, - "page_url": "/products", - "action": "view", - "prev_minute": 2.0, - "minutes_since_last": 38.0, - "is_session_start": true - }, - { - "user_id": "user2", - "minute_offset": 42, - "page_url": "/cart", - "action": "view", - "prev_minute": 40.0, - "minutes_since_last": 2.0, - "is_session_start": false - }, - { - "user_id": "user3", - "minute_offset": 1, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true - }, - { - "user_id": "user3", - "minute_offset": 3, - "page_url": "/about", - "action": "view", - "prev_minute": 1.0, - "minutes_since_last": 2.0, - "is_session_start": false - }, - { - "user_id": "user3", - "minute_offset": 7, - "page_url": "/products", - "action": "view", - "prev_minute": 3.0, - "minutes_since_last": 4.0, - "is_session_start": false - }, - { - "user_id": "user3", - "minute_offset": 50, - "page_url": "/home", - "action": "view", - "prev_minute": 7.0, - "minutes_since_last": 43.0, - "is_session_start": true - }, - { - "user_id": "user3", - "minute_offset": 52, - "page_url": "/contact", - "action": "view", - "prev_minute": 50.0, - "minutes_since_last": 2.0, - "is_session_start": false - } - ] - } - } } }, "query_with_session_ids": { "code": "from ibis import _\n\nresult = (\n activity_st\n .group_by(\"user_id\", \"minute_offset\", \"page_url\", \"action\")\n .aggregate()\n .mutate(\n prev_minute=lambda t: t.minute_offset.lag().over(\n group_by=\"user_id\",\n order_by=t.minute_offset\n ),\n minutes_since_last=lambda t: t.minute_offset - t.prev_minute,\n is_session_start=lambda t: (t.minutes_since_last > 30) | t.prev_minute.isnull(),\n # Cumulative sum of session starts gives session ID\n session_id=lambda t: t.is_session_start.cast(\"int32\").sum().over(\n group_by=\"user_id\",\n order_by=t.minute_offset,\n rows=(None, 0) # Cumulative sum\n )\n )\n .order_by(_.user_id, _.minute_offset)\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"page_url\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (PARTITION BY \"t5\".\"user_id\" ORDER BY \"t5\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"page_url\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"page_url\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (PARTITION BY \"t3\".\"user_id\" ORDER BY \"t3\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ttrclo4g4bgdjmygjwa35uobuy\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"user_id\" ASC,\n \"t6\".\"minute_offset\" ASC", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"page_url\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (\n PARTITION BY \"t5\".\"user_id\"\n ORDER BY \"t5\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n ) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"page_url\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"page_url\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (\n PARTITION BY \"t3\".\"user_id\"\n ORDER BY \"t3\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_wmd3uzq7rzcljgvxljaofcdisi\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"page_url\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n) AS \"t6\"\nORDER BY\n \"t6\".\"user_id\" ASC,\n \"t6\".\"minute_offset\" ASC", "plan": "SemanticTable: activity\n user_id [dim]\n minute_offset [dim]\n page_url [dim]\n action [dim]\n event_count [measure]\n unique_users [measure]\n-> GroupBy(user_id, minute_offset, page_url, action)\n-> Aggregate()\n-> Mutate(prev_minute, minutes_since_last, is_session_start, session_id)\n-> OrderBy(_CallableWrapper(_fn=_.user_id), _CallableWrapper(_fn=_.minute_offset))", "table": { "columns": [ @@ -488,160 +351,11 @@ 2 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-90b550172ff2549ec2dee72cb37f4b7c" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-90b550172ff2549ec2dee72cb37f4b7c": [ - { - "user_id": "user1", - "minute_offset": 0, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true, - "session_id": 1 - }, - { - "user_id": "user1", - "minute_offset": 5, - "page_url": "/products", - "action": "view", - "prev_minute": 0.0, - "minutes_since_last": 5.0, - "is_session_start": false, - "session_id": 1 - }, - { - "user_id": "user1", - "minute_offset": 10, - "page_url": "/cart", - "action": "view", - "prev_minute": 5.0, - "minutes_since_last": 5.0, - "is_session_start": false, - "session_id": 1 - }, - { - "user_id": "user1", - "minute_offset": 45, - "page_url": "/checkout", - "action": "purchase", - "prev_minute": 10.0, - "minutes_since_last": 35.0, - "is_session_start": true, - "session_id": 2 - }, - { - "user_id": "user2", - "minute_offset": 2, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true, - "session_id": 1 - }, - { - "user_id": "user2", - "minute_offset": 40, - "page_url": "/products", - "action": "view", - "prev_minute": 2.0, - "minutes_since_last": 38.0, - "is_session_start": true, - "session_id": 2 - }, - { - "user_id": "user2", - "minute_offset": 42, - "page_url": "/cart", - "action": "view", - "prev_minute": 40.0, - "minutes_since_last": 2.0, - "is_session_start": false, - "session_id": 2 - }, - { - "user_id": "user3", - "minute_offset": 1, - "page_url": "/home", - "action": "view", - "prev_minute": null, - "minutes_since_last": null, - "is_session_start": true, - "session_id": 1 - }, - { - "user_id": "user3", - "minute_offset": 3, - "page_url": "/about", - "action": "view", - "prev_minute": 1.0, - "minutes_since_last": 2.0, - "is_session_start": false, - "session_id": 1 - }, - { - "user_id": "user3", - "minute_offset": 7, - "page_url": "/products", - "action": "view", - "prev_minute": 3.0, - "minutes_since_last": 4.0, - "is_session_start": false, - "session_id": 1 - }, - { - "user_id": "user3", - "minute_offset": 50, - "page_url": "/home", - "action": "view", - "prev_minute": 7.0, - "minutes_since_last": 43.0, - "is_session_start": true, - "session_id": 2 - }, - { - "user_id": "user3", - "minute_offset": 52, - "page_url": "/contact", - "action": "view", - "prev_minute": 50.0, - "minutes_since_last": 2.0, - "is_session_start": false, - "session_id": 2 - } - ] - } - } } }, "query_session_metrics": { "code": "from ibis import _\n\nresult = (\n activity_st\n .group_by(\"user_id\", \"minute_offset\", \"action\")\n .aggregate()\n .mutate(\n prev_minute=lambda t: t.minute_offset.lag().over(\n group_by=\"user_id\",\n order_by=t.minute_offset\n ),\n minutes_since_last=lambda t: t.minute_offset - t.prev_minute,\n is_session_start=lambda t: (t.minutes_since_last > 30) | t.prev_minute.isnull(),\n session_id=lambda t: t.is_session_start.cast(\"int32\").sum().over(\n group_by=\"user_id\",\n order_by=t.minute_offset,\n rows=(None, 0)\n )\n )\n .group_by(\"user_id\", \"session_id\")\n .aggregate(\n events_in_session=lambda t: t.count(),\n session_start_min=lambda t: t.minute_offset.min(),\n session_end_min=lambda t: t.minute_offset.max(),\n has_purchase=lambda t: (t.action == \"purchase\").any()\n )\n .mutate(\n session_duration_min=lambda t: (t.session_end_min - t.session_start_min)\n )\n .order_by(_.user_id, _.session_id)\n)", - "sql": "SELECT\n \"t8\".\"user_id\",\n \"t8\".\"session_id\",\n \"t8\".\"events_in_session\",\n \"t8\".\"session_start_min\",\n \"t8\".\"session_end_min\",\n \"t8\".\"has_purchase\",\n \"t8\".\"session_end_min\" - \"t8\".\"session_start_min\" AS \"session_duration_min\"\nFROM (\n SELECT\n \"t7\".\"user_id\",\n \"t7\".\"session_id\",\n COUNT(*) AS \"events_in_session\",\n MIN(\"t7\".\"minute_offset\") AS \"session_start_min\",\n MAX(\"t7\".\"minute_offset\") AS \"session_end_min\",\n BOOL_OR(\"t7\".\"action\" = 'purchase') AS \"has_purchase\"\n FROM (\n SELECT\n \"t7\".\"minute_offset\",\n \"t7\".\"action\",\n \"t7\".\"prev_minute\",\n \"t7\".\"minutes_since_last\",\n \"t7\".\"is_session_start\",\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (PARTITION BY \"t5\".\"user_id\" ORDER BY \"t5\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (PARTITION BY \"t3\".\"user_id\" ORDER BY \"t3\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"page_url\",\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ttrclo4g4bgdjmygjwa35uobuy\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS \"t6\"\n ) AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n) AS \"t8\"\nORDER BY\n \"t8\".\"user_id\" ASC,\n \"t8\".\"session_id\" ASC", + "sql": "SELECT\n \"t8\".\"user_id\",\n \"t8\".\"session_id\",\n \"t8\".\"events_in_session\",\n \"t8\".\"session_start_min\",\n \"t8\".\"session_end_min\",\n \"t8\".\"has_purchase\",\n \"t8\".\"session_end_min\" - \"t8\".\"session_start_min\" AS \"session_duration_min\"\nFROM (\n SELECT\n \"t7\".\"user_id\",\n \"t7\".\"session_id\",\n COUNT(*) AS \"events_in_session\",\n MIN(\"t7\".\"minute_offset\") AS \"session_start_min\",\n MAX(\"t7\".\"minute_offset\") AS \"session_end_min\",\n BOOL_OR(\"t7\".\"action\" = 'purchase') AS \"has_purchase\"\n FROM (\n SELECT\n \"t7\".\"minute_offset\",\n \"t7\".\"action\",\n \"t7\".\"prev_minute\",\n \"t7\".\"minutes_since_last\",\n \"t7\".\"is_session_start\",\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (\n PARTITION BY \"t5\".\"user_id\"\n ORDER BY \"t5\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n ) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (\n PARTITION BY \"t3\".\"user_id\"\n ORDER BY \"t3\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"page_url\",\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_wmd3uzq7rzcljgvxljaofcdisi\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS \"t6\"\n ) AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n) AS \"t8\"\nORDER BY\n \"t8\".\"user_id\" ASC,\n \"t8\".\"session_id\" ASC", "plan": "SemanticTable: activity\n user_id [dim]\n minute_offset [dim]\n page_url [dim]\n action [dim]\n event_count [measure]\n unique_users [measure]\n-> GroupBy(user_id, minute_offset, action)\n-> Aggregate()\n-> Mutate(prev_minute, minutes_since_last, is_session_start, session_id)\n-> GroupBy(user_id, session_id)\n-> Aggregate(events_in_session, session_start_min, session_end_min, has_purchase)\n-> Mutate(session_duration_min)\n-> OrderBy(_CallableWrapper(_fn=_.user_id), _CallableWrapper(_fn=_.session_id))", "table": { "columns": [ @@ -709,94 +423,11 @@ 2 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-07d4f06d2c07efd1ca130dd2fb6400bd" - }, - "mark": { - "type": "text" - }, - "encoding": { - "text": { - "value": "Complex query - consider custom visualization" - } - }, - "height": 400, - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-07d4f06d2c07efd1ca130dd2fb6400bd": [ - { - "user_id": "user1", - "session_id": 1, - "events_in_session": 3, - "session_start_min": 0, - "session_end_min": 10, - "has_purchase": false, - "session_duration_min": 10 - }, - { - "user_id": "user1", - "session_id": 2, - "events_in_session": 1, - "session_start_min": 45, - "session_end_min": 45, - "has_purchase": true, - "session_duration_min": 0 - }, - { - "user_id": "user2", - "session_id": 1, - "events_in_session": 1, - "session_start_min": 2, - "session_end_min": 2, - "has_purchase": false, - "session_duration_min": 0 - }, - { - "user_id": "user2", - "session_id": 2, - "events_in_session": 2, - "session_start_min": 40, - "session_end_min": 42, - "has_purchase": false, - "session_duration_min": 2 - }, - { - "user_id": "user3", - "session_id": 1, - "events_in_session": 3, - "session_start_min": 1, - "session_end_min": 7, - "has_purchase": false, - "session_duration_min": 6 - }, - { - "user_id": "user3", - "session_id": 2, - "events_in_session": 2, - "session_start_min": 50, - "session_end_min": 52, - "has_purchase": false, - "session_duration_min": 2 - } - ] - } - } } }, "query_user_summary": { "code": "from ibis import _\n\nresult = (\n activity_st\n .group_by(\"user_id\", \"minute_offset\", \"action\")\n .aggregate()\n .mutate(\n prev_minute=lambda t: t.minute_offset.lag().over(\n group_by=\"user_id\",\n order_by=t.minute_offset\n ),\n minutes_since_last=lambda t: t.minute_offset - t.prev_minute,\n is_session_start=lambda t: (t.minutes_since_last > 30) | t.prev_minute.isnull(),\n session_id=lambda t: t.is_session_start.cast(\"int32\").sum().over(\n group_by=\"user_id\",\n order_by=t.minute_offset,\n rows=(None, 0)\n )\n )\n .group_by(\"user_id\", \"session_id\")\n .aggregate(\n events_in_session=lambda t: t.count(),\n has_purchase=lambda t: (t.action == \"purchase\").any()\n )\n .group_by(\"user_id\")\n .aggregate(\n total_sessions=lambda t: t.count(),\n total_events=lambda t: t.events_in_session.sum(),\n sessions_with_purchase=lambda t: t.has_purchase.cast(\"int32\").sum(),\n avg_events_per_session=lambda t: t.events_in_session.mean().round(2)\n )\n .mutate(\n conversion_rate=lambda t: (t.sessions_with_purchase / t.total_sessions * 100).round(2)\n )\n .order_by(_.total_events.desc())\n)", - "sql": "SELECT\n \"t10\".\"user_id\",\n \"t10\".\"total_sessions\",\n \"t10\".\"total_events\",\n \"t10\".\"sessions_with_purchase\",\n \"t10\".\"avg_events_per_session\",\n ROUND(\n (\n CAST(\"t10\".\"sessions_with_purchase\" AS DOUBLE PRECISION) / \"t10\".\"total_sessions\"\n ) * 100,\n 2\n ) AS \"conversion_rate\"\nFROM (\n SELECT\n \"t9\".\"user_id\",\n COUNT(*) AS \"total_sessions\",\n SUM(\"t9\".\"events_in_session\") AS \"total_events\",\n SUM(CAST(\"t9\".\"has_purchase\" AS INT)) AS \"sessions_with_purchase\",\n ROUND(AVG(\"t9\".\"events_in_session\"), 2) AS \"avg_events_per_session\"\n FROM (\n SELECT\n \"t9\".\"session_id\",\n \"t9\".\"events_in_session\",\n \"t9\".\"has_purchase\",\n \"t9\".\"user_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t7\".\"user_id\",\n \"t7\".\"session_id\",\n COUNT(*) AS \"events_in_session\",\n BOOL_OR(\"t7\".\"action\" = 'purchase') AS \"has_purchase\"\n FROM (\n SELECT\n \"t7\".\"minute_offset\",\n \"t7\".\"action\",\n \"t7\".\"prev_minute\",\n \"t7\".\"minutes_since_last\",\n \"t7\".\"is_session_start\",\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (PARTITION BY \"t5\".\"user_id\" ORDER BY \"t5\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (PARTITION BY \"t3\".\"user_id\" ORDER BY \"t3\".\"minute_offset\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"page_url\",\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_ttrclo4g4bgdjmygjwa35uobuy\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS \"t6\"\n ) AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n ) AS \"t8\"\n ) AS \"t9\"\n ) AS t9\n GROUP BY\n \"t9\".\"user_id\"\n) AS \"t10\"\nORDER BY\n \"t10\".\"total_events\" DESC NULLS LAST", + "sql": "SELECT\n \"t10\".\"user_id\",\n \"t10\".\"total_sessions\",\n \"t10\".\"total_events\",\n \"t10\".\"sessions_with_purchase\",\n \"t10\".\"avg_events_per_session\",\n ROUND(\n (\n CAST(\"t10\".\"sessions_with_purchase\" AS DOUBLE PRECISION) / \"t10\".\"total_sessions\"\n ) * 100,\n 2\n ) AS \"conversion_rate\"\nFROM (\n SELECT\n \"t9\".\"user_id\",\n COUNT(*) AS \"total_sessions\",\n SUM(\"t9\".\"events_in_session\") AS \"total_events\",\n SUM(CAST(\"t9\".\"has_purchase\" AS INT)) AS \"sessions_with_purchase\",\n ROUND(CAST(AVG(\"t9\".\"events_in_session\") AS DECIMAL), 2) AS \"avg_events_per_session\"\n FROM (\n SELECT\n \"t9\".\"session_id\",\n \"t9\".\"events_in_session\",\n \"t9\".\"has_purchase\",\n \"t9\".\"user_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t7\".\"user_id\",\n \"t7\".\"session_id\",\n COUNT(*) AS \"events_in_session\",\n BOOL_OR(\"t7\".\"action\" = 'purchase') AS \"has_purchase\"\n FROM (\n SELECT\n \"t7\".\"minute_offset\",\n \"t7\".\"action\",\n \"t7\".\"prev_minute\",\n \"t7\".\"minutes_since_last\",\n \"t7\".\"is_session_start\",\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t5\".\"user_id\",\n \"t5\".\"minute_offset\",\n \"t5\".\"action\",\n \"t5\".\"prev_minute\",\n \"t5\".\"minutes_since_last\",\n \"t5\".\"is_session_start\",\n SUM(CAST(\"t5\".\"is_session_start\" AS INT)) OVER (\n PARTITION BY \"t5\".\"user_id\"\n ORDER BY \"t5\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n ) AS \"session_id\"\n FROM (\n SELECT\n \"t4\".\"user_id\",\n \"t4\".\"minute_offset\",\n \"t4\".\"action\",\n \"t4\".\"prev_minute\",\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\" AS \"minutes_since_last\",\n (\n (\n \"t4\".\"minute_offset\" - \"t4\".\"prev_minute\"\n ) > 30\n )\n OR (\n \"t4\".\"prev_minute\" IS NULL\n ) AS \"is_session_start\"\n FROM (\n SELECT\n \"t3\".\"user_id\",\n \"t3\".\"minute_offset\",\n \"t3\".\"action\",\n LAG(\"t3\".\"minute_offset\") OVER (\n PARTITION BY \"t3\".\"user_id\"\n ORDER BY \"t3\".\"minute_offset\" ASC\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) AS \"prev_minute\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n \"t1\".\"page_url\",\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_wmd3uzq7rzcljgvxljaofcdisi\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"user_id\",\n \"t1\".\"minute_offset\",\n \"t1\".\"action\"\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n ) AS \"t5\"\n ) AS \"t6\"\n ) AS \"t7\"\n ) AS t7\n GROUP BY\n \"t7\".\"user_id\",\n \"t7\".\"session_id\"\n ) AS \"t8\"\n ) AS \"t9\"\n ) AS t9\n GROUP BY\n \"t9\".\"user_id\"\n) AS \"t10\"\nORDER BY\n \"t10\".\"total_events\" DESC NULLS LAST", "plan": "SemanticTable: activity\n user_id [dim]\n minute_offset [dim]\n page_url [dim]\n action [dim]\n event_count [measure]\n unique_users [measure]\n-> GroupBy(user_id, minute_offset, action)\n-> Aggregate()\n-> Mutate(prev_minute, minutes_since_last, is_session_start, session_id)\n-> GroupBy(user_id, session_id)\n-> Aggregate(events_in_session, has_purchase)\n-> GroupBy(user_id)\n-> Aggregate(total_sessions, total_events, sessions_with_purchase, avg_events_per_session)\n-> Mutate(conversion_rate)\n-> OrderBy(_CallableWrapper(_fn=_.total_events.desc()))", "table": { "columns": [ @@ -833,101 +464,6 @@ 0.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-e9f9ebd327ccbd11d15df0c271a567fd" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "user_id", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "user_id", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_sessions", - "total_events", - "sessions_with_purchase", - "avg_events_per_session", - "conversion_rate" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-e9f9ebd327ccbd11d15df0c271a567fd": [ - { - "user_id": "user3", - "total_sessions": 2, - "total_events": 5, - "sessions_with_purchase": 0, - "avg_events_per_session": 2.5, - "conversion_rate": 0.0 - }, - { - "user_id": "user1", - "total_sessions": 2, - "total_events": 4, - "sessions_with_purchase": 1, - "avg_events_per_session": 2.0, - "conversion_rate": 0.0 - }, - { - "user_id": "user2", - "total_sessions": 2, - "total_events": 3, - "sessions_with_purchase": 0, - "avg_events_per_session": 1.5, - "conversion_rate": 0.0 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/windowing.json b/docs/web/public/bsl-data/windowing.json index dcac784..cc56053 100644 --- a/docs/web/public/bsl-data/windowing.json +++ b/docs/web/public/bsl-data/windowing.json @@ -467,7 +467,7 @@ }, "setup_st": { "code": "from boring_semantic_layer import to_semantic_table\n\n# Create semantic table with measures\nsales_st = to_semantic_table(\n sales_data,\n name=\"daily_sales\"\n).with_measures(\n total_revenue=lambda t: t.revenue.sum(),\n avg_revenue=lambda t: t.revenue.mean(),\n sale_count=lambda t: t.count(),\n)", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\"", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]", "table": { "columns": [ @@ -931,7 +931,7 @@ }, "query_lag_lead": { "code": "from ibis import _\n\n# Aggregate daily revenue\ndaily_revenue = (\n sales_st\n .group_by(\"sale_date\")\n .aggregate(\"total_revenue\")\n .order_by(\"sale_date\")\n)\n\n# Add window functions for lag/lead\nresult = daily_revenue.mutate(\n prev_day_revenue=_.total_revenue.lag(),\n next_day_revenue=_.total_revenue.lead(),\n day_over_day_change=_.total_revenue - _.total_revenue.lag(),\n pct_change=((_.total_revenue - _.total_revenue.lag()) / _.total_revenue.lag() * 100).round(2)\n).limit(10)", - "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"prev_day_revenue\",\n \"t5\".\"next_day_revenue\",\n \"t5\".\"day_over_day_change\",\n ROUND(\n (\n CAST((\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) AS DOUBLE PRECISION) / LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"pct_change\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"prev_day_revenue\",\n \"t4\".\"next_day_revenue\",\n \"t4\".\"total_revenue\" - LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"day_over_day_change\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"prev_day_revenue\",\n LEAD(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"next_day_revenue\"\n FROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n LAG(\"t2\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_day_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", + "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"prev_day_revenue\",\n \"t5\".\"next_day_revenue\",\n \"t5\".\"day_over_day_change\",\n ROUND(\n (\n CAST((\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) AS DOUBLE PRECISION) / LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"pct_change\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"prev_day_revenue\",\n \"t4\".\"next_day_revenue\",\n \"t4\".\"total_revenue\" - LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"day_over_day_change\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"prev_day_revenue\",\n LEAD(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"next_day_revenue\"\n FROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n LAG(\"t2\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_day_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> GroupBy(sale_date)\n-> Aggregate(total_revenue)\n-> OrderBy(sale_date)\n-> Mutate(prev_day_revenue, next_day_revenue, day_over_day_change, pct_change)\n-> Limit(10)", "table": { "columns": [ @@ -944,242 +944,91 @@ ], "data": [ [ - "2024-01-09", - 1064.38, + "2024-01-19", + 1241.89, null, - 1188.05, + 2254.13, null, null ], [ - "2024-01-23", - 1188.05, - 1064.38, - 1474.04, - 123.66999999999985, - 11.62 + "2024-03-16", + 2254.13, + 1241.89, + 974.64, + 1012.24, + 81.51 ], [ - "2024-02-20", - 1474.04, - 1188.05, - 1494.23, - 285.99, - 24.07 + "2024-01-04", + 974.64, + 2254.13, + 1111.07, + -1279.4900000000002, + -56.76 ], [ - "2024-02-26", - 1494.23, - 1474.04, - 1512.68, - 20.190000000000055, - 1.37 + "2024-01-12", + 1111.07, + 974.64, + 1365.58, + 136.42999999999995, + 14.0 ], [ - "2024-02-28", - 1512.68, - 1494.23, - 1798.58, - 18.450000000000045, - 1.23 + "2024-02-12", + 1365.58, + 1111.07, + 1451.9, + 254.51, + 22.91 ], [ - "2024-03-12", - 1798.58, - 1512.68, - 1692.94, - 285.89999999999986, - 18.9 + "2024-02-21", + 1451.9, + 1365.58, + 1473.4, + 86.32000000000016, + 6.32 ], [ - "2024-03-14", - 1692.94, - 1798.58, - 1771.09, - -105.63999999999987, - -5.87 + "2024-02-22", + 1473.4, + 1451.9, + 1666.92, + 21.5, + 1.48 ], [ - "2024-03-15", + "2024-03-04", + 1666.92, + 1473.4, 1771.09, - 1692.94, - 1762.55, - 78.14999999999986, - 4.62 + 193.51999999999998, + 13.13 ], [ - "2024-03-22", - 1762.55, - 1771.09, - 2366.42, - -8.539999999999964, - -0.48 + "2024-03-29", + 1798.18, + 2458.56, + 987.39, + -660.3799999999999, + 7.87 ], [ - "2024-03-30", - 2366.42, - 1762.55, + "2024-01-08", 987.39, - 603.8700000000001, - 34.26 + 1798.18, + 1570.34, + -810.7900000000001, + -45.09 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-0bf5ac8bf4e17ca02eb8ea22ddea9201" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "sale_date", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "sale_date", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "prev_day_revenue", - "next_day_revenue", - "day_over_day_change", - "pct_change" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-0bf5ac8bf4e17ca02eb8ea22ddea9201": [ - { - "sale_date": "2024-01-09T00:00:00", - "total_revenue": 1064.38, - "prev_day_revenue": null, - "next_day_revenue": 1188.05, - "day_over_day_change": null, - "pct_change": null - }, - { - "sale_date": "2024-01-23T00:00:00", - "total_revenue": 1188.05, - "prev_day_revenue": 1064.38, - "next_day_revenue": 1474.04, - "day_over_day_change": 123.66999999999985, - "pct_change": 11.62 - }, - { - "sale_date": "2024-02-20T00:00:00", - "total_revenue": 1474.04, - "prev_day_revenue": 1188.05, - "next_day_revenue": 1494.23, - "day_over_day_change": 285.99, - "pct_change": 24.07 - }, - { - "sale_date": "2024-02-26T00:00:00", - "total_revenue": 1494.23, - "prev_day_revenue": 1474.04, - "next_day_revenue": 1512.68, - "day_over_day_change": 20.190000000000055, - "pct_change": 1.37 - }, - { - "sale_date": "2024-02-28T00:00:00", - "total_revenue": 1512.68, - "prev_day_revenue": 1494.23, - "next_day_revenue": 1798.58, - "day_over_day_change": 18.450000000000045, - "pct_change": 1.23 - }, - { - "sale_date": "2024-03-12T00:00:00", - "total_revenue": 1798.58, - "prev_day_revenue": 1512.68, - "next_day_revenue": 1692.94, - "day_over_day_change": 285.89999999999986, - "pct_change": 18.9 - }, - { - "sale_date": "2024-03-14T00:00:00", - "total_revenue": 1692.94, - "prev_day_revenue": 1798.58, - "next_day_revenue": 1771.09, - "day_over_day_change": -105.63999999999987, - "pct_change": -5.87 - }, - { - "sale_date": "2024-03-15T00:00:00", - "total_revenue": 1771.09, - "prev_day_revenue": 1692.94, - "next_day_revenue": 1762.55, - "day_over_day_change": 78.14999999999986, - "pct_change": 4.62 - }, - { - "sale_date": "2024-03-22T00:00:00", - "total_revenue": 1762.55, - "prev_day_revenue": 1771.09, - "next_day_revenue": 2366.42, - "day_over_day_change": -8.539999999999964, - "pct_change": -0.48 - }, - { - "sale_date": "2024-03-30T00:00:00", - "total_revenue": 2366.42, - "prev_day_revenue": 1762.55, - "next_day_revenue": 974.64, - "day_over_day_change": 603.8700000000001, - "pct_change": 34.26 - } - ] - } - } } }, "query_running_total": { "code": "from ibis import _\n\n# Daily revenue with cumulative total\ndaily_revenue = (\n sales_st\n .group_by(\"sale_date\")\n .aggregate(\"total_revenue\")\n .order_by(\"sale_date\")\n)\n\n# Calculate cumulative sum and running average\nwindow_unbounded = xo.window(rows=(None, 0), order_by=\"sale_date\")\n\nresult = daily_revenue.mutate(\n cumulative_revenue=_.total_revenue.cumsum(),\n days_count=lambda t: t.count().over(window_unbounded),\n avg_daily_so_far=lambda t: (t.cumulative_revenue / t.days_count).round(2)\n).limit(10)", - "sql": "SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"cumulative_revenue\",\n \"t4\".\"days_count\",\n ROUND(CAST(\"t4\".\"cumulative_revenue\" AS DOUBLE PRECISION) / \"t4\".\"days_count\", 2) AS \"avg_daily_so_far\"\nFROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"cumulative_revenue\",\n COUNT(*) OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"days_count\"\n FROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n SUM(\"t2\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"\nLIMIT 10", + "sql": "SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"cumulative_revenue\",\n \"t4\".\"days_count\",\n ROUND(CAST(\"t4\".\"cumulative_revenue\" AS DOUBLE PRECISION) / \"t4\".\"days_count\", 2) AS \"avg_daily_so_far\"\nFROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"cumulative_revenue\",\n COUNT(*) OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"days_count\"\n FROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n SUM(\"t2\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"\nLIMIT 10", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> GroupBy(sale_date)\n-> Aggregate(total_revenue)\n-> OrderBy(sale_date)\n-> Mutate(cumulative_revenue, days_count, avg_daily_so_far)\n-> Limit(10)", "table": { "columns": [ @@ -1193,219 +1042,79 @@ [ "2024-01-01", 1027.89, - 130566.25000000004, + 99331.94000000002, 1, - 130566.25 + 99331.94 ], [ "2024-01-02", 915.0, - 75785.36000000002, + 16429.45, 2, - 37892.68 + 8214.73 ], [ "2024-01-03", 975.01, - 61554.590000000004, + 13653.54, 3, - 20518.2 + 4551.18 ], [ "2024-01-04", 974.64, - 53137.75000000001, + 118171.70999999999, 4, - 13284.44 + 29542.93 ], [ "2024-01-05", 1087.29, - 131653.54000000004, + 100419.23000000001, 5, - 26330.71 + 20083.85 ], [ "2024-01-06", 1400.34, - 26509.030000000002, + 36737.88, 6, - 4418.17 + 6122.98 ], [ "2024-01-07", 1456.44, - 63011.030000000006, + 32697.350000000002, 7, - 9001.58 + 4671.05 ], [ "2024-01-08", 987.39, - 17112.350000000002, + 53532.499999999985, 8, - 2139.04 + 6691.56 ], [ "2024-01-09", 1064.38, - 1064.38, + 87383.66, 9, - 118.26 + 9709.3 ], [ "2024-01-10", 995.96, - 94204.55000000005, + 128007.62999999999, 10, - 9420.46 + 12800.76 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-173f4021c0a149fd61ff76b6f7254231" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "sale_date", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "sale_date", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "cumulative_revenue", - "days_count", - "avg_daily_so_far" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-173f4021c0a149fd61ff76b6f7254231": [ - { - "sale_date": "2024-01-01T00:00:00", - "total_revenue": 1027.89, - "cumulative_revenue": 130566.24999999997, - "days_count": 1, - "avg_daily_so_far": 130566.25 - }, - { - "sale_date": "2024-01-02T00:00:00", - "total_revenue": 915.0, - "cumulative_revenue": 110502.07999999996, - "days_count": 2, - "avg_daily_so_far": 55251.04 - }, - { - "sale_date": "2024-01-03T00:00:00", - "total_revenue": 975.01, - "cumulative_revenue": 78370.92999999996, - "days_count": 3, - "avg_daily_so_far": 26123.64 - }, - { - "sale_date": "2024-01-04T00:00:00", - "total_revenue": 974.64, - "cumulative_revenue": 117584.18999999997, - "days_count": 4, - "avg_daily_so_far": 29396.05 - }, - { - "sale_date": "2024-01-05T00:00:00", - "total_revenue": 1087.29, - "cumulative_revenue": 131653.53999999998, - "days_count": 5, - "avg_daily_so_far": 26330.71 - }, - { - "sale_date": "2024-01-06T00:00:00", - "total_revenue": 1400.34, - "cumulative_revenue": 93087.03999999996, - "days_count": 6, - "avg_daily_so_far": 15514.51 - }, - { - "sale_date": "2024-01-07T00:00:00", - "total_revenue": 1456.44, - "cumulative_revenue": 79827.36999999997, - "days_count": 7, - "avg_daily_so_far": 11403.91 - }, - { - "sale_date": "2024-01-08T00:00:00", - "total_revenue": 987.39, - "cumulative_revenue": 49999.64, - "days_count": 8, - "avg_daily_so_far": 6249.96 - }, - { - "sale_date": "2024-01-09T00:00:00", - "total_revenue": 1064.38, - "cumulative_revenue": 1064.38, - "days_count": 9, - "avg_daily_so_far": 118.26 - }, - { - "sale_date": "2024-01-10T00:00:00", - "total_revenue": 995.96, - "cumulative_revenue": 17120.920000000002, - "days_count": 10, - "avg_daily_so_far": 1712.09 - } - ] - } - } } }, "query_moving_average": { "code": "from ibis import _\n\n# Daily revenue\ndaily_revenue = (\n sales_st\n .group_by(\"sale_date\")\n .aggregate(\"total_revenue\")\n .order_by(\"sale_date\")\n)\n\n# 7-day moving average\nwindow_7d = xo.window(rows=(-6, 0), order_by=\"sale_date\")\n\nresult = daily_revenue.mutate(\n ma_7day=_.total_revenue.mean().over(window_7d).round(2),\n ma_7day_sum=_.total_revenue.sum().over(window_7d).round(2),\n).limit(10)", - "sql": "SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"ma_7day\",\n ROUND(\n SUM(\"t3\".\"total_revenue\") OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN 6 preceding AND CURRENT ROW),\n 2\n ) AS \"ma_7day_sum\"\nFROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n ROUND(\n AVG(\"t2\".\"total_revenue\") OVER (ORDER BY \"t2\".\"sale_date\" ASC ROWS BETWEEN 6 preceding AND CURRENT ROW),\n 2\n ) AS \"ma_7day\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n) AS \"t3\"\nLIMIT 10", + "sql": "SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n \"t3\".\"ma_7day\",\n ROUND(\n SUM(\"t3\".\"total_revenue\") OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN 6 preceding AND CURRENT ROW),\n 2\n ) AS \"ma_7day_sum\"\nFROM (\n SELECT\n \"t2\".\"sale_date\",\n \"t2\".\"total_revenue\",\n ROUND(\n CAST(AVG(\"t2\".\"total_revenue\") OVER (ORDER BY \"t2\".\"sale_date\" ASC ROWS BETWEEN 6 preceding AND CURRENT ROW) AS DECIMAL),\n 2\n ) AS \"ma_7day\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"sale_date\" ASC\n ) AS \"t2\"\n) AS \"t3\"\nLIMIT 10", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> GroupBy(sale_date)\n-> Aggregate(total_revenue)\n-> OrderBy(sale_date)\n-> Mutate(ma_7day, ma_7day_sum)\n-> Limit(10)", "table": { "columns": [ @@ -1476,140 +1185,11 @@ 7966.44 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-3d114133558af670c07d20a7e60639fa" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "sale_date", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "sale_date", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "ma_7day", - "ma_7day_sum" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-3d114133558af670c07d20a7e60639fa": [ - { - "sale_date": "2024-01-01T00:00:00", - "total_revenue": 1027.89, - "ma_7day": 1027.89, - "ma_7day_sum": 1027.89 - }, - { - "sale_date": "2024-01-02T00:00:00", - "total_revenue": 915.0, - "ma_7day": 971.45, - "ma_7day_sum": 1942.89 - }, - { - "sale_date": "2024-01-03T00:00:00", - "total_revenue": 975.01, - "ma_7day": 972.63, - "ma_7day_sum": 2917.9 - }, - { - "sale_date": "2024-01-04T00:00:00", - "total_revenue": 974.64, - "ma_7day": 973.14, - "ma_7day_sum": 3892.54 - }, - { - "sale_date": "2024-01-05T00:00:00", - "total_revenue": 1087.29, - "ma_7day": 995.97, - "ma_7day_sum": 4979.83 - }, - { - "sale_date": "2024-01-06T00:00:00", - "total_revenue": 1400.34, - "ma_7day": 1063.36, - "ma_7day_sum": 6380.17 - }, - { - "sale_date": "2024-01-07T00:00:00", - "total_revenue": 1456.44, - "ma_7day": 1119.52, - "ma_7day_sum": 7836.61 - }, - { - "sale_date": "2024-01-08T00:00:00", - "total_revenue": 987.39, - "ma_7day": 1113.73, - "ma_7day_sum": 7796.11 - }, - { - "sale_date": "2024-01-09T00:00:00", - "total_revenue": 1064.38, - "ma_7day": 1135.07, - "ma_7day_sum": 7945.49 - }, - { - "sale_date": "2024-01-10T00:00:00", - "total_revenue": 995.96, - "ma_7day": 1138.06, - "ma_7day_sum": 7966.44 - } - ] - } - } } }, "query_ranking": { "code": "from ibis import _\n\n# Aggregate by product category\ncategory_revenue = (\n sales_st\n .group_by(\"product_category\")\n .aggregate(\"total_revenue\", \"sale_count\")\n .order_by(_.total_revenue.desc())\n)\n\n# Add rank columns\nresult = category_revenue.mutate(\n rank=lambda t: xo.rank().over(xo.window(order_by=xo.desc(t.total_revenue))),\n dense_rank=lambda t: xo.dense_rank().over(xo.window(order_by=xo.desc(t.total_revenue))),\n row_number=lambda t: xo.row_number().over(xo.window(order_by=xo.desc(t.total_revenue))),\n)", - "sql": "SELECT\n \"t4\".\"product_category\",\n \"t4\".\"total_revenue\",\n \"t4\".\"sale_count\",\n \"t4\".\"rank\",\n \"t4\".\"dense_rank\",\n ROW_NUMBER() OVER (ORDER BY \"t4\".\"total_revenue\" DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS \"row_number\"\nFROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"sale_count\",\n \"t3\".\"rank\",\n DENSE_RANK() OVER (ORDER BY \"t3\".\"total_revenue\" DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS \"dense_rank\"\n FROM (\n SELECT\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n \"t2\".\"sale_count\",\n RANK() OVER (ORDER BY \"t2\".\"total_revenue\" DESC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS \"rank\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"product_category\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\",\n COUNT(*) AS \"sale_count\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"product_category\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"total_revenue\" DESC NULLS LAST\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"", + "sql": "SELECT\n \"t4\".\"product_category\",\n \"t4\".\"total_revenue\",\n \"t4\".\"sale_count\",\n \"t4\".\"rank\",\n \"t4\".\"dense_rank\",\n ROW_NUMBER() OVER (\n ORDER BY \"t4\".\"total_revenue\" DESC NULLS LAST\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) - 1 AS \"row_number\"\nFROM (\n SELECT\n \"t3\".\"product_category\",\n \"t3\".\"total_revenue\",\n \"t3\".\"sale_count\",\n \"t3\".\"rank\",\n DENSE_RANK() OVER (\n ORDER BY \"t3\".\"total_revenue\" DESC NULLS LAST\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) - 1 AS \"dense_rank\"\n FROM (\n SELECT\n \"t2\".\"product_category\",\n \"t2\".\"total_revenue\",\n \"t2\".\"sale_count\",\n RANK() OVER (\n ORDER BY \"t2\".\"total_revenue\" DESC NULLS LAST\n ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n ) - 1 AS \"rank\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"product_category\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\",\n COUNT(*) AS \"sale_count\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"product_category\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"total_revenue\" DESC NULLS LAST\n ) AS \"t2\"\n ) AS \"t3\"\n) AS \"t4\"", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> GroupBy(product_category)\n-> Aggregate(total_revenue, sale_count)\n-> OrderBy(_CallableWrapper(_fn=_.total_revenue.desc()))\n-> Mutate(rank, dense_rank, row_number)", "table": { "columns": [ @@ -1646,106 +1226,11 @@ 2 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-a4c4adb65d2cd90d069cb567d81f4f29" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "product_category", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "product_category", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "sale_count", - "rank", - "dense_rank", - "row_number" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-a4c4adb65d2cd90d069cb567d81f4f29": [ - { - "product_category": "Home", - "total_revenue": 47712.26999999998, - "sale_count": 30, - "rank": 0, - "dense_rank": 0, - "row_number": 0 - }, - { - "product_category": "Electronics", - "total_revenue": 46555.450000000004, - "sale_count": 30, - "rank": 1, - "dense_rank": 1, - "row_number": 1 - }, - { - "product_category": "Clothing", - "total_revenue": 46158.00000000001, - "sale_count": 30, - "rank": 2, - "dense_rank": 2, - "row_number": 2 - } - ] - } - } } }, "query_week_over_week": { "code": "from ibis import _\n\n# Aggregate by week\nweekly_revenue = (\n sales_st\n .mutate(week_start=_.sale_date.truncate(\"W\"))\n .group_by(\"week_start\")\n .aggregate(\"total_revenue\")\n .order_by(\"week_start\")\n)\n\n# Calculate week-over-week changes\nresult = weekly_revenue.mutate(\n prev_week_revenue=_.total_revenue.lag(),\n wow_change=_.total_revenue - _.total_revenue.lag(),\n wow_pct_change=((_.total_revenue - _.total_revenue.lag()) / _.total_revenue.lag() * 100).round(2)\n).limit(10)", - "sql": "SELECT\n \"t5\".\"week_start\",\n \"t5\".\"total_revenue\",\n \"t5\".\"prev_week_revenue\",\n \"t5\".\"wow_change\",\n ROUND(\n (\n CAST((\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) AS DOUBLE PRECISION) / LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"wow_pct_change\"\nFROM (\n SELECT\n \"t4\".\"week_start\",\n \"t4\".\"total_revenue\",\n \"t4\".\"prev_week_revenue\",\n \"t4\".\"total_revenue\" - LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"wow_change\"\n FROM (\n SELECT\n \"t3\".\"week_start\",\n \"t3\".\"total_revenue\",\n LAG(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_week_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"week_start\",\n SUM(\"t1\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"sale_date\",\n \"t1\".\"revenue\",\n \"t1\".\"product_category\",\n \"t1\".\"week_start\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n DATE_TRUNC('WEEK', \"t0\".\"sale_date\") AS \"week_start\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"week_start\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"week_start\" ASC\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", + "sql": "SELECT\n \"t5\".\"week_start\",\n \"t5\".\"total_revenue\",\n \"t5\".\"prev_week_revenue\",\n \"t5\".\"wow_change\",\n ROUND(\n (\n CAST((\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) AS DOUBLE PRECISION) / LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"wow_pct_change\"\nFROM (\n SELECT\n \"t4\".\"week_start\",\n \"t4\".\"total_revenue\",\n \"t4\".\"prev_week_revenue\",\n \"t4\".\"total_revenue\" - LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"wow_change\"\n FROM (\n SELECT\n \"t3\".\"week_start\",\n \"t3\".\"total_revenue\",\n LAG(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_week_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"week_start\",\n SUM(\"t1\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"sale_date\",\n \"t1\".\"revenue\",\n \"t1\".\"product_category\",\n \"t1\".\"week_start\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n DATE_TRUNC('WEEK', \"t0\".\"sale_date\") AS \"week_start\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"week_start\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"week_start\" ASC\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> Mutate(week_start)\n-> GroupBy(week_start)\n-> Aggregate(total_revenue)\n-> OrderBy(week_start)\n-> Mutate(prev_week_revenue, wow_change, wow_pct_change)\n-> Limit(10)", "table": { "columns": [ @@ -1757,221 +1242,81 @@ ], "data": [ [ - "2024-01-22", - 9274.43, + "2024-02-19", + 11548.08, null, null, null ], [ - "2024-01-08", - 7972.610000000001, + "2024-01-22", 9274.43, - -1301.8199999999997, - -14.04 - ], - [ - "2024-03-11", - 13325.119999999999, 7972.610000000001, - 5352.509999999998, - 67.14 + 1301.8199999999997, + -19.69 ], [ - "2024-02-05", - 10640.87, + "2024-01-29", + 10246.21, 13325.119999999999, - -2684.249999999998, - -20.14 + -3078.91, + 10.48 ], [ - "2024-03-25", - 11609.76, - 10640.87, - 968.8899999999994, - 9.11 + "2024-01-01", + 7836.610000000001, + 11548.08, + -3711.4699999999993, + -23.52 ], [ - "2024-01-29", - 10246.21, - 11609.76, - -1363.550000000001, - -11.74 + "2024-01-15", + 8932.26, + 7836.610000000001, + 1095.6499999999996, + 13.98 ], [ - "2024-02-12", - 10703.909999999998, - 10246.21, - 457.6999999999989, - 4.47 + "2024-01-08", + 7972.610000000001, + 8932.26, + -959.6499999999996, + -10.74 ], [ - "2024-03-04", - 12560.560000000001, - 10703.909999999998, - 1856.6500000000033, - 17.35 + "2024-02-26", + 12124.92, + 9274.43, + 2850.49, + 4.44 ], [ "2024-03-18", 13650.38, - 12560.560000000001, - 1089.819999999998, - 8.68 + 12124.92, + 1525.4599999999991, + 12.58 ], [ - "2024-01-01", - 7836.610000000001, + "2024-03-25", + 11609.76, + 10640.87, + 968.8899999999994, + 45.62 + ], + [ + "2024-02-05", + 10640.87, 13650.38, - -5813.769999999999, - -42.59 + -3009.5099999999984, + -22.05 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-69be61ac94b8af4da345c2a61c78dc1e" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "week_start", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "week_start", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "prev_week_revenue", - "wow_change", - "wow_pct_change" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-69be61ac94b8af4da345c2a61c78dc1e": [ - { - "week_start": "2024-01-08T00:00:00", - "total_revenue": 7972.610000000001, - "prev_week_revenue": 12124.92, - "wow_change": -4152.3099999999995, - "wow_pct_change": -34.25 - }, - { - "week_start": "2024-03-11T00:00:00", - "total_revenue": 13325.119999999999, - "prev_week_revenue": 7972.610000000001, - "wow_change": 5352.509999999998, - "wow_pct_change": 67.14 - }, - { - "week_start": "2024-02-26T00:00:00", - "total_revenue": 12124.92, - "prev_week_revenue": 13650.38, - "wow_change": -1525.4599999999991, - "wow_pct_change": -11.18 - }, - { - "week_start": "2024-02-19T00:00:00", - "total_revenue": 11548.08, - "prev_week_revenue": 13325.119999999999, - "wow_change": -1777.039999999999, - "wow_pct_change": -13.34 - }, - { - "week_start": "2024-01-01T00:00:00", - "total_revenue": 7836.610000000001, - "prev_week_revenue": 11548.08, - "wow_change": -3711.4699999999993, - "wow_pct_change": -32.14 - }, - { - "week_start": "2024-01-15T00:00:00", - "total_revenue": 8932.26, - "prev_week_revenue": 7836.610000000001, - "wow_change": 1095.6499999999996, - "wow_pct_change": 13.98 - }, - { - "week_start": "2024-01-29T00:00:00", - "total_revenue": 10246.21, - "prev_week_revenue": null, - "wow_change": null, - "wow_pct_change": null - }, - { - "week_start": "2024-02-12T00:00:00", - "total_revenue": 10703.909999999998, - "prev_week_revenue": 10246.21, - "wow_change": 457.6999999999989, - "wow_pct_change": 4.47 - }, - { - "week_start": "2024-03-04T00:00:00", - "total_revenue": 12560.560000000001, - "prev_week_revenue": 10703.909999999998, - "wow_change": 1856.6500000000033, - "wow_pct_change": 17.35 - }, - { - "week_start": "2024-03-18T00:00:00", - "total_revenue": 13650.38, - "prev_week_revenue": 12560.560000000001, - "wow_change": 1089.819999999998, - "wow_pct_change": 8.68 - } - ] - } - } } }, "query_pct_running": { "code": "from ibis import _\n\n# Top 10 days by revenue\ntop_days = (\n sales_st\n .group_by(\"sale_date\")\n .aggregate(\"total_revenue\")\n .order_by(_.total_revenue.desc())\n .limit(10)\n)\n\n# Calculate cumulative percentage\nresult = top_days.mutate(\n cumulative_revenue=_.total_revenue.cumsum(),\n total_top10=_.total_revenue.sum(),\n pct_of_top10=(_.total_revenue.cumsum() / _.total_revenue.sum() * 100).round(2)\n)", - "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"cumulative_revenue\",\n \"t5\".\"total_top10\",\n ROUND(\n (\n CAST(SUM(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE PRECISION) / SUM(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"pct_of_top10\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"cumulative_revenue\",\n SUM(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"total_top10\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n SUM(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"total_revenue\" DESC NULLS LAST\n LIMIT 10\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"", + "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"cumulative_revenue\",\n \"t5\".\"total_top10\",\n ROUND(\n (\n CAST(SUM(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE PRECISION) / SUM(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)\n ) * 100,\n 2\n ) AS \"pct_of_top10\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"cumulative_revenue\",\n SUM(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"total_top10\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n SUM(\"t3\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"cumulative_revenue\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n SUM(\"t0\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n \"t0\".\"sale_date\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n ) AS t0\n GROUP BY\n \"t0\".\"sale_date\"\n ) AS \"t1\"\n ORDER BY\n \"t1\".\"total_revenue\" DESC NULLS LAST\n LIMIT 10\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> GroupBy(sale_date)\n-> Aggregate(total_revenue)\n-> OrderBy(_CallableWrapper(_fn=_.total_revenue.desc()))\n-> Limit(10)\n-> Mutate(cumulative_revenue, total_top10, pct_of_top10)", "table": { "columns": [ @@ -2053,151 +1398,11 @@ 100.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-ccfb8e557a5760f2556855387231ce27" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "sale_date", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "sale_date", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "cumulative_revenue", - "total_top10", - "pct_of_top10" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-ccfb8e557a5760f2556855387231ce27": [ - { - "sale_date": "2024-03-24T00:00:00", - "total_revenue": 2458.56, - "cumulative_revenue": 2458.56, - "total_top10": 22405.800000000003, - "pct_of_top10": 10.97 - }, - { - "sale_date": "2024-03-23T00:00:00", - "total_revenue": 2382.92, - "cumulative_revenue": 4841.48, - "total_top10": 22405.800000000003, - "pct_of_top10": 21.61 - }, - { - "sale_date": "2024-03-17T00:00:00", - "total_revenue": 2370.91, - "cumulative_revenue": 7212.389999999999, - "total_top10": 22405.800000000003, - "pct_of_top10": 32.19 - }, - { - "sale_date": "2024-03-30T00:00:00", - "total_revenue": 2366.42, - "cumulative_revenue": 9578.81, - "total_top10": 22405.800000000003, - "pct_of_top10": 42.75 - }, - { - "sale_date": "2024-03-16T00:00:00", - "total_revenue": 2254.13, - "cumulative_revenue": 11832.939999999999, - "total_top10": 22405.800000000003, - "pct_of_top10": 52.81 - }, - { - "sale_date": "2024-03-10T00:00:00", - "total_revenue": 2150.55, - "cumulative_revenue": 13983.489999999998, - "total_top10": 22405.800000000003, - "pct_of_top10": 62.41 - }, - { - "sale_date": "2024-03-09T00:00:00", - "total_revenue": 2147.09, - "cumulative_revenue": 16130.579999999998, - "total_top10": 22405.800000000003, - "pct_of_top10": 71.99 - }, - { - "sale_date": "2024-03-02T00:00:00", - "total_revenue": 2121.0, - "cumulative_revenue": 18251.579999999998, - "total_top10": 22405.800000000003, - "pct_of_top10": 81.46 - }, - { - "sale_date": "2024-03-03T00:00:00", - "total_revenue": 2117.39, - "cumulative_revenue": 20368.969999999998, - "total_top10": 22405.800000000003, - "pct_of_top10": 90.91 - }, - { - "sale_date": "2024-02-25T00:00:00", - "total_revenue": 2036.83, - "cumulative_revenue": 22405.799999999996, - "total_top10": 22405.800000000003, - "pct_of_top10": 100.0 - } - ] - } - } } }, "query_window_filter": { "code": "from ibis import _\n\n# Focus on weekends only\nweekend_revenue = (\n sales_st\n .mutate(is_weekend=_.sale_date.day_of_week.index().isin([5, 6]))\n .filter(_.is_weekend)\n .group_by(\"sale_date\")\n .aggregate(\"total_revenue\")\n .order_by(\"sale_date\")\n)\n\n# 3-weekend moving average\nwindow_3 = xo.window(rows=(-2, 0), order_by=\"sale_date\")\n\nresult = weekend_revenue.mutate(\n ma_3weekend=_.total_revenue.mean().over(window_3).round(2),\n prev_weekend=_.total_revenue.lag(),\n weekend_change=_.total_revenue - _.total_revenue.lag()\n).limit(10)", - "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"ma_3weekend\",\n \"t5\".\"prev_weekend\",\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"weekend_change\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"ma_3weekend\",\n LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_weekend\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n ROUND(\n AVG(\"t3\".\"total_revenue\") OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN 2 preceding AND CURRENT ROW),\n 2\n ) AS \"ma_3weekend\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"sale_date\",\n SUM(\"t1\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"revenue\",\n \"t1\".\"product_category\",\n \"t1\".\"is_weekend\",\n \"t1\".\"sale_date\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n (\n DATE_PART('dow', \"t0\".\"sale_date\") + 6\n ) % 7 IN (5, 6) AS \"is_weekend\"\n FROM \"ibis_pandas_memtable_wf7k4mmyh5hnzewc4s4rec3kg4\" AS \"t0\"\n WHERE\n (\n DATE_PART('dow', \"t0\".\"sale_date\") + 6\n ) % 7 IN (5, 6)\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"sale_date\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"sale_date\" ASC\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", + "sql": "SELECT\n \"t5\".\"sale_date\",\n \"t5\".\"total_revenue\",\n \"t5\".\"ma_3weekend\",\n \"t5\".\"prev_weekend\",\n \"t5\".\"total_revenue\" - LAG(\"t5\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"weekend_change\"\nFROM (\n SELECT\n \"t4\".\"sale_date\",\n \"t4\".\"total_revenue\",\n \"t4\".\"ma_3weekend\",\n LAG(\"t4\".\"total_revenue\") OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"prev_weekend\"\n FROM (\n SELECT\n \"t3\".\"sale_date\",\n \"t3\".\"total_revenue\",\n ROUND(\n CAST(AVG(\"t3\".\"total_revenue\") OVER (ORDER BY \"t3\".\"sale_date\" ASC ROWS BETWEEN 2 preceding AND CURRENT ROW) AS DECIMAL),\n 2\n ) AS \"ma_3weekend\"\n FROM (\n SELECT\n *\n FROM (\n SELECT\n \"t1\".\"sale_date\",\n SUM(\"t1\".\"revenue\") AS \"total_revenue\"\n FROM (\n SELECT\n \"t1\".\"revenue\",\n \"t1\".\"product_category\",\n \"t1\".\"is_weekend\",\n \"t1\".\"sale_date\"\n FROM (\n SELECT\n \"t0\".\"sale_date\",\n \"t0\".\"revenue\",\n \"t0\".\"product_category\",\n (\n DATE_PART('dow', \"t0\".\"sale_date\") + 6\n ) % 7 IN (5, 6) AS \"is_weekend\"\n FROM \"ibis_pandas_memtable_fbuywylsizfijdmqwyxy7yjnfm\" AS \"t0\"\n WHERE\n (\n DATE_PART('dow', \"t0\".\"sale_date\") + 6\n ) % 7 IN (5, 6)\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"sale_date\"\n ) AS \"t2\"\n ORDER BY\n \"t2\".\"sale_date\" ASC\n ) AS \"t3\"\n ) AS \"t4\"\n) AS \"t5\"\nLIMIT 10", "plan": "SemanticTable: daily_sales\n total_revenue [measure]\n avg_revenue [measure]\n sale_count [measure]\n-> Mutate(is_weekend)\n-> Filter(\u03bb )\n-> GroupBy(sale_date)\n-> Aggregate(total_revenue)\n-> OrderBy(sale_date)\n-> Mutate(ma_3weekend, prev_weekend, weekend_change)\n-> Limit(10)", "table": { "columns": [ @@ -2279,146 +1484,6 @@ -105.90999999999985 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-3a819d33c9eacd7e65be7b4ec0b9932b" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "sale_date", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "sale_date", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "total_revenue", - "ma_3weekend", - "prev_weekend", - "weekend_change" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-3a819d33c9eacd7e65be7b4ec0b9932b": [ - { - "sale_date": "2024-01-06T00:00:00", - "total_revenue": 1400.34, - "ma_3weekend": 1400.34, - "prev_weekend": null, - "weekend_change": null - }, - { - "sale_date": "2024-01-07T00:00:00", - "total_revenue": 1456.44, - "ma_3weekend": 1428.39, - "prev_weekend": 1400.34, - "weekend_change": 56.100000000000136 - }, - { - "sale_date": "2024-01-13T00:00:00", - "total_revenue": 1361.31, - "ma_3weekend": 1406.03, - "prev_weekend": 1456.44, - "weekend_change": -95.13000000000011 - }, - { - "sale_date": "2024-01-14T00:00:00", - "total_revenue": 1408.77, - "ma_3weekend": 1408.84, - "prev_weekend": 1361.31, - "weekend_change": 47.460000000000036 - }, - { - "sale_date": "2024-01-20T00:00:00", - "total_revenue": 1448.3, - "ma_3weekend": 1406.13, - "prev_weekend": 1408.77, - "weekend_change": 39.52999999999997 - }, - { - "sale_date": "2024-01-21T00:00:00", - "total_revenue": 1621.16, - "ma_3weekend": 1492.74, - "prev_weekend": 1448.3, - "weekend_change": 172.86000000000013 - }, - { - "sale_date": "2024-01-27T00:00:00", - "total_revenue": 1556.55, - "ma_3weekend": 1542.0, - "prev_weekend": 1621.16, - "weekend_change": -64.61000000000013 - }, - { - "sale_date": "2024-01-28T00:00:00", - "total_revenue": 1570.34, - "ma_3weekend": 1582.68, - "prev_weekend": 1556.55, - "weekend_change": 13.789999999999964 - }, - { - "sale_date": "2024-02-03T00:00:00", - "total_revenue": 1823.62, - "ma_3weekend": 1650.17, - "prev_weekend": 1570.34, - "weekend_change": 253.27999999999997 - }, - { - "sale_date": "2024-02-04T00:00:00", - "total_revenue": 1717.71, - "ma_3weekend": 1703.89, - "prev_weekend": 1823.62, - "weekend_change": -105.90999999999985 - } - ] - } - } } } }, diff --git a/docs/web/public/bsl-data/yaml-config.json b/docs/web/public/bsl-data/yaml-config.json index f8921e6..c7ed69b 100644 --- a/docs/web/public/bsl-data/yaml-config.json +++ b/docs/web/public/bsl-data/yaml-config.json @@ -3,7 +3,7 @@ "queries": { "load_yaml_example": { "code": "from boring_semantic_layer import from_yaml\n\n# Load models from YAML file with explicit tables\nmodels = from_yaml(\n \"yaml_example.yaml\",\n tables={\n \"flights_tbl\": flights_tbl,\n \"carriers_tbl\": carriers_tbl\n }\n)\n\nflights_sm = models[\"flights\"]\ncarriers_sm = models[\"carriers\"]\n\n# Inspect the loaded models\nflights_sm.dimensions, flights_sm.measures", - "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_dzvbyqk4lrbrzjjqzanojjwt4i\"", + "sql": "SELECT\n *\nFROM \"ibis_pandas_memtable_j2mhogl5vvgy7bhqsygbvs4vpy\"", "plan": "SemanticTable: carriers\n code [dim]\n name [dim]\n carrier_count [measure]", "table": { "columns": [ @@ -28,7 +28,7 @@ }, "query_yaml_model": { "code": "# Query the YAML-defined model\nresult = (\n flights_sm\n .group_by(\"origin\")\n .aggregate(\"flight_count\", \"avg_distance\")\n)", - "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"distance\") AS \"avg_distance\"\n FROM (\n SELECT\n \"t1\".\"dest\",\n \"t1\".\"carrier\",\n \"t1\".\"year\",\n \"t1\".\"distance\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n *\n FROM \"ibis_pandas_memtable_dsjzjsakybaunn5s2rifyw5f5q\" AS \"t0\"\n WHERE\n \"t0\".\"year\" > 2020\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", + "sql": "SELECT\n *\nFROM (\n SELECT\n \"t1\".\"origin\",\n COUNT(*) AS \"flight_count\",\n AVG(\"t1\".\"distance\") AS \"avg_distance\"\n FROM (\n SELECT\n \"t1\".\"dest\",\n \"t1\".\"carrier\",\n \"t1\".\"year\",\n \"t1\".\"distance\",\n \"t1\".\"destination\",\n \"t1\".\"origin\"\n FROM (\n SELECT\n \"t0\".\"origin\",\n \"t0\".\"dest\",\n \"t0\".\"carrier\",\n \"t0\".\"year\",\n \"t0\".\"distance\",\n \"t0\".\"dest\" AS \"destination\"\n FROM \"ibis_pandas_memtable_2rhxc36mlrb2hpy5vpavuy7iye\" AS \"t0\"\n WHERE\n \"t0\".\"year\" > 2020\n ) AS \"t1\"\n ) AS t1\n GROUP BY\n \"t1\".\"origin\"\n) AS \"t2\"", "plan": "SemanticTable: flights\n origin [dim]\n destination [dim]\n year [dim]\n carrier [dim]\n flight_count [measure]\n total_distance [measure]\n avg_distance [measure]\n-> Filter(\u03bb )\n-> GroupBy(origin)\n-> Aggregate(flight_count, avg_distance)", "table": { "columns": [ @@ -53,89 +53,6 @@ 2475.0 ] ] - }, - "chart": { - "type": "vega", - "spec": { - "config": { - "view": { - "continuousWidth": 300, - "continuousHeight": 300 - } - }, - "data": { - "name": "data-6d81a9b72c8565e701a64a16a2225db0" - }, - "mark": { - "type": "bar" - }, - "encoding": { - "color": { - "field": "measure", - "type": "nominal" - }, - "tooltip": [ - { - "field": "origin", - "type": "nominal" - }, - { - "field": "measure", - "type": "nominal" - }, - { - "field": "value", - "type": "quantitative" - } - ], - "x": { - "field": "origin", - "sort": null, - "type": "ordinal" - }, - "xOffset": { - "field": "measure" - }, - "y": { - "field": "value", - "type": "quantitative" - } - }, - "height": 400, - "transform": [ - { - "fold": [ - "flight_count", - "avg_distance" - ], - "as": [ - "measure", - "value" - ] - } - ], - "width": 700, - "$schema": "https://vega.github.io/schema/vega-lite/v5.20.1.json", - "datasets": { - "data-6d81a9b72c8565e701a64a16a2225db0": [ - { - "origin": "JFK", - "flight_count": 1, - "avg_distance": 2475.0 - }, - { - "origin": "LAX", - "flight_count": 1, - "avg_distance": 337.0 - }, - { - "origin": "SFO", - "flight_count": 1, - "avg_distance": 382.0 - } - ] - } - } } } }, diff --git a/examples/README.md b/examples/README.md index 59fb7a4..eed0ce5 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,19 +8,19 @@ Run the examples in order to learn the key features: ```bash # Example 1: Basic semantic tables and queries -python examples/01_basic_flights.py +python examples/basic_flights.py # Example 2: Market share and percent of total -python examples/02_percent_of_total.py +python examples/percent_of_total.py # Example 3: Window functions (rolling averages, rankings) -python examples/03_window_functions.py +python examples/window_functions.py # Example 4: Joins and foreign sums/averages -python examples/04_joins.py +python examples/joins.py # Example 5: Bucketing with 'Other' (Top N with rollup) -python examples/05_bucketing_with_other.py +python examples/bucketing_with_other.py ``` ## Examples Overview @@ -86,8 +86,7 @@ Master the "bucketing with OTHER" pattern for clean reports and visualizations: ### Tests For more advanced examples and patterns, see the test suite: -- `src/boring_semantic_layer/api/tests/test_real_world_scenarios.py` -- `src/boring_semantic_layer/api/tests/malloy_equivalence/` +- `src/boring_semantic_layer/tests/test_real_world_scenarios.py` ## Data Sources