From ae69f8cdfff4bcc2117fd3910ca3b2ea749f6b30 Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 13:45:09 +0000 Subject: [PATCH 1/9] Add COUNT aggregate function documentation --- .../Functions/aggregate_functions.rst | 11 ++ .../aggregate_functions/count.diagram | 9 ++ .../Functions/aggregate_functions/count.rst | 136 ++++++++++++++++++ ...ate-functions-documentation-queries.yamsql | 49 +++++++ 4 files changed, 205 insertions(+) create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/count.diagram create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/count.rst create mode 100644 yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions.rst b/docs/sphinx/source/reference/Functions/aggregate_functions.rst index 5a5e4612f6..be436fdaba 100644 --- a/docs/sphinx/source/reference/Functions/aggregate_functions.rst +++ b/docs/sphinx/source/reference/Functions/aggregate_functions.rst @@ -7,6 +7,17 @@ Aggregate functions perform calculations on zero, one, or multiple rows of value List of functions (by sub-category) ################################### +Standard SQL Aggregates +------------------------ + +.. toctree:: + :maxdepth: 1 + + aggregate_functions/count + +Specialized Aggregates +---------------------- + .. toctree:: :maxdepth: 1 diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/count.diagram b/docs/sphinx/source/reference/Functions/aggregate_functions/count.diagram new file mode 100644 index 0000000000..d8cf1acaaf --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/count.diagram @@ -0,0 +1,9 @@ +Diagram( + Terminal('COUNT'), + Terminal('('), + Choice(0, + Terminal('*'), + NonTerminal('expression'), + ), + Terminal(')'), +) \ No newline at end of file diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/count.rst b/docs/sphinx/source/reference/Functions/aggregate_functions/count.rst new file mode 100644 index 0000000000..4266c914f7 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/count.rst @@ -0,0 +1,136 @@ +===== +COUNT +===== + +.. _count: + +Counts the number of rows or non-NULL values in a group. + +Syntax +====== + +.. raw:: html + :file: count.diagram.svg + +Parameters +========== + +The function accepts two forms: + +``COUNT(*)`` + Counts all rows in the group, including rows with NULL values. + +``COUNT(expression)`` + Counts only the rows where ``expression`` is not NULL. + +Returns +======= + +Returns a ``BIGINT`` representing the count of rows or non-NULL values. If the input set is empty, returns ``0``. + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``sales`` table: + +.. code-block:: sql + + CREATE TABLE sales( + id BIGINT, + product STRING, + region STRING, + amount BIGINT, + PRIMARY KEY(id)) + + INSERT INTO sales VALUES + (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) + +COUNT(*) - Count All Rows +-------------------------- + +Count all rows in the table: + +.. code-block:: sql + + SELECT COUNT(*) AS total_sales FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`total_sales` + * - :json:`5` + +COUNT(column) - Count Non-NULL Values +-------------------------------------- + +Count only non-NULL amounts: + +.. code-block:: sql + + SELECT COUNT(amount) AS sales_with_amount FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`sales_with_amount` + * - :json:`4` + +Notice that the count is 4, not 5, because the fourth row has a NULL ``amount``. + +COUNT with GROUP BY +------------------- + +Count sales per product: + +.. code-block:: sql + + SELECT product, COUNT(*) AS sales_count + FROM sales + GROUP BY product + +.. list-table:: + :header-rows: 1 + + * - :sql:`product` + - :sql:`sales_count` + * - :json:`"Widget"` + - :json:`3` + * - :json:`"Gadget"` + - :json:`2` + +Count non-NULL amounts per region: + +.. code-block:: sql + + SELECT region, COUNT(amount) AS non_null_amounts + FROM sales + GROUP BY region + +.. list-table:: + :header-rows: 1 + + * - :sql:`region` + - :sql:`non_null_amounts` + * - :json:`"North"` + - :json:`3` + * - :json:`"South"` + - :json:`1` + +The South region has 2 sales, but only 1 has a non-NULL amount. + +Important Notes +=============== + +* ``COUNT(*)`` counts all rows, including those with NULL values in any column +* ``COUNT(column)`` counts only rows where the specified column is not NULL +* When used without GROUP BY, COUNT returns a single value for the entire table +* When used with GROUP BY, COUNT returns one value per group +* The return type is always ``BIGINT`` +* **Index Requirement**: For optimal performance, queries with GROUP BY require an appropriate index. See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. diff --git a/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql new file mode 100644 index 0000000000..71a01f4187 --- /dev/null +++ b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql @@ -0,0 +1,49 @@ +--- +options: + supported_version: 4.3.2.0 +--- +schema_template: + create table sales(id bigint, product string, region string, amount bigint, primary key(id)) +--- +setup: + steps: + - query: insert into sales + values (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) +--- +test_block: + name: aggregate-functions-documentation-tests + preset: single_repetition_ordered + tests: + # COUNT(*) - Count All Rows + - + - query: SELECT COUNT(*) AS total_sales FROM sales + - supported_version: 4.3.2.0 + - result: [{total_sales: 5}] + + # COUNT(column) - Count Non-NULL Values + - + - query: SELECT COUNT(amount) AS sales_with_amount FROM sales + - supported_version: 4.3.2.0 + - result: [{sales_with_amount: 4}] + + # COUNT with GROUP BY - per product + - + - query: SELECT product, COUNT(*) AS sales_count + FROM sales + GROUP BY product + - supported_version: 4.3.2.0 + - unorderedResult: [{product: "Widget", sales_count: 3}, + {product: "Gadget", sales_count: 2}] + + # COUNT with GROUP BY - non-NULL amounts per region + - + - query: SELECT region, COUNT(amount) AS non_null_amounts + FROM sales + GROUP BY region + - supported_version: 4.3.2.0 + - unorderedResult: [{region: "North", non_null_amounts: 3}, + {region: "South", non_null_amounts: 1}] From 6ddcd38fcefe0a073f8d860104211ef55f3606ac Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 13:49:33 +0000 Subject: [PATCH 2/9] Add SUM, AVG, MIN, MAX aggregate function documentation --- .../Functions/aggregate_functions.rst | 4 + .../Functions/aggregate_functions/avg.diagram | 6 + .../Functions/aggregate_functions/avg.rst | 116 +++++++++++++++++ .../Functions/aggregate_functions/max.diagram | 6 + .../Functions/aggregate_functions/max.rst | 117 ++++++++++++++++++ .../Functions/aggregate_functions/min.diagram | 6 + .../Functions/aggregate_functions/min.rst | 117 ++++++++++++++++++ .../Functions/aggregate_functions/sum.diagram | 6 + .../Functions/aggregate_functions/sum.rst | 116 +++++++++++++++++ ...ate-functions-documentation-queries.yamsql | 96 ++++++++++++++ 10 files changed, 590 insertions(+) create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/avg.diagram create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/avg.rst create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/max.diagram create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/max.rst create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/min.diagram create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/min.rst create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/sum.diagram create mode 100644 docs/sphinx/source/reference/Functions/aggregate_functions/sum.rst diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions.rst b/docs/sphinx/source/reference/Functions/aggregate_functions.rst index be436fdaba..df7f731ca4 100644 --- a/docs/sphinx/source/reference/Functions/aggregate_functions.rst +++ b/docs/sphinx/source/reference/Functions/aggregate_functions.rst @@ -14,6 +14,10 @@ Standard SQL Aggregates :maxdepth: 1 aggregate_functions/count + aggregate_functions/sum + aggregate_functions/avg + aggregate_functions/min + aggregate_functions/max Specialized Aggregates ---------------------- diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/avg.diagram b/docs/sphinx/source/reference/Functions/aggregate_functions/avg.diagram new file mode 100644 index 0000000000..ce0803b5ec --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/avg.diagram @@ -0,0 +1,6 @@ +Diagram( + Terminal('AVG'), + Terminal('('), + NonTerminal('expression'), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/avg.rst b/docs/sphinx/source/reference/Functions/aggregate_functions/avg.rst new file mode 100644 index 0000000000..37492e5356 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/avg.rst @@ -0,0 +1,116 @@ +=== +AVG +=== + +.. _avg: + +Computes the average (arithmetic mean) of all non-NULL values in a group. + +Syntax +====== + +.. raw:: html + :file: avg.diagram.svg + +Parameters +========== + +``AVG(expression)`` + Calculates the average of all non-NULL values of ``expression`` in the group. NULL values are ignored. + +Returns +======= + +Returns a ``DOUBLE`` representing the average of all non-NULL values. If all values are NULL or the input set is empty, returns NULL. + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``sales`` table: + +.. code-block:: sql + + CREATE TABLE sales( + id BIGINT, + product STRING, + region STRING, + amount BIGINT, + PRIMARY KEY(id)) + + INSERT INTO sales VALUES + (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) + +AVG - Average All Values +------------------------- + +Calculate the average of all amounts in the table: + +.. code-block:: sql + + SELECT AVG(amount) AS average_amount FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`average_amount` + * - :json:`142.5` + +Notice that the NULL value in row 4 is ignored, so the average is 570 / 4 = 142.5. + +AVG with GROUP BY +------------------ + +Calculate average amounts per product: + +.. code-block:: sql + + SELECT product, AVG(amount) AS average_amount + FROM sales + GROUP BY product + +.. list-table:: + :header-rows: 1 + + * - :sql:`product` + - :sql:`average_amount` + * - :json:`"Widget"` + - :json:`123.33333333333333` + * - :json:`"Gadget"` + - :json:`200.0` + +Calculate average amounts per region: + +.. code-block:: sql + + SELECT region, AVG(amount) AS average_amount + FROM sales + GROUP BY region + +.. list-table:: + :header-rows: 1 + + * - :sql:`region` + - :sql:`average_amount` + * - :json:`"North"` + - :json:`140.0` + * - :json:`"South"` + - :json:`150.0` + +The South region average only includes the non-NULL value (150), ignoring the NULL from the Gadget sale. + +Important Notes +=============== + +* ``AVG`` ignores NULL values in the aggregation +* When used without GROUP BY, AVG returns a single value for the entire table +* When used with GROUP BY, AVG returns one value per group +* If all values in a group are NULL, AVG returns NULL for that group +* The return type is ``DOUBLE`` (even if the input is ``BIGINT``) +* **Index Requirement**: For optimal performance, queries with GROUP BY require an appropriate index. See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/max.diagram b/docs/sphinx/source/reference/Functions/aggregate_functions/max.diagram new file mode 100644 index 0000000000..ce7debc13f --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/max.diagram @@ -0,0 +1,6 @@ +Diagram( + Terminal('MAX'), + Terminal('('), + NonTerminal('expression'), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/max.rst b/docs/sphinx/source/reference/Functions/aggregate_functions/max.rst new file mode 100644 index 0000000000..481abd7fe1 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/max.rst @@ -0,0 +1,117 @@ +=== +MAX +=== + +.. _max: + +Returns the maximum value from all non-NULL values in a group. + +Syntax +====== + +.. raw:: html + :file: max.diagram.svg + +Parameters +========== + +``MAX(expression)`` + Returns the largest non-NULL value of ``expression`` in the group. NULL values are ignored. + +Returns +======= + +Returns the maximum value with the same type as ``expression``. If all values are NULL or the input set is empty, returns NULL. + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``sales`` table: + +.. code-block:: sql + + CREATE TABLE sales( + id BIGINT, + product STRING, + region STRING, + amount BIGINT, + PRIMARY KEY(id)) + + INSERT INTO sales VALUES + (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) + +MAX - Maximum Value +-------------------- + +Find the maximum amount in the table: + +.. code-block:: sql + + SELECT MAX(amount) AS max_amount FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`max_amount` + * - :json:`200` + +Notice that the NULL value in row 4 is ignored. + +MAX with GROUP BY +------------------ + +Find maximum amounts per product: + +.. code-block:: sql + + SELECT product, MAX(amount) AS max_amount + FROM sales + GROUP BY product + +.. list-table:: + :header-rows: 1 + + * - :sql:`product` + - :sql:`max_amount` + * - :json:`"Widget"` + - :json:`150` + * - :json:`"Gadget"` + - :json:`200` + +Find maximum amounts per region: + +.. code-block:: sql + + SELECT region, MAX(amount) AS max_amount + FROM sales + GROUP BY region + +.. list-table:: + :header-rows: 1 + + * - :sql:`region` + - :sql:`max_amount` + * - :json:`"North"` + - :json:`200` + * - :json:`"South"` + - :json:`150` + +The South region maximum only considers the non-NULL value (150), ignoring the NULL from the Gadget sale. + +Important Notes +=============== + +* ``MAX`` ignores NULL values in the aggregation +* When used without GROUP BY, MAX returns a single value for the entire table +* When used with GROUP BY, MAX returns one value per group +* If all values in a group are NULL, MAX returns NULL for that group +* The return type matches the type of the input expression +* ``MAX`` can be used with numeric types, strings (lexicographic ordering), and other comparable types +* **Index Requirement**: For optimal performance, queries with GROUP BY require an appropriate index. See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/min.diagram b/docs/sphinx/source/reference/Functions/aggregate_functions/min.diagram new file mode 100644 index 0000000000..e9574ccfa1 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/min.diagram @@ -0,0 +1,6 @@ +Diagram( + Terminal('MIN'), + Terminal('('), + NonTerminal('expression'), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/min.rst b/docs/sphinx/source/reference/Functions/aggregate_functions/min.rst new file mode 100644 index 0000000000..bd0717836f --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/min.rst @@ -0,0 +1,117 @@ +=== +MIN +=== + +.. _min: + +Returns the minimum value from all non-NULL values in a group. + +Syntax +====== + +.. raw:: html + :file: min.diagram.svg + +Parameters +========== + +``MIN(expression)`` + Returns the smallest non-NULL value of ``expression`` in the group. NULL values are ignored. + +Returns +======= + +Returns the minimum value with the same type as ``expression``. If all values are NULL or the input set is empty, returns NULL. + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``sales`` table: + +.. code-block:: sql + + CREATE TABLE sales( + id BIGINT, + product STRING, + region STRING, + amount BIGINT, + PRIMARY KEY(id)) + + INSERT INTO sales VALUES + (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) + +MIN - Minimum Value +-------------------- + +Find the minimum amount in the table: + +.. code-block:: sql + + SELECT MIN(amount) AS min_amount FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`min_amount` + * - :json:`100` + +Notice that the NULL value in row 4 is ignored. + +MIN with GROUP BY +------------------ + +Find minimum amounts per product: + +.. code-block:: sql + + SELECT product, MIN(amount) AS min_amount + FROM sales + GROUP BY product + +.. list-table:: + :header-rows: 1 + + * - :sql:`product` + - :sql:`min_amount` + * - :json:`"Widget"` + - :json:`100` + * - :json:`"Gadget"` + - :json:`200` + +Find minimum amounts per region: + +.. code-block:: sql + + SELECT region, MIN(amount) AS min_amount + FROM sales + GROUP BY region + +.. list-table:: + :header-rows: 1 + + * - :sql:`region` + - :sql:`min_amount` + * - :json:`"North"` + - :json:`100` + * - :json:`"South"` + - :json:`150` + +The South region minimum only considers the non-NULL value (150), ignoring the NULL from the Gadget sale. + +Important Notes +=============== + +* ``MIN`` ignores NULL values in the aggregation +* When used without GROUP BY, MIN returns a single value for the entire table +* When used with GROUP BY, MIN returns one value per group +* If all values in a group are NULL, MIN returns NULL for that group +* The return type matches the type of the input expression +* ``MIN`` can be used with numeric types, strings (lexicographic ordering), and other comparable types +* **Index Requirement**: For optimal performance, queries with GROUP BY require an appropriate index. See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/sum.diagram b/docs/sphinx/source/reference/Functions/aggregate_functions/sum.diagram new file mode 100644 index 0000000000..9cd30dcb7e --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/sum.diagram @@ -0,0 +1,6 @@ +Diagram( + Terminal('SUM'), + Terminal('('), + NonTerminal('expression'), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/aggregate_functions/sum.rst b/docs/sphinx/source/reference/Functions/aggregate_functions/sum.rst new file mode 100644 index 0000000000..a170a9f97f --- /dev/null +++ b/docs/sphinx/source/reference/Functions/aggregate_functions/sum.rst @@ -0,0 +1,116 @@ +=== +SUM +=== + +.. _sum: + +Computes the sum of all non-NULL values in a group. + +Syntax +====== + +.. raw:: html + :file: sum.diagram.svg + +Parameters +========== + +``SUM(expression)`` + Sums all non-NULL values of ``expression`` in the group. NULL values are ignored. + +Returns +======= + +Returns a ``BIGINT`` representing the sum of all non-NULL values. If all values are NULL or the input set is empty, returns NULL. + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``sales`` table: + +.. code-block:: sql + + CREATE TABLE sales( + id BIGINT, + product STRING, + region STRING, + amount BIGINT, + PRIMARY KEY(id)) + + INSERT INTO sales VALUES + (1, 'Widget', 'North', 100), + (2, 'Widget', 'South', 150), + (3, 'Gadget', 'North', 200), + (4, 'Gadget', 'South', NULL), + (5, 'Widget', 'North', 120) + +SUM - Sum All Values +--------------------- + +Sum all amounts in the table: + +.. code-block:: sql + + SELECT SUM(amount) AS total_amount FROM sales + +.. list-table:: + :header-rows: 1 + + * - :sql:`total_amount` + * - :json:`570` + +Notice that the NULL value in row 4 is ignored in the sum. + +SUM with GROUP BY +------------------ + +Sum amounts per product: + +.. code-block:: sql + + SELECT product, SUM(amount) AS total_amount + FROM sales + GROUP BY product + +.. list-table:: + :header-rows: 1 + + * - :sql:`product` + - :sql:`total_amount` + * - :json:`"Widget"` + - :json:`370` + * - :json:`"Gadget"` + - :json:`200` + +Sum amounts per region: + +.. code-block:: sql + + SELECT region, SUM(amount) AS total_amount + FROM sales + GROUP BY region + +.. list-table:: + :header-rows: 1 + + * - :sql:`region` + - :sql:`total_amount` + * - :json:`"North"` + - :json:`420` + * - :json:`"South"` + - :json:`150` + +The South region sum only includes the non-NULL value (150), ignoring the NULL from the Gadget sale. + +Important Notes +=============== + +* ``SUM`` ignores NULL values in the aggregation +* When used without GROUP BY, SUM returns a single value for the entire table +* When used with GROUP BY, SUM returns one value per group +* If all values in a group are NULL, SUM returns NULL for that group +* The return type is ``BIGINT`` +* **Index Requirement**: For optimal performance, queries with GROUP BY require an appropriate index. See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. diff --git a/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql index 71a01f4187..c1a0acea1a 100644 --- a/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql +++ b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql @@ -47,3 +47,99 @@ test_block: - supported_version: 4.3.2.0 - unorderedResult: [{region: "North", non_null_amounts: 3}, {region: "South", non_null_amounts: 1}] + + # SUM - Sum All Values + - + - query: SELECT SUM(amount) AS total_amount FROM sales + - supported_version: 4.3.2.0 + - result: [{total_amount: 570}] + + # SUM with GROUP BY - per product + - + - query: SELECT product, SUM(amount) AS total_amount + FROM sales + GROUP BY product + - supported_version: 4.3.2.0 + - unorderedResult: [{product: "Widget", total_amount: 370}, + {product: "Gadget", total_amount: 200}] + + # SUM with GROUP BY - per region + - + - query: SELECT region, SUM(amount) AS total_amount + FROM sales + GROUP BY region + - supported_version: 4.3.2.0 + - unorderedResult: [{region: "North", total_amount: 420}, + {region: "South", total_amount: 150}] + + # AVG - Average All Values + - + - query: SELECT AVG(amount) AS average_amount FROM sales + - supported_version: 4.3.2.0 + - result: [{average_amount: 142.5}] + + # AVG with GROUP BY - per product + - + - query: SELECT product, AVG(amount) AS average_amount + FROM sales + GROUP BY product + - supported_version: 4.3.2.0 + - unorderedResult: [{product: "Widget", average_amount: 123.33333333333333}, + {product: "Gadget", average_amount: 200.0}] + + # AVG with GROUP BY - per region + - + - query: SELECT region, AVG(amount) AS average_amount + FROM sales + GROUP BY region + - supported_version: 4.3.2.0 + - unorderedResult: [{region: "North", average_amount: 140.0}, + {region: "South", average_amount: 150.0}] + + # MIN - Minimum Value + - + - query: SELECT MIN(amount) AS min_amount FROM sales + - supported_version: 4.3.2.0 + - result: [{min_amount: 100}] + + # MIN with GROUP BY - per product + - + - query: SELECT product, MIN(amount) AS min_amount + FROM sales + GROUP BY product + - supported_version: 4.3.2.0 + - unorderedResult: [{product: "Widget", min_amount: 100}, + {product: "Gadget", min_amount: 200}] + + # MIN with GROUP BY - per region + - + - query: SELECT region, MIN(amount) AS min_amount + FROM sales + GROUP BY region + - supported_version: 4.3.2.0 + - unorderedResult: [{region: "North", min_amount: 100}, + {region: "South", min_amount: 150}] + + # MAX - Maximum Value + - + - query: SELECT MAX(amount) AS max_amount FROM sales + - supported_version: 4.3.2.0 + - result: [{max_amount: 200}] + + # MAX with GROUP BY - per product + - + - query: SELECT product, MAX(amount) AS max_amount + FROM sales + GROUP BY product + - supported_version: 4.3.2.0 + - unorderedResult: [{product: "Widget", max_amount: 150}, + {product: "Gadget", max_amount: 200}] + + # MAX with GROUP BY - per region + - + - query: SELECT region, MAX(amount) AS max_amount + FROM sales + GROUP BY region + - supported_version: 4.3.2.0 + - unorderedResult: [{region: "North", max_amount: 200}, + {region: "South", max_amount: 150}] From 6efe87f42d31ba6366aefb5a1e60d258a68148ea Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 13:52:43 +0000 Subject: [PATCH 3/9] Add GREATEST, LEAST, COALESCE scalar function documentation --- .../reference/Functions/scalar_functions.rst | 13 ++ .../scalar_functions/coalesce.diagram | 12 ++ .../Functions/scalar_functions/coalesce.rst | 134 ++++++++++++++++++ .../scalar_functions/greatest.diagram | 12 ++ .../Functions/scalar_functions/greatest.rst | 117 +++++++++++++++ .../Functions/scalar_functions/least.diagram | 12 ++ .../Functions/scalar_functions/least.rst | 116 +++++++++++++++ ...lar-functions-documentation-queries.yamsql | 81 +++++++++++ 8 files changed, 497 insertions(+) create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/coalesce.diagram create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/coalesce.rst create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/greatest.diagram create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/greatest.rst create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/least.diagram create mode 100644 docs/sphinx/source/reference/Functions/scalar_functions/least.rst create mode 100644 yaml-tests/src/test/resources/documentation-queries/scalar-functions-documentation-queries.yamsql diff --git a/docs/sphinx/source/reference/Functions/scalar_functions.rst b/docs/sphinx/source/reference/Functions/scalar_functions.rst index 0e6da8fe17..5777f79c36 100644 --- a/docs/sphinx/source/reference/Functions/scalar_functions.rst +++ b/docs/sphinx/source/reference/Functions/scalar_functions.rst @@ -7,6 +7,19 @@ Scalar functions are functions that take an input of one row and produce a singl List of functions (by sub-category) ################################### +Comparison Functions +-------------------- + +.. toctree:: + :maxdepth: 1 + + scalar_functions/greatest + scalar_functions/least + scalar_functions/coalesce + +Specialized Functions +--------------------- + .. toctree:: :maxdepth: 1 diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.diagram b/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.diagram new file mode 100644 index 0000000000..2cde21b94e --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.diagram @@ -0,0 +1,12 @@ +Diagram( + Terminal('COALESCE'), + Terminal('('), + NonTerminal('expression'), + OneOrMore( + Sequence( + Terminal(','), + NonTerminal('expression') + ) + ), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.rst b/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.rst new file mode 100644 index 0000000000..030c5ecbdf --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/coalesce.rst @@ -0,0 +1,134 @@ +======== +COALESCE +======== + +.. _coalesce: + +Returns the first non-NULL value from a list of expressions. + +Syntax +====== + +.. raw:: html + :file: coalesce.diagram.svg + +Parameters +========== + +``COALESCE(expression1, expression2, ...)`` + Evaluates expressions from left to right and returns the first non-NULL value. Requires at least two expressions. + +Returns +======= + +Returns the first non-NULL value with the same type as the input expressions. If all values are NULL, returns NULL. All expressions must be of compatible types. + +Supported Types +================ + +``COALESCE`` supports all data types: + +* Primitive types: ``STRING``, ``BOOLEAN``, ``DOUBLE``, ``FLOAT``, ``INTEGER``, ``BIGINT``, ``BYTES`` +* Complex types: ``ARRAY``, ``STRUCT`` + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``contacts`` table: + +.. code-block:: sql + + CREATE TABLE contacts( + id BIGINT, + name STRING, + email STRING, + phone STRING, + address STRING, + PRIMARY KEY(id)) + + INSERT INTO contacts VALUES + (1, 'Alice', 'alice@example.com', NULL, NULL), + (2, 'Bob', NULL, '555-0123', NULL), + (3, 'Charlie', NULL, NULL, '123 Main St'), + (4, 'David', NULL, NULL, NULL) + +COALESCE - Find First Available Contact Method +------------------------------------------------ + +Get the first available contact method for each person: + +.. code-block:: sql + + SELECT name, COALESCE(email, phone, address, 'No contact info') AS contact_method + FROM contacts + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`contact_method` + * - :json:`"Alice"` + - :json:`"alice@example.com"` + * - :json:`"Bob"` + - :json:`"555-0123"` + * - :json:`"Charlie"` + - :json:`"123 Main St"` + * - :json:`"David"` + - :json:`"No contact info"` + +COALESCE with Default Values +------------------------------ + +Provide default values for NULL fields: + +.. code-block:: sql + + SELECT name, + COALESCE(email, 'no-email@example.com') AS email, + COALESCE(phone, 'Unknown') AS phone + FROM contacts + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`email` + - :sql:`phone` + * - :json:`"Alice"` + - :json:`"alice@example.com"` + - :json:`"Unknown"` + * - :json:`"Bob"` + - :json:`"no-email@example.com"` + - :json:`"555-0123"` + * - :json:`"Charlie"` + - :json:`"no-email@example.com"` + - :json:`"Unknown"` + * - :json:`"David"` + - :json:`"no-email@example.com"` + - :json:`"Unknown"` + +COALESCE in UPDATE Statements +------------------------------- + +Use COALESCE to update only NULL values: + +.. code-block:: sql + + UPDATE contacts + SET email = COALESCE(email, 'default@example.com') + WHERE id = 2 + +This sets the email to ``'default@example.com'`` only if it was NULL, otherwise keeps the existing value. + +Important Notes +=============== + +* ``COALESCE`` returns the first non-NULL value from left to right +* If all values are NULL, ``COALESCE`` returns NULL +* All expressions must be of compatible types +* The function requires at least two arguments +* ``COALESCE`` supports all data types, including complex types like ``STRUCT`` and ``ARRAY`` +* Commonly used for providing default values when NULL is encountered diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/greatest.diagram b/docs/sphinx/source/reference/Functions/scalar_functions/greatest.diagram new file mode 100644 index 0000000000..b2a9c23374 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/greatest.diagram @@ -0,0 +1,12 @@ +Diagram( + Terminal('GREATEST'), + Terminal('('), + NonTerminal('expression'), + OneOrMore( + Sequence( + Terminal(','), + NonTerminal('expression') + ) + ), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/greatest.rst b/docs/sphinx/source/reference/Functions/scalar_functions/greatest.rst new file mode 100644 index 0000000000..dfc203d7dd --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/greatest.rst @@ -0,0 +1,117 @@ +======== +GREATEST +======== + +.. _greatest: + +Returns the greatest (maximum) value from a list of expressions. + +Syntax +====== + +.. raw:: html + :file: greatest.diagram.svg + +Parameters +========== + +``GREATEST(expression1, expression2, ...)`` + Returns the largest value among all provided expressions. Requires at least two expressions. NULL values are considered smaller than any non-NULL value. + +Returns +======= + +Returns the greatest value with the same type as the input expressions. If all values are NULL, returns NULL. All expressions must be of compatible types. + +Supported Types +================ + +``GREATEST`` supports the following types: + +* ``STRING`` - Lexicographic comparison +* ``BOOLEAN`` - TRUE > FALSE +* ``DOUBLE`` - Numeric comparison +* ``FLOAT`` - Numeric comparison +* ``INTEGER`` - Numeric comparison +* ``BIGINT`` - Numeric comparison + +**Not Supported**: ``ARRAY``, ``STRUCT``, ``BYTES`` + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``products`` table: + +.. code-block:: sql + + CREATE TABLE products( + id BIGINT, + name STRING, + price_usd BIGINT, + price_eur BIGINT, + price_gbp BIGINT, + PRIMARY KEY(id)) + + INSERT INTO products VALUES + (1, 'Widget', 100, 90, 80), + (2, 'Gadget', 150, 140, 120), + (3, 'Doohickey', 200, 180, 160) + +GREATEST - Find Maximum Price +------------------------------- + +Find the highest price across all currencies for each product: + +.. code-block:: sql + + SELECT name, GREATEST(price_usd, price_eur, price_gbp) AS max_price + FROM products + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`max_price` + * - :json:`"Widget"` + - :json:`100` + * - :json:`"Gadget"` + - :json:`150` + * - :json:`"Doohickey"` + - :json:`200` + +GREATEST with Constants +------------------------- + +Compare values with constants: + +.. code-block:: sql + + SELECT name, GREATEST(price_usd, 125) AS adjusted_price + FROM products + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`adjusted_price` + * - :json:`"Widget"` + - :json:`125` + * - :json:`"Gadget"` + - :json:`150` + * - :json:`"Doohickey"` + - :json:`200` + +This ensures a minimum price of 125 for all products. + +Important Notes +=============== + +* ``GREATEST`` returns the largest value among all provided expressions +* NULL values are treated as smaller than any non-NULL value +* If all values are NULL, ``GREATEST`` returns NULL +* All expressions must be of compatible types +* The function requires at least two arguments +* For string comparisons, lexicographic ordering is used diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/least.diagram b/docs/sphinx/source/reference/Functions/scalar_functions/least.diagram new file mode 100644 index 0000000000..ba9640bf1f --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/least.diagram @@ -0,0 +1,12 @@ +Diagram( + Terminal('LEAST'), + Terminal('('), + NonTerminal('expression'), + OneOrMore( + Sequence( + Terminal(','), + NonTerminal('expression') + ) + ), + Terminal(')'), +) diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/least.rst b/docs/sphinx/source/reference/Functions/scalar_functions/least.rst new file mode 100644 index 0000000000..0227aa0900 --- /dev/null +++ b/docs/sphinx/source/reference/Functions/scalar_functions/least.rst @@ -0,0 +1,116 @@ +==== +LEAST +===== + +.. _least: + +Returns the least (minimum) value from a list of expressions. + +Syntax +====== + +.. raw:: html + :file: least.diagram.svg + +Parameters +========== + +``LEAST(expression1, expression2, ...)`` + Returns the smallest value among all provided expressions. Requires at least two expressions. NULL values are considered smaller than any non-NULL value. + +Returns +======= + +Returns the least value with the same type as the input expressions. If any value is NULL, returns NULL. All expressions must be of compatible types. + +Supported Types +================ + +``LEAST`` supports the following types: + +* ``STRING`` - Lexicographic comparison +* ``BOOLEAN`` - FALSE < TRUE +* ``DOUBLE`` - Numeric comparison +* ``FLOAT`` - Numeric comparison +* ``INTEGER`` - Numeric comparison +* ``BIGINT`` - Numeric comparison + +**Not Supported**: ``ARRAY``, ``STRUCT``, ``BYTES`` + +Examples +======== + +Setup +----- + +For these examples, assume we have a ``products`` table: + +.. code-block:: sql + + CREATE TABLE products( + id BIGINT, + name STRING, + price_usd BIGINT, + price_eur BIGINT, + price_gbp BIGINT, + PRIMARY KEY(id)) + + INSERT INTO products VALUES + (1, 'Widget', 100, 90, 80), + (2, 'Gadget', 150, 140, 120), + (3, 'Doohickey', 200, 180, 160) + +LEAST - Find Minimum Price +---------------------------- + +Find the lowest price across all currencies for each product: + +.. code-block:: sql + + SELECT name, LEAST(price_usd, price_eur, price_gbp) AS min_price + FROM products + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`min_price` + * - :json:`"Widget"` + - :json:`80` + * - :json:`"Gadget"` + - :json:`120` + * - :json:`"Doohickey"` + - :json:`160` + +LEAST with Constants +--------------------- + +Compare values with constants: + +.. code-block:: sql + + SELECT name, LEAST(price_usd, 175) AS capped_price + FROM products + +.. list-table:: + :header-rows: 1 + + * - :sql:`name` + - :sql:`capped_price` + * - :json:`"Widget"` + - :json:`100` + * - :json:`"Gadget"` + - :json:`150` + * - :json:`"Doohickey"` + - :json:`175` + +This ensures a maximum price cap of 175 for all products. + +Important Notes +=============== + +* ``LEAST`` returns the smallest value among all provided expressions +* If any value is NULL, ``LEAST`` returns NULL +* All expressions must be of compatible types +* The function requires at least two arguments +* For string comparisons, lexicographic ordering is used diff --git a/yaml-tests/src/test/resources/documentation-queries/scalar-functions-documentation-queries.yamsql b/yaml-tests/src/test/resources/documentation-queries/scalar-functions-documentation-queries.yamsql new file mode 100644 index 0000000000..81b1911a6e --- /dev/null +++ b/yaml-tests/src/test/resources/documentation-queries/scalar-functions-documentation-queries.yamsql @@ -0,0 +1,81 @@ +--- +options: + supported_version: 4.3.2.0 +--- +schema_template: + create table products(id bigint, name string, price_usd bigint, price_eur bigint, price_gbp bigint, primary key(id)) + create table contacts(id bigint, name string, email string, phone string, address string, primary key(id)) +--- +setup: + steps: + - query: insert into products + values (1, 'Widget', 100, 90, 80), + (2, 'Gadget', 150, 140, 120), + (3, 'Doohickey', 200, 180, 160) + - query: insert into contacts + values (1, 'Alice', 'alice@example.com', NULL, NULL), + (2, 'Bob', NULL, '555-0123', NULL), + (3, 'Charlie', NULL, NULL, '123 Main St'), + (4, 'David', NULL, NULL, NULL) +--- +test_block: + name: scalar-functions-documentation-tests + preset: single_repetition_ordered + tests: + # GREATEST - Find Maximum Price + - + - query: SELECT name, GREATEST(price_usd, price_eur, price_gbp) AS max_price + FROM products + - supported_version: 4.3.2.0 + - result: [{name: "Widget", max_price: 100}, + {name: "Gadget", max_price: 150}, + {name: "Doohickey", max_price: 200}] + + # GREATEST with Constants + - + - query: SELECT name, GREATEST(price_usd, 125) AS adjusted_price + FROM products + - supported_version: 4.3.2.0 + - result: [{name: "Widget", adjusted_price: 125}, + {name: "Gadget", adjusted_price: 150}, + {name: "Doohickey", adjusted_price: 200}] + + # LEAST - Find Minimum Price + - + - query: SELECT name, LEAST(price_usd, price_eur, price_gbp) AS min_price + FROM products + - supported_version: 4.3.2.0 + - result: [{name: "Widget", min_price: 80}, + {name: "Gadget", min_price: 120}, + {name: "Doohickey", min_price: 160}] + + # LEAST with Constants + - + - query: SELECT name, LEAST(price_usd, 175) AS capped_price + FROM products + - supported_version: 4.3.2.0 + - result: [{name: "Widget", capped_price: 100}, + {name: "Gadget", capped_price: 150}, + {name: "Doohickey", capped_price: 175}] + + # COALESCE - Find First Available Contact Method + - + - query: SELECT name, COALESCE(email, phone, address, 'No contact info') AS contact_method + FROM contacts + - supported_version: 4.3.2.0 + - result: [{name: "Alice", contact_method: "alice@example.com"}, + {name: "Bob", contact_method: "555-0123"}, + {name: "Charlie", contact_method: "123 Main St"}, + {name: "David", contact_method: "No contact info"}] + + # COALESCE with Default Values + - + - query: SELECT name, + COALESCE(email, 'no-email@example.com') AS email, + COALESCE(phone, 'Unknown') AS phone + FROM contacts + - supported_version: 4.3.2.0 + - result: [{name: "Alice", email: "alice@example.com", phone: "Unknown"}, + {name: "Bob", email: "no-email@example.com", phone: "555-0123"}, + {name: "Charlie", email: "no-email@example.com", phone: "Unknown"}, + {name: "David", email: "no-email@example.com", phone: "Unknown"}] From b242d5496a65e32535b1125eec7804bc97d4a5c0 Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 14:35:48 +0000 Subject: [PATCH 4/9] Fix LEAST header formatting and add test coverage for documentation queries --- .../reference/Functions/scalar_functions/least.rst | 2 +- .../src/test/java/DocumentationQueriesTests.java | 10 ++++++++++ .../aggregate-functions-documentation-queries.yamsql | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/sphinx/source/reference/Functions/scalar_functions/least.rst b/docs/sphinx/source/reference/Functions/scalar_functions/least.rst index 0227aa0900..cd5c5840c7 100644 --- a/docs/sphinx/source/reference/Functions/scalar_functions/least.rst +++ b/docs/sphinx/source/reference/Functions/scalar_functions/least.rst @@ -1,4 +1,4 @@ -==== +===== LEAST ===== diff --git a/yaml-tests/src/test/java/DocumentationQueriesTests.java b/yaml-tests/src/test/java/DocumentationQueriesTests.java index e820bad3e7..341fdedc0b 100644 --- a/yaml-tests/src/test/java/DocumentationQueriesTests.java +++ b/yaml-tests/src/test/java/DocumentationQueriesTests.java @@ -43,4 +43,14 @@ void castDocumentationQueriesTests(YamlTest.Runner runner) throws Exception { void vectorDocumentationQueriesTests(YamlTest.Runner runner) throws Exception { runner.runYamsql(PREFIX + "/vector-documentation-queries.yamsql"); } + + @TestTemplate + void aggregateFunctionsDocumentationQueriesTests(YamlTest.Runner runner) throws Exception { + runner.runYamsql(PREFIX + "/aggregate-functions-documentation-queries.yamsql"); + } + + @TestTemplate + void scalarFunctionsDocumentationQueriesTests(YamlTest.Runner runner) throws Exception { + runner.runYamsql(PREFIX + "/scalar-functions-documentation-queries.yamsql"); + } } diff --git a/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql index c1a0acea1a..fd93a01a10 100644 --- a/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql +++ b/yaml-tests/src/test/resources/documentation-queries/aggregate-functions-documentation-queries.yamsql @@ -4,6 +4,8 @@ options: --- schema_template: create table sales(id bigint, product string, region string, amount bigint, primary key(id)) + create index product_idx as select product from sales order by product + create index region_idx as select region from sales order by region --- setup: steps: From 7166cdbfff4c13a36fa8bf606a755cd97516cb1f Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 15:23:26 +0000 Subject: [PATCH 5/9] Add GROUP BY documentation --- .../sql_commands/DQL/GROUP_BY.diagram | 23 ++ .../reference/sql_commands/DQL/GROUP_BY.rst | 279 ++++++++++++++++++ .../test/java/DocumentationQueriesTests.java | 5 + .../group-by-documentation-queries.yamsql | 81 +++++ 4 files changed, 388 insertions(+) create mode 100644 docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram create mode 100644 docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst create mode 100644 yaml-tests/src/test/resources/documentation-queries/group-by-documentation-queries.yamsql diff --git a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram new file mode 100644 index 0000000000..2159486b93 --- /dev/null +++ b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram @@ -0,0 +1,23 @@ +Diagram( + Terminal('GROUP'), + Terminal('BY'), + NonTerminal('expression'), + Optional( + Sequence( + Terminal('AS'), + NonTerminal('alias') + ) + ), + OneOrMore( + Sequence( + Terminal(','), + NonTerminal('expression'), + Optional( + Sequence( + Terminal('AS'), + NonTerminal('alias') + ) + ) + ) + ), +) diff --git a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst new file mode 100644 index 0000000000..981d880eba --- /dev/null +++ b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst @@ -0,0 +1,279 @@ +======== +GROUP BY +======== + +.. _group_by: + +Groups rows that have the same values in specified columns into aggregated rows, typically used with aggregate functions. + +Syntax +====== + +.. raw:: html + :file: GROUP_BY.diagram.svg + +The GROUP BY clause is used in SELECT statements: + +.. code-block:: sql + + SELECT column1, aggregate_function(column2) + FROM table_name + GROUP BY column1 + +Parameters +========== + +``GROUP BY expression [AS alias], ...`` + Groups rows based on the values of one or more expressions. Each unique combination of expression values creates a separate group. + +``expression`` + Can be: + + - Column names + - Nested field references (e.g., ``struct_column.field``) + - Expressions or calculations + +``alias`` (optional) + An optional alias for the grouping expression + +Returns +======= + +Returns one row per unique combination of grouped values. When used with aggregate functions, computes aggregate values for each group. + +Examples +======== + +Setup +----- + +For these examples, assume we have an ``employees`` table: + +.. code-block:: sql + + CREATE TABLE employees( + id BIGINT, + department STRING, + role STRING, + salary BIGINT, + PRIMARY KEY(id)) + + CREATE INDEX dept_idx AS SELECT department FROM employees ORDER BY department + CREATE INDEX role_idx AS SELECT role FROM employees ORDER BY role + + INSERT INTO employees VALUES + (1, 'Engineering', 'Developer', 100000), + (2, 'Engineering', 'Developer', 110000), + (3, 'Engineering', 'Manager', 150000), + (4, 'Sales', 'Representative', 80000), + (5, 'Sales', 'Manager', 120000) + +GROUP BY Single Column +----------------------- + +Count employees by department: + +.. code-block:: sql + + SELECT department, COUNT(*) AS employee_count + FROM employees + GROUP BY department + +.. list-table:: + :header-rows: 1 + + * - :sql:`department` + - :sql:`employee_count` + * - :json:`"Engineering"` + - :json:`3` + * - :json:`"Sales"` + - :json:`2` + +GROUP BY Multiple Columns +-------------------------- + +Count employees by department and role: + +.. code-block:: sql + + SELECT department, role, COUNT(*) AS employee_count + FROM employees + GROUP BY department, role + +.. list-table:: + :header-rows: 1 + + * - :sql:`department` + - :sql:`role` + - :sql:`employee_count` + * - :json:`"Engineering"` + - :json:`"Developer"` + - :json:`2` + * - :json:`"Engineering"` + - :json:`"Manager"` + - :json:`1` + * - :json:`"Sales"` + - :json:`"Representative"` + - :json:`1` + * - :json:`"Sales"` + - :json:`"Manager"` + - :json:`1` + +GROUP BY with Aggregate Functions +----------------------------------- + +Calculate average salary by department: + +.. code-block:: sql + + SELECT department, AVG(salary) AS avg_salary + FROM employees + GROUP BY department + +.. list-table:: + :header-rows: 1 + + * - :sql:`department` + - :sql:`avg_salary` + * - :json:`"Engineering"` + - :json:`120000.0` + * - :json:`"Sales"` + - :json:`100000.0` + +Calculate multiple aggregates: + +.. code-block:: sql + + SELECT department, + COUNT(*) AS employee_count, + MIN(salary) AS min_salary, + MAX(salary) AS max_salary, + AVG(salary) AS avg_salary + FROM employees + GROUP BY department + +.. list-table:: + :header-rows: 1 + + * - :sql:`department` + - :sql:`employee_count` + - :sql:`min_salary` + - :sql:`max_salary` + - :sql:`avg_salary` + * - :json:`"Engineering"` + - :json:`3` + - :json:`100000` + - :json:`150000` + - :json:`120000.0` + * - :json:`"Sales"` + - :json:`2` + - :json:`80000` + - :json:`120000` + - :json:`100000.0` + +GROUP BY with HAVING Clause +----------------------------- + +Filter groups using HAVING: + +.. code-block:: sql + + SELECT department, AVG(salary) AS avg_salary + FROM employees + GROUP BY department + HAVING AVG(salary) > 110000 + +.. list-table:: + :header-rows: 1 + + * - :sql:`department` + - :sql:`avg_salary` + * - :json:`"Engineering"` + - :json:`120000.0` + +The HAVING clause filters groups after aggregation, unlike WHERE which filters rows before grouping. + +GROUP BY with Column Aliases +------------------------------ + +Use aliases for grouped columns: + +.. code-block:: sql + + SELECT department AS dept, COUNT(*) AS total + FROM employees + GROUP BY department AS dept + +.. list-table:: + :header-rows: 1 + + * - :sql:`dept` + - :sql:`total` + * - :json:`"Engineering"` + - :json:`3` + * - :json:`"Sales"` + - :json:`2` + +Important Notes +=============== + +Index Requirement +----------------- + +**GROUP BY operations require an appropriate index for optimal performance.** The query planner needs an index on the grouped column(s) to execute the query efficiently. Without a suitable index, the query will fail with an "unable to plan" error. + +Example index creation: + +.. code-block:: sql + + CREATE INDEX dept_idx AS SELECT department FROM employees ORDER BY department + +See :ref:`Indexes ` for details on creating indexes that support GROUP BY operations. + +Column Selection Rules +---------------------- + +* Only columns in the GROUP BY clause or aggregate functions can appear in the SELECT list +* Selecting non-grouped, non-aggregated columns will result in error 42803 + +**Invalid example**: + +.. code-block:: sql + + -- ERROR: id is neither grouped nor aggregated + SELECT id, department, COUNT(*) + FROM employees + GROUP BY department + +**Valid example**: + +.. code-block:: sql + + -- OK: all non-aggregated columns are grouped + SELECT department, role, COUNT(*) + FROM employees + GROUP BY department, role + +Nested Fields +------------- + +GROUP BY supports grouping on nested struct fields: + +.. code-block:: sql + + SELECT address.city, COUNT(*) AS resident_count + FROM people + GROUP BY address.city + +Execution Model +--------------- + +The Relational Layer does not perform in-memory grouping. All GROUP BY operations must be backed by an appropriate index. This is a fundamental architectural constraint that ensures queries can execute efficiently over large datasets. + +See Also +======== + +* :ref:`Aggregate Functions ` - Functions used with GROUP BY +* :ref:`Indexes ` - Creating indexes for GROUP BY +* :ref:`HAVING Clause ` - Filtering grouped results +* :ref:`SELECT Statement ` - Full SELECT syntax From 50ee9db2a766232a0ded8ebc7a1dc7770dc3860e Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Mon, 17 Nov 2025 18:06:06 +0000 Subject: [PATCH 8/9] Add more docs about having --- .../source/reference/sql_commands/DQL/GROUP_BY.diagram | 6 ++++++ .../source/reference/sql_commands/DQL/GROUP_BY.rst | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram index 73a77dc645..cafe07db54 100644 --- a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram +++ b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.diagram @@ -13,4 +13,10 @@ Diagram( ), Terminal(',') ), + Optional( + Sequence( + Terminal('HAVING'), + NonTerminal('having-expression') + ) + ) ) diff --git a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst index 7e224f16d8..839819126b 100644 --- a/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst +++ b/docs/sphinx/source/reference/sql_commands/DQL/GROUP_BY.rst @@ -19,6 +19,7 @@ The GROUP BY clause is used in SELECT statements: SELECT column1, aggregate_function(column2) FROM table_name GROUP BY column1 + HAVING aggregate_function(column2) > value Parameters ========== @@ -36,6 +37,15 @@ Parameters ``alias`` (optional) An optional alias for the grouping expression +``HAVING condition`` (optional) + Filters groups after aggregation. The condition can reference: + + - Grouped columns + - Aggregate functions (e.g., ``AVG(salary) > 100000``) + - Combinations using AND, OR, NOT + + Unlike WHERE which filters rows before grouping, HAVING filters groups after aggregation. + Returns ======= From bcb7453ba360dc1c9d92c7b79b404441c2bd1440 Mon Sep 17 00:00:00 2001 From: Arnaud Lacurie Date: Tue, 18 Nov 2025 00:36:07 +0000 Subject: [PATCH 9/9] Fix GROUP BY documentation tests - add composite index - Added dept_role_idx to support GROUP BY department, role - Index needed for multi-column GROUP BY queries --- .../documentation-queries/group-by-documentation-queries.yamsql | 1 + 1 file changed, 1 insertion(+) diff --git a/yaml-tests/src/test/resources/documentation-queries/group-by-documentation-queries.yamsql b/yaml-tests/src/test/resources/documentation-queries/group-by-documentation-queries.yamsql index f9915ea99c..8bc15a2029 100644 --- a/yaml-tests/src/test/resources/documentation-queries/group-by-documentation-queries.yamsql +++ b/yaml-tests/src/test/resources/documentation-queries/group-by-documentation-queries.yamsql @@ -6,6 +6,7 @@ schema_template: create table employees(id bigint, department string, role string, salary bigint, primary key(id)) create index dept_idx as select department from employees order by department create index role_idx as select role from employees order by role + create index dept_role_idx as select department, role from employees order by department, role --- setup: steps: