diff --git a/src/SqlParser.Tests/Dialects/ClickhouseDialectTests.cs b/src/SqlParser.Tests/Dialects/ClickhouseDialectTests.cs index 92a7e07..aa1084b 100644 --- a/src/SqlParser.Tests/Dialects/ClickhouseDialectTests.cs +++ b/src/SqlParser.Tests/Dialects/ClickhouseDialectTests.cs @@ -1048,13 +1048,9 @@ public void Parse_Inner_With() { var sql = "WITH outer_cte AS (WITH inner_value AS (SELECT 1 AS val) SELECT val FROM inner_value) SELECT * FROM outer_cte"; VerifiedStatement(sql, DefaultDialects!); - } - - [Fact] - public void Parse_Inner_With_() - { - var sql = "WITH city_table AS (WITH new_pop AS (SELECT POPULATION - 10000) SELECT NAME, new_pop AS POP FROM (SELECT NAME, POPULATION FROM CITY) AS base_city) SELECT POP FROM city_table"; - VerifiedStatement(sql, DefaultDialects!); + + var sql2 = "WITH city_table AS (WITH new_pop AS (SELECT POPULATION - 10000) SELECT NAME, new_pop AS POP FROM (SELECT NAME, POPULATION FROM CITY) AS base_city) SELECT POP FROM city_table"; + VerifiedStatement(sql2, DefaultDialects!); } [Fact] @@ -1074,11 +1070,9 @@ public void Parse_With_In_Subqueries_Common() [Fact] public void Parse_With_In_Set_Operations() { - // WITH in UNION subquery var sql1 = "SELECT 1 UNION ALL (WITH (SELECT 2) AS val SELECT val)"; VerifiedStatement(sql1, DefaultDialects!); - - // WITH in both sides of UNION + var sql2 = "(WITH (SELECT 1) AS a SELECT a) UNION ALL (WITH (SELECT 2) AS b SELECT b)"; VerifiedStatement(sql2, DefaultDialects!); } @@ -1162,77 +1156,109 @@ public void Parse_With_Expression_With_Parenthesis() } [Fact] - public void Parse_With_Case_Expressions() + public void Parse_Double_Array_Join_Inside() { - var sql = "WITH (CASE WHEN col > 10 THEN 'high' ELSE 'low' END) AS category SELECT category FROM table1"; + var sql = "SELECT rr, day FROM (SELECT retention FROM my_table) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); } - + [Fact] - public void Parse_With_Array_And_Tuple_Expressions() + public void Parse_Array_Join_With_Subquery() { - var sql = "WITH ([1, 2, 3]) AS arr SELECT arr"; + var sql = "SELECT install_date, rr, day FROM (SELECT install_date, retention FROM my_table GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); - - var sql2 = "WITH ((1, 'a')) AS tup SELECT tup"; + } + + [Fact] + public void Parse_Array_Join_With_Nested_Subquery() + { + var sql = "SELECT install_date, rr, day FROM (SELECT install_date, retention FROM (SELECT install_date, retention FROM my_table) GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); } - + [Fact] - public void Private_Test_Case_Expected_Join_Table() + public void Parse_Array_Join_With_Group_By() { - var sql = "SELECT toUInt32(toDateTime(install_date)) * 1000 AS t, groupArray(('Day ' || toString(day), rr / users)) FROM (SELECT install_date, total AS users, rr, day FROM (SELECT install_date, sum(r[1]) AS total, sumForEach(r) AS retention FROM (WITH date - install_date AS visit_day SELECT install_date,player_id, retention(visit_day = 0, visit_day = 1, visit_day = 3, visit_day = 7, visit_day = 14, visit_day = 28) AS r FROM (SELECT player_id, date, toDate(toDateTimeOrZero(player_install_date)) AS install_date FROM mw2.pause WHERE date BETWEEN toDate(1741163029) AND toDate(1748935429) + INTERVAL 28 day AND install_date BETWEEN toDate(1741163029) AND toDate(1748935429) UNION ALL SELECT player_id, date, toDate(toDateTimeOrZero(player_install_date)) AS install_date FROM mw2.registration WHERE date BETWEEN toDate(1741163029) AND toDate(1748935429) + INTERVAL 28 day AND install_date BETWEEN toDate(1741163029) AND toDate(1748935429)) GROUP BY install_date, player_id) GROUP BY install_date) ARRAY JOIN\n retention AS rr, [0, 1, 3, 7, 14, 28] AS day)GROUP BY t ORDER BY t ASC FORMAT JSON"; + var sql = "SELECT install_date, rr, day FROM (SELECT install_date, retention FROM my_table GROUP BY install_date, player_id) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); - } - + } + + [Fact] + public void Parse_Array_Join_With_Aggregation() + { + var sql = "SELECT install_date, rr, day FROM (SELECT install_date, sum(r[1]) AS total, sumForEach(r) AS retention FROM my_table GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; + VerifiedStatement(sql, DefaultDialects!); + } + [Fact] - public void Private_Test_Case_Expected_Right_FoundLeft() + public void Parse_Array_Join_With_Complex_Select() { - var sql = "SELECT toUInt32(toDateTime(install_date)) * 1000 AS t, groupArray(('Day ' || toString(cohort_day), visit_users / total_users)) FROM (WITH toDate(toDateTimeOrZero(player_install_date)) AS install_date, date - install_date AS visit_day SELECT install_date, cohort_day, uniqExactIf(player_id, visit_day = cohort_day) AS visit_users, uniqExactIf(player_id, visit_day = 0) AS total_users FROM mw2.pause ARRAY JOIN [0, 1, 3, 7, 14, 28] AS cohort_day WHERE date BETWEEN toDate(1741163034) AND toDate(1748935434) + toIntervalDay(28) AND install_date BETWEEN toDate(1741163034) AND toDate(1748935434) GROUP BY install_date, cohort_day ORDER BY install_date, cohort_day) GROUP BY t ORDER BY t ASC FORMAT JSON"; + var sql = "SELECT toUInt32(toDateTime(install_date)) * 1000 AS t, rr, day FROM (SELECT install_date, retention FROM my_table GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); } [Fact] - public void With_Substraction() + public void Parse_Array_Join_With_Outer_Group_By() { - var sql = "WITH visit_day AS date - install_date SELECT visit_day"; + var sql = "SELECT install_date, rr FROM (SELECT install_date, retention FROM my_table GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day GROUP BY install_date"; + VerifiedStatement(sql, DefaultDialects!); + } + + [Fact] + public void Parse_Array_Join_With_Complex_Function() + { + var sql = "SELECT groupArray(('Day ' || toString(day), rr)) FROM (SELECT install_date, retention FROM my_table GROUP BY install_date) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; + VerifiedStatement(sql, DefaultDialects!); + } + + [Fact] + public void Parse_Array_Join_Triple_Nested() + { + var sql = "SELECT rr, day FROM (SELECT retention FROM (SELECT retention FROM (SELECT retention FROM my_table) GROUP BY col1) GROUP BY col2) ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); } [Fact] - public void With_Substraction_Common() + public void Parse_Double_Array_Join() { - var sql = "WITH date - install_date AS visit_day SELECT visit_day"; + var sql = "SELECT user_id, rr, day FROM my_table ARRAY JOIN retention AS rr, [0, 1, 3, 7, 14, 28] AS day"; VerifiedStatement(sql, DefaultDialects!); } [Fact] - public void With_Substraction2_Common() + public void Function_In_Function() { - var sql = "WITH toDate('2024-06-01') AS date, toDate('2024-05-25') AS install_date, date - install_date AS visit_day SELECT date, install_date, visit_day"; + var sql = "WITH toDate(toDateTimeOrZero(player_install_date)) AS install_date SELECT * FROM install_date"; VerifiedStatement(sql, DefaultDialects!); } [Fact] - public void Parse_With_Table_References() + public void Function_In_Function_In_With_In_From() { - var sql = "WITH table1.column1 AS alias_col SELECT alias_col FROM table1"; - VerifiedStatement(sql, DefaultDialects!); - } + var sql = "SELECT install_date FROM (WITH toDate(toDateTimeOrZero(player_install_date)) AS install_date, date - install_date AS visit_day SELECT install_date FROM mw2.pause)"; + VerifiedStatement(sql, DefaultDialects!); + } + [Fact] + public void With_Several() + { + var sql = "WITH toDate(toDateTimeOrZero(player_install_date)) AS install_date, date - install_date AS visit_day SELECT * FROM install_date"; + VerifiedStatement(sql, DefaultDialects!); + } - //TODO: Both can be parsed though ParseCommonTableExpression_ClickhouseQuery, but needs better detection method for col or sorted_col in order to not break other tests [Fact] - public void Parse_With_In_Window_Expressions_Common() + public void With_ArrayJoin_Explicit_List() { - var sql = "SELECT ROW_NUMBER() OVER (ORDER BY (WITH col AS sorted_col SELECT sorted_col)) FROM table1"; - VerifiedStatement(sql, DefaultDialects!); + var sql = "SELECT cohort_day FROM mw2.pause ARRAY JOIN [0, 1, 3, 7, 14, 28] AS cohort_day"; + VerifiedStatement(sql, DefaultDialects!); } - + [Fact] - public void Parse_With_In_Window_Expressions_Clickhouse() + public void With_Substraction_Common() { - var sql = "SELECT ROW_NUMBER() OVER (ORDER BY (WITH sorted_col AS col SELECT sorted_col)) FROM table1"; - VerifiedStatement(sql, DefaultDialects!); + var sql = "WITH date - install_date AS visit_day SELECT visit_day"; + VerifiedStatement(sql, DefaultDialects!); + var sql2 = "WITH toDate('2024-06-01') AS date, toDate('2024-05-25') AS install_date, date - install_date AS visit_day SELECT date, install_date, visit_day"; + VerifiedStatement(sql2, DefaultDialects!); } } \ No newline at end of file diff --git a/src/SqlParser/Ast/Join.cs b/src/SqlParser/Ast/Join.cs index 6e584af..913d65a 100644 --- a/src/SqlParser/Ast/Join.cs +++ b/src/SqlParser/Ast/Join.cs @@ -20,11 +20,9 @@ public void ToSql(SqlTextWriter writer) case JoinOperator.CrossJoin: writer.WriteSql($" CROSS JOIN {Relation}"); return; - case JoinOperator.InnerArrayJoin: - writer.WriteSql($" ARRAY JOIN {Relation}"); - return; - case JoinOperator.LeftArrayJoin: - writer.WriteSql($" LEFT ARRAY JOIN {Relation}"); + case JoinOperator.ArrayJoin arrayJoin: + writer.Write(arrayJoin.Left ? " LEFT ARRAY JOIN " : " ARRAY JOIN "); + writer.WriteDelimited(arrayJoin.Relations, ", "); return; case JoinOperator.AsOf a: @@ -149,15 +147,7 @@ public record CrossApply : JoinOperator; /// Outer apply join /// public record OuterApply : JoinOperator; - - /// - /// Inner array join - /// - public record InnerArrayJoin : JoinOperator; - /// - /// Left array join - /// - public record LeftArrayJoin : JoinOperator; + public record ArrayJoin(bool Left, Sequence Relations) : JoinOperator; public record AsOf(Expression MatchCondition, JoinConstraint Constraint) : JoinOperator; } diff --git a/src/SqlParser/Ast/TableWithJoins.cs b/src/SqlParser/Ast/TableWithJoins.cs index 28419d8..df2d375 100644 --- a/src/SqlParser/Ast/TableWithJoins.cs +++ b/src/SqlParser/Ast/TableWithJoins.cs @@ -19,4 +19,4 @@ public void ToSql(SqlTextWriter writer) writer.WriteList(Joins); } } -} \ No newline at end of file +} diff --git a/src/SqlParser/Keywords.cs b/src/SqlParser/Keywords.cs index 8f46e9d..a20a79f 100644 --- a/src/SqlParser/Keywords.cs +++ b/src/SqlParser/Keywords.cs @@ -122,6 +122,7 @@ static Keywords() Keyword.AS, // TODO remove? // Reserved for snowflake MATCH_RECOGNIZE Keyword.MATCH_RECOGNIZE, + Keyword.ARRAY, ]; } @@ -892,4 +893,4 @@ public enum Keyword ZONE, undefined -} \ No newline at end of file +} diff --git a/src/SqlParser/Parser.cs b/src/SqlParser/Parser.cs index acaf07f..545026b 100644 --- a/src/SqlParser/Parser.cs +++ b/src/SqlParser/Parser.cs @@ -4457,158 +4457,145 @@ public CommonTableExpression ParseCommonTableExpression() { var isExpression = IsExpression(); var isScalarSubquery = IsScalarSubquery(); - var isAlias = IsAlias(); if (_dialect is ClickHouseDialect && (isExpression || isScalarSubquery)) { - return ParseCommonTableExpression_ClickhouseQuery(); - } - else - { - return ParseCommonTableExpression_CommonQuery(); - } - } + var expression = ParseExpr(); + CommonTableExpression? cte; + if (ParseKeyword(Keyword.AS)) + { + var name = ParseIdentifier(); + var isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => + { + if (ParseKeyword(Keyword.MATERIALIZED)) + { + return CteAsMaterialized.Materialized; + } - private CommonTableExpression ParseCommonTableExpression_ClickhouseQuery() - { - var expression = ParseExpr(); + if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + { + return CteAsMaterialized.NotMaterialized; + } - CommonTableExpression? cte; + return null; + }); - if (ParseKeyword(Keyword.AS)) - { - var name = ParseIdentifier(); - var isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => + var expressionBody = new SetExpression.ExpressionOnly(expression); + var query = new Query(expressionBody); + var alias = new TableAlias(name); + cte = new CommonTableExpression(alias, query, null, Materialized: isMaterialized, true, true); + } + else { - if (ParseKeyword(Keyword.MATERIALIZED)) + var name = ParseIdentifier(); + var columns = ParseTableAliasColumnDefs(); + if (columns != null && !columns.Any()) { - return CteAsMaterialized.Materialized; + columns = null; } - - if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + + ExpectKeyword(Keyword.AS); + CteAsMaterialized? isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => { - return CteAsMaterialized.NotMaterialized; - } + if (ParseKeyword(Keyword.MATERIALIZED)) + { + return CteAsMaterialized.Materialized; + } + + if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + { + return CteAsMaterialized.NotMaterialized; + } + + return null; + }); + var query = ExpectParens(() => ParseQuery()); + + var alias = new TableAlias(name, columns); + cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); + } - return null; - }); + if (ParseKeyword(Keyword.FROM)) + { + cte.From = ParseIdentifier(); + } - var expressionBody = new SetExpression.ExpressionOnly(expression); - var query = new Query(expressionBody); - var alias = new TableAlias(name); - cte = new CommonTableExpression(alias, query, null, Materialized: isMaterialized, true, true); + return cte; } else - { + { var name = ParseIdentifier(); - var columns = ParseTableAliasColumnDefs(); - if (columns != null && !columns.Any()) - { - columns = null; - } - - ExpectKeyword(Keyword.AS); - CteAsMaterialized? isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => + CommonTableExpression? cte; + + if (ParseKeyword(Keyword.AS)) { - if (ParseKeyword(Keyword.MATERIALIZED)) - { - return CteAsMaterialized.Materialized; - } - - if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + var isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => { - return CteAsMaterialized.NotMaterialized; - } - - return null; - }); - var query = ExpectParens(() => ParseQuery()); - - var alias = new TableAlias(name, columns); - cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); - } - - if (ParseKeyword(Keyword.FROM)) - { - cte.From = ParseIdentifier(); - } - - return cte; - } + if (ParseKeyword(Keyword.MATERIALIZED)) + { + return CteAsMaterialized.Materialized; + } - private CommonTableExpression ParseCommonTableExpression_CommonQuery() - { - var name = ParseIdentifier(); - CommonTableExpression? cte; + if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + { + return CteAsMaterialized.NotMaterialized; + } - if (ParseKeyword(Keyword.AS)) - { - var isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => - { - if (ParseKeyword(Keyword.MATERIALIZED)) + return null; + }); + + if (IsExpression()) { - return CteAsMaterialized.Materialized; + var expression = ParseExpr(); + + var expressionBody = new SetExpression.ExpressionOnly(expression); + var query = new Query(expressionBody); + + var alias = new TableAlias(name); + cte = new CommonTableExpression(alias, query,null, Materialized: isMaterialized, true); } - - if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + else { - return CteAsMaterialized.NotMaterialized; + var query = ExpectParens(() => ParseQuery()); + var alias = new TableAlias(name); + cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); } - - return null; - }); - - if (IsExpression()) - { - var expression = ParseExpr(); - - var expressionBody = new SetExpression.ExpressionOnly(expression); - var query = new Query(expressionBody); - - var alias = new TableAlias(name); - cte = new CommonTableExpression(alias, query,null, Materialized: isMaterialized, true); } else { - var query = ExpectParens(() => ParseQuery()); - var alias = new TableAlias(name); - cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); - } - } - else - { - var columns = ParseTableAliasColumnDefs(); - if (columns != null && !columns.Any()) - { - columns = null; - } - - ExpectKeyword(Keyword.AS); - CteAsMaterialized? isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => - { - if (ParseKeyword(Keyword.MATERIALIZED)) + var columns = ParseTableAliasColumnDefs(); + if (columns != null && !columns.Any()) { - return CteAsMaterialized.Materialized; + columns = null; } - if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + + ExpectKeyword(Keyword.AS); + CteAsMaterialized? isMaterialized = ParseInit(_dialect is PostgreSqlDialect, () => { - return CteAsMaterialized.NotMaterialized; - } + if (ParseKeyword(Keyword.MATERIALIZED)) + { + return CteAsMaterialized.Materialized; + } + if (ParseKeywordSequence(Keyword.NOT, Keyword.MATERIALIZED)) + { + return CteAsMaterialized.NotMaterialized; + } - return null; - }); - var query = ExpectParens(() => ParseQuery()); + return null; + }); + var query = ExpectParens(() => ParseQuery()); - var alias = new TableAlias(name, columns); - cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); - } + var alias = new TableAlias(name, columns); + cte = new CommonTableExpression(alias, query.Query, Materialized: isMaterialized); + } - if (ParseKeyword(Keyword.FROM)) - { - cte.From = ParseIdentifier(); - } + if (ParseKeyword(Keyword.FROM)) + { + cte.From = ParseIdentifier(); + } - return cte; + return cte; + } } private bool IsExpression() @@ -4659,32 +4646,6 @@ private bool IsScalarSubquery() return false; } - - - private bool IsAlias() - { - var token = PeekToken(); - - if (token is Word word) - { - var keyword = word.Keyword; - - // Only allow non-keyword identifiers to be aliases - if (keyword != Keyword.undefined) - { - return false; - } - - var nextToken = PeekNthToken(1); - if (nextToken is Word nextWord && nextWord.Keyword == Keyword.AS) - { - return true; - } - } - - return false; - } - /// /// Parse a `FOR JSON` clause @@ -5175,21 +5136,33 @@ public TableWithJoins ParseTableAndJoins() if (ParseKeyword(Keyword.ARRAY)) { ExpectKeyword(Keyword.JOIN); - var arrayExpr = ParseExpr(); - var arrayAlias = MaybeParseTableAlias(); - var arrayRel = new TableFactor.ExpressionTable(arrayExpr) { Alias = arrayAlias }; + + var relations = new Sequence(); + do + { + var arrayExpr = ParseExpr(); + var aliasIdent = ParseOptionalAlias(System.Array.Empty()); + var arrayRel = new TableFactor.ExpressionTable(arrayExpr) + { + Alias = aliasIdent != null ? new TableAlias(aliasIdent) : null + }; + relations.Add(arrayRel); + + } while (ConsumeToken()); + if (inner && left) { throw new ParserException("Cannot have both LEFT and INNER for ARRAY JOIN", PeekToken().Location); } + + var op = new JoinOperator.ArrayJoin(left, relations); - var op = left ? (JoinOperator)new JoinOperator.LeftArrayJoin() : new JoinOperator.InnerArrayJoin(); - - var item = new Join(arrayRel, op); + var item = new Join(JoinOperator: op); joins ??= []; joins.Add(item); + continue; } @@ -5203,7 +5176,7 @@ public TableWithJoins ParseTableAndJoins() PrevToken(); } } - + var global = ParseKeyword(Keyword.GLOBAL); Join join;