From c94f7543835c9019e248bcfd7a2efbab23f2ff63 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 23 Feb 2026 17:44:42 +0000 Subject: [PATCH 1/2] Add failing golden test for Colorado claims dbt/Snowflake SQL This test exposes a bracket validation bug where jinja if/else branches containing SQL with closing parens cause the validator to see brackets from *both* mutually exclusive branches, driving depth negative. https://claude.ai/code/session_014YynqZn3ATqFB64gfvD8m9 --- .../222_colorado_claims_extract.sql | 327 ++++++++++++++++++ tests/golden_test.rs | 4 + 2 files changed, 331 insertions(+) create mode 100644 tests/data/unformatted/222_colorado_claims_extract.sql diff --git a/tests/data/unformatted/222_colorado_claims_extract.sql b/tests/data/unformatted/222_colorado_claims_extract.sql new file mode 100644 index 0000000..d4fd2e8 --- /dev/null +++ b/tests/data/unformatted/222_colorado_claims_extract.sql @@ -0,0 +1,327 @@ +-- disable-parser: if statement creates invalid sql +{{ config( + materialized="table", + meta={'final_schema': 'integrations'} +) }} +/* COLORADO CLAIMS MEDICAL EXTRACT + dbtmodel: colorado_all_payers_claim_medical.sql + Purpose: + - Pull all data from transform.colorado_all_payers_claim_stage and format them as per the Colorado APCD requirements + - Remove the fields that are for internal purposes + - DETAILS of REPORT in this link: + -- https://civhc.org/wp-content/uploads/2024/12/Data-Submission-Guide-DSG-v-16-Final.pdf +*/ +WITH claims AS ( + SELECT claims.* + FROM {{ ref('colorado_all_payers_claim_stage') }} AS claims + WHERE target_month = TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') +) +, claims_header_amts AS ( + SELECT * + , MC063::FLOAT + MC064::FLOAT + MC065::FLOAT + MC066::FLOAT + MC067::FLOAT AS amt_per_record + , IFF(MC220 = 'Y', amt_per_record, 0.0) AS amt_vision + , IFF(MC209 = 'Y', amt_per_record, 0.0) AS amt_dental + , IFF(MC209 != 'Y' AND MC220 != 'Y', amt_per_record, 0.0) AS amt_other + FROM claims +) +, claims_count AS ( + SELECT COUNT(*) AS claim_count + , TO_CHAR( + TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') + , 'YYYYMM' + ) AS report_month + , SUM(amt_vision) AS total_amt_vision + , SUM(amt_dental) AS total_amt_dental + , SUM(amt_other) AS total_other_amt + , SUM(amt_per_record) AS total_amt + FROM claims_header_amts +) +, mem_eligible AS ( + SELECT COUNT( DISTINCT + CASE WHEN ME152 = 'Y' THEN member_id ELSE NULL END + ) AS cnt_mem_vision_eligible + , COUNT( DISTINCT + CASE WHEN ME020 = 'Y' THEN member_id ELSE NULL END + ) AS cnt_mem_dental_eligible + , COUNT( DISTINCT + CASE + WHEN ME018 = 'Y' OR ME123 = 'Y' THEN member_id + ELSE NULL + END + ) AS cnt_all_mem + FROM {{ ref('colorado_all_payers_member_eligibility_stage') }} +) +, claim_header_fields AS ( + SELECT a.* + , b.* + , REPLACE( + ROUND( + DIV0(a.total_amt * 1.0, b.cnt_all_mem)::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD007 + , REPLACE( + ROUND( + DIV0( + a.total_amt_dental * 1.0 + , b.cnt_mem_dental_eligible + )::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD009 + , REPLACE( + ROUND( + DIV0( + a.total_amt_vision * 1.0 + , b.cnt_mem_vision_eligible + )::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD010 + FROM claims_count AS a + , mem_eligible AS b +) +, header_stage AS ( + SELECT CONCAT_WS( + '|' + , 'HD' -- HD001 HEADER INDICATOR + , 'MC' -- HD002 RECORD TYPE + , 'COC0135' -- HD003 PAYER CODE + , 'DHP_COC0135' -- HD004 PAYER NAME + , report_month -- HD005 BEGINNING MONTH + , report_month -- HD006 ENDING MONTH + , IFNULL(claim_count, 0) -- HD007 RECORD COUNT + , HD007 -- HD008 MED_BH PMPM + , '' -- HD009 PHARMACY PMPM (leave blank) + , HD009 -- HD010 DENTAL PMPM + , HD010 -- HD011 VISION PMPM + , CASE WHEN '{{ var("file_env") }}' = 'TEST' THEN 'T' ELSE 'P' END -- HD012 FILE TYPE INDICATOR (P or T) + ) AS text_blob + , 1 AS chunk_order + FROM claim_header_fields +) +, base_stage AS ( + {% set all_columns = adapter.get_columns_in_relation(ref('colorado_all_payers_claim_stage')) %} + {% set except_col_names=["FINALIZED_DATE_EASTERN", "ADJUDICATION_ID", "LINE_ADJUDICATION_ID","PLAN_TYPE", "FIRST_SERVICE_DATE", "TARGET_MONTH", "CLAIM_STAGE_ID","BILLING_PROVIDER_RECORD_LOCATOR","RENDERING_PROVIDER_RECORD_LOCATOR"] %} + {% set col_names_to_hardcode=["MC999999"] %} + -- create data rows with pipe-delimited values + SELECT + concat_ws('|', + {%- for col in all_columns if col.name not in except_col_names %} + IFNULL( + {%- if col.name in col_names_to_hardcode %} + '20000219', '') + {%- else %} + REPLACE(REPLACE({{ col.name }},',',''),'\n',''), '') + {% endif %} + {%- if not loop.last %} {{ ',' }} + {% endif %} + {%- endfor %}) as text_blob + , 2 as chunk_order + FROM claims +) +, trailer_stage AS ( + SELECT CONCAT_WS( + '|' + , 'TR' -- TR001 TRAILER INDICATOR + , 'MC' -- TR002 RECORD TYPE + , 'COC0135' -- TR003 PAYER CODE + , 'DHP_COC0135' -- TR004 PAYER NAME + , report_month -- TR005 BEGINNING MONTH + , report_month -- TR006 ENDING MONTH + , to_char(current_timestamp, 'yyyymmdd') -- TR007 DATE CREATED + ) AS text_blob + , 3 AS chunk_order + FROM claims_count +) +, aggregated AS ( + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM header_stage + UNION ALL + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM base_stage + UNION ALL + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM trailer_stage +) +SELECT + text_blob, chunk_order, target_month, + {{ dbt_utils.generate_surrogate_key( + [ + 'target_month', + 'text_blob' + ] + ) + }} AS claim_medical_id + FROM aggregated + ORDER BY chunk_order +)))))__SQLFMT_OUTPUT__((((( +-- disable-parser: if statement creates invalid sql +{{ config( + materialized="table", + meta={'final_schema': 'integrations'} +) }} +/* COLORADO CLAIMS MEDICAL EXTRACT + dbtmodel: colorado_all_payers_claim_medical.sql + Purpose: + - Pull all data from transform.colorado_all_payers_claim_stage and format them as per the Colorado APCD requirements + - Remove the fields that are for internal purposes + - DETAILS of REPORT in this link: + -- https://civhc.org/wp-content/uploads/2024/12/Data-Submission-Guide-DSG-v-16-Final.pdf +*/ +WITH claims AS ( + SELECT claims.* + FROM {{ ref('colorado_all_payers_claim_stage') }} AS claims + WHERE target_month = TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') +) +, claims_header_amts AS ( + SELECT * + , MC063::FLOAT + MC064::FLOAT + MC065::FLOAT + MC066::FLOAT + MC067::FLOAT AS amt_per_record + , IFF(MC220 = 'Y', amt_per_record, 0.0) AS amt_vision + , IFF(MC209 = 'Y', amt_per_record, 0.0) AS amt_dental + , IFF(MC209 != 'Y' AND MC220 != 'Y', amt_per_record, 0.0) AS amt_other + FROM claims +) +, claims_count AS ( + SELECT COUNT(*) AS claim_count + , TO_CHAR( + TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') + , 'YYYYMM' + ) AS report_month + , SUM(amt_vision) AS total_amt_vision + , SUM(amt_dental) AS total_amt_dental + , SUM(amt_other) AS total_other_amt + , SUM(amt_per_record) AS total_amt + FROM claims_header_amts +) +, mem_eligible AS ( + SELECT COUNT( DISTINCT + CASE WHEN ME152 = 'Y' THEN member_id ELSE NULL END + ) AS cnt_mem_vision_eligible + , COUNT( DISTINCT + CASE WHEN ME020 = 'Y' THEN member_id ELSE NULL END + ) AS cnt_mem_dental_eligible + , COUNT( DISTINCT + CASE + WHEN ME018 = 'Y' OR ME123 = 'Y' THEN member_id + ELSE NULL + END + ) AS cnt_all_mem + FROM {{ ref('colorado_all_payers_member_eligibility_stage') }} +) +, claim_header_fields AS ( + SELECT a.* + , b.* + , REPLACE( + ROUND( + DIV0(a.total_amt * 1.0, b.cnt_all_mem)::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD007 + , REPLACE( + ROUND( + DIV0( + a.total_amt_dental * 1.0 + , b.cnt_mem_dental_eligible + )::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD009 + , REPLACE( + ROUND( + DIV0( + a.total_amt_vision * 1.0 + , b.cnt_mem_vision_eligible + )::FLOAT + , 2 + ) + , '.' + , '' + ) AS HD010 + FROM claims_count AS a + , mem_eligible AS b +) +, header_stage AS ( + SELECT CONCAT_WS( + '|' + , 'HD' -- HD001 HEADER INDICATOR + , 'MC' -- HD002 RECORD TYPE + , 'COC0135' -- HD003 PAYER CODE + , 'DHP_COC0135' -- HD004 PAYER NAME + , report_month -- HD005 BEGINNING MONTH + , report_month -- HD006 ENDING MONTH + , IFNULL(claim_count, 0) -- HD007 RECORD COUNT + , HD007 -- HD008 MED_BH PMPM + , '' -- HD009 PHARMACY PMPM (leave blank) + , HD009 -- HD010 DENTAL PMPM + , HD010 -- HD011 VISION PMPM + , CASE WHEN '{{ var("file_env") }}' = 'TEST' THEN 'T' ELSE 'P' END -- HD012 FILE TYPE INDICATOR (P or T) + ) AS text_blob + , 1 AS chunk_order + FROM claim_header_fields +) +, base_stage AS ( + {% set all_columns = adapter.get_columns_in_relation(ref('colorado_all_payers_claim_stage')) %} + {% set except_col_names=["FINALIZED_DATE_EASTERN", "ADJUDICATION_ID", "LINE_ADJUDICATION_ID","PLAN_TYPE", "FIRST_SERVICE_DATE", "TARGET_MONTH", "CLAIM_STAGE_ID","BILLING_PROVIDER_RECORD_LOCATOR","RENDERING_PROVIDER_RECORD_LOCATOR"] %} + {% set col_names_to_hardcode=["MC999999"] %} + -- create data rows with pipe-delimited values + SELECT + concat_ws('|', + {%- for col in all_columns if col.name not in except_col_names %} + IFNULL( + {%- if col.name in col_names_to_hardcode %} + '20000219', '') + {%- else %} + REPLACE(REPLACE({{ col.name }},',',''),'\n',''), '') + {% endif %} + {%- if not loop.last %} {{ ',' }} + {% endif %} + {%- endfor %}) as text_blob + , 2 as chunk_order + FROM claims +) +, trailer_stage AS ( + SELECT CONCAT_WS( + '|' + , 'TR' -- TR001 TRAILER INDICATOR + , 'MC' -- TR002 RECORD TYPE + , 'COC0135' -- TR003 PAYER CODE + , 'DHP_COC0135' -- TR004 PAYER NAME + , report_month -- TR005 BEGINNING MONTH + , report_month -- TR006 ENDING MONTH + , to_char(current_timestamp, 'yyyymmdd') -- TR007 DATE CREATED + ) AS text_blob + , 3 AS chunk_order + FROM claims_count +) +, aggregated AS ( + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM header_stage + UNION ALL + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM base_stage + UNION ALL + SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month + FROM trailer_stage +) +SELECT + text_blob, chunk_order, target_month, + {{ dbt_utils.generate_surrogate_key( + [ + 'target_month', + 'text_blob' + ] + ) + }} AS claim_medical_id + FROM aggregated + ORDER BY chunk_order diff --git a/tests/golden_test.rs b/tests/golden_test.rs index bad0137..f6abe96 100644 --- a/tests/golden_test.rs +++ b/tests/golden_test.rs @@ -463,6 +463,10 @@ golden_test!( golden_unformatted_221_dbt_config_dollar_quoted, "tests/data/unformatted/221_dbt_config_dollar_quoted.sql" ); +golden_test!( + golden_unformatted_222_colorado_claims_extract, + "tests/data/unformatted/222_colorado_claims_extract.sql" +); // ============================================================================= // Unformatted golden tests — 300-series (Jinja formatting) From 1768b6c925881e3038b28332d56d92b1bc52df11 Mon Sep 17 00:00:00 2001 From: brian moore Date: Mon, 23 Feb 2026 13:49:52 -0500 Subject: [PATCH 2/2] add perf check in --- Cargo.lock | 64 ++ Cargo.toml | 1 + src/analyzer.rs | 26 +- src/formatter.rs | 12 +- src/lexer.rs | 282 ++++---- src/line.rs | 8 +- src/merger.rs | 8 +- src/node.rs | 33 +- src/node_manager.rs | 94 +-- src/operator_precedence.rs | 4 +- src/segment.rs | 4 +- src/splitter.rs | 16 +- .../222_colorado_claims_extract.sql | 307 ++++----- .../222_jinja_unbalanced_brackets.sql | 61 ++ tests/golden_test.rs | 618 +++++------------- 15 files changed, 698 insertions(+), 840 deletions(-) create mode 100644 tests/data/unformatted/222_jinja_unbalanced_brackets.sql diff --git a/Cargo.lock b/Cargo.lock index d570a21..bf188f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -638,6 +638,48 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -752,6 +794,21 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + [[package]] name = "rayon" version = "1.11.0" @@ -910,6 +967,12 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "smallvec" version = "1.15.1" @@ -931,6 +994,7 @@ dependencies = [ "indicatif", "insta", "memchr", + "phf", "predicates", "pretty_assertions", "serde", diff --git a/Cargo.toml b/Cargo.toml index 158d2e2..dc3c1db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ termcolor = "1" smallvec = { version = "1", features = ["union"] } compact_str = "0.8" memchr = "2" +phf = { version = "0.11", features = ["macros"] } [dev-dependencies] pretty_assertions = "1" diff --git a/src/analyzer.rs b/src/analyzer.rs index 8557b4c..002d2e8 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -590,7 +590,7 @@ impl Analyzer { let mut line = Line::new(prev); line.append_node(idx); - if !self.arena[idx].formatting_disabled.is_empty() { + if self.arena[idx].formatting_disabled { line.formatting_disabled = true; } self.line_buffer.push(line); @@ -610,7 +610,7 @@ impl Analyzer { } for &idx in &self.node_buffer { - if !self.arena[idx].formatting_disabled.is_empty() { + if self.arena[idx].formatting_disabled { line.formatting_disabled = true; break; } @@ -825,8 +825,13 @@ impl Analyzer { } /// Post-lex validation: check for unmatched closing brackets. + /// Jinja `{% if %}`/`{% else %}`/`{% endif %}` branches are alternatives, + /// not sequential, so each branch is validated against the depth at the + /// block start. fn validate_brackets(&self) -> Result<(), SqlfmtError> { let mut depth = 0i32; + // Stack of (depth_at_block_start, jinja_nesting_level) for {% if %}/{% for %} etc. + let mut jinja_depth_stack: Vec = Vec::new(); for node in &self.arena { match node.token.token_type { TokenType::BracketOpen | TokenType::StatementStart => { @@ -834,13 +839,28 @@ impl Analyzer { } TokenType::BracketClose | TokenType::StatementEnd => { depth -= 1; - if depth < 0 { + if depth < 0 && jinja_depth_stack.is_empty() { return Err(SqlfmtError::Bracket(format!( "Encountered closing bracket '{}' without a matching opening bracket", node.token.text ))); } } + TokenType::JinjaBlockStart => { + jinja_depth_stack.push(depth); + } + TokenType::JinjaBlockKeyword => { + // {% else %}, {% elif %}: restore depth to block start + if let Some(&saved_depth) = jinja_depth_stack.last() { + depth = saved_depth; + } + } + TokenType::JinjaBlockEnd => { + // {% endif %}, {% endfor %}: restore depth to block start + if let Some(saved_depth) = jinja_depth_stack.pop() { + depth = saved_depth; + } + } _ => {} } } diff --git a/src/formatter.rs b/src/formatter.rs index dee1bbe..e295729 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -262,12 +262,8 @@ fn split_line_at_jinja(line: Line, split_pos: usize, arena: &mut Vec) -> ( prev_idx, CompactString::new(""), CompactString::from("\n"), - prev_idx - .map(|i| arena[i].open_brackets.clone()) - .unwrap_or_default(), - prev_idx - .map(|i| arena[i].open_jinja_blocks.clone()) - .unwrap_or_default(), + prev_idx.map(|i| arena[i].bracket_depth).unwrap_or(0), + prev_idx.map(|i| arena[i].jinja_depth).unwrap_or(0), ); let nl_idx = arena.len(); arena.push(nl_node); @@ -331,8 +327,8 @@ fn find_jinja_block_end( /// Adjust bracket depth of a line's first content node to a target depth. fn adjust_bracket_depth(line: &Line, target_depth: usize, arena: &mut [Node]) { if let Some(node_idx) = line.first_content_node_idx(arena) { - while arena[node_idx].open_brackets.len() > target_depth { - arena[node_idx].open_brackets.pop(); + if (arena[node_idx].bracket_depth as usize) > target_depth { + arena[node_idx].bracket_depth = target_depth as u16; } } } diff --git a/src/lexer.rs b/src/lexer.rs index c69c2a4..ee7ece3 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -2398,125 +2398,130 @@ fn classify_keyword<'a>( (&A_NAME, full_text) } +/// Perfect-hash set of unterminated keywords (lowercased). +static UNTERM_KEYWORDS: phf::Set<&'static str> = phf::phf_set! { + "with recursive", "with", + "select as struct", "select as value", "select all", + "select distinct", "select", + "global inner join", "global left outer join", "global left join", + "global right outer join", "global right join", "global full outer join", + "global full join", "global any join", "global join", + "any left outer join", "any left join", "any right outer join", "any right join", + "any inner join", "any full outer join", "any full join", + "paste join", + "natural full outer join", "natural full join", "natural left outer join", + "natural left join", "natural right outer join", "natural right join", + "natural inner join", "natural join", + "cross lateral join", "cross join", + "left outer join", "left semi join", "left anti join", "left asof join", "left join", + "right outer join", "right semi join", "right anti join", "right join", + "full outer join", "full join", + "inner join", "semi join", "anti join", "asof left join", "asof join", + "positional join", "any join", "lateral join", "join", + "lateral view outer", "lateral view", "lateral", + "prewhere", "where", + "group by", "cluster by", "distribute by", "sort by", + "having", "qualify", "window", + "order by", "limit", + "fetch first", "fetch next", + "for no key update", "for key share", "for update", "for share", + "when", "then", "else", + "partition by", + "values", "returning", "into", + "match_recognize", "connect", "start with", +}; + /// Check if a (lowercased) keyword is an unterminated keyword. fn is_unterm_keyword(kw: &str) -> bool { - matches!( - kw, - "with recursive" | "with" - | "select as struct" | "select as value" | "select all" - | "select distinct" | "select" - // select top N handled by multi-word - | "global inner join" | "global left outer join" | "global left join" - | "global right outer join" | "global right join" | "global full outer join" - | "global full join" | "global any join" | "global join" - | "any left outer join" | "any left join" | "any right outer join" | "any right join" - | "any inner join" | "any full outer join" | "any full join" - | "paste join" - | "natural full outer join" | "natural full join" | "natural left outer join" - | "natural left join" | "natural right outer join" | "natural right join" - | "natural inner join" | "natural join" - | "cross lateral join" | "cross join" - | "left outer join" | "left semi join" | "left anti join" | "left asof join" | "left join" - | "right outer join" | "right semi join" | "right anti join" | "right join" - | "full outer join" | "full join" - | "inner join" | "semi join" | "anti join" | "asof left join" | "asof join" - | "positional join" | "any join" | "lateral join" | "join" - | "lateral view outer" | "lateral view" | "lateral" - | "prewhere" | "where" - | "group by" | "cluster by" | "distribute by" | "sort by" - | "having" | "qualify" | "window" - | "order by" | "limit" - | "fetch first" | "fetch next" - | "for no key update" | "for key share" | "for update" | "for share" - | "when" | "then" | "else" - | "partition by" - | "values" | "returning" | "into" - | "match_recognize" | "connect" | "start with" - ) + UNTERM_KEYWORDS.contains(kw) } +/// Perfect-hash set of word operators (lowercased). +static WORD_OPERATORS: phf::Set<&'static str> = phf::phf_set! { + "is not distinct from", + "is distinct from", + "not similar to", + "similar to", + "not ilike all", + "not ilike any", + "not like all", + "not like any", + "ilike all", + "ilike any", + "like all", + "like any", + "not between", + "not ilike", + "not like", + "not rlike", + "not regexp", + "not exists", + "global not in", + "global in", + "not in", + "is not", + "grouping sets", + "within group", + "respect nulls", + "ignore nulls", + "nulls first", + "nulls last", + "as", + "between", + "cube", + "exists", + "filter", + "ilike", + "isnull", + "in", + "interval", + "is", + "like", + "notnull", + "over", + "pivot", + "regexp", + "rlike", + "rollup", + "some", + "tablesample", + "unpivot", + "asc", + "desc", +}; + /// Check if it's a word operator. fn is_word_operator(kw: &str) -> bool { - matches!( - kw, - "is not distinct from" - | "is distinct from" - | "not similar to" - | "similar to" - | "not ilike all" - | "not ilike any" - | "not like all" - | "not like any" - | "ilike all" - | "ilike any" - | "like all" - | "like any" - | "not between" - | "not ilike" - | "not like" - | "not rlike" - | "not regexp" - | "not exists" - | "global not in" - | "global in" - | "not in" - | "is not" - | "grouping sets" - | "within group" - | "respect nulls" - | "ignore nulls" - | "nulls first" - | "nulls last" - | "as" - | "between" - | "cube" - | "exists" - | "filter" - | "ilike" - | "isnull" - | "in" - | "interval" - | "is" - | "like" - | "notnull" - | "over" - | "pivot" - | "regexp" - | "rlike" - | "rollup" - | "some" - | "tablesample" - | "unpivot" - | "asc" - | "desc" - ) + WORD_OPERATORS.contains(kw) } +/// Perfect-hash set of set operators (lowercased). +static SET_OPERATORS: phf::Set<&'static str> = phf::phf_set! { + "union all by name", + "union by name", + "union all", + "union distinct", + "intersect all", + "intersect distinct", + "except all", + "except distinct", + "union all corresponding by", + "union corresponding by", + "union strict corresponding", + "union corresponding", + "intersect all corresponding", + "intersect corresponding", + "except all corresponding", + "except corresponding", + "union", + "intersect", + "except", + "minus", +}; + /// Check if it's a set operator. fn is_set_operator(kw: &str) -> bool { - matches!( - kw, - "union all by name" - | "union by name" - | "union all" - | "union distinct" - | "intersect all" - | "intersect distinct" - | "except all" - | "except distinct" - | "union all corresponding by" - | "union corresponding by" - | "union strict corresponding" - | "union corresponding" - | "intersect all corresponding" - | "intersect corresponding" - | "except all corresponding" - | "except corresponding" - | "union" - | "intersect" - | "except" - | "minus" - ) + SET_OPERATORS.contains(kw) } /// Check if it's a CREATE/ALTER FUNCTION DDL keyword. @@ -2532,56 +2537,21 @@ fn is_warehouse_ddl(kw: &str) -> bool { || kw.starts_with("alter") && kw.contains("warehouse") } +/// Perfect-hash set of unsupported DDL first words (lowercased). +static UNSUPPORTED_DDL: phf::Set<&'static str> = phf::phf_set! { + "delete", "insert", "update", "merge", "truncate", "rename", "unset", + "use", "execute", "begin", "commit", "rollback", "copy", "clone", + "cluster", "deallocate", "declare", "discard", "do", "export", + "handler", "import", "lock", "move", "prepare", "reassign", "repair", + "security", "unload", "validate", "vacuum", "analyze", "refresh", + "list", "remove", "get", "put", "describe", "show", "comment", + "add", "undrop", "cache", "clear", +}; + /// Check if first word starts an unsupported DDL. /// Note: create/alter/drop and grant/revoke are handled separately above. fn is_unsupported_ddl(first_word: &str) -> bool { - matches!( - first_word, - "delete" - | "insert" - | "update" - | "merge" - | "truncate" - | "rename" - | "unset" - | "use" - | "execute" - | "begin" - | "commit" - | "rollback" - | "copy" - | "clone" - | "cluster" - | "deallocate" - | "declare" - | "discard" - | "do" - | "export" - | "handler" - | "import" - | "lock" - | "move" - | "prepare" - | "reassign" - | "repair" - | "security" - | "unload" - | "validate" - | "vacuum" - | "analyze" - | "refresh" - | "list" - | "remove" - | "get" - | "put" - | "describe" - | "show" - | "comment" - | "add" - | "undrop" - | "cache" - | "clear" - ) + UNSUPPORTED_DDL.contains(first_word) } /// Try to scan a frame clause: "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" etc. diff --git a/src/line.rs b/src/line.rs index e06da62..b4775db 100644 --- a/src/line.rs +++ b/src/line.rs @@ -469,8 +469,8 @@ mod tests { prev, compact_str::CompactString::from(prefix), compact_str::CompactString::from(value), - smallvec::SmallVec::new(), - smallvec::SmallVec::new(), + 0, + 0, )); idx } @@ -488,7 +488,7 @@ mod tests { fn test_line_depth() { let mut arena = Vec::new(); let idx = make_node_in_arena(&mut arena, TokenType::Name, "a", ""); - arena[idx].open_brackets = smallvec::smallvec![99]; // 1 open bracket + arena[idx].bracket_depth = 1; let mut line = Line::new(None); line.append_node(idx); assert_eq!(line.depth(&arena), (1, 0)); @@ -673,7 +673,7 @@ mod tests { fn test_indentation_with_depth() { let mut arena = Vec::new(); let idx = make_node_in_arena(&mut arena, TokenType::Name, "a", ""); - arena[idx].open_brackets = smallvec::smallvec![99]; // depth 1 + arena[idx].bracket_depth = 1; // depth 1 let mut line = Line::new(None); line.append_node(idx); assert_eq!(line.indentation(&arena), " "); // 4 spaces per depth level diff --git a/src/merger.rs b/src/merger.rs index cdf26a6..3bfe11b 100644 --- a/src/merger.rs +++ b/src/merger.rs @@ -360,7 +360,7 @@ impl LineMerger { for &node_idx in &line.nodes { let node = &arena[node_idx]; - if !node.formatting_disabled.is_empty() { + if node.formatting_disabled { return Err(ControlFlow::CannotMerge); } if matches!( @@ -786,8 +786,8 @@ mod tests { prev, compact_str::CompactString::from(prefix), compact_str::CompactString::from(val), - smallvec::SmallVec::new(), - smallvec::SmallVec::new(), + 0, + 0, )); idx } @@ -876,7 +876,7 @@ mod tests { // Create a line with formatting disabled let a = make_node(&mut arena, TokenType::Name, "a", ""); - arena[a].formatting_disabled = smallvec::smallvec![0usize]; + arena[a].formatting_disabled = true; let nl = make_node(&mut arena, TokenType::Newline, "\n", ""); let mut disabled_line = Line::new(None); disabled_line.append_node(a); diff --git a/src/node.rs b/src/node.rs index 68abba7..99b5ff6 100644 --- a/src/node.rs +++ b/src/node.rs @@ -6,11 +6,9 @@ use crate::token::{Token, TokenType}; /// Index into the node arena (Vec). pub type NodeIndex = usize; -/// SmallVec type aliases for Node fields that are almost always small. -/// These avoid heap allocation for the common case (0-8 elements). +/// SmallVec type aliases used by NodeManager for internal bracket tracking. pub type BracketVec = SmallVec<[NodeIndex; 8]>; pub type JinjaBlockVec = SmallVec<[NodeIndex; 4]>; -pub type FmtDisabledVec = SmallVec<[NodeIndex; 2]>; /// A Node wraps a Token with formatting metadata: depth, open brackets, /// open Jinja blocks, and a link to the previous node. @@ -20,9 +18,12 @@ pub struct Node { pub previous_node: Option, pub prefix: CompactString, pub value: CompactString, - pub open_brackets: BracketVec, - pub open_jinja_blocks: JinjaBlockVec, - pub formatting_disabled: FmtDisabledVec, + /// SQL bracket depth (number of open brackets + unterm keywords). + pub bracket_depth: u16, + /// Jinja block nesting depth. + pub jinja_depth: u16, + /// Whether formatting is disabled (fmt:off region). + pub formatting_disabled: bool, } impl Node { @@ -31,23 +32,23 @@ impl Node { previous_node: Option, prefix: CompactString, value: CompactString, - open_brackets: BracketVec, - open_jinja_blocks: JinjaBlockVec, + bracket_depth: u16, + jinja_depth: u16, ) -> Self { Self { token, previous_node, prefix, value, - open_brackets, - open_jinja_blocks, - formatting_disabled: SmallVec::new(), + bracket_depth, + jinja_depth, + formatting_disabled: false, } } /// Depth is (sql_bracket_depth, jinja_block_depth). pub fn depth(&self) -> (usize, usize) { - (self.open_brackets.len(), self.open_jinja_blocks.len()) + (self.bracket_depth as usize, self.jinja_depth as usize) } /// Formatted string: prefix + value. @@ -271,8 +272,8 @@ mod tests { prev, CompactString::new(""), CompactString::from(value), - SmallVec::new(), - SmallVec::new(), + 0, + 0, ) } @@ -285,8 +286,8 @@ mod tests { #[test] fn test_depth_with_brackets() { let mut node = make_node(TokenType::Name, "foo", None); - node.open_brackets = smallvec::smallvec![0, 1]; - node.open_jinja_blocks = smallvec::smallvec![2]; + node.bracket_depth = 2; + node.jinja_depth = 1; assert_eq!(node.depth(), (2, 1)); } diff --git a/src/node_manager.rs b/src/node_manager.rs index 3430f67..01c222f 100644 --- a/src/node_manager.rs +++ b/src/node_manager.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use compact_str::CompactString; use smallvec::SmallVec; -use crate::node::{BracketVec, FmtDisabledVec, JinjaBlockVec, Node, NodeIndex}; +use crate::node::{BracketVec, JinjaBlockVec, Node, NodeIndex}; use crate::token::{Token, TokenType}; /// NodeManager creates Nodes from Tokens, tracking bracket depth, @@ -15,13 +15,19 @@ pub struct NodeManager { pub open_brackets: BracketVec, /// Current jinja block stack — used by handle_jinja_block_keyword etc. pub open_jinja_blocks: JinjaBlockVec, - pub formatting_disabled: FmtDisabledVec, + /// Formatting-disabled nesting depth. >0 means formatting is disabled. + /// Uses a counter instead of bool to handle nested Data token push/pop. + formatting_disabled_depth: u16, /// Running node-level brackets (including unterm keywords). Mutated in place /// to avoid cloning from arena on every node creation. node_open_brackets: BracketVec, /// Running node-level jinja block stack. Separate from open_jinja_blocks /// because external push/pop calls can temporarily diverge them. node_open_jinja: JinjaBlockVec, + /// Bracket snapshots parallel to node_open_jinja. When a jinja block is + /// opened, we snapshot node_open_brackets so we can restore it when the + /// block closes (without needing to store brackets in each Node). + node_open_jinja_bracket_snapshots: Vec, } impl NodeManager { @@ -30,9 +36,10 @@ impl NodeManager { case_sensitive_names, open_brackets: SmallVec::new(), open_jinja_blocks: SmallVec::new(), - formatting_disabled: SmallVec::new(), + formatting_disabled_depth: 0, node_open_brackets: SmallVec::new(), node_open_jinja: SmallVec::new(), + node_open_jinja_bracket_snapshots: Vec::new(), } } @@ -46,12 +53,11 @@ impl NodeManager { arena: &[Node], ) -> Node { // This matches the Python pattern where depth propagates through nodes - let (open_brackets, open_jinja_blocks) = - self.compute_open_brackets(&token, previous_node, arena); + let (bracket_depth, jinja_depth) = self.compute_open_brackets(&token, previous_node, arena); let formatting_disabled = self.compute_formatting_disabled(&token, previous_node, arena); - let (prefix, value) = if !formatting_disabled.is_empty() { + let (prefix, value) = if formatting_disabled { (token.prefix.clone(), token.text.clone()) } else { let prefix_cow = self.compute_prefix(&token, previous_node, arena); @@ -67,31 +73,34 @@ impl NodeManager { previous_node, prefix, value, - open_brackets, - open_jinja_blocks, + bracket_depth, + jinja_depth, formatting_disabled, } } - /// Compute the list of open brackets for a new node. + /// Compute the bracket and jinja depths for a new node. /// /// In Python sqlfmt, the Node's open_brackets includes both actual brackets /// AND unterm keywords (for depth tracking). But the NodeManager's open_brackets /// only contains actual brackets (BracketOpen, StatementStart), used by actions /// like HandleNonreservedTopLevelKeyword to decide behavior. + /// + /// Returns (bracket_depth, jinja_depth) as u16 counts instead of cloned SmallVecs. fn compute_open_brackets( &mut self, token: &Token, previous_node: Option, arena: &[Node], - ) -> (BracketVec, JinjaBlockVec) { + ) -> (u16, u16) { // Mutate running state in place instead of cloning from arena each time. - // self.node_open_brackets always equals the previous node's open_brackets + // self.node_open_brackets always equals the previous node's brackets // at this point (set at end of previous call). match previous_node { None => { self.node_open_brackets.clear(); self.node_open_jinja.clear(); + self.node_open_jinja_bracket_snapshots.clear(); } Some(prev_idx) => { let prev = &arena[prev_idx]; @@ -105,6 +114,10 @@ impl NodeManager { self.node_open_brackets.push(prev_idx); } } else if prev.is_opening_jinja_block() { + // Snapshot brackets before pushing jinja block so we can + // restore them when the block closes. + self.node_open_jinja_bracket_snapshots + .push(self.node_open_brackets.clone()); self.node_open_jinja.push(prev_idx); } } @@ -135,18 +148,20 @@ impl NodeManager { } TokenType::JinjaBlockEnd => { // Pop the jinja block and restore SQL brackets to the state - // at the time the jinja block was opened. SQL scope inside a - // jinja block doesn't leak out to the closing tag. - // Must clone from arena here — rare path (only jinja blocks). - if let Some(jinja_start_idx) = self.node_open_jinja.pop() { - self.node_open_brackets = arena[jinja_start_idx].open_brackets.clone(); + // at the time the jinja block was opened. + if self.node_open_jinja.pop().is_some() { + if let Some(snapshot) = self.node_open_jinja_bracket_snapshots.pop() { + self.node_open_brackets = snapshot; + } } } TokenType::JinjaBlockKeyword => { // {% else %}, {% elif %}, etc. close the previous block section // and open a new one. Restore SQL brackets to the block start's state. - if let Some(jinja_start_idx) = self.node_open_jinja.pop() { - self.node_open_brackets = arena[jinja_start_idx].open_brackets.clone(); + if self.node_open_jinja.pop().is_some() { + if let Some(snapshot) = self.node_open_jinja_bracket_snapshots.pop() { + self.node_open_brackets = snapshot; + } } } TokenType::Semicolon => { @@ -167,41 +182,40 @@ impl NodeManager { self.open_jinja_blocks = self.node_open_jinja.clone(); ( - self.node_open_brackets.clone(), - self.node_open_jinja.clone(), + self.node_open_brackets.len() as u16, + self.node_open_jinja.len() as u16, ) } /// Compute formatting_disabled state from previous node. - /// Uses self.formatting_disabled as running state, mutating in place - /// instead of cloning from arena each time. + /// Uses self.formatting_disabled_depth as running state, mutating in place. + /// Returns bool (true = disabled) for the Node's formatting_disabled field. fn compute_formatting_disabled( &mut self, token: &Token, previous_node: Option, arena: &[Node], - ) -> FmtDisabledVec { + ) -> bool { if previous_node.is_none() { - self.formatting_disabled.clear(); + self.formatting_disabled_depth = 0; } if matches!(token.token_type, TokenType::FmtOff | TokenType::Data) { - // Push a marker index (the value doesn't matter, only non-emptiness is checked) - self.formatting_disabled.push(previous_node.unwrap_or(0)); + self.formatting_disabled_depth += 1; } - if !self.formatting_disabled.is_empty() { + if self.formatting_disabled_depth > 0 { if let Some(prev_idx) = previous_node { if matches!( arena[prev_idx].token.token_type, TokenType::FmtOn | TokenType::Data ) { - self.formatting_disabled.pop(); + self.formatting_disabled_depth -= 1; } } } - self.formatting_disabled.clone() + self.formatting_disabled_depth > 0 } /// Open a bracket (called after node is added to arena with its index). @@ -459,8 +473,10 @@ impl NodeManager { { return Cow::Borrowed(&*token.text); } - // Use optimized bulk lowercase, then normalize whitespace without Vec - let lower = str::to_ascii_lowercase(&token.text); + // Lowercase in-place on a String buffer (avoids str::to_ascii_lowercase + // which allocates a separate String) + let mut lower = String::from(&*token.text); + lower.make_ascii_lowercase(); if !lower.contains(|c: char| c.is_ascii_whitespace()) { return Cow::Owned(lower); } @@ -478,7 +494,9 @@ impl NodeManager { if token.text.bytes().all(|b| !b.is_ascii_uppercase()) { return Cow::Borrowed(&*token.text); } - return Cow::Owned(str::to_ascii_lowercase(&token.text)); + let mut s = String::from(&*token.text); + s.make_ascii_lowercase(); + return Cow::Owned(s); } if !self.case_sensitive_names && tt == TokenType::Name { @@ -489,7 +507,9 @@ impl NodeManager { if token.text.bytes().all(|b| !b.is_ascii_uppercase()) { return Cow::Borrowed(&*token.text); } - return Cow::Owned(str::to_ascii_lowercase(&token.text)); + let mut s = String::from(&*token.text); + s.make_ascii_lowercase(); + return Cow::Owned(s); } // Jinja tokens, quoted names, etc.: preserve original text @@ -499,19 +519,19 @@ impl NodeManager { /// Enable formatting (handle fmt:on). #[cfg(test)] pub fn enable_formatting(&mut self) { - self.formatting_disabled.clear(); + self.formatting_disabled_depth = 0; } /// Disable formatting (handle fmt:off). #[cfg(test)] pub fn disable_formatting(&mut self) { - self.formatting_disabled.push(0); + self.formatting_disabled_depth = 1; } /// Check if formatting is currently disabled. #[cfg(test)] pub fn is_formatting_disabled(&self) -> bool { - !self.formatting_disabled.is_empty() + self.formatting_disabled_depth > 0 } /// Reset state (for new query). @@ -522,7 +542,7 @@ impl NodeManager { pub fn reset(&mut self) { self.open_brackets.clear(); self.open_jinja_blocks.clear(); - self.formatting_disabled.clear(); + self.formatting_disabled_depth = 0; } } diff --git a/src/operator_precedence.rs b/src/operator_precedence.rs index f5fd365..da9f368 100644 --- a/src/operator_precedence.rs +++ b/src/operator_precedence.rs @@ -168,8 +168,8 @@ mod tests { None, compact_str::CompactString::new(""), compact_str::CompactString::from(value), - smallvec::SmallVec::new(), - smallvec::SmallVec::new(), + 0, + 0, ) } diff --git a/src/segment.rs b/src/segment.rs index 98e3345..63d7863 100644 --- a/src/segment.rs +++ b/src/segment.rs @@ -172,8 +172,8 @@ mod tests { if idx > 0 { Some(idx - 1) } else { None }, compact_str::CompactString::new(""), compact_str::CompactString::from(val), - smallvec::SmallVec::new(), - smallvec::SmallVec::new(), + 0, + 0, )); idx } diff --git a/src/splitter.rs b/src/splitter.rs index 08d8e68..34efcdd 100644 --- a/src/splitter.rs +++ b/src/splitter.rs @@ -195,7 +195,7 @@ impl LineSplitter { if node.divides_queries() { return (true, false); } - if !node.formatting_disabled.is_empty() { + if node.formatting_disabled { return (false, true); } @@ -294,12 +294,8 @@ impl LineSplitter { prev_idx, CompactString::new(""), CompactString::from("\n"), - prev_idx - .map(|i| arena[i].open_brackets.clone()) - .unwrap_or_default(), - prev_idx - .map(|i| arena[i].open_jinja_blocks.clone()) - .unwrap_or_default(), + prev_idx.map(|i| arena[i].bracket_depth).unwrap_or(0), + prev_idx.map(|i| arena[i].jinja_depth).unwrap_or(0), ); let idx = arena.len(); arena.push(nl_node); @@ -308,7 +304,7 @@ impl LineSplitter { /// Check if a node has formatting disabled. fn node_has_formatting_disabled(node_idx: NodeIndex, arena: &[Node]) -> bool { - !arena[node_idx].formatting_disabled.is_empty() + arena[node_idx].formatting_disabled } } @@ -326,8 +322,8 @@ mod tests { prev, CompactString::from(prefix), CompactString::from(val), - smallvec::SmallVec::new(), - smallvec::SmallVec::new(), + 0, + 0, )); idx } diff --git a/tests/data/unformatted/222_colorado_claims_extract.sql b/tests/data/unformatted/222_colorado_claims_extract.sql index d4fd2e8..a19f64f 100644 --- a/tests/data/unformatted/222_colorado_claims_extract.sql +++ b/tests/data/unformatted/222_colorado_claims_extract.sql @@ -175,153 +175,160 @@ SELECT - DETAILS of REPORT in this link: -- https://civhc.org/wp-content/uploads/2024/12/Data-Submission-Guide-DSG-v-16-Final.pdf */ -WITH claims AS ( - SELECT claims.* - FROM {{ ref('colorado_all_payers_claim_stage') }} AS claims - WHERE target_month = TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') -) -, claims_header_amts AS ( - SELECT * - , MC063::FLOAT + MC064::FLOAT + MC065::FLOAT + MC066::FLOAT + MC067::FLOAT AS amt_per_record - , IFF(MC220 = 'Y', amt_per_record, 0.0) AS amt_vision - , IFF(MC209 = 'Y', amt_per_record, 0.0) AS amt_dental - , IFF(MC209 != 'Y' AND MC220 != 'Y', amt_per_record, 0.0) AS amt_other - FROM claims -) -, claims_count AS ( - SELECT COUNT(*) AS claim_count - , TO_CHAR( - TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') - , 'YYYYMM' - ) AS report_month - , SUM(amt_vision) AS total_amt_vision - , SUM(amt_dental) AS total_amt_dental - , SUM(amt_other) AS total_other_amt - , SUM(amt_per_record) AS total_amt - FROM claims_header_amts -) -, mem_eligible AS ( - SELECT COUNT( DISTINCT - CASE WHEN ME152 = 'Y' THEN member_id ELSE NULL END - ) AS cnt_mem_vision_eligible - , COUNT( DISTINCT - CASE WHEN ME020 = 'Y' THEN member_id ELSE NULL END - ) AS cnt_mem_dental_eligible - , COUNT( DISTINCT - CASE - WHEN ME018 = 'Y' OR ME123 = 'Y' THEN member_id - ELSE NULL - END - ) AS cnt_all_mem - FROM {{ ref('colorado_all_payers_member_eligibility_stage') }} -) -, claim_header_fields AS ( - SELECT a.* - , b.* - , REPLACE( - ROUND( - DIV0(a.total_amt * 1.0, b.cnt_all_mem)::FLOAT - , 2 - ) - , '.' - , '' - ) AS HD007 - , REPLACE( - ROUND( - DIV0( - a.total_amt_dental * 1.0 - , b.cnt_mem_dental_eligible - )::FLOAT - , 2 - ) - , '.' - , '' - ) AS HD009 - , REPLACE( - ROUND( - DIV0( - a.total_amt_vision * 1.0 - , b.cnt_mem_vision_eligible - )::FLOAT - , 2 - ) - , '.' - , '' - ) AS HD010 - FROM claims_count AS a - , mem_eligible AS b -) -, header_stage AS ( - SELECT CONCAT_WS( - '|' - , 'HD' -- HD001 HEADER INDICATOR - , 'MC' -- HD002 RECORD TYPE - , 'COC0135' -- HD003 PAYER CODE - , 'DHP_COC0135' -- HD004 PAYER NAME - , report_month -- HD005 BEGINNING MONTH - , report_month -- HD006 ENDING MONTH - , IFNULL(claim_count, 0) -- HD007 RECORD COUNT - , HD007 -- HD008 MED_BH PMPM - , '' -- HD009 PHARMACY PMPM (leave blank) - , HD009 -- HD010 DENTAL PMPM - , HD010 -- HD011 VISION PMPM - , CASE WHEN '{{ var("file_env") }}' = 'TEST' THEN 'T' ELSE 'P' END -- HD012 FILE TYPE INDICATOR (P or T) - ) AS text_blob - , 1 AS chunk_order - FROM claim_header_fields -) -, base_stage AS ( - {% set all_columns = adapter.get_columns_in_relation(ref('colorado_all_payers_claim_stage')) %} - {% set except_col_names=["FINALIZED_DATE_EASTERN", "ADJUDICATION_ID", "LINE_ADJUDICATION_ID","PLAN_TYPE", "FIRST_SERVICE_DATE", "TARGET_MONTH", "CLAIM_STAGE_ID","BILLING_PROVIDER_RECORD_LOCATOR","RENDERING_PROVIDER_RECORD_LOCATOR"] %} - {% set col_names_to_hardcode=["MC999999"] %} - -- create data rows with pipe-delimited values - SELECT - concat_ws('|', - {%- for col in all_columns if col.name not in except_col_names %} - IFNULL( - {%- if col.name in col_names_to_hardcode %} - '20000219', '') - {%- else %} - REPLACE(REPLACE({{ col.name }},',',''),'\n',''), '') - {% endif %} - {%- if not loop.last %} {{ ',' }} - {% endif %} - {%- endfor %}) as text_blob - , 2 as chunk_order - FROM claims -) -, trailer_stage AS ( - SELECT CONCAT_WS( - '|' - , 'TR' -- TR001 TRAILER INDICATOR - , 'MC' -- TR002 RECORD TYPE - , 'COC0135' -- TR003 PAYER CODE - , 'DHP_COC0135' -- TR004 PAYER NAME - , report_month -- TR005 BEGINNING MONTH - , report_month -- TR006 ENDING MONTH - , to_char(current_timestamp, 'yyyymmdd') -- TR007 DATE CREATED - ) AS text_blob - , 3 AS chunk_order - FROM claims_count -) -, aggregated AS ( - SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month - FROM header_stage - UNION ALL - SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month - FROM base_stage - UNION ALL - SELECT *, TRY_TO_TIMESTAMP('{{ var("data_anchor_month") }}') AS target_month - FROM trailer_stage -) -SELECT - text_blob, chunk_order, target_month, - {{ dbt_utils.generate_surrogate_key( - [ - 'target_month', - 'text_blob' - ] - ) - }} AS claim_medical_id - FROM aggregated - ORDER BY chunk_order +with + claims as ( + select claims.* + from {{ ref("colorado_all_payers_claim_stage") }} as claims + where target_month = try_to_timestamp('{{ var("data_anchor_month") }}') + ), + claims_header_amts as ( + select + *, + mc063::float + + mc064::float + + mc065::float + + mc066::float + + mc067::float as amt_per_record, + iff(mc220 = 'Y', amt_per_record, 0.0) as amt_vision, + iff(mc209 = 'Y', amt_per_record, 0.0) as amt_dental, + iff(mc209 != 'Y' and mc220 != 'Y', amt_per_record, 0.0) as amt_other + from claims + ), + claims_count as ( + select + count(*) as claim_count, + to_char( + try_to_timestamp('{{ var("data_anchor_month") }}'), 'YYYYMM' + ) as report_month, + sum(amt_vision) as total_amt_vision, + sum(amt_dental) as total_amt_dental, + sum(amt_other) as total_other_amt, + sum(amt_per_record) as total_amt + from claims_header_amts + ), + mem_eligible as ( + select + count( + distinct case when me152 = 'Y' then member_id else null end + ) as cnt_mem_vision_eligible, + count( + distinct case when me020 = 'Y' then member_id else null end + ) as cnt_mem_dental_eligible, + count( + distinct + case when me018 = 'Y' or me123 = 'Y' then member_id else null end + ) as cnt_all_mem + from {{ ref("colorado_all_payers_member_eligibility_stage") }} + ), + claim_header_fields as ( + select + a.*, + b.*, + replace( + round(div0(a.total_amt * 1.0, b.cnt_all_mem)::float, 2), '.', '' + ) as hd007, + replace( + round( + div0(a.total_amt_dental * 1.0, b.cnt_mem_dental_eligible)::float, 2 + ), + '.', + '' + ) as hd009, + replace( + round( + div0(a.total_amt_vision * 1.0, b.cnt_mem_vision_eligible)::float, 2 + ), + '.', + '' + ) as hd010 + from claims_count as a, mem_eligible as b + ), + header_stage as ( + select + concat_ws( + '|', + 'HD', -- HD001 HEADER INDICATOR + 'MC', -- HD002 RECORD TYPE + 'COC0135', -- HD003 PAYER CODE + 'DHP_COC0135', -- HD004 PAYER NAME + report_month, -- HD005 BEGINNING MONTH + report_month, -- HD006 ENDING MONTH + ifnull(claim_count, 0), -- HD007 RECORD COUNT + hd007, -- HD008 MED_BH PMPM + '', -- HD009 PHARMACY PMPM (leave blank) + hd009, -- HD010 DENTAL PMPM + hd010, -- HD011 VISION PMPM + case when '{{ var("file_env") }}' = 'TEST' then 'T' else 'P' end -- HD012 FILE TYPE INDICATOR (P or T) + ) as text_blob, + 1 as chunk_order + from claim_header_fields + ), + base_stage as ( + {% set all_columns = adapter.get_columns_in_relation( + ref("colorado_all_payers_claim_stage") + ) %} + {% set except_col_names = [ + "FINALIZED_DATE_EASTERN", + "ADJUDICATION_ID", + "LINE_ADJUDICATION_ID", + "PLAN_TYPE", + "FIRST_SERVICE_DATE", + "TARGET_MONTH", + "CLAIM_STAGE_ID", + "BILLING_PROVIDER_RECORD_LOCATOR", + "RENDERING_PROVIDER_RECORD_LOCATOR", + ] %} + {% set col_names_to_hardcode = ["MC999999"] %} + -- create data rows with pipe-delimited values + select + concat_ws( + '|', + {%- for col in all_columns if col.name not in except_col_names %} + ifnull( + {%- if col.name in col_names_to_hardcode %} + '20000219', + '' + ) + {%- else %} + replace(replace({{ col.name }}, ',', ''), '\n', ''), '' + ) + {% endif %} + {%- if not loop.last %} {{ "," }} {% endif %} + {%- endfor %} + ) as text_blob, + 2 as chunk_order + from claims + ), + trailer_stage as ( + select + concat_ws( + '|', + 'TR', -- TR001 TRAILER INDICATOR + 'MC', -- TR002 RECORD TYPE + 'COC0135', -- TR003 PAYER CODE + 'DHP_COC0135', -- TR004 PAYER NAME + report_month, -- TR005 BEGINNING MONTH + report_month, -- TR006 ENDING MONTH + to_char(current_timestamp, 'yyyymmdd') -- TR007 DATE CREATED + ) as text_blob, + 3 as chunk_order + from claims_count + ), + aggregated as ( + select *, try_to_timestamp('{{ var("data_anchor_month") }}') as target_month + from header_stage + union all + select *, try_to_timestamp('{{ var("data_anchor_month") }}') as target_month + from base_stage + union all + select *, try_to_timestamp('{{ var("data_anchor_month") }}') as target_month + from trailer_stage + ) +select + text_blob, + chunk_order, + target_month, + {{ dbt_utils.generate_surrogate_key(["target_month", "text_blob"]) }} + as claim_medical_id +from aggregated +order by chunk_order diff --git a/tests/data/unformatted/222_jinja_unbalanced_brackets.sql b/tests/data/unformatted/222_jinja_unbalanced_brackets.sql new file mode 100644 index 0000000..c2c7eb0 --- /dev/null +++ b/tests/data/unformatted/222_jinja_unbalanced_brackets.sql @@ -0,0 +1,61 @@ +-- Jinja if/else with unbalanced brackets per branch (each branch +-- independently closes the IFNULL paren) plus Snowflake :: casts. +{{ config(materialized="table") }} +WITH base AS ( + SELECT a::FLOAT + b::FLOAT AS total + FROM {{ ref('source_table') }} + WHERE created_at > '{{ var("start_date") }}' +) +, dynamic_cols AS ( + {% set columns = adapter.get_columns_in_relation(ref('source_table')) %} + {% set skip_cols = ["INTERNAL_ID", "UPDATED_AT"] %} + {% set special_cols = ["LEGACY_CODE"] %} + SELECT + concat_ws('|', + {%- for col in columns if col.name not in skip_cols %} + IFNULL( + {%- if col.name in special_cols %} + '19700101', '') + {%- else %} + REPLACE({{ col.name }}, ',', ''), '') + {% endif %} + {%- if not loop.last %} {{ ',' }} + {% endif %} + {%- endfor %}) AS row_data + FROM base +) +SELECT row_data +FROM dynamic_cols +)))))__SQLFMT_OUTPUT__((((( +-- Jinja if/else with unbalanced brackets per branch (each branch +-- independently closes the IFNULL paren) plus Snowflake :: casts. +{{ config(materialized="table") }} +with + base as ( + select a::float + b::float as total + from {{ ref("source_table") }} + where created_at > '{{ var("start_date") }}' + ), + dynamic_cols as ( + {% set columns = adapter.get_columns_in_relation(ref("source_table")) %} + {% set skip_cols = ["INTERNAL_ID", "UPDATED_AT"] %} + {% set special_cols = ["LEGACY_CODE"] %} + select + concat_ws( + '|', + {%- for col in columns if col.name not in skip_cols %} + ifnull( + {%- if col.name in special_cols %} + '19700101', + '' + ) + {%- else %}replace({{ col.name }}, ',', ''), '' + ) + {% endif %} + {%- if not loop.last %} {{ "," }} {% endif %} + {%- endfor %} + ) as row_data + from base + ) +select row_data +from dynamic_cols diff --git a/tests/golden_test.rs b/tests/golden_test.rs index f6abe96..72ccb80 100644 --- a/tests/golden_test.rs +++ b/tests/golden_test.rs @@ -76,75 +76,59 @@ fn clickhouse_mode() -> Mode { } } -macro_rules! golden_test { - ($name:ident, $path:expr) => { - #[test] - fn $name() { - let (source, expected) = read_test_data($path); - let mode = default_mode(); - let actual = format_string(&source, &mode).unwrap_or_else(|e| { - panic!("format_string failed for {}: {}", $path, e); - }); - assert_eq!( - expected, actual, - "\n\nFormatting mismatch for {}\n\n--- expected ---\n{}\n--- actual ---\n{}\n", - $path, expected, actual - ); - // Idempotency check - let second = format_string(&actual, &mode).unwrap_or_else(|e| { - panic!("Idempotency format failed for {}: {}", $path, e); - }); - assert_eq!( - expected, second, - "\n\nIdempotency failed for {}\n\n--- expected ---\n{}\n--- second pass ---\n{}\n", - $path, expected, second - ); - } - }; +fn run_golden_test(path: &str, mode: &Mode) { + let (source, expected) = read_test_data(path); + let actual = format_string(&source, mode).unwrap_or_else(|e| { + panic!("format_string failed for {}: {}", path, e); + }); + assert_eq!( + expected, actual, + "\n\nFormatting mismatch for {}\n\n--- expected ---\n{}\n--- actual ---\n{}\n", + path, expected, actual + ); + // Idempotency check + let second = format_string(&actual, mode).unwrap_or_else(|e| { + panic!("Idempotency format failed for {}: {}", path, e); + }); + assert_eq!( + expected, second, + "\n\nIdempotency failed for {}\n\n--- expected ---\n{}\n--- second pass ---\n{}\n", + path, expected, second + ); } -macro_rules! golden_test_clickhouse { - ($name:ident, $path:expr) => { - #[test] - fn $name() { - let (source, expected) = read_test_data($path); - let mode = clickhouse_mode(); - let actual = format_string(&source, &mode).unwrap_or_else(|e| { - panic!("format_string failed for {}: {}", $path, e); - }); - assert_eq!( - expected, actual, - "\n\nFormatting mismatch for {}\n\n--- expected ---\n{}\n--- actual ---\n{}\n", - $path, expected, actual - ); - // Idempotency check - let second = format_string(&actual, &mode).unwrap_or_else(|e| { - panic!("Idempotency format failed for {}: {}", $path, e); - }); - assert_eq!( - expected, second, - "\n\nIdempotency failed for {}\n\n--- expected ---\n{}\n--- second pass ---\n{}\n", - $path, expected, second - ); - } +fn run_golden_error_test(path: &str) { + let content = fs::read_to_string(path) + .unwrap_or_else(|e| panic!("Failed to read error test file {}: {}", path, e)); + let source = format!("{}\n", content.trim()); + let result = format_string(&source, &default_mode()); + assert!( + result.is_err(), + "Expected error for {} but got Ok:\n{}", + path, + result.unwrap() + ); +} + +macro_rules! golden_tests { + (mode: $mode_fn:ident, $($name:ident => $path:expr),* $(,)?) => { + $( + #[test] + fn $name() { + run_golden_test($path, &$mode_fn()); + } + )* }; } -macro_rules! golden_error_test { - ($name:ident, $path:expr) => { - #[test] - fn $name() { - let content = fs::read_to_string($path) - .unwrap_or_else(|e| panic!("Failed to read error test file {}: {}", $path, e)); - let source = format!("{}\n", content.trim()); - let result = format_string(&source, &default_mode()); - assert!( - result.is_err(), - "Expected error for {} but got Ok:\n{}", - $path, - result.unwrap() - ); - } +macro_rules! golden_error_tests { + ($($name:ident => $path:expr),* $(,)?) => { + $( + #[test] + fn $name() { + run_golden_error_test($path); + } + )* }; } @@ -153,417 +137,155 @@ macro_rules! golden_error_test { // These files have no sentinel — input should pass through unchanged. // ============================================================================= -golden_test!( - golden_preformatted_001_select_1, - "tests/data/preformatted/001_select_1.sql" -); -golden_test!( - golden_preformatted_002_select_from_where, - "tests/data/preformatted/002_select_from_where.sql" -); -golden_test!( - golden_preformatted_003_literals, - "tests/data/preformatted/003_literals.sql" -); -golden_test!( - golden_preformatted_004_with_select, - "tests/data/preformatted/004_with_select.sql" -); -golden_test!( - golden_preformatted_005_fmt_off, - "tests/data/preformatted/005_fmt_off.sql" -); -golden_test!( - golden_preformatted_006_fmt_off_447, - "tests/data/preformatted/006_fmt_off_447.sql" -); -golden_test!( - golden_preformatted_007_fmt_off_comments, - "tests/data/preformatted/007_fmt_off_comments.sql" -); -golden_test!( - golden_preformatted_008_reserved_names, - "tests/data/preformatted/008_reserved_names.sql" -); -golden_test!( - golden_preformatted_009_empty, - "tests/data/preformatted/009_empty.sql" -); -golden_test!( - golden_preformatted_010_comment_only, - "tests/data/preformatted/010_comment_only.sql" -); -golden_test!( - golden_preformatted_301_multiline_jinjafmt, - "tests/data/preformatted/301_multiline_jinjafmt.sql" -); -golden_test!( - golden_preformatted_302_jinjafmt_multiline_str, - "tests/data/preformatted/302_jinjafmt_multiline_str.sql" -); -golden_test!( - golden_preformatted_303_jinjafmt_more_mutliline_str, - "tests/data/preformatted/303_jinjafmt_more_mutliline_str.sql" -); -golden_test!( - golden_preformatted_400_create_table, - "tests/data/preformatted/400_create_table.sql" -); -golden_test!( - golden_preformatted_401_create_row_access_policy, - "tests/data/preformatted/401_create_row_access_policy.sql" -); -golden_test!( - golden_preformatted_402_alter_table, - "tests/data/preformatted/402_alter_table.sql" -); +golden_tests! { + mode: default_mode, + golden_preformatted_001_select_1 => "tests/data/preformatted/001_select_1.sql", + golden_preformatted_002_select_from_where => "tests/data/preformatted/002_select_from_where.sql", + golden_preformatted_003_literals => "tests/data/preformatted/003_literals.sql", + golden_preformatted_004_with_select => "tests/data/preformatted/004_with_select.sql", + golden_preformatted_005_fmt_off => "tests/data/preformatted/005_fmt_off.sql", + golden_preformatted_006_fmt_off_447 => "tests/data/preformatted/006_fmt_off_447.sql", + golden_preformatted_007_fmt_off_comments => "tests/data/preformatted/007_fmt_off_comments.sql", + golden_preformatted_008_reserved_names => "tests/data/preformatted/008_reserved_names.sql", + golden_preformatted_009_empty => "tests/data/preformatted/009_empty.sql", + golden_preformatted_010_comment_only => "tests/data/preformatted/010_comment_only.sql", + golden_preformatted_301_multiline_jinjafmt => "tests/data/preformatted/301_multiline_jinjafmt.sql", + golden_preformatted_302_jinjafmt_multiline_str => "tests/data/preformatted/302_jinjafmt_multiline_str.sql", + golden_preformatted_303_jinjafmt_more_mutliline_str => "tests/data/preformatted/303_jinjafmt_more_mutliline_str.sql", + golden_preformatted_400_create_table => "tests/data/preformatted/400_create_table.sql", + golden_preformatted_401_create_row_access_policy => "tests/data/preformatted/401_create_row_access_policy.sql", + golden_preformatted_402_alter_table => "tests/data/preformatted/402_alter_table.sql", +} // ============================================================================= // Unformatted golden tests — 100-series (core SQL formatting) // ============================================================================= -golden_test!( - golden_unformatted_100_select_case, - "tests/data/unformatted/100_select_case.sql" -); -golden_test!( - golden_unformatted_101_multiline, - "tests/data/unformatted/101_multiline.sql" -); -golden_test!( - golden_unformatted_102_lots_of_comments, - "tests/data/unformatted/102_lots_of_comments.sql" -); -golden_test!( - golden_unformatted_103_window_functions, - "tests/data/unformatted/103_window_functions.sql" -); -golden_test!( - golden_unformatted_104_joins, - "tests/data/unformatted/104_joins.sql" -); -golden_test!( - golden_unformatted_105_fmt_off, - "tests/data/unformatted/105_fmt_off.sql" -); -golden_test!( - golden_unformatted_106_leading_commas, - "tests/data/unformatted/106_leading_commas.sql" -); -golden_test!( - golden_unformatted_107_jinja_blocks, - "tests/data/unformatted/107_jinja_blocks.sql" -); -golden_test!( - golden_unformatted_108_test_block, - "tests/data/unformatted/108_test_block.sql" -); -golden_test!( - golden_unformatted_109_lateral_flatten, - "tests/data/unformatted/109_lateral_flatten.sql" -); -golden_test!( - golden_unformatted_110_other_identifiers, - "tests/data/unformatted/110_other_identifiers.sql" -); -golden_test!( - golden_unformatted_111_chained_boolean_between, - "tests/data/unformatted/111_chained_boolean_between.sql" -); -golden_test!( - golden_unformatted_112_semicolons, - "tests/data/unformatted/112_semicolons.sql" -); -golden_test!( - golden_unformatted_113_utils_group_by, - "tests/data/unformatted/113_utils_group_by.sql" -); -golden_test!( - golden_unformatted_114_unions, - "tests/data/unformatted/114_unions.sql" -); -golden_test!( - golden_unformatted_115_select_star_except, - "tests/data/unformatted/115_select_star_except.sql" -); -golden_test!( - golden_unformatted_116_chained_booleans, - "tests/data/unformatted/116_chained_booleans.sql" -); -golden_test!( - golden_unformatted_117_whitespace_in_tokens, - "tests/data/unformatted/117_whitespace_in_tokens.sql" -); -golden_test!( - golden_unformatted_118_within_group, - "tests/data/unformatted/118_within_group.sql" -); -golden_test!( - golden_unformatted_119_psycopg_placeholders, - "tests/data/unformatted/119_psycopg_placeholders.sql" -); -golden_test!( - golden_unformatted_120_array_literals, - "tests/data/unformatted/120_array_literals.sql" -); -golden_test!( - golden_unformatted_121_stubborn_merge_edge_cases, - "tests/data/unformatted/121_stubborn_merge_edge_cases.sql" -); -golden_test!( - golden_unformatted_122_values, - "tests/data/unformatted/122_values.sql" -); -golden_test!( - golden_unformatted_123_spark_keywords, - "tests/data/unformatted/123_spark_keywords.sql" -); -golden_test!( - golden_unformatted_124_bq_compound_types, - "tests/data/unformatted/124_bq_compound_types.sql" -); -golden_test!( - golden_unformatted_125_numeric_literals, - "tests/data/unformatted/125_numeric_literals.sql" -); -golden_test!( - golden_unformatted_126_blank_lines, - "tests/data/unformatted/126_blank_lines.sql" -); -golden_test!( - golden_unformatted_127_more_comments, - "tests/data/unformatted/127_more_comments.sql" -); -golden_test!( - golden_unformatted_128_double_slash_comments, - "tests/data/unformatted/128_double_slash_comments.sql" -); -golden_test!( - golden_unformatted_129_duckdb_joins, - "tests/data/unformatted/129_duckdb_joins.sql" -); -golden_test!( - golden_unformatted_130_athena_data_types, - "tests/data/unformatted/130_athena_data_types.sql" -); -golden_test!( - golden_unformatted_131_assignment_statement, - "tests/data/unformatted/131_assignment_statement.sql" -); -golden_test!( - golden_unformatted_132_spark_number_literals, - "tests/data/unformatted/132_spark_number_literals.sql" -); -golden_test!( - golden_unformatted_133_for_else, - "tests/data/unformatted/133_for_else.sql" -); -golden_test!( - golden_unformatted_134_databricks_type_hints, - "tests/data/unformatted/134_databricks_type_hints.sql" -); -golden_test!( - golden_unformatted_135_star_columns, - "tests/data/unformatted/135_star_columns.sql" -); -golden_test!( - golden_unformatted_136_databricks_variant, - "tests/data/unformatted/136_databricks_variant.sql" -); +golden_tests! { + mode: default_mode, + golden_unformatted_100_select_case => "tests/data/unformatted/100_select_case.sql", + golden_unformatted_101_multiline => "tests/data/unformatted/101_multiline.sql", + golden_unformatted_102_lots_of_comments => "tests/data/unformatted/102_lots_of_comments.sql", + golden_unformatted_103_window_functions => "tests/data/unformatted/103_window_functions.sql", + golden_unformatted_104_joins => "tests/data/unformatted/104_joins.sql", + golden_unformatted_105_fmt_off => "tests/data/unformatted/105_fmt_off.sql", + golden_unformatted_106_leading_commas => "tests/data/unformatted/106_leading_commas.sql", + golden_unformatted_107_jinja_blocks => "tests/data/unformatted/107_jinja_blocks.sql", + golden_unformatted_108_test_block => "tests/data/unformatted/108_test_block.sql", + golden_unformatted_109_lateral_flatten => "tests/data/unformatted/109_lateral_flatten.sql", + golden_unformatted_110_other_identifiers => "tests/data/unformatted/110_other_identifiers.sql", + golden_unformatted_111_chained_boolean_between => "tests/data/unformatted/111_chained_boolean_between.sql", + golden_unformatted_112_semicolons => "tests/data/unformatted/112_semicolons.sql", + golden_unformatted_113_utils_group_by => "tests/data/unformatted/113_utils_group_by.sql", + golden_unformatted_114_unions => "tests/data/unformatted/114_unions.sql", + golden_unformatted_115_select_star_except => "tests/data/unformatted/115_select_star_except.sql", + golden_unformatted_116_chained_booleans => "tests/data/unformatted/116_chained_booleans.sql", + golden_unformatted_117_whitespace_in_tokens => "tests/data/unformatted/117_whitespace_in_tokens.sql", + golden_unformatted_118_within_group => "tests/data/unformatted/118_within_group.sql", + golden_unformatted_119_psycopg_placeholders => "tests/data/unformatted/119_psycopg_placeholders.sql", + golden_unformatted_120_array_literals => "tests/data/unformatted/120_array_literals.sql", + golden_unformatted_121_stubborn_merge_edge_cases => "tests/data/unformatted/121_stubborn_merge_edge_cases.sql", + golden_unformatted_122_values => "tests/data/unformatted/122_values.sql", + golden_unformatted_123_spark_keywords => "tests/data/unformatted/123_spark_keywords.sql", + golden_unformatted_124_bq_compound_types => "tests/data/unformatted/124_bq_compound_types.sql", + golden_unformatted_125_numeric_literals => "tests/data/unformatted/125_numeric_literals.sql", + golden_unformatted_126_blank_lines => "tests/data/unformatted/126_blank_lines.sql", + golden_unformatted_127_more_comments => "tests/data/unformatted/127_more_comments.sql", + golden_unformatted_128_double_slash_comments => "tests/data/unformatted/128_double_slash_comments.sql", + golden_unformatted_129_duckdb_joins => "tests/data/unformatted/129_duckdb_joins.sql", + golden_unformatted_130_athena_data_types => "tests/data/unformatted/130_athena_data_types.sql", + golden_unformatted_131_assignment_statement => "tests/data/unformatted/131_assignment_statement.sql", + golden_unformatted_132_spark_number_literals => "tests/data/unformatted/132_spark_number_literals.sql", + golden_unformatted_133_for_else => "tests/data/unformatted/133_for_else.sql", + golden_unformatted_134_databricks_type_hints => "tests/data/unformatted/134_databricks_type_hints.sql", + golden_unformatted_135_star_columns => "tests/data/unformatted/135_star_columns.sql", + golden_unformatted_136_databricks_variant => "tests/data/unformatted/136_databricks_variant.sql", +} // ============================================================================= // Unformatted golden tests — 200-series (real-world dbt models) // ============================================================================= -golden_test!( - golden_unformatted_200_base_model, - "tests/data/unformatted/200_base_model.sql" -); -golden_test!( - golden_unformatted_201_basic_snapshot, - "tests/data/unformatted/201_basic_snapshot.sql" -); -golden_test!( - golden_unformatted_202_unpivot_macro, - "tests/data/unformatted/202_unpivot_macro.sql" -); -golden_test!( - golden_unformatted_203_gitlab_email_domain_type, - "tests/data/unformatted/203_gitlab_email_domain_type.sql" -); -golden_test!( - golden_unformatted_204_gitlab_tag_validation, - "tests/data/unformatted/204_gitlab_tag_validation.sql" -); -golden_test!( - golden_unformatted_205_rittman_hubspot_deals, - "tests/data/unformatted/205_rittman_hubspot_deals.sql" -); -golden_test!( - golden_unformatted_206_gitlab_prep_geozone, - "tests/data/unformatted/206_gitlab_prep_geozone.sql" -); -golden_test!( - golden_unformatted_207_rittman_int_journals, - "tests/data/unformatted/207_rittman_int_journals.sql" -); -golden_test!( - golden_unformatted_208_rittman_int_plan_breakout_metrics, - "tests/data/unformatted/208_rittman_int_plan_breakout_metrics.sql" -); -golden_test!( - golden_unformatted_209_rittman_int_web_events_sessionized, - "tests/data/unformatted/209_rittman_int_web_events_sessionized.sql" -); -golden_test!( - golden_unformatted_210_gitlab_gdpr_delete, - "tests/data/unformatted/210_gitlab_gdpr_delete.sql" -); -golden_test!( - golden_unformatted_211_http_2019_cdn_17_20, - "tests/data/unformatted/211_http_2019_cdn_17_20.sql" -); -golden_test!( - golden_unformatted_212_http_2019_cms_14_02, - "tests/data/unformatted/212_http_2019_cms_14_02.sql" -); -golden_test!( - golden_unformatted_213_gitlab_fct_sales_funnel_target, - "tests/data/unformatted/213_gitlab_fct_sales_funnel_target.sql" -); -golden_test!( - golden_unformatted_214_get_unique_attributes, - "tests/data/unformatted/214_get_unique_attributes.sql" -); -golden_test!( - golden_unformatted_215_gitlab_get_backup_table_command, - "tests/data/unformatted/215_gitlab_get_backup_table_command.sql" -); -golden_test!( - golden_unformatted_216_gitlab_zuora_revenue_revenue_contract_line_source, - "tests/data/unformatted/216_gitlab_zuora_revenue_revenue_contract_line_source.sql" -); -golden_test!( - golden_unformatted_217_dbt_unit_testing_csv, - "tests/data/unformatted/217_dbt_unit_testing_csv.sql" -); -golden_test!( - golden_unformatted_218_multiple_c_comments, - "tests/data/unformatted/218_multiple_c_comments.sql" -); -golden_test!( - golden_unformatted_219_any_all_agg, - "tests/data/unformatted/219_any_all_agg.sql" -); -golden_test_clickhouse!( - golden_unformatted_220_clickhouse_joins, - "tests/data/unformatted/220_clickhouse_joins.sql" -); -golden_test!( - golden_unformatted_221_dbt_config_dollar_quoted, - "tests/data/unformatted/221_dbt_config_dollar_quoted.sql" -); -golden_test!( - golden_unformatted_222_colorado_claims_extract, - "tests/data/unformatted/222_colorado_claims_extract.sql" -); +golden_tests! { + mode: default_mode, + golden_unformatted_200_base_model => "tests/data/unformatted/200_base_model.sql", + golden_unformatted_201_basic_snapshot => "tests/data/unformatted/201_basic_snapshot.sql", + golden_unformatted_202_unpivot_macro => "tests/data/unformatted/202_unpivot_macro.sql", + golden_unformatted_203_gitlab_email_domain_type => "tests/data/unformatted/203_gitlab_email_domain_type.sql", + golden_unformatted_204_gitlab_tag_validation => "tests/data/unformatted/204_gitlab_tag_validation.sql", + golden_unformatted_205_rittman_hubspot_deals => "tests/data/unformatted/205_rittman_hubspot_deals.sql", + golden_unformatted_206_gitlab_prep_geozone => "tests/data/unformatted/206_gitlab_prep_geozone.sql", + golden_unformatted_207_rittman_int_journals => "tests/data/unformatted/207_rittman_int_journals.sql", + golden_unformatted_208_rittman_int_plan_breakout_metrics => "tests/data/unformatted/208_rittman_int_plan_breakout_metrics.sql", + golden_unformatted_209_rittman_int_web_events_sessionized => "tests/data/unformatted/209_rittman_int_web_events_sessionized.sql", + golden_unformatted_210_gitlab_gdpr_delete => "tests/data/unformatted/210_gitlab_gdpr_delete.sql", + golden_unformatted_211_http_2019_cdn_17_20 => "tests/data/unformatted/211_http_2019_cdn_17_20.sql", + golden_unformatted_212_http_2019_cms_14_02 => "tests/data/unformatted/212_http_2019_cms_14_02.sql", + golden_unformatted_213_gitlab_fct_sales_funnel_target => "tests/data/unformatted/213_gitlab_fct_sales_funnel_target.sql", + golden_unformatted_214_get_unique_attributes => "tests/data/unformatted/214_get_unique_attributes.sql", + golden_unformatted_215_gitlab_get_backup_table_command => "tests/data/unformatted/215_gitlab_get_backup_table_command.sql", + golden_unformatted_216_gitlab_zuora_revenue_revenue_contract_line_source => "tests/data/unformatted/216_gitlab_zuora_revenue_revenue_contract_line_source.sql", + golden_unformatted_217_dbt_unit_testing_csv => "tests/data/unformatted/217_dbt_unit_testing_csv.sql", + golden_unformatted_218_multiple_c_comments => "tests/data/unformatted/218_multiple_c_comments.sql", + golden_unformatted_219_any_all_agg => "tests/data/unformatted/219_any_all_agg.sql", + golden_unformatted_221_dbt_config_dollar_quoted => "tests/data/unformatted/221_dbt_config_dollar_quoted.sql", + golden_unformatted_222_colorado_claims_extract => "tests/data/unformatted/222_colorado_claims_extract.sql", + golden_unformatted_222_jinja_unbalanced_brackets => "tests/data/unformatted/222_jinja_unbalanced_brackets.sql", +} + +// ClickHouse dialect tests +golden_tests! { + mode: clickhouse_mode, + golden_unformatted_220_clickhouse_joins => "tests/data/unformatted/220_clickhouse_joins.sql", +} // ============================================================================= // Unformatted golden tests — 300-series (Jinja formatting) // ============================================================================= -golden_test!( - golden_unformatted_300_jinjafmt, - "tests/data/unformatted/300_jinjafmt.sql" -); +golden_tests! { + mode: default_mode, + golden_unformatted_300_jinjafmt => "tests/data/unformatted/300_jinjafmt.sql", +} // ============================================================================= // Unformatted golden tests — 400-series (DDL/DML) // ============================================================================= -golden_test!( - golden_unformatted_400_create_fn_and_select, - "tests/data/unformatted/400_create_fn_and_select.sql" -); -golden_test!( - golden_unformatted_401_explain_select, - "tests/data/unformatted/401_explain_select.sql" -); -golden_test!( - golden_unformatted_402_delete_from_using, - "tests/data/unformatted/402_delete_from_using.sql" -); -golden_test!( - golden_unformatted_403_grant_revoke, - "tests/data/unformatted/403_grant_revoke.sql" -); -golden_test!( - golden_unformatted_404_create_function_pg_examples, - "tests/data/unformatted/404_create_function_pg_examples.sql" -); -golden_test!( - golden_unformatted_405_create_function_snowflake_examples, - "tests/data/unformatted/405_create_function_snowflake_examples.sql" -); -golden_test!( - golden_unformatted_406_create_function_bq_examples, - "tests/data/unformatted/406_create_function_bq_examples.sql" -); -golden_test!( - golden_unformatted_407_alter_function_pg_examples, - "tests/data/unformatted/407_alter_function_pg_examples.sql" -); -golden_test!( - golden_unformatted_408_alter_function_snowflake_examples, - "tests/data/unformatted/408_alter_function_snowflake_examples.sql" -); -golden_test!( - golden_unformatted_409_create_external_function, - "tests/data/unformatted/409_create_external_function.sql" -); -golden_test!( - golden_unformatted_410_create_warehouse, - "tests/data/unformatted/410_create_warehouse.sql" -); -golden_test!( - golden_unformatted_411_create_clone, - "tests/data/unformatted/411_create_clone.sql" -); -golden_test!( - golden_unformatted_412_pragma, - "tests/data/unformatted/412_pragma.sql" -); +golden_tests! { + mode: default_mode, + golden_unformatted_400_create_fn_and_select => "tests/data/unformatted/400_create_fn_and_select.sql", + golden_unformatted_401_explain_select => "tests/data/unformatted/401_explain_select.sql", + golden_unformatted_402_delete_from_using => "tests/data/unformatted/402_delete_from_using.sql", + golden_unformatted_403_grant_revoke => "tests/data/unformatted/403_grant_revoke.sql", + golden_unformatted_404_create_function_pg_examples => "tests/data/unformatted/404_create_function_pg_examples.sql", + golden_unformatted_405_create_function_snowflake_examples => "tests/data/unformatted/405_create_function_snowflake_examples.sql", + golden_unformatted_406_create_function_bq_examples => "tests/data/unformatted/406_create_function_bq_examples.sql", + golden_unformatted_407_alter_function_pg_examples => "tests/data/unformatted/407_alter_function_pg_examples.sql", + golden_unformatted_408_alter_function_snowflake_examples => "tests/data/unformatted/408_alter_function_snowflake_examples.sql", + golden_unformatted_409_create_external_function => "tests/data/unformatted/409_create_external_function.sql", + golden_unformatted_410_create_warehouse => "tests/data/unformatted/410_create_warehouse.sql", + golden_unformatted_411_create_clone => "tests/data/unformatted/411_create_clone.sql", + golden_unformatted_412_pragma => "tests/data/unformatted/412_pragma.sql", +} // ============================================================================= // Unformatted golden tests — 900-series (edge cases) // ============================================================================= -golden_test!( - golden_unformatted_900_create_view, - "tests/data/unformatted/900_create_view.sql" -); -golden_test!( - golden_unformatted_999_unsupported_ddl, - "tests/data/unformatted/999_unsupported_ddl.sql" -); +golden_tests! { + mode: default_mode, + golden_unformatted_900_create_view => "tests/data/unformatted/900_create_view.sql", + golden_unformatted_999_unsupported_ddl => "tests/data/unformatted/999_unsupported_ddl.sql", +} // ============================================================================= // Error golden tests (4 files) — these should produce parse errors // ============================================================================= -golden_error_test!( - golden_error_900_bad_token, - "tests/data/errors/900_bad_token.sql" -); -golden_error_test!( - golden_error_910_unopened_multiline, - "tests/data/errors/910_unopened_multiline.sql" -); -golden_error_test!( - golden_error_911_unopened_bracket, - "tests/data/errors/911_unopened_bracket.sql" -); -golden_error_test!( - golden_error_920_unterminated_multiline, - "tests/data/errors/920_unterminated_multiline.sql" -); +golden_error_tests! { + golden_error_900_bad_token => "tests/data/errors/900_bad_token.sql", + golden_error_910_unopened_multiline => "tests/data/errors/910_unopened_multiline.sql", + golden_error_911_unopened_bracket => "tests/data/errors/911_unopened_bracket.sql", + golden_error_920_unterminated_multiline => "tests/data/errors/920_unterminated_multiline.sql", +}