Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@ impl Line {
.find(|n| !n.is_newline())
}

/// Get the second non-newline node (useful for leading-comma lines like `, lateral`).
pub fn second_content_node<'a>(&self, arena: &'a [Node]) -> Option<&'a Node> {
self.nodes
.iter()
.map(|&i| &arena[i])
.filter(|n| !n.is_newline())
.nth(1)
}

/// Get the index of the first non-newline node.
pub fn first_content_node_idx(&self, arena: &[Node]) -> Option<NodeIndex> {
self.nodes.iter().copied().find(|&i| !arena[i].is_newline())
Expand Down
91 changes: 74 additions & 17 deletions src/merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,32 +155,44 @@ impl LineMerger {
}
}

/// Convert leading commas to trailing commas by merging standalone comma
/// lines with the preceding non-blank content line.
/// Python sqlfmt always uses trailing comma style.
/// Merge standalone comma lines with the FOLLOWING content line.
/// Leading-comma style: `, next_item` on one line.
/// Don't merge commas with Jinja block tags ({% if %}, {% else %}, etc.)
/// as the comma is part of the block's content, not the tag itself.
/// Uses a mark-and-sweep approach to avoid O(n²) from Vec::remove().
fn fix_standalone_commas(&self, lines: &mut Vec<Line>, arena: &[Node]) {
let len = lines.len();
let mut remove = vec![false; len];
let mut last_content_idx: Option<usize> = None;

for i in 0..len {
if remove[i] {
continue;
}
if lines[i].is_standalone_comma(arena) && i > 0 {
if let Some(pi) = last_content_idx {
if !is_jinja_block_line(&lines[pi], arena) {
let to_merge = vec![lines[pi].clone(), lines[i].clone()];
if lines[i].is_standalone_comma(arena) {
// Find next non-blank content line
let mut next_content_idx: Option<usize> = None;
for j in (i + 1)..len {
if !remove[j]
&& !lines[j].is_blank_line(arena)
&& !lines[j].is_standalone_comment_line(arena)
{
next_content_idx = Some(j);
break;
}
}
if let Some(ni) = next_content_idx {
if !is_jinja_block_line(&lines[ni], arena)
&& !line_has_interior_split_points(&lines[ni], arena)
{
let to_merge = vec![lines[i].clone(), lines[ni].clone()];
if let Ok(merged) = self.create_merged_line(&to_merge, arena) {
if let Some(merged_line) =
merged.into_iter().find(|l| !l.is_blank_line(arena))
{
lines[pi] = merged_line;
lines[ni] = merged_line;
remove[i] = true;
for j in (pi + 1)..i {
// Remove blank lines between comma and next content
for j in (i + 1)..ni {
if lines[j].is_blank_line(arena) {
remove[j] = true;
}
Expand All @@ -191,10 +203,6 @@ impl LineMerger {
}
}
}

if !lines[i].is_blank_line(arena) && !lines[i].is_standalone_comment_line(arena) {
last_content_idx = Some(i);
}
}

if remove.iter().any(|&r| r) {
Expand Down Expand Up @@ -293,6 +301,13 @@ impl LineMerger {
return Err(ControlFlow::CannotMerge);
}

// Leading-comma style: never merge across comma-starting lines.
// Must check BEFORE setting first_content_seen so that the first
// content line of a segment (e.g., `, sum(`) is not blocked.
if is_content && first_content_seen && line.starts_with_comma(arena) {
return Err(ControlFlow::CannotMerge);
}

// --- Interior standalone comment detection ---
if is_content {
if pending_interior_comment {
Expand Down Expand Up @@ -512,6 +527,10 @@ impl LineMerger {
});
}

// LATERAL: with leading commas, the comma is on the LATERAL line itself
// (e.g., `, lateral flatten(...)`) so check if the head line starts with comma
// and the second content node is LATERAL, or if the first content is LATERAL
// and the previous segment's tail ends with comma.
if first.token.token_type == crate::token::TokenType::UntermKeyword
&& first.value.eq_ignore_ascii_case("lateral")
{
Expand All @@ -525,6 +544,16 @@ impl LineMerger {
}
return self.create_merged_line(&segment.lines, arena).is_ok();
}
// Leading-comma style: `, lateral` — first content is comma, second is lateral
if first.is_comma() {
if let Some(second) = line.second_content_node(arena) {
if second.token.token_type == crate::token::TokenType::UntermKeyword
&& second.value.eq_ignore_ascii_case("lateral")
{
return self.create_merged_line(&segment.lines, arena).is_ok();
}
}
}

false
}
Expand Down Expand Up @@ -750,6 +779,34 @@ impl LineMerger {
}
}

/// Check if a line has interior nodes that would cause the splitter to re-split
/// if the line were prepended with a comma. This prevents fix_standalone_commas
/// from creating non-idempotent output.
fn line_has_interior_split_points(line: &Line, arena: &[Node]) -> bool {
let mut first_content = true;
for &idx in &line.nodes {
let node = &arena[idx];
if node.is_newline() {
continue;
}
if first_content {
first_content = false;
continue;
}
// These would cause the splitter to split before them
if node.is_operator(arena) && !node.is_bracket_operator(arena) {
return true;
}
if node.is_boolean_operator() {
return true;
}
if node.is_unterm_keyword() {
return true;
}
}
false
}

/// Check if a line starts with a Jinja block tag.
fn is_jinja_block_line(line: &Line, arena: &[Node]) -> bool {
line.first_content_node(arena)
Expand Down Expand Up @@ -923,11 +980,11 @@ mod tests {

#[test]
fn test_create_merged_line_basic() {
// Two short columns should merge onto one SELECT line
// Two short columns should stay split with leading commas
let result = format_sql("SELECT\n a,\n b\n");
assert!(
result.contains("a,"),
"Short columns should merge: {}",
result.contains(", b"),
"Short columns should use leading commas: {}",
result
);
}
Expand Down
19 changes: 11 additions & 8 deletions src/splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use crate::token::{Token, TokenType};
///
/// Mirrors the Python sqlfmt LineSplitter exactly:
/// - Iterates node-by-node with split_before/split_after flags
/// - Splits AFTER commas, opening brackets, keywords, query dividers
/// - Splits BEFORE operators, keywords, closing brackets, multiline jinja
/// - Splits AFTER opening brackets, keywords, query dividers
/// - Splits BEFORE commas, operators, keywords, closing brackets, multiline jinja
/// - Uses iterative (not recursive) approach to handle very long lines
pub struct LineSplitter;

Expand Down Expand Up @@ -135,6 +135,11 @@ impl LineSplitter {
return true;
}

// Leading-comma style: split BEFORE commas so they lead the next line
if node.is_comma() {
return true;
}

false
}

Expand Down Expand Up @@ -162,9 +167,6 @@ impl LineSplitter {
fn maybe_split_after(&self, node_idx: NodeIndex, arena: &[Node]) -> (bool, bool) {
let node = &arena[node_idx];

if node.is_comma() {
return (true, false);
}
// BUT NOT after angle brackets (< for type constructors like array<int64>).
// Angle bracket content is typically short and should stay on the same line.
if node.is_opening_bracket() {
Expand Down Expand Up @@ -230,8 +232,6 @@ impl LineSplitter {
(comments, Vec::new())
} else if comments.is_empty() {
(Vec::new(), Vec::new())
} else if new_nodes.len() == 1 && arena[new_nodes[0]].token.token_type == TokenType::Comma {
(Vec::new(), comments)
} else {
// Use slice contains() instead of HashSet for small node sets
let remaining_nodes: &[NodeIndex] = if index < line.nodes.len() {
Expand Down Expand Up @@ -365,7 +365,8 @@ mod tests {
}

#[test]
fn test_split_after_comma() {
fn test_split_before_comma() {
// Leading-comma style: split BEFORE the comma
let mut arena = Vec::new();
let a = make_node(&mut arena, TokenType::Name, "a", "");
let comma = make_node(&mut arena, TokenType::Comma, ",", "");
Expand All @@ -382,6 +383,8 @@ mod tests {
"Expected at least 2 lines, got {}",
result.len()
);
// First line should have "a" (no comma)
// Second line should start with comma
}

#[test]
Expand Down
8 changes: 4 additions & 4 deletions tests/data/preformatted/002_select_from_where.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
select
a_long_field_name,
another_long_field_name,
(one_field + another_field) as c,
a final_field
a_long_field_name
, another_long_field_name
, (one_field + another_field) as c
, a final_field
from my_schema."my_QUOTED_ table!"
where one_field < another_field
19 changes: 12 additions & 7 deletions tests/data/preformatted/003_literals.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
select
1 + 1,
sum(1.05),
1.4 - 15.17,
-.45 + -0.99,
-0.710 - sum(-34.5),
(1 + 1) - (4 - 0.6),
3.14159
1
+ 1
, sum(1.05)
, 1.4
- 15.17
, -.45
+ -0.99
, -0.710
- sum(-34.5)
, (1 + 1)
- (4 - 0.6)
, 3.14159
9 changes: 8 additions & 1 deletion tests/data/preformatted/004_with_select.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
with my_cte as (select 1, b, another_field from my_schema.my_table)
with
my_cte as (
select
1
, b
, another_field
from my_schema.my_table
)
select *
from another_cte
38 changes: 19 additions & 19 deletions tests/data/preformatted/008_reserved_names.sql
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
with
"select" as (select * from foo.select),
"case" as (select * from foo.case),
"end" as (select * from foo.end),
"as" as (select * from foo.as),
"interval" as (select * from foo.interval),
"exclude" as (select * from foo.exclude),
"using" as (select * from foo.using),
"on" as (select * from foo.on),
"and" as (select * from foo.and),
"limit" as (select * from foo.limit),
"over" as (select * from foo.over),
"between" as (select * from foo.between),
"union" as (select * from foo.union),
"explain" as (select * from foo.explain),
"grant" as (select * from foo.grant),
"create" as (select * from foo.create),
"alter" as (select * from foo.alter),
"truncate" as (select * from foo.truncate),
"drop" as (select * from foo.drop)
"select" as (select * from foo.select)
, "case" as (select * from foo.case)
, "end" as (select * from foo.end)
, "as" as (select * from foo.as)
, "interval" as (select * from foo.interval)
, "exclude" as (select * from foo.exclude)
, "using" as (select * from foo.using)
, "on" as (select * from foo.on)
, "and" as (select * from foo.and)
, "limit" as (select * from foo.limit)
, "over" as (select * from foo.over)
, "between" as (select * from foo.between)
, "union" as (select * from foo.union)
, "explain" as (select * from foo.explain)
, "grant" as (select * from foo.grant)
, "create" as (select * from foo.create)
, "alter" as (select * from foo.alter)
, "truncate" as (select * from foo.truncate)
, "drop" as (select * from foo.drop)
select 1
16 changes: 11 additions & 5 deletions tests/data/preformatted/301_multiline_jinjafmt.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@ with
end_date="sysdate() + interval '1 year'",
)
}}
),
final as (
)
, final as (
select
date_day as dt,
date_trunc('month', date_day) as mnth,
date_part('day', date_day) as day_of_month
date_day as dt
, date_trunc(
'month'
, date_day
) as mnth
, date_part(
'day'
, date_day
) as day_of_month
from base_spine
)

Expand Down
5 changes: 4 additions & 1 deletion tests/data/preformatted/302_jinjafmt_multiline_str.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@
)
}}

select campaign_name, date_part, count(distinct user_id) as users
select
campaign_name
, date_part
, count(distinct user_id) as users
31 changes: 20 additions & 11 deletions tests/data/unformatted/100_select_case.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,30 @@ select
from some_table
)))))__SQLFMT_OUTPUT__(((((
select
my_first_field,
my_second_field as an_alias,
case
my_first_field
,
my_second_field as an_alias
, case
when another_field = some_other_value then some_really_long_value
end as my_case_statement,
case
end as my_case_statement
, case
when caser = 'my_literal'
then some_really_really_long_value_to_wrap_this_next_line
else 42
end,
case when (my_field) then end_field end::numeric(10, 2) as casted_case,
(case when ending then false end) + (case when 2 then true end)::varchar(10),
another_field,
case when true then 10 end + 4,
case -- a comment with no spaces
end
,
case when (my_field) then end_field end::numeric(
10
, 2
) as casted_case
,
(case when ending then false end)
+ (case when 2 then true end)::varchar(10)
, another_field
,
case when true then 10 end
+ 4
, case -- a comment with no spaces
when something_long_that_keeps_this_from_wrapping
then something_else_long_long_long
else another_super_long_field_name
Expand Down
Loading