datastx · datastx · Feb 23, 2026 · Feb 23, 2026
diff --git a/src/line.rs b/src/line.rs
@@ -250,6 +250,15 @@ impl Line {
             .find(|n| !n.is_newline())
     }
 
+    /// Get the second non-newline node (useful for leading-comma lines like `, lateral`).
+    pub fn second_content_node<'a>(&self, arena: &'a [Node]) -> Option<&'a Node> {
+        self.nodes
+            .iter()
+            .map(|&i| &arena[i])
+            .filter(|n| !n.is_newline())
+            .nth(1)
+    }
+
     /// Get the index of the first non-newline node.
     pub fn first_content_node_idx(&self, arena: &[Node]) -> Option<NodeIndex> {
         self.nodes.iter().copied().find(|&i| !arena[i].is_newline())

diff --git a/src/merger.rs b/src/merger.rs
@@ -155,32 +155,44 @@ impl LineMerger {
         }
     }
 
-    /// Convert leading commas to trailing commas by merging standalone comma
-    /// lines with the preceding non-blank content line.
-    /// Python sqlfmt always uses trailing comma style.
+    /// Merge standalone comma lines with the FOLLOWING content line.
+    /// Leading-comma style: `, next_item` on one line.
     /// Don't merge commas with Jinja block tags ({% if %}, {% else %}, etc.)
     /// as the comma is part of the block's content, not the tag itself.
     /// Uses a mark-and-sweep approach to avoid O(n²) from Vec::remove().
     fn fix_standalone_commas(&self, lines: &mut Vec<Line>, arena: &[Node]) {
         let len = lines.len();
         let mut remove = vec![false; len];
-        let mut last_content_idx: Option<usize> = None;
 
         for i in 0..len {
             if remove[i] {
                 continue;
             }
-            if lines[i].is_standalone_comma(arena) && i > 0 {
-                if let Some(pi) = last_content_idx {
-                    if !is_jinja_block_line(&lines[pi], arena) {
-                        let to_merge = vec![lines[pi].clone(), lines[i].clone()];
+            if lines[i].is_standalone_comma(arena) {
+                // Find next non-blank content line
+                let mut next_content_idx: Option<usize> = None;
+                for j in (i + 1)..len {
+                    if !remove[j]
+                        && !lines[j].is_blank_line(arena)
+                        && !lines[j].is_standalone_comment_line(arena)
+                    {
+                        next_content_idx = Some(j);
+                        break;
+                    }
+                }
+                if let Some(ni) = next_content_idx {
+                    if !is_jinja_block_line(&lines[ni], arena)
+                        && !line_has_interior_split_points(&lines[ni], arena)
+                    {
+                        let to_merge = vec![lines[i].clone(), lines[ni].clone()];
                         if let Ok(merged) = self.create_merged_line(&to_merge, arena) {
                             if let Some(merged_line) =
                                 merged.into_iter().find(|l| !l.is_blank_line(arena))
                             {
-                                lines[pi] = merged_line;
+                                lines[ni] = merged_line;
                                 remove[i] = true;
-                                for j in (pi + 1)..i {
+                                // Remove blank lines between comma and next content
+                                for j in (i + 1)..ni {
                                     if lines[j].is_blank_line(arena) {
                                         remove[j] = true;
                                     }
@@ -191,10 +203,6 @@ impl LineMerger {
                     }
                 }
             }
-
-            if !lines[i].is_blank_line(arena) && !lines[i].is_standalone_comment_line(arena) {
-                last_content_idx = Some(i);
-            }
         }
 
         if remove.iter().any(|&r| r) {
@@ -293,6 +301,13 @@ impl LineMerger {
                 return Err(ControlFlow::CannotMerge);
             }
 
+            // Leading-comma style: never merge across comma-starting lines.
+            // Must check BEFORE setting first_content_seen so that the first
+            // content line of a segment (e.g., `, sum(`) is not blocked.
+            if is_content && first_content_seen && line.starts_with_comma(arena) {
+                return Err(ControlFlow::CannotMerge);
+            }
+
             // --- Interior standalone comment detection ---
             if is_content {
                 if pending_interior_comment {
@@ -512,6 +527,10 @@ impl LineMerger {
             });
         }
 
+        // LATERAL: with leading commas, the comma is on the LATERAL line itself
+        // (e.g., `, lateral flatten(...)`) so check if the head line starts with comma
+        // and the second content node is LATERAL, or if the first content is LATERAL
+        // and the previous segment's tail ends with comma.
         if first.token.token_type == crate::token::TokenType::UntermKeyword
             && first.value.eq_ignore_ascii_case("lateral")
         {
@@ -525,6 +544,16 @@ impl LineMerger {
             }
             return self.create_merged_line(&segment.lines, arena).is_ok();
         }
+        // Leading-comma style: `, lateral` — first content is comma, second is lateral
+        if first.is_comma() {
+            if let Some(second) = line.second_content_node(arena) {
+                if second.token.token_type == crate::token::TokenType::UntermKeyword
+                    && second.value.eq_ignore_ascii_case("lateral")
+                {
+                    return self.create_merged_line(&segment.lines, arena).is_ok();
+                }
+            }
+        }
 
         false
     }
@@ -750,6 +779,34 @@ impl LineMerger {
     }
 }
 
+/// Check if a line has interior nodes that would cause the splitter to re-split
+/// if the line were prepended with a comma. This prevents fix_standalone_commas
+/// from creating non-idempotent output.
+fn line_has_interior_split_points(line: &Line, arena: &[Node]) -> bool {
+    let mut first_content = true;
+    for &idx in &line.nodes {
+        let node = &arena[idx];
+        if node.is_newline() {
+            continue;
+        }
+        if first_content {
+            first_content = false;
+            continue;
+        }
+        // These would cause the splitter to split before them
+        if node.is_operator(arena) && !node.is_bracket_operator(arena) {
+            return true;
+        }
+        if node.is_boolean_operator() {
+            return true;
+        }
+        if node.is_unterm_keyword() {
+            return true;
+        }
+    }
+    false
+}
+
 /// Check if a line starts with a Jinja block tag.
 fn is_jinja_block_line(line: &Line, arena: &[Node]) -> bool {
     line.first_content_node(arena)
@@ -923,11 +980,11 @@ mod tests {
 
     #[test]
     fn test_create_merged_line_basic() {
-        // Two short columns should merge onto one SELECT line
+        // Two short columns should stay split with leading commas
         let result = format_sql("SELECT\n    a,\n    b\n");
         assert!(
-            result.contains("a,"),
-            "Short columns should merge: {}",
+            result.contains(", b"),
+            "Short columns should use leading commas: {}",
             result
         );
     }

diff --git a/src/splitter.rs b/src/splitter.rs
@@ -10,8 +10,8 @@ use crate::token::{Token, TokenType};
 ///
 /// Mirrors the Python sqlfmt LineSplitter exactly:
 /// - Iterates node-by-node with split_before/split_after flags
-/// - Splits AFTER commas, opening brackets, keywords, query dividers
-/// - Splits BEFORE operators, keywords, closing brackets, multiline jinja
+/// - Splits AFTER opening brackets, keywords, query dividers
+/// - Splits BEFORE commas, operators, keywords, closing brackets, multiline jinja
 /// - Uses iterative (not recursive) approach to handle very long lines
 pub struct LineSplitter;
 
@@ -135,6 +135,11 @@ impl LineSplitter {
             return true;
         }
 
+        // Leading-comma style: split BEFORE commas so they lead the next line
+        if node.is_comma() {
+            return true;
+        }
+
         false
     }
 
@@ -162,9 +167,6 @@ impl LineSplitter {
     fn maybe_split_after(&self, node_idx: NodeIndex, arena: &[Node]) -> (bool, bool) {
         let node = &arena[node_idx];
 
-        if node.is_comma() {
-            return (true, false);
-        }
         // BUT NOT after angle brackets (< for type constructors like array<int64>).
         // Angle bracket content is typically short and should stay on the same line.
         if node.is_opening_bracket() {
@@ -230,8 +232,6 @@ impl LineSplitter {
             (comments, Vec::new())
         } else if comments.is_empty() {
             (Vec::new(), Vec::new())
-        } else if new_nodes.len() == 1 && arena[new_nodes[0]].token.token_type == TokenType::Comma {
-            (Vec::new(), comments)
         } else {
             // Use slice contains() instead of HashSet for small node sets
             let remaining_nodes: &[NodeIndex] = if index < line.nodes.len() {
@@ -365,7 +365,8 @@ mod tests {
     }
 
     #[test]
-    fn test_split_after_comma() {
+    fn test_split_before_comma() {
+        // Leading-comma style: split BEFORE the comma
         let mut arena = Vec::new();
         let a = make_node(&mut arena, TokenType::Name, "a", "");
         let comma = make_node(&mut arena, TokenType::Comma, ",", "");
@@ -382,6 +383,8 @@ mod tests {
             "Expected at least 2 lines, got {}",
             result.len()
         );
+        // First line should have "a" (no comma)
+        // Second line should start with comma
     }
 
     #[test]

diff --git a/tests/data/preformatted/002_select_from_where.sql b/tests/data/preformatted/002_select_from_where.sql
@@ -1,7 +1,7 @@
 select
-    a_long_field_name,
-    another_long_field_name,
-    (one_field + another_field) as c,
-    a final_field
+    a_long_field_name
+    , another_long_field_name
+    , (one_field + another_field) as c
+    , a final_field
 from my_schema."my_QUOTED_ table!"
 where one_field < another_field
diff --git a/tests/data/preformatted/003_literals.sql b/tests/data/preformatted/003_literals.sql
@@ -1,8 +1,13 @@
 select
-    1 + 1,
-    sum(1.05),
-    1.4 - 15.17,
-    -.45 + -0.99,
-    -0.710 - sum(-34.5),
-    (1 + 1) - (4 - 0.6),
-    3.14159
+    1
+    + 1
+    , sum(1.05)
+    , 1.4
+    - 15.17
+    , -.45
+    + -0.99
+    , -0.710
+    - sum(-34.5)
+    , (1 + 1)
+    - (4 - 0.6)
+    , 3.14159
diff --git a/tests/data/preformatted/004_with_select.sql b/tests/data/preformatted/004_with_select.sql
@@ -1,3 +1,10 @@
-with my_cte as (select 1, b, another_field from my_schema.my_table)
+with
+    my_cte as (
+        select
+            1
+            , b
+            , another_field
+        from my_schema.my_table
+    )
 select *
 from another_cte
diff --git a/tests/data/preformatted/008_reserved_names.sql b/tests/data/preformatted/008_reserved_names.sql
@@ -1,21 +1,21 @@
 with
-    "select" as (select * from foo.select),
-    "case" as (select * from foo.case),
-    "end" as (select * from foo.end),
-    "as" as (select * from foo.as),
-    "interval" as (select * from foo.interval),
-    "exclude" as (select * from foo.exclude),
-    "using" as (select * from foo.using),
-    "on" as (select * from foo.on),
-    "and" as (select * from foo.and),
-    "limit" as (select * from foo.limit),
-    "over" as (select * from foo.over),
-    "between" as (select * from foo.between),
-    "union" as (select * from foo.union),
-    "explain" as (select * from foo.explain),
-    "grant" as (select * from foo.grant),
-    "create" as (select * from foo.create),
-    "alter" as (select * from foo.alter),
-    "truncate" as (select * from foo.truncate),
-    "drop" as (select * from foo.drop)
+    "select" as (select * from foo.select)
+    , "case" as (select * from foo.case)
+    , "end" as (select * from foo.end)
+    , "as" as (select * from foo.as)
+    , "interval" as (select * from foo.interval)
+    , "exclude" as (select * from foo.exclude)
+    , "using" as (select * from foo.using)
+    , "on" as (select * from foo.on)
+    , "and" as (select * from foo.and)
+    , "limit" as (select * from foo.limit)
+    , "over" as (select * from foo.over)
+    , "between" as (select * from foo.between)
+    , "union" as (select * from foo.union)
+    , "explain" as (select * from foo.explain)
+    , "grant" as (select * from foo.grant)
+    , "create" as (select * from foo.create)
+    , "alter" as (select * from foo.alter)
+    , "truncate" as (select * from foo.truncate)
+    , "drop" as (select * from foo.drop)
 select 1
diff --git a/tests/data/preformatted/301_multiline_jinjafmt.sql b/tests/data/preformatted/301_multiline_jinjafmt.sql
@@ -7,12 +7,18 @@ with
                 end_date="sysdate() + interval '1 year'",
             )
         }}
-    ),
-    final as (
+    )
+    , final as (
         select
-            date_day as dt,
-            date_trunc('month', date_day) as mnth,
-            date_part('day', date_day) as day_of_month
+            date_day as dt
+            , date_trunc(
+                'month'
+                , date_day
+            ) as mnth
+            , date_part(
+                'day'
+                , date_day
+            ) as day_of_month
         from base_spine
     )
 

diff --git a/tests/data/preformatted/302_jinjafmt_multiline_str.sql b/tests/data/preformatted/302_jinjafmt_multiline_str.sql
@@ -15,4 +15,7 @@
     )
 }}
 
-select campaign_name, date_part, count(distinct user_id) as users
+select
+    campaign_name
+    , date_part
+    , count(distinct user_id) as users
diff --git a/tests/data/unformatted/100_select_case.sql b/tests/data/unformatted/100_select_case.sql
@@ -25,21 +25,30 @@ select
 from some_table
 )))))__SQLFMT_OUTPUT__(((((
 select
-    my_first_field,
-    my_second_field as an_alias,
-    case
+    my_first_field
+    ,
+    my_second_field as an_alias
+    , case
         when another_field = some_other_value then some_really_long_value
-    end as my_case_statement,
-    case
+    end as my_case_statement
+    , case
         when caser = 'my_literal'
         then some_really_really_long_value_to_wrap_this_next_line
         else 42
-    end,
-    case when (my_field) then end_field end::numeric(10, 2) as casted_case,
-    (case when ending then false end) + (case when 2 then true end)::varchar(10),
-    another_field,
-    case when true then 10 end + 4,
-    case  -- a comment with no spaces
+    end
+    ,
+    case when (my_field) then end_field end::numeric(
+        10
+        , 2
+    ) as casted_case
+    ,
+    (case when ending then false end)
+    + (case when 2 then true end)::varchar(10)
+    , another_field
+    ,
+    case when true then 10 end
+    + 4
+    , case  -- a comment with no spaces
         when something_long_that_keeps_this_from_wrapping
         then something_else_long_long_long
         else another_super_long_field_name