From fef69dce65847c3e67059f97ec44d5bacaac9db6 Mon Sep 17 00:00:00 2001
From: Matjaz Domen Pecan <matjaz.pecan@gmail.com>
Date: Tue, 7 Apr 2026 17:40:35 +0100
Subject: [PATCH] feat: recognize brace expansion patterns in Word.parts

Add post-hoc brace expansion detection so downstream consumers
(e.g. security tools) can identify {a,b,c} and {1..10} patterns
in Word.parts without changing s-expression output.

- Add WordSpanKind::BraceExpansion and detect_brace_expansions()
- Add NodeKind::BraceExpansion { content } to AST
- Add WordSegment::BraceExpansion and wire through segment pipeline
- Extract span_to_segment() helper, collapse duplicate match arms
- 15 new unit tests covering comma, range, nested, edge cases

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/ast.rs                   |   3 +
 src/format/mod.rs            |   4 +-
 src/lexer/brace_expansion.rs | 189 +++++++++++++++++++++++++++++++++++
 src/lexer/mod.rs             |   1 +
 src/lexer/word_builder.rs    |   2 +
 src/lexer/words.rs           |   2 +
 src/parser/word_parts.rs     |  41 ++++++++
 src/sexp/mod.rs              |   5 +-
 src/sexp/word.rs             | 102 +++++++++++--------
 9 files changed, 304 insertions(+), 45 deletions(-)
 create mode 100644 src/lexer/brace_expansion.rs
diff --git a/src/ast.rs b/src/ast.rs
index 3fff3d4..e774a33 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -224,6 +224,9 @@ pub enum NodeKind {
     /// Locale string: `$"..."`
     LocaleString { content: String },
 
+    /// Brace expansion: `{a,b,c}` or `{1..10}`.
+    BraceExpansion { content: String },
+
     /// Arithmetic expansion: `$(( expr ))`
     ArithmeticExpansion { expression: Option<Box<Node>> },
 
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 06be22a..fa2fe46 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -605,7 +605,9 @@ fn process_word_value(value: &str, spans: &[crate::lexer::word_builder::WordSpan
                 }
                 result.push(')');
             }
-            WordSegment::ParamExpansion(text) | WordSegment::SimpleVar(text) => {
+            WordSegment::ParamExpansion(text)
+            | WordSegment::SimpleVar(text)
+            | WordSegment::BraceExpansion(text) => {
                 result.push_str(text);
             }
         }
diff --git a/src/lexer/brace_expansion.rs b/src/lexer/brace_expansion.rs
new file mode 100644
index 0000000..4bf1753
--- /dev/null
+++ b/src/lexer/brace_expansion.rs
@@ -0,0 +1,189 @@
+//! Post-hoc brace expansion detection.
+//!
+//! After a word is fully built by the lexer, scans the word value
+//! to identify brace expansion patterns (`{a,b,c}`, `{1..10}`) and
+//! records `WordSpanKind::BraceExpansion` spans. Existing spans
+//! (quotes, escapes, parameter expansions) are used to skip
+//! protected regions.
+
+use super::word_builder::{QuotingContext, WordBuilder, WordSpan, WordSpanKind};
+
+/// Scans the completed word value for brace expansion patterns and
+/// records `BraceExpansion` spans for each one found.
+pub(super) fn detect_brace_expansions(wb: &mut WordBuilder) {
+    let value = wb.value.as_bytes();
+    let spans = &wb.spans;
+    let mut new_spans: Vec<WordSpan> = Vec::new();
+    let mut i = 0;
+
+    while i < value.len() {
+        if value[i] != b'{' {
+            i += 1;
+            continue;
+        }
+
+        // Skip if preceded by $ (parameter expansion)
+        if i > 0 && value[i - 1] == b'$' {
+            i += 1;
+            continue;
+        }
+
+        // Skip if inside an existing span
+        if span_end_at(i, spans).is_some() {
+            i += 1;
+            continue;
+        }
+
+        // Try to find matching } with a comma or .. separator
+        if let Some(close) = find_brace_close(value, i, spans) {
+            new_spans.push(WordSpan {
+                start: i,
+                end: close + 1,
+                kind: WordSpanKind::BraceExpansion,
+                context: QuotingContext::None,
+            });
+            i = close + 1;
+        } else {
+            i += 1;
+        }
+    }
+
+    wb.spans.extend(new_spans);
+}
+
+/// Returns the byte index of the matching `}` if the content between
+/// `{` and `}` contains a `,` or `..` at depth 1. Returns `None` if
+/// no valid brace expansion is found.
+fn find_brace_close(value: &[u8], open: usize, spans: &[WordSpan]) -> Option<usize> {
+    let mut depth: i32 = 1;
+    let mut has_comma = false;
+    let mut has_dotdot = false;
+    let mut j = open + 1;
+
+    while j < value.len() {
+        // Skip positions inside existing spans
+        if let Some(end) = span_end_at(j, spans) {
+            j = end;
+            continue;
+        }
+
+        match value[j] {
+            b'{' => depth += 1,
+            b'}' => {
+                depth -= 1;
+                if depth == 0 {
+                    if has_comma || has_dotdot {
+                        return Some(j);
+                    }
+                    return None;
+                }
+            }
+            b',' if depth == 1 => has_comma = true,
+            b'.' if depth == 1 && j + 1 < value.len() && value[j + 1] == b'.' => {
+                has_dotdot = true;
+            }
+            _ => {}
+        }
+        j += 1;
+    }
+
+    None
+}
+
+/// If `pos` falls inside an existing span, returns the span's end offset.
+fn span_end_at(pos: usize, spans: &[WordSpan]) -> Option<usize> {
+    spans
+        .iter()
+        .find(|s| pos >= s.start && pos < s.end)
+        .map(|s| s.end)
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::lexer::word_builder::WordSpanKind;
+
+    /// Helper: lex a single word and check for `BraceExpansion` spans.
+    #[allow(clippy::unwrap_used)]
+    fn brace_spans(source: &str) -> Vec<(usize, usize)> {
+        let mut lexer = crate::lexer::Lexer::new(source, false);
+        let tok = lexer.next_token().unwrap();
+        tok.spans
+            .iter()
+            .filter(|s| s.kind == WordSpanKind::BraceExpansion)
+            .map(|s| (s.start, s.end))
+            .collect()
+    }
+
+    #[test]
+    fn comma_form() {
+        let spans = brace_spans("{a,b,c}");
+        assert_eq!(spans, vec![(0, 7)]);
+    }
+
+    #[test]
+    fn range_form() {
+        let spans = brace_spans("{1..10}");
+        assert_eq!(spans, vec![(0, 7)]);
+    }
+
+    #[test]
+    fn mid_word() {
+        let spans = brace_spans("file{1,2}.txt");
+        assert_eq!(spans, vec![(4, 9)]);
+    }
+
+    #[test]
+    fn nested_braces() {
+        let spans = brace_spans("{a,{b,c}}");
+        assert_eq!(spans, vec![(0, 9)]);
+    }
+
+    #[test]
+    fn empty_braces_not_expansion() {
+        let spans = brace_spans("{}");
+        assert!(spans.is_empty());
+    }
+
+    #[test]
+    fn single_element_not_expansion() {
+        let spans = brace_spans("{a}");
+        assert!(spans.is_empty());
+    }
+
+    #[test]
+    fn trailing_comma() {
+        let spans = brace_spans("{a,}");
+        assert_eq!(spans, vec![(0, 4)]);
+    }
+
+    #[test]
+    fn leading_comma() {
+        let spans = brace_spans("{,a}");
+        assert_eq!(spans, vec![(0, 4)]);
+    }
+
+    #[test]
+    fn param_expansion_not_brace() {
+        // ${foo} should NOT produce a BraceExpansion span
+        let spans = brace_spans("${foo}");
+        assert!(spans.is_empty());
+    }
+
+    #[test]
+    fn adjacent_brace_expansions() {
+        let spans = brace_spans("{a,b}{c,d}");
+        assert_eq!(spans, vec![(0, 5), (5, 10)]);
+    }
+
+    #[test]
+    fn alpha_range() {
+        let spans = brace_spans("{a..z}");
+        assert_eq!(spans, vec![(0, 6)]);
+    }
+
+    #[test]
+    fn range_with_step() {
+        let spans = brace_spans("{1..10..2}");
+        assert_eq!(spans, vec![(0, 10)]);
+    }
+}
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 3334d61..5778e37 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -1,6 +1,7 @@
 use crate::error::{RableError, Result};
 use crate::token::{Token, TokenType};
 
+mod brace_expansion;
 mod expansions;
 mod heredoc;
 mod operators;
diff --git a/src/lexer/word_builder.rs b/src/lexer/word_builder.rs
index a8298d1..491a2c3 100644
--- a/src/lexer/word_builder.rs
+++ b/src/lexer/word_builder.rs
@@ -148,4 +148,6 @@ pub enum WordSpanKind {
     DeprecatedArith,
     /// Backslash escape: `\X` (not `\<newline>` line continuations).
     Escape,
+    /// Brace expansion: `{a,b,c}` or `{1..10}`.
+    BraceExpansion,
 }
diff --git a/src/lexer/words.rs b/src/lexer/words.rs
index 0bfb3e1..5c6c6d6 100644
--- a/src/lexer/words.rs
+++ b/src/lexer/words.rs
@@ -68,6 +68,8 @@ impl Lexer {
             }
         }
 
+        super::brace_expansion::detect_brace_expansions(&mut wb);
+
         if wb.is_empty() {
             return Err(RableError::parse("unexpected character", start, line));
         }
diff --git a/src/parser/word_parts.rs b/src/parser/word_parts.rs
index ba438ce..5ab7fee 100644
--- a/src/parser/word_parts.rs
+++ b/src/parser/word_parts.rs
@@ -67,6 +67,9 @@ fn segment_to_node(seg: WordSegment) -> Node {
         }
         WordSegment::SimpleVar(text) => parse_simple_var(&text),
         WordSegment::ParamExpansion(text) => parse_braced_param(&text),
+        WordSegment::BraceExpansion(text) => {
+            Node::empty(NodeKind::BraceExpansion { content: text })
+        }
     }
 }
 
@@ -517,4 +520,42 @@ mod tests {
             NodeKind::LocaleString { content } if content == "\"hello\""
         ));
     }
+
+    #[test]
+    fn brace_expansion_comma() {
+        let parts = decompose("{a,b,c}");
+        assert_eq!(parts.len(), 1);
+        assert!(matches!(
+            &parts[0].kind,
+            NodeKind::BraceExpansion { content } if content == "{a,b,c}"
+        ));
+    }
+
+    #[test]
+    fn brace_expansion_range() {
+        let parts = decompose("{1..10}");
+        assert_eq!(parts.len(), 1);
+        assert!(matches!(
+            &parts[0].kind,
+            NodeKind::BraceExpansion { content } if content == "{1..10}"
+        ));
+    }
+
+    #[test]
+    fn brace_expansion_mid_word() {
+        let parts = decompose("file{1,2}.txt");
+        assert_eq!(parts.len(), 3);
+        assert!(matches!(
+            &parts[0].kind,
+            NodeKind::WordLiteral { value } if value == "file"
+        ));
+        assert!(matches!(
+            &parts[1].kind,
+            NodeKind::BraceExpansion { content } if content == "{1,2}"
+        ));
+        assert!(matches!(
+            &parts[2].kind,
+            NodeKind::WordLiteral { value } if value == ".txt"
+        ));
+    }
 }
diff --git a/src/sexp/mod.rs b/src/sexp/mod.rs
index 5ad6077..7d98d43 100644
--- a/src/sexp/mod.rs
+++ b/src/sexp/mod.rs
@@ -160,6 +160,7 @@ impl fmt::Display for NodeKind {
             }
             Self::AnsiCQuote { content } => write!(f, "$'{content}'"),
             Self::LocaleString { content } => write!(f, "$\"{content}\""),
+            Self::BraceExpansion { content } => write!(f, "{content}"),
             Self::ArithmeticExpansion { expression } => {
                 write_arith_wrapper(f, "arith", expression.as_deref())
             }
@@ -735,7 +736,9 @@ fn write_redirect_segments(
                 }
                 write!(f, ")")?;
             }
-            word::WordSegment::ParamExpansion(text) | word::WordSegment::SimpleVar(text) => {
+            word::WordSegment::ParamExpansion(text)
+            | word::WordSegment::SimpleVar(text)
+            | word::WordSegment::BraceExpansion(text) => {
                 write!(f, "{text}")?;
             }
         }
diff --git a/src/sexp/word.rs b/src/sexp/word.rs
index 33bacb4..66bfc79 100644
--- a/src/sexp/word.rs
+++ b/src/sexp/word.rs
@@ -30,6 +30,8 @@ pub enum WordSegment {
     ParamExpansion(String),
     /// Simple variable `$var` — raw text includes `$` prefix.
     SimpleVar(String),
+    /// Brace expansion `{a,b,c}` or `{1..10}` — raw text includes braces.
+    BraceExpansion(String),
 }
 
 /// Formats word segments into S-expression output.
@@ -38,7 +40,8 @@ pub fn write_word_segments(f: &mut fmt::Formatter<'_>, segments: &[WordSegment])
         match seg {
             WordSegment::Literal(text)
             | WordSegment::ParamExpansion(text)
-            | WordSegment::SimpleVar(text) => {
+            | WordSegment::SimpleVar(text)
+            | WordSegment::BraceExpansion(text) => {
                 for ch in text.chars() {
                     write_escaped_char(f, ch)?;
                 }
@@ -121,7 +124,6 @@ fn build_segments(
     spans: &[crate::lexer::word_builder::WordSpan],
     filter: fn(&crate::lexer::word_builder::WordSpanKind) -> bool,
 ) -> Vec<WordSegment> {
-    use crate::lexer::word_builder::{QuotingContext, WordSpanKind};
     let top_level = collect_filtered_spans(spans, filter);
     let mut segments = Vec::new();
     let mut pos = 0;
@@ -131,55 +133,66 @@ fn build_segments(
         {
             segments.push(WordSegment::Literal(text.to_string()));
         }
-        match &span.kind {
-            WordSpanKind::CommandSub => {
-                if let Some(c) = value.get(span.start + 2..span.end - 1) {
-                    segments.push(WordSegment::CommandSubstitution(c.to_string()));
-                }
-            }
-            WordSpanKind::ProcessSub(dir) => {
-                if let Some(c) = value.get(span.start + 2..span.end - 1) {
-                    segments.push(WordSegment::ProcessSubstitution(*dir, c.to_string()));
-                }
+        span_to_segment(&mut segments, value, span);
+        pos = span.end;
+    }
+    if pos < value.len()
+        && let Some(text) = value.get(pos..)
+    {
+        segments.push(WordSegment::Literal(text.to_string()));
+    }
+    segments
+}
+
+/// Converts a single span into the appropriate `WordSegment` and appends it.
+fn span_to_segment(
+    segments: &mut Vec<WordSegment>,
+    value: &str,
+    span: &crate::lexer::word_builder::WordSpan,
+) {
+    use crate::lexer::word_builder::{QuotingContext, WordSpanKind};
+    match &span.kind {
+        WordSpanKind::CommandSub => {
+            if let Some(c) = value.get(span.start + 2..span.end - 1) {
+                segments.push(WordSegment::CommandSubstitution(c.to_string()));
             }
-            WordSpanKind::AnsiCQuote => {
-                push_ansi_c_span(&mut segments, value, span);
+        }
+        WordSpanKind::ProcessSub(dir) => {
+            if let Some(c) = value.get(span.start + 2..span.end - 1) {
+                segments.push(WordSegment::ProcessSubstitution(*dir, c.to_string()));
             }
-            WordSpanKind::LocaleString => {
-                match span.context {
-                    QuotingContext::DoubleQuote => {
-                        // $"..." inside "..." is literal (not a locale string)
-                        if let Some(text) = value.get(span.start..span.end) {
-                            push_literal(&mut segments, text);
-                        }
-                    }
-                    _ => {
-                        if let Some(c) = value.get(span.start + 1..span.end) {
-                            segments.push(WordSegment::LocaleString(c.to_string()));
-                        }
+        }
+        WordSpanKind::AnsiCQuote => {
+            push_ansi_c_span(segments, value, span);
+        }
+        WordSpanKind::LocaleString => {
+            match span.context {
+                QuotingContext::DoubleQuote => {
+                    // $"..." inside "..." is literal (not a locale string)
+                    if let Some(text) = value.get(span.start..span.end) {
+                        push_literal(segments, text);
                     }
                 }
-            }
-            WordSpanKind::ParamExpansion => {
-                if let Some(text) = value.get(span.start..span.end) {
-                    segments.push(WordSegment::ParamExpansion(text.to_string()));
+                _ => {
+                    if let Some(c) = value.get(span.start + 1..span.end) {
+                        segments.push(WordSegment::LocaleString(c.to_string()));
+                    }
                 }
             }
-            WordSpanKind::SimpleVar => {
-                if let Some(text) = value.get(span.start..span.end) {
-                    segments.push(WordSegment::SimpleVar(text.to_string()));
-                }
+        }
+        WordSpanKind::ParamExpansion | WordSpanKind::SimpleVar | WordSpanKind::BraceExpansion => {
+            if let Some(text) = value.get(span.start..span.end) {
+                let seg = match &span.kind {
+                    WordSpanKind::ParamExpansion => WordSegment::ParamExpansion,
+                    WordSpanKind::SimpleVar => WordSegment::SimpleVar,
+                    WordSpanKind::BraceExpansion => WordSegment::BraceExpansion,
+                    _ => unreachable!(),
+                };
+                segments.push(seg(text.to_string()));
             }
-            _ => {} // filtered out by span filter
         }
-        pos = span.end;
-    }
-    if pos < value.len()
-        && let Some(text) = value.get(pos..)
-    {
-        segments.push(WordSegment::Literal(text.to_string()));
+        _ => {} // filtered out by span filter
     }
-    segments
 }
 
 /// Handles `$'...'` spans with context-sensitive behavior:
@@ -243,7 +256,10 @@ const fn is_decomposable(kind: &crate::lexer::word_builder::WordSpanKind) -> boo
     if is_sexp_relevant(kind) {
         return true;
     }
-    matches!(kind, WordSpanKind::ParamExpansion | WordSpanKind::SimpleVar)
+    matches!(
+        kind,
+        WordSpanKind::ParamExpansion | WordSpanKind::SimpleVar | WordSpanKind::BraceExpansion
+    )
 }
 
 /// Collects top-level spans matching `filter`, sorted by start offset.