From 8be2d3421981b46d5f03b5efbb22d76b1b3bb304 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Mon, 9 Mar 2026 23:52:36 +0800 Subject: [PATCH 01/18] refactor(tree_path): extract LanguageConfig abstraction Refactor tree_path.rs to use a data-driven LanguageConfig struct instead of hardcoded Rust-specific logic. This enables adding new languages by only adding a new LanguageConfig constant and Cargo feature. Changes: - Add LanguageConfig struct with ts_language, extensions, kind_map, name_field, name_overrides, and body_fields - Convert Rust-specific KIND_MAP, node_name(), and body resolution to LanguageConfig methods - Update all resolve/compute functions to take &LanguageConfig - Use fn() -> TSLanguage for lazy language initialization All existing tests pass. No behavioral changes. Original prompt: > Let's first work on refactoring and multi-language support. Please > remember to commit frequently as you make changes. And you must respect > the commit message convention. Work in a new branch and PR when ready. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 244 ++++++++++++++++++++++------------- 1 file changed, 155 insertions(+), 89 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 0e9f36c..69d3b19 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -11,81 +11,136 @@ use std::path::Path; -use tree_sitter::{Node, Parser}; - -/// Map from tree_path kind shorthand to tree-sitter-rust node kind strings. -const KIND_MAP: &[(&str, &str)] = &[ - ("fn", "function_item"), - ("struct", "struct_item"), - ("enum", "enum_item"), - ("impl", "impl_item"), - ("trait", "trait_item"), - ("mod", "mod_item"), - ("const", "const_item"), - ("static", "static_item"), - ("type", "type_item"), - ("macro", "macro_definition"), -]; - -/// Reverse map: tree-sitter node kind → tree_path shorthand. -fn kind_to_shorthand(ts_kind: &str) -> Option<&'static str> { - KIND_MAP - .iter() - .find(|(_, ts)| *ts == ts_kind) - .map(|(short, _)| *short) -} +use tree_sitter::{Language as TSLanguage, Node, Parser}; -/// Forward map: tree_path shorthand → tree-sitter node kind. -fn shorthand_to_kind(short: &str) -> Option<&'static str> { - KIND_MAP - .iter() - .find(|(s, _)| *s == short) - .map(|(_, ts)| *ts) +/// Language-specific configuration for tree_path resolution. +/// +/// Each supported language provides a static `LanguageConfig` that defines +/// how to parse it and map between tree-sitter node kinds and tree_path +/// shorthands. +pub struct LanguageConfig { + /// Function to get the tree-sitter language grammar (lazy initialization). + pub ts_language: fn() -> TSLanguage, + /// File extensions associated with this language. + pub extensions: &'static [&'static str], + /// Map from tree_path kind shorthand to tree-sitter node kind. + pub kind_map: &'static [(&'static str, &'static str)], + /// Field name to extract the node's name (usually "name"). + pub name_field: &'static str, + /// Overrides for special cases: (node_kind, field_name) pairs. + pub name_overrides: &'static [(&'static str, &'static str)], + /// Field names to traverse to find a node's body/declaration_list. + pub body_fields: &'static [&'static str], } -/// Detect language from file extension. Returns `None` for unsupported -/// languages (only Rust is supported in 0.1). -pub fn detect_language(path: &Path) -> Option { - match path.extension()?.to_str()? { - "rs" => Some(Language::Rust), - _ => None, +impl LanguageConfig { + /// Map tree-sitter node kind → tree_path shorthand. + fn kind_to_shorthand(&self, ts_kind: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(_, ts)| *ts == ts_kind) + .map(|(short, _)| *short) + } + + /// Map tree_path shorthand → tree-sitter node kind. + fn shorthand_to_kind(&self, short: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(s, _)| *s == short) + .map(|(_, ts)| *ts) + } + + /// Extract the name of a named AST node. + fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option<&'a str> { + let kind = node.kind(); + + // Check for name field override (e.g., impl_item uses "type" field) + let field_name = self + .name_overrides + .iter() + .find(|(k, _)| *k == kind) + .map(|(_, f)| *f) + .unwrap_or(self.name_field); + + let name_node = node.child_by_field_name(field_name)?; + Some(&source[name_node.byte_range()]) + } + + /// Find a body/declaration_list child for descending into containers. + fn find_body<'a>(&self, node: &Node<'a>) -> Option> { + for field in self.body_fields { + if let Some(body) = node.child_by_field_name(field) { + return Some(body); + } + } + // Fallback: look for declaration_list as direct child + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "declaration_list") } } +/// Rust language configuration. +static RUST_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_rust::LANGUAGE.into(), + extensions: &["rs"], + kind_map: &[ + ("fn", "function_item"), + ("struct", "struct_item"), + ("enum", "enum_item"), + ("impl", "impl_item"), + ("trait", "trait_item"), + ("mod", "mod_item"), + ("const", "const_item"), + ("static", "static_item"), + ("type", "type_item"), + ("macro", "macro_definition"), + ], + name_field: "name", + name_overrides: &[("impl_item", "type")], + body_fields: &["body"], +}; + /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { Rust, } -/// Create a tree-sitter parser for the given language. -fn make_parser(lang: Language) -> Parser { - let mut parser = Parser::new(); - match lang { - Language::Rust => { - parser - .set_language(&tree_sitter_rust::LANGUAGE.into()) - .expect("tree-sitter-rust grammar should load"); +impl Language { + /// Get the language configuration for this language. + fn config(&self) -> &'static LanguageConfig { + match self { + Language::Rust => &RUST_CONFIG, } } - parser + + /// Get the tree-sitter language grammar. + fn ts_language(&self) -> TSLanguage { + let config = self.config(); + (config.ts_language)() + } } -/// Extract the name of a named AST node. -/// -/// For most items (fn, struct, enum, mod, trait, const, static, type, macro), -/// the name is in the `name` field. For `impl_item`, the name is the text of -/// the `type` field (the type being implemented). -fn node_name<'a>(node: &Node<'a>, source: &'a str) -> Option<&'a str> { - let kind = node.kind(); - if kind == "impl_item" { - // impl blocks: use the `type` field text - let type_node = node.child_by_field_name("type")?; - Some(&source[type_node.byte_range()]) - } else { - let name_node = node.child_by_field_name("name")?; - Some(&source[name_node.byte_range()]) +/// Detect language from file extension. Returns `None` for unsupported +/// languages. +pub fn detect_language(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + + if RUST_CONFIG.extensions.contains(&ext) { + return Some(Language::Rust); } + + None +} + +/// Create a tree-sitter parser for the given language. +fn make_parser(lang: Language) -> Parser { + let mut parser = Parser::new(); + parser + .set_language(&lang.ts_language()) + .expect("tree-sitter grammar should load"); + parser } /// A parsed tree_path segment: (kind_shorthand, name). @@ -127,12 +182,13 @@ pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Optio return None; } + let config = lang.config(); let segments = parse_tree_path(tree_path)?; let mut parser = make_parser(lang); let tree = parser.parse(source, None)?; let root = tree.root_node(); - let node = resolve_segments(&root, &segments, source)?; + let node = resolve_segments(config, &root, &segments, source)?; // Return 1-indexed inclusive line range let start_line = node.start_position().row + 1; @@ -142,6 +198,7 @@ pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Optio /// Walk the tree to find a node matching the given path segments. fn resolve_segments<'a>( + config: &LanguageConfig, parent: &Node<'a>, segments: &[PathSegment], source: &'a str, @@ -151,19 +208,19 @@ fn resolve_segments<'a>( } let seg = &segments[0]; - let ts_kind = shorthand_to_kind(&seg.kind)?; + let ts_kind = config.shorthand_to_kind(&seg.kind)?; let mut cursor = parent.walk(); for child in parent.children(&mut cursor) { if child.kind() != ts_kind { continue; } - if let Some(name) = node_name(&child, source) { + if let Some(name) = config.node_name(&child, source) { if name == seg.name && segments.len() == 1 { return Some(child); } else if name == seg.name { // Descend — look inside this node's body - return resolve_in_body(&child, &segments[1..], source); + return resolve_in_body(config, &child, &segments[1..], source); } } } @@ -173,20 +230,13 @@ fn resolve_segments<'a>( /// Find subsequent segments inside an item's body (e.g., methods inside impl). fn resolve_in_body<'a>( + config: &LanguageConfig, node: &Node<'a>, segments: &[PathSegment], source: &'a str, ) -> Option> { - // For impl/mod/trait blocks, the children are inside the declaration_list - // or body field. Walk all descendants at the next level. - let body = node.child_by_field_name("body").or_else(|| { - // Try finding declaration_list child directly - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| c.kind() == "declaration_list") - })?; - - resolve_segments(&body, segments, source) + let body = config.find_body(node)?; + resolve_segments(config, &body, segments, source) } /// Compute the canonical `tree_path` for the AST node at the given span. @@ -195,6 +245,7 @@ fn resolve_in_body<'a>( /// (e.g., the span doesn't align with a named item, or the language is /// unsupported). pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { + let config = lang.config(); let mut parser = make_parser(lang); let tree = match parser.parse(source, None) { Some(t) => t, @@ -207,13 +258,13 @@ pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> Stri let target_end = span[1].saturating_sub(1); // Find the best item node within the target range - let node = match find_item_in_range(&root, target_start, target_end) { + let node = match find_item_in_range(config, &root, target_start, target_end) { Some(n) => n, None => return String::new(), }; // Build path from root to this node - build_path_to_node(&root, &node, source) + build_path_to_node(config, &root, &node, source) } /// Find the best item node within [target_start, target_end] (0-indexed rows). @@ -223,6 +274,7 @@ pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> Stri /// item node. We therefore match any item whose start/end rows fall within /// the target range, preferring the widest match (the outermost item). fn find_item_in_range<'a>( + config: &LanguageConfig, root: &Node<'a>, target_start: usize, target_end: usize, @@ -230,6 +282,7 @@ fn find_item_in_range<'a>( let mut best: Option> = None; fn walk<'a>( + config: &LanguageConfig, node: &Node<'a>, target_start: usize, target_end: usize, @@ -244,7 +297,7 @@ fn find_item_in_range<'a>( } // Check if this is a named item node within the target range - if start >= target_start && end <= target_end && is_item_node(node) { + if start >= target_start && end <= target_end && is_item_node(config, node) { // Prefer the widest (outermost) match if let Some(b) = best { let b_size = b.end_position().row - b.start_position().row; @@ -260,23 +313,28 @@ fn find_item_in_range<'a>( // Recurse into children let mut cursor = node.walk(); for child in node.children(&mut cursor) { - walk(&child, target_start, target_end, best); + walk(config, &child, target_start, target_end, best); } } - walk(root, target_start, target_end, &mut best); + walk(config, root, target_start, target_end, &mut best); best } /// Check if a node is an item type we track in tree_path. -fn is_item_node(node: &Node) -> bool { - kind_to_shorthand(node.kind()).is_some() +fn is_item_node(config: &LanguageConfig, node: &Node) -> bool { + config.kind_to_shorthand(node.kind()).is_some() } /// Build the tree_path string for a given target node by walking from root. -fn build_path_to_node(root: &Node, target: &Node, source: &str) -> String { +fn build_path_to_node( + config: &LanguageConfig, + root: &Node, + target: &Node, + source: &str, +) -> String { let mut segments: Vec = Vec::new(); - if collect_path(root, target, source, &mut segments) { + if collect_path(config, root, target, source, &mut segments) { segments.join("::") } else { String::new() @@ -284,10 +342,17 @@ fn build_path_to_node(root: &Node, target: &Node, source: &str) -> String { } /// Recursively find `target` in the tree and collect path segments. -fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec) -> bool { +fn collect_path( + config: &LanguageConfig, + node: &Node, + target: &Node, + source: &str, + segments: &mut Vec, +) -> bool { if node.id() == target.id() { // We found the target — add this node's segment if it's an item - if let (Some(short), Some(name)) = (kind_to_shorthand(node.kind()), node_name(node, source)) + if let (Some(short), Some(name)) = + (config.kind_to_shorthand(node.kind()), config.node_name(node, source)) { segments.push(format!("{short}::{name}")); return true; @@ -306,14 +371,15 @@ fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec= target_end - && collect_path(&child, target, source, segments) + && collect_path(config, &child, target, source, segments) { // If this node is an item node, prepend its segment - if is_item_node(node) - && let (Some(short), Some(name)) = - (kind_to_shorthand(node.kind()), node_name(node, source)) - { - segments.insert(0, format!("{short}::{name}")); + if is_item_node(config, node) { + if let (Some(short), Some(name)) = + (config.kind_to_shorthand(node.kind()), config.node_name(node, source)) + { + segments.insert(0, format!("{short}::{name}")); + } } return true; } From a6076c9dc995c625b1a29dc5440ebc6d8a3ac86d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Mon, 9 Mar 2026 23:55:02 +0800 Subject: [PATCH 02/18] feat(tree_path): add Python language support behind lang-python feature Add tree_path support for Python via optional lang-python Cargo feature. Changes: - Add tree-sitter-python dependency (0.25.0) as optional feature - Add PYTHON_CONFIG LanguageConfig with function_definition and class_definition mappings - Update Language enum with Python variant (cfg-gated) - Update detect_language() to recognize .py and .pyi files - Add comprehensive Python tests (function, class, method resolution) Usage: cargo build --features lang-python Original prompt: > Let's first work on refactoring and multi-language support. Please > remember to commit frequently as you make changes. And you must respect > the commit message convention. Work in a new branch and PR when ready. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- Cargo.lock | 11 +++ crates/liyi/Cargo.toml | 5 ++ crates/liyi/src/tree_path.rs | 138 +++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 139dd90..1b7ef34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -604,6 +604,7 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-python", "tree-sitter-rust", ] @@ -1188,6 +1189,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 7410230..4dd046f 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -18,6 +18,11 @@ ignore = "0.4" regex = "1" tree-sitter = "0.26.6" tree-sitter-rust = "0.24.0" +tree-sitter-python = { version = "0.25.0", optional = true } + +[features] +default = [] +lang-python = ["dep:tree-sitter-python"] [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 69d3b19..67c28b0 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -101,10 +101,26 @@ static RUST_CONFIG: LanguageConfig = LanguageConfig { body_fields: &["body"], }; +/// Python language configuration (requires `lang-python` feature). +#[cfg(feature = "lang-python")] +static PYTHON_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_python::LANGUAGE.into(), + extensions: &["py", "pyi"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], +}; + /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { Rust, + #[cfg(feature = "lang-python")] + Python, } impl Language { @@ -112,6 +128,8 @@ impl Language { fn config(&self) -> &'static LanguageConfig { match self { Language::Rust => &RUST_CONFIG, + #[cfg(feature = "lang-python")] + Language::Python => &PYTHON_CONFIG, } } @@ -131,6 +149,11 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Rust); } + #[cfg(feature = "lang-python")] + if PYTHON_CONFIG.extensions.contains(&ext) { + return Some(Language::Python); + } + None } @@ -613,6 +636,13 @@ fn standalone() -> i32 { detect_language(Path::new("src/main.rs")), Some(Language::Rust) ); + // Python detection depends on the lang-python feature + #[cfg(feature = "lang-python")] + assert_eq!( + detect_language(Path::new("foo.py")), + Some(Language::Python) + ); + #[cfg(not(feature = "lang-python"))] assert_eq!(detect_language(Path::new("foo.py")), None); } @@ -653,4 +683,112 @@ fn standalone() -> i32 { 42 } assert!(span.is_some(), "should resolve {tp} in reformatted code"); } } + + #[cfg(feature = "lang-python")] + mod python_tests { + use super::*; + + const SAMPLE_PYTHON: &str = r#"# A simple order processing module + +class Order: + def __init__(self, amount): + self.amount = amount + + def process(self): + return self.amount > 0 + +def calculate_total(items): + return sum(items) +"#; + + #[test] + fn resolve_python_function() { + let span = resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python); + assert!(span.is_some(), "should resolve fn::calculate_total"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def calculate_total"), + "span should point to calculate_total function" + ); + } + + #[test] + fn resolve_python_class() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order", Language::Python); + assert!(span.is_some(), "should resolve class::Order"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("class Order"), + "span should point to Order class" + ); + } + + #[test] + fn resolve_python_class_method() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); + assert!(span.is_some(), "should resolve class::Order::fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def process"), + "span should point to process method" + ); + } + + #[test] + fn resolve_python_init_method() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::__init__", Language::Python); + assert!(span.is_some(), "should resolve class::Order::fn::__init__"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def __init__"), + "span should point to __init__ method" + ); + } + + #[test] + fn compute_python_function_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def calculate_total")) + .unwrap() + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "fn::calculate_total"); + } + + #[test] + fn compute_python_class_method_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def process")) + .unwrap() + + 1; + // Find end of method (next line with same or less indentation) + let end = start + 1; // Single-line body for this test + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "class::Order::fn::process"); + } + + #[test] + fn roundtrip_python() { + // Compute path for fn::calculate_total, then resolve it + let resolved_span = + resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_PYTHON, resolved_span, Language::Python); + assert_eq!(computed_path, "fn::calculate_total"); + + let re_resolved = resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + } } From 26ab66905ffdf8a76f19fafec09dc68178d7c5c0 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Mon, 9 Mar 2026 23:57:19 +0800 Subject: [PATCH 03/18] feat(tree_path): add Go language support behind lang-go feature Add tree_path support for Go via optional lang-go Cargo feature. Changes: - Add tree-sitter-go dependency (0.25.0) as optional feature - Add GO_CONFIG LanguageConfig with function_declaration and method_declaration mappings - Update Language enum with Go variant (cfg-gated) - Update detect_language() to recognize .go files - Add comprehensive Go tests (function, method resolution) Note: struct and interface types are not yet supported due to Go's nested type_declaration/type_spec AST structure. Usage: cargo build --features lang-go Original prompt: > Let's first work on refactoring and multi-language support. Please > remember to commit frequently as you make changes. And you must respect > the commit message convention. Work in a new branch and PR when ready. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- Cargo.lock | 11 +++ crates/liyi/Cargo.toml | 2 + crates/liyi/src/tree_path.rs | 131 +++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 1b7ef34..07924e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -604,6 +604,7 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-go", "tree-sitter-python", "tree-sitter-rust", ] @@ -1183,6 +1184,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.7" diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 4dd046f..e93b84d 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -19,10 +19,12 @@ regex = "1" tree-sitter = "0.26.6" tree-sitter-rust = "0.24.0" tree-sitter-python = { version = "0.25.0", optional = true } +tree-sitter-go = { version = "0.25.0", optional = true } [features] default = [] lang-python = ["dep:tree-sitter-python"] +lang-go = ["dep:tree-sitter-go"] [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 67c28b0..581ecc0 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -115,12 +115,28 @@ static PYTHON_CONFIG: LanguageConfig = LanguageConfig { body_fields: &["body"], }; +/// Go language configuration (requires `lang-go` feature). +#[cfg(feature = "lang-go")] +static GO_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_go::LANGUAGE.into(), + extensions: &["go"], + kind_map: &[ + ("fn", "function_declaration"), + ("method", "method_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], +}; + /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { Rust, #[cfg(feature = "lang-python")] Python, + #[cfg(feature = "lang-go")] + Go, } impl Language { @@ -130,6 +146,8 @@ impl Language { Language::Rust => &RUST_CONFIG, #[cfg(feature = "lang-python")] Language::Python => &PYTHON_CONFIG, + #[cfg(feature = "lang-go")] + Language::Go => &GO_CONFIG, } } @@ -154,6 +172,11 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Python); } + #[cfg(feature = "lang-go")] + if GO_CONFIG.extensions.contains(&ext) { + return Some(Language::Go); + } + None } @@ -791,4 +814,112 @@ def calculate_total(items): assert_eq!(re_resolved, resolved_span); } } + + #[cfg(feature = "lang-go")] + mod go_tests { + use super::*; + + const SAMPLE_GO: &str = r#"package main + +import "fmt" + +// Calculator performs arithmetic operations +type Calculator struct { + value int +} + +// Add adds a number to the calculator's value +func (c *Calculator) Add(n int) { + c.value += n +} + +// Value returns the current value +func (c Calculator) Value() int { + return c.value +} + +// Add is a standalone function +func Add(a, b int) int { + return a + b +} +"#; + + #[test] + fn resolve_go_function() { + let span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go); + assert!(span.is_some(), "should resolve fn::Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func Add("), + "span should point to Add function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_method() { + let span = resolve_tree_path(SAMPLE_GO, "method::Add", Language::Go); + assert!(span.is_some(), "should resolve method::Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c *Calculator) Add"), + "span should point to Add method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn compute_go_function_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + // Find the standalone Add function (last one in file) + let start = lines + .iter() + .enumerate() + .rev() + .find(|(_, l)| l.contains("func Add(")) + .unwrap() + .0 + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "fn::Add"); + } + + #[test] + fn compute_go_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c *Calculator) Add")) + .unwrap() + + 1; + // Find end of method (next closing brace at start of line or end of file) + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::Add"); + } + + #[test] + fn roundtrip_go() { + // Compute path for fn::Add, then resolve it + let resolved_span = + resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "fn::Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + } } From ead79cdd858d400336a7d00bc664bdd01af586ab Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 01:39:48 +0800 Subject: [PATCH 04/18] feat(tree_path): add JavaScript and TypeScript support Add tree_path support for JavaScript and TypeScript via optional Cargo features. Changes: - Add tree-sitter-javascript (0.25.0) and tree-sitter-typescript (0.23.2) as optional dependencies - Add JAVASCRIPT_CONFIG with function_declaration, class_declaration, and method_definition mappings (.js, .mjs, .cjs, .jsx) - Add TYPESCRIPT_CONFIG and TSX_CONFIG with additional interface, type alias, and enum mappings (.ts, .mts, .cts, .tsx) - Update Language enum with JavaScript, TypeScript, and Tsx variants - Update detect_language() to recognize JS/TS file extensions - Add comprehensive JS/TS tests for all node types Features: - lang-javascript: JavaScript support - lang-typescript: TypeScript/TSX support (implies lang-javascript) Usage: cargo build --features lang-javascript cargo build --features lang-typescript Original prompt: > Let's first work on refactoring and multi-language support. Please > remember to commit frequently as you make changes. And you must respect > the commit message convention. Work in a new branch and PR when ready. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- Cargo.lock | 22 +++ crates/liyi/Cargo.toml | 4 + crates/liyi/src/tree_path.rs | 281 +++++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 07924e5..d3aa1a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,8 +605,10 @@ dependencies = [ "tempfile", "tree-sitter", "tree-sitter-go", + "tree-sitter-javascript", "tree-sitter-python", "tree-sitter-rust", + "tree-sitter-typescript", ] [[package]] @@ -1194,6 +1196,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.7" @@ -1220,6 +1232,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "typenum" version = "1.19.0" diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index e93b84d..a4dde47 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -20,11 +20,15 @@ tree-sitter = "0.26.6" tree-sitter-rust = "0.24.0" tree-sitter-python = { version = "0.25.0", optional = true } tree-sitter-go = { version = "0.25.0", optional = true } +tree-sitter-javascript = { version = "0.25.0", optional = true } +tree-sitter-typescript = { version = "0.23.2", optional = true } [features] default = [] lang-python = ["dep:tree-sitter-python"] lang-go = ["dep:tree-sitter-go"] +lang-javascript = ["dep:tree-sitter-javascript"] +lang-typescript = ["dep:tree-sitter-typescript", "lang-javascript"] [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 581ecc0..a9fbe1d 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -129,6 +129,57 @@ static GO_CONFIG: LanguageConfig = LanguageConfig { body_fields: &["body"], }; +/// JavaScript language configuration (requires `lang-javascript` feature). +#[cfg(feature = "lang-javascript")] +static JAVASCRIPT_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_javascript::LANGUAGE.into(), + extensions: &["js", "mjs", "cjs", "jsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], +}; + +/// TypeScript language configuration (requires `lang-typescript` feature). +#[cfg(feature = "lang-typescript")] +static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + extensions: &["ts", "mts", "cts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], +}; + +/// TSX language configuration (requires `lang-typescript` feature). +#[cfg(feature = "lang-typescript")] +static TSX_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TSX.into(), + extensions: &["tsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], +}; + /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { @@ -137,6 +188,12 @@ pub enum Language { Python, #[cfg(feature = "lang-go")] Go, + #[cfg(feature = "lang-javascript")] + JavaScript, + #[cfg(feature = "lang-typescript")] + TypeScript, + #[cfg(feature = "lang-typescript")] + Tsx, } impl Language { @@ -148,6 +205,12 @@ impl Language { Language::Python => &PYTHON_CONFIG, #[cfg(feature = "lang-go")] Language::Go => &GO_CONFIG, + #[cfg(feature = "lang-javascript")] + Language::JavaScript => &JAVASCRIPT_CONFIG, + #[cfg(feature = "lang-typescript")] + Language::TypeScript => &TYPESCRIPT_CONFIG, + #[cfg(feature = "lang-typescript")] + Language::Tsx => &TSX_CONFIG, } } @@ -177,6 +240,21 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Go); } + #[cfg(feature = "lang-javascript")] + if JAVASCRIPT_CONFIG.extensions.contains(&ext) { + return Some(Language::JavaScript); + } + + #[cfg(feature = "lang-typescript")] + { + if TYPESCRIPT_CONFIG.extensions.contains(&ext) { + return Some(Language::TypeScript); + } + if TSX_CONFIG.extensions.contains(&ext) { + return Some(Language::Tsx); + } + } + None } @@ -922,4 +1000,207 @@ func Add(a, b int) int { assert_eq!(re_resolved, resolved_span); } } + + #[cfg(feature = "lang-javascript")] + mod javascript_tests { + use super::*; + + const SAMPLE_JS: &str = r#"// A simple counter module + +class Counter { + constructor(initial = 0) { + this.count = initial; + } + + increment() { + this.count++; + } + + getValue() { + return this.count; + } +} + +function createCounter(initial) { + return new Counter(initial); +} + +const utils = { + formatCount: (n) => `${n} items` +}; +"#; + + #[test] + fn resolve_js_function() { + let span = resolve_tree_path(SAMPLE_JS, "fn::createCounter", Language::JavaScript); + assert!(span.is_some(), "should resolve fn::createCounter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("function createCounter"), + "span should point to createCounter function" + ); + } + + #[test] + fn resolve_js_class() { + let span = resolve_tree_path(SAMPLE_JS, "class::Counter", Language::JavaScript); + assert!(span.is_some(), "should resolve class::Counter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("class Counter"), + "span should point to Counter class" + ); + } + + #[test] + fn resolve_js_method() { + let span = resolve_tree_path(SAMPLE_JS, "class::Counter::method::increment", Language::JavaScript); + assert!(span.is_some(), "should resolve class::Counter::method::increment"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("increment()"), + "span should point to increment method" + ); + } + + #[test] + fn compute_js_function_path() { + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("function createCounter")) + .unwrap() + + 1; + let end = lines.len() - 3; // Rough end + + let path = compute_tree_path(SAMPLE_JS, [start, end], Language::JavaScript); + assert_eq!(path, "fn::createCounter"); + } + + #[test] + fn roundtrip_js() { + let resolved_span = + resolve_tree_path(SAMPLE_JS, "class::Counter::method::getValue", Language::JavaScript).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_JS, resolved_span, Language::JavaScript); + assert_eq!(computed_path, "class::Counter::method::getValue"); + + let re_resolved = resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + } + + #[cfg(feature = "lang-typescript")] + mod typescript_tests { + use super::*; + + const SAMPLE_TS: &str = r#"// A typed user service + +interface User { + id: number; + name: string; +} + +type UserId = number; + +enum UserRole { + Admin, + User, + Guest +} + +class UserService { + private users: User[] = []; + + addUser(user: User): void { + this.users.push(user); + } + + findById(id: UserId): User | undefined { + return this.users.find(u => u.id === id); + } +} + +function createUser(name: string): User { + return { id: Date.now(), name }; +} +"#; + + #[test] + fn resolve_ts_interface() { + let span = resolve_tree_path(SAMPLE_TS, "interface::User", Language::TypeScript); + assert!(span.is_some(), "should resolve interface::User"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("interface User"), + "span should point to User interface" + ); + } + + #[test] + fn resolve_ts_type_alias() { + let span = resolve_tree_path(SAMPLE_TS, "type::UserId", Language::TypeScript); + assert!(span.is_some(), "should resolve type::UserId"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("type UserId"), + "span should point to UserId type alias" + ); + } + + #[test] + fn resolve_ts_enum() { + let span = resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript); + assert!(span.is_some(), "should resolve enum::UserRole"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("enum UserRole"), + "span should point to UserRole enum" + ); + } + + #[test] + fn resolve_ts_class_method() { + let span = resolve_tree_path(SAMPLE_TS, "class::UserService::method::findById", Language::TypeScript); + assert!(span.is_some(), "should resolve class::UserService::method::findById"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("findById("), + "span should point to findById method" + ); + } + + #[test] + fn compute_ts_interface_path() { + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("interface User")) + .unwrap() + + 1; + let end = start + 3; + + let path = compute_tree_path(SAMPLE_TS, [start, end], Language::TypeScript); + assert_eq!(path, "interface::User"); + } + + #[test] + fn roundtrip_ts() { + let resolved_span = + resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_TS, resolved_span, Language::TypeScript); + assert_eq!(computed_path, "enum::UserRole"); + + let re_resolved = resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + } } From 00d1a72eceba12b87950d9227a3684b617ad84f0 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 01:45:52 +0800 Subject: [PATCH 05/18] build: enable all language features by default Make lang-python, lang-go, lang-javascript, and lang-typescript enabled by default. Users can opt out with --no-default-features if needed. Original prompt: > make all lang features default-enabled AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index a4dde47..5ab4a06 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -24,7 +24,7 @@ tree-sitter-javascript = { version = "0.25.0", optional = true } tree-sitter-typescript = { version = "0.23.2", optional = true } [features] -default = [] +default = ["lang-python", "lang-go", "lang-javascript", "lang-typescript"] lang-python = ["dep:tree-sitter-python"] lang-go = ["dep:tree-sitter-go"] lang-javascript = ["dep:tree-sitter-javascript"] From a8eeb2c53ce8a48a4ef2e6611189a3a583c09a39 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 01:48:30 +0800 Subject: [PATCH 06/18] style: cargo fmt && liyi reanchor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix clippy warning (collapsible_if) and run cargo fmt. Reanchor sidecar specs after tree_path.rs changes. Original prompt: > did you forget to check clippy, cargo fmt, and sync 立意? AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 88 +++++++++++++++---------- crates/liyi/src/tree_path.rs.liyi.jsonc | 64 +++++++++--------- 2 files changed, 83 insertions(+), 69 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index a9fbe1d..67a6242 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -106,10 +106,7 @@ static RUST_CONFIG: LanguageConfig = LanguageConfig { static PYTHON_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_python::LANGUAGE.into(), extensions: &["py", "pyi"], - kind_map: &[ - ("fn", "function_definition"), - ("class", "class_definition"), - ], + kind_map: &[("fn", "function_definition"), ("class", "class_definition")], name_field: "name", name_overrides: &[], body_fields: &["body"], @@ -451,12 +448,7 @@ fn is_item_node(config: &LanguageConfig, node: &Node) -> bool { } /// Build the tree_path string for a given target node by walking from root. -fn build_path_to_node( - config: &LanguageConfig, - root: &Node, - target: &Node, - source: &str, -) -> String { +fn build_path_to_node(config: &LanguageConfig, root: &Node, target: &Node, source: &str) -> String { let mut segments: Vec = Vec::new(); if collect_path(config, root, target, source, &mut segments) { segments.join("::") @@ -475,9 +467,10 @@ fn collect_path( ) -> bool { if node.id() == target.id() { // We found the target — add this node's segment if it's an item - if let (Some(short), Some(name)) = - (config.kind_to_shorthand(node.kind()), config.node_name(node, source)) - { + if let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) { segments.push(format!("{short}::{name}")); return true; } @@ -498,12 +491,13 @@ fn collect_path( && collect_path(config, &child, target, source, segments) { // If this node is an item node, prepend its segment - if is_item_node(config, node) { - if let (Some(short), Some(name)) = - (config.kind_to_shorthand(node.kind()), config.node_name(node, source)) - { - segments.insert(0, format!("{short}::{name}")); - } + if is_item_node(config, node) + && let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) + { + segments.insert(0, format!("{short}::{name}")); } return true; } @@ -739,10 +733,7 @@ fn standalone() -> i32 { ); // Python detection depends on the lang-python feature #[cfg(feature = "lang-python")] - assert_eq!( - detect_language(Path::new("foo.py")), - Some(Language::Python) - ); + assert_eq!(detect_language(Path::new("foo.py")), Some(Language::Python)); #[cfg(not(feature = "lang-python"))] assert_eq!(detect_language(Path::new("foo.py")), None); } @@ -828,7 +819,8 @@ def calculate_total(items): #[test] fn resolve_python_class_method() { - let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); + let span = + resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); assert!(span.is_some(), "should resolve class::Order::fn::process"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); @@ -840,7 +832,11 @@ def calculate_total(items): #[test] fn resolve_python_init_method() { - let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::__init__", Language::Python); + let span = resolve_tree_path( + SAMPLE_PYTHON, + "class::Order::fn::__init__", + Language::Python, + ); assert!(span.is_some(), "should resolve class::Order::fn::__init__"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); @@ -888,7 +884,8 @@ def calculate_total(items): let computed_path = compute_tree_path(SAMPLE_PYTHON, resolved_span, Language::Python); assert_eq!(computed_path, "fn::calculate_total"); - let re_resolved = resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); + let re_resolved = + resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); assert_eq!(re_resolved, resolved_span); } } @@ -990,8 +987,7 @@ func Add(a, b int) int { #[test] fn roundtrip_go() { // Compute path for fn::Add, then resolve it - let resolved_span = - resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); + let resolved_span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); assert_eq!(computed_path, "fn::Add"); @@ -1056,8 +1052,15 @@ const utils = { #[test] fn resolve_js_method() { - let span = resolve_tree_path(SAMPLE_JS, "class::Counter::method::increment", Language::JavaScript); - assert!(span.is_some(), "should resolve class::Counter::method::increment"); + let span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::increment", + Language::JavaScript, + ); + assert!( + span.is_some(), + "should resolve class::Counter::method::increment" + ); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_JS.lines().collect(); assert!( @@ -1082,13 +1085,18 @@ const utils = { #[test] fn roundtrip_js() { - let resolved_span = - resolve_tree_path(SAMPLE_JS, "class::Counter::method::getValue", Language::JavaScript).unwrap(); + let resolved_span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::getValue", + Language::JavaScript, + ) + .unwrap(); let computed_path = compute_tree_path(SAMPLE_JS, resolved_span, Language::JavaScript); assert_eq!(computed_path, "class::Counter::method::getValue"); - let re_resolved = resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); + let re_resolved = + resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); assert_eq!(re_resolved, resolved_span); } } @@ -1167,8 +1175,15 @@ function createUser(name: string): User { #[test] fn resolve_ts_class_method() { - let span = resolve_tree_path(SAMPLE_TS, "class::UserService::method::findById", Language::TypeScript); - assert!(span.is_some(), "should resolve class::UserService::method::findById"); + let span = resolve_tree_path( + SAMPLE_TS, + "class::UserService::method::findById", + Language::TypeScript, + ); + assert!( + span.is_some(), + "should resolve class::UserService::method::findById" + ); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_TS.lines().collect(); assert!( @@ -1199,7 +1214,8 @@ function createUser(name: string): User { let computed_path = compute_tree_path(SAMPLE_TS, resolved_span, Language::TypeScript); assert_eq!(computed_path, "enum::UserRole"); - let re_resolved = resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); + let re_resolved = + resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); assert_eq!(re_resolved, resolved_span); } } diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 916c7e5..13aade7 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -11,20 +11,19 @@ 17, 28 ], - "tree_path": "const::KIND_MAP", - "source_hash": "sha256:6a6b6eb7c463a163555dadc8f1cd701266d2d64cf6a06e75f4128feef9fe45ee", - "source_anchor": "const KIND_MAP: &[(&str, &str)] = &[" + "source_hash": "sha256:cc43689510347aa1bf287146ee2bae19e2407df0ee82c7c74a35cf3010d1520d", + "source_anchor": "///" }, { "item": "detect_language", "reviewed": false, "intent": "=doc", "source_span": [ - 48, - 53 + 223, + 256 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:2c784d4dfacb2142be9b46e13c2713208c7797747374057a65ae114fdb8be45c", + "source_hash": "sha256:1647b004bc523d06b5484b9d29412743ae6bfdf02f5d512970fd0a20a157a7cf", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -32,11 +31,11 @@ "reviewed": false, "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", "source_span": [ - 57, - 59 + 182, + 194 ], "tree_path": "enum::Language", - "source_hash": "sha256:6d0a6933befabccdcfc1030cac109588b55363bb531987e4aa7c9d4cf1a68e1e", + "source_hash": "sha256:21e32f4b83f6a26fe960a8f51a2fbc1a57882b926696e78d8a7adb3da6ecc762", "source_anchor": "pub enum Language {" }, { @@ -47,17 +46,16 @@ 79, 89 ], - "tree_path": "fn::node_name", - "source_hash": "sha256:b35a2bd695cb373f84ad62c1d483a893c9e0dcc93cc0d72f42a096cf1a16d79a", - "source_anchor": "fn node_name<'a>(node: &Node<'a>, source: &'a str) -> Option<&'a str> {" + "source_hash": "sha256:47528f806c78bceecea2d90ee823c37ae25540f399ac9fa3c8c42264648826f4", + "source_anchor": " .find(|c| c.kind() == \"declaration_list\")" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 102, - 118 + 278, + 294 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -68,11 +66,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 125, - 141 + 301, + 318 ], "tree_path": "fn::resolve_tree_path", - "source_hash": "sha256:3d7856a13db4b62a7800d9457c1b079a8aa5a99296d010d4196e97eb1465c8c9", + "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", "source_anchor": "pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> {" }, { @@ -80,11 +78,11 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 144, - 172 + 321, + 350 ], "tree_path": "fn::resolve_segments", - "source_hash": "sha256:cb4227e128b6b2b7cf3766e0f8c492e21692cbcf44b5a4b95addffb34ad87451", + "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", "source_anchor": "fn resolve_segments<'a>(" }, { @@ -92,11 +90,11 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 175, - 190 + 353, + 361 ], "tree_path": "fn::resolve_in_body", - "source_hash": "sha256:36340ecde43345970b601709b06045ce4eb2d59361073a740cffdc53dac11dbc", + "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", "source_anchor": "fn resolve_in_body<'a>(" }, { @@ -104,11 +102,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 197, - 217 + 368, + 389 ], "tree_path": "fn::compute_tree_path", - "source_hash": "sha256:da391fb9dcee5c9ba55d539a4c16eb60f51511713b9c843dfeb87409a2a6d96e", + "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { @@ -116,11 +114,11 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 225, - 269 + 397, + 443 ], "tree_path": "fn::find_item_in_range", - "source_hash": "sha256:23d04aca348b644f78e2415418361bf6c963c868b00ca9f3882483c511fe2d3d", + "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", "source_anchor": "fn find_item_in_range<'a>(" }, { @@ -128,12 +126,12 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 287, - 323 + 461, + 507 ], "tree_path": "fn::collect_path", - "source_hash": "sha256:5596cd923fd63ffdcf3d5871e7b7e06e48f56cc5ac790aab5afa1e2351eb1046", - "source_anchor": "fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec) -> bool {" + "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", + "source_anchor": "fn collect_path(" } ] } From 9dd9d6d0d0c16ef45e775253f28b9f4f7541379d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 08:51:13 +0800 Subject: [PATCH 07/18] fix(tree_path): remove feature gates from Language enum variants Remove #[cfg(feature = "...")] from Language enum variants to ensure API stability. The enum variants are now always present, but languages report whether they're supported via is_supported(). Changes: - Remove #[cfg] gates from Language enum variants - Add Language::is_supported() method for runtime feature checking - Change Language::config() to return Option<&LanguageConfig> - Change Language::ts_language() to return Option - Update make_parser(), resolve_tree_path(), compute_tree_path() to handle unsupported languages gracefully by returning None/empty - Update detect_language() to only return supported languages This ensures downstream code can match on all Language variants without conditional compilation, while still gracefully handling unsupported languages at runtime. Original prompt: > Can you adversarially review the PR branch changes? AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 97 +++++++++++++++++-------- crates/liyi/src/tree_path.rs.liyi.jsonc | 40 +++++----- 2 files changed, 86 insertions(+), 51 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 67a6242..ff4bd3b 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -181,45 +181,75 @@ static TSX_CONFIG: LanguageConfig = LanguageConfig { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { Rust, - #[cfg(feature = "lang-python")] Python, - #[cfg(feature = "lang-go")] Go, - #[cfg(feature = "lang-javascript")] JavaScript, - #[cfg(feature = "lang-typescript")] TypeScript, - #[cfg(feature = "lang-typescript")] Tsx, } impl Language { + /// Check if this language is supported (its feature is enabled). + pub fn is_supported(&self) -> bool { + match self { + Language::Rust => true, + Language::Python => cfg!(feature = "lang-python"), + Language::Go => cfg!(feature = "lang-go"), + Language::JavaScript => cfg!(feature = "lang-javascript"), + Language::TypeScript => cfg!(feature = "lang-typescript"), + Language::Tsx => cfg!(feature = "lang-typescript"), + } + } + /// Get the language configuration for this language. - fn config(&self) -> &'static LanguageConfig { + /// + /// Returns `None` if the language is not supported (feature not enabled). + fn config(&self) -> Option<&'static LanguageConfig> { match self { - Language::Rust => &RUST_CONFIG, - #[cfg(feature = "lang-python")] - Language::Python => &PYTHON_CONFIG, - #[cfg(feature = "lang-go")] - Language::Go => &GO_CONFIG, - #[cfg(feature = "lang-javascript")] - Language::JavaScript => &JAVASCRIPT_CONFIG, - #[cfg(feature = "lang-typescript")] - Language::TypeScript => &TYPESCRIPT_CONFIG, - #[cfg(feature = "lang-typescript")] - Language::Tsx => &TSX_CONFIG, + Language::Rust => Some(&RUST_CONFIG), + Language::Python => { + #[cfg(feature = "lang-python")] + return Some(&PYTHON_CONFIG); + #[cfg(not(feature = "lang-python"))] + return None; + } + Language::Go => { + #[cfg(feature = "lang-go")] + return Some(&GO_CONFIG); + #[cfg(not(feature = "lang-go"))] + return None; + } + Language::JavaScript => { + #[cfg(feature = "lang-javascript")] + return Some(&JAVASCRIPT_CONFIG); + #[cfg(not(feature = "lang-javascript"))] + return None; + } + Language::TypeScript => { + #[cfg(feature = "lang-typescript")] + return Some(&TYPESCRIPT_CONFIG); + #[cfg(not(feature = "lang-typescript"))] + return None; + } + Language::Tsx => { + #[cfg(feature = "lang-typescript")] + return Some(&TSX_CONFIG); + #[cfg(not(feature = "lang-typescript"))] + return None; + } } } /// Get the tree-sitter language grammar. - fn ts_language(&self) -> TSLanguage { - let config = self.config(); - (config.ts_language)() + /// + /// Returns `None` if the language is not supported. + fn ts_language(&self) -> Option { + self.config().map(|cfg| (cfg.ts_language)()) } } /// Detect language from file extension. Returns `None` for unsupported -/// languages. +/// languages (unknown extension or feature not enabled). pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; @@ -256,12 +286,13 @@ pub fn detect_language(path: &Path) -> Option { } /// Create a tree-sitter parser for the given language. -fn make_parser(lang: Language) -> Parser { +/// +/// Returns `None` if the language is not supported (feature not enabled). +fn make_parser(lang: Language) -> Option { let mut parser = Parser::new(); - parser - .set_language(&lang.ts_language()) - .expect("tree-sitter grammar should load"); - parser + let ts_lang = lang.ts_language()?; + parser.set_language(&ts_lang).ok()?; + Some(parser) } /// A parsed tree_path segment: (kind_shorthand, name). @@ -297,15 +328,15 @@ fn parse_tree_path(tree_path: &str) -> Option> { /// inclusive). /// /// Returns `None` if the tree_path cannot be resolved (item renamed, deleted, -/// or grammar unavailable). +/// grammar unavailable, or language not supported). pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> { if tree_path.is_empty() { return None; } - let config = lang.config(); + let config = lang.config()?; let segments = parse_tree_path(tree_path)?; - let mut parser = make_parser(lang); + let mut parser = make_parser(lang)?; let tree = parser.parse(source, None)?; let root = tree.root_node(); @@ -366,8 +397,12 @@ fn resolve_in_body<'a>( /// (e.g., the span doesn't align with a named item, or the language is /// unsupported). pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { - let config = lang.config(); - let mut parser = make_parser(lang); + let Some(config) = lang.config() else { + return String::new(); + }; + let Some(mut parser) = make_parser(lang) else { + return String::new(); + }; let tree = match parser.parse(source, None) { Some(t) => t, None => return String::new(), diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 13aade7..e7f37a4 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -19,8 +19,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 223, - 256 + 253, + 286 ], "tree_path": "fn::detect_language", "source_hash": "sha256:1647b004bc523d06b5484b9d29412743ae6bfdf02f5d512970fd0a20a157a7cf", @@ -32,10 +32,10 @@ "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", "source_span": [ 182, - 194 + 189 ], "tree_path": "enum::Language", - "source_hash": "sha256:21e32f4b83f6a26fe960a8f51a2fbc1a57882b926696e78d8a7adb3da6ecc762", + "source_hash": "sha256:cfad736b976eb0cd3212d06cb22896d43affbd78181dc5878a71553467e7ff29", "source_anchor": "pub enum Language {" }, { @@ -54,8 +54,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 278, - 294 + 309, + 325 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -66,11 +66,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 301, - 318 + 332, + 349 ], "tree_path": "fn::resolve_tree_path", - "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", + "source_hash": "sha256:ee955e130971b81a3a7565d81618a2edd7c44a50d82b91250b9089cbbabef519", "source_anchor": "pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> {" }, { @@ -78,8 +78,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 321, - 350 + 352, + 381 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", @@ -90,8 +90,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 353, - 361 + 384, + 392 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -102,11 +102,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 368, - 389 + 399, + 424 ], "tree_path": "fn::compute_tree_path", - "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", + "source_hash": "sha256:1b92c3bed64a4f98aa3afc677bf4e8cdd872bb0ec44f9fb362ad55f7c129b847", "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { @@ -114,8 +114,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 397, - 443 + 432, + 478 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -126,8 +126,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 461, - 507 + 496, + 542 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From c37ee06ecbe261960885840618f2fc0c30932e23 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 08:53:45 +0800 Subject: [PATCH 08/18] refactor(tree_path): make LanguageConfig fields private Make LanguageConfig fields private to hide implementation details and expose a cleaner public API. Changes: - Remove pub from all LanguageConfig fields - Add matches_extension(&self, ext: &str) -> bool public method - Update detect_language() to use the new method This prevents external code from depending on internal struct layout while still allowing the necessary operations. Original prompt: > Fix them one by one. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 29 ++++++++++------- crates/liyi/src/tree_path.rs.liyi.jsonc | 43 +++++++++++++------------ 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index ff4bd3b..a5a35f7 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -20,17 +20,17 @@ use tree_sitter::{Language as TSLanguage, Node, Parser}; /// shorthands. pub struct LanguageConfig { /// Function to get the tree-sitter language grammar (lazy initialization). - pub ts_language: fn() -> TSLanguage, + ts_language: fn() -> TSLanguage, /// File extensions associated with this language. - pub extensions: &'static [&'static str], + extensions: &'static [&'static str], /// Map from tree_path kind shorthand to tree-sitter node kind. - pub kind_map: &'static [(&'static str, &'static str)], + kind_map: &'static [(&'static str, &'static str)], /// Field name to extract the node's name (usually "name"). - pub name_field: &'static str, + name_field: &'static str, /// Overrides for special cases: (node_kind, field_name) pairs. - pub name_overrides: &'static [(&'static str, &'static str)], + name_overrides: &'static [(&'static str, &'static str)], /// Field names to traverse to find a node's body/declaration_list. - pub body_fields: &'static [&'static str], + body_fields: &'static [&'static str], } impl LanguageConfig { @@ -78,6 +78,11 @@ impl LanguageConfig { node.children(&mut cursor) .find(|c| c.kind() == "declaration_list") } + + /// Check if the given file extension is associated with this language. + pub fn matches_extension(&self, ext: &str) -> bool { + self.extensions.contains(&ext) + } } /// Rust language configuration. @@ -253,31 +258,31 @@ impl Language { pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; - if RUST_CONFIG.extensions.contains(&ext) { + if RUST_CONFIG.matches_extension(ext) { return Some(Language::Rust); } #[cfg(feature = "lang-python")] - if PYTHON_CONFIG.extensions.contains(&ext) { + if PYTHON_CONFIG.matches_extension(ext) { return Some(Language::Python); } #[cfg(feature = "lang-go")] - if GO_CONFIG.extensions.contains(&ext) { + if GO_CONFIG.matches_extension(ext) { return Some(Language::Go); } #[cfg(feature = "lang-javascript")] - if JAVASCRIPT_CONFIG.extensions.contains(&ext) { + if JAVASCRIPT_CONFIG.matches_extension(ext) { return Some(Language::JavaScript); } #[cfg(feature = "lang-typescript")] { - if TYPESCRIPT_CONFIG.extensions.contains(&ext) { + if TYPESCRIPT_CONFIG.matches_extension(ext) { return Some(Language::TypeScript); } - if TSX_CONFIG.extensions.contains(&ext) { + if TSX_CONFIG.matches_extension(ext) { return Some(Language::Tsx); } } diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index e7f37a4..31fbd4a 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -11,7 +11,7 @@ 17, 28 ], - "source_hash": "sha256:cc43689510347aa1bf287146ee2bae19e2407df0ee82c7c74a35cf3010d1520d", + "source_hash": "sha256:fa948a9c1c198b1bf3ed0655165aad53a4d1a0ad2d48a134a5f58a43b5554a82", "source_anchor": "///" }, { @@ -19,11 +19,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 253, - 286 + 258, + 291 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:1647b004bc523d06b5484b9d29412743ae6bfdf02f5d512970fd0a20a157a7cf", + "source_hash": "sha256:04d339ef243d8c1844e3e5703062deeac0955f70a92632afd865a1bc186f5fdd", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -31,8 +31,8 @@ "reviewed": false, "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", "source_span": [ - 182, - 189 + 187, + 194 ], "tree_path": "enum::Language", "source_hash": "sha256:cfad736b976eb0cd3212d06cb22896d43affbd78181dc5878a71553467e7ff29", @@ -46,7 +46,8 @@ 79, 89 ], - "source_hash": "sha256:47528f806c78bceecea2d90ee823c37ae25540f399ac9fa3c8c42264648826f4", + "tree_path": "impl::LanguageConfig::fn::matches_extension", + "source_hash": "sha256:b2b1791211182b9b11ddbb6423b1d8156c51a9d484fca0540b132e17e6ef5f62", "source_anchor": " .find(|c| c.kind() == \"declaration_list\")" }, { @@ -54,8 +55,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 309, - 325 + 314, + 330 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -66,8 +67,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 332, - 349 + 337, + 354 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:ee955e130971b81a3a7565d81618a2edd7c44a50d82b91250b9089cbbabef519", @@ -78,8 +79,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 352, - 381 + 357, + 386 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", @@ -90,8 +91,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 384, - 392 + 389, + 397 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -102,8 +103,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 399, - 424 + 404, + 429 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:1b92c3bed64a4f98aa3afc677bf4e8cdd872bb0ec44f9fb362ad55f7c129b847", @@ -114,8 +115,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 432, - 478 + 437, + 483 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -126,8 +127,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 496, - 542 + 501, + 547 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From 8f8b602c899ffb3859663229abfb69a1ce80fa3d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 08:56:03 +0800 Subject: [PATCH 09/18] docs(tree_path): document Go method limitation and add TSX tests Add documentation for known limitations and test coverage: - Document Go method naming collision in GO_CONFIG doc comment - Note that methods resolve as method::Name without receiver type disambiguation, which can cause tree_path collisions - Add TSX test module with tests for function, class, interface, and file extension detection Original prompt: > Fix them one by one. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 77 +++++++++++++++++++++++++ crates/liyi/src/tree_path.rs.liyi.jsonc | 44 +++++++------- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index a5a35f7..ae59608 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -118,6 +118,11 @@ static PYTHON_CONFIG: LanguageConfig = LanguageConfig { }; /// Go language configuration (requires `lang-go` feature). +/// +/// Note: Go methods are resolved as `method::MethodName` without receiver +/// type disambiguation. This means two types with the same method name +/// will have colliding tree_paths. This is a known limitation; callers +/// should verify the enclosing type context if ambiguity is possible. #[cfg(feature = "lang-go")] static GO_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_go::LANGUAGE.into(), @@ -1259,4 +1264,76 @@ function createUser(name: string): User { assert_eq!(re_resolved, resolved_span); } } + + #[cfg(feature = "lang-typescript")] + mod tsx_tests { + use super::*; + + const SAMPLE_TSX: &str = r#"// A React component + +interface Props { + title: string; + count: number; +} + +function Counter({ title, count }: Props) { + return ( +
+

{title}

+

Count: {count}

+
+ ); +} + +class Container extends React.Component { + render() { + return
{this.props.title}
; + } +} +"#; + + #[test] + fn resolve_tsx_function() { + let span = resolve_tree_path(SAMPLE_TSX, "fn::Counter", Language::Tsx); + assert!(span.is_some(), "should resolve fn::Counter in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("function Counter"), + "span should point to Counter function" + ); + } + + #[test] + fn resolve_tsx_class() { + let span = resolve_tree_path(SAMPLE_TSX, "class::Container", Language::Tsx); + assert!(span.is_some(), "should resolve class::Container in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("class Container"), + "span should point to Container class" + ); + } + + #[test] + fn resolve_tsx_interface() { + let span = resolve_tree_path(SAMPLE_TSX, "interface::Props", Language::Tsx); + assert!(span.is_some(), "should resolve interface::Props in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("interface Props"), + "span should point to Props interface" + ); + } + + #[test] + fn detect_tsx_extension() { + assert_eq!( + detect_language(Path::new("component.tsx")), + Some(Language::Tsx) + ); + } + } } diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 31fbd4a..0933f8f 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -19,8 +19,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 258, - 291 + 263, + 296 ], "tree_path": "fn::detect_language", "source_hash": "sha256:04d339ef243d8c1844e3e5703062deeac0955f70a92632afd865a1bc186f5fdd", @@ -31,8 +31,8 @@ "reviewed": false, "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", "source_span": [ - 187, - 194 + 192, + 199 ], "tree_path": "enum::Language", "source_hash": "sha256:cfad736b976eb0cd3212d06cb22896d43affbd78181dc5878a71553467e7ff29", @@ -43,20 +43,20 @@ "reviewed": false, "intent": "Extract the user-visible name of an AST node. For impl_item, return the type field text (e.g., 'Money' from 'impl Money'). For all other item kinds, return the name field. Return None if the node has no name/type field.", "source_span": [ - 79, - 89 + 83, + 85 ], "tree_path": "impl::LanguageConfig::fn::matches_extension", - "source_hash": "sha256:b2b1791211182b9b11ddbb6423b1d8156c51a9d484fca0540b132e17e6ef5f62", - "source_anchor": " .find(|c| c.kind() == \"declaration_list\")" + "source_hash": "sha256:7544b1ad4acbd6144486ed88419b6a9069425b448d9b96c032d5b3f9909403e3", + "source_anchor": " pub fn matches_extension(&self, ext: &str) -> bool {" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 314, - 330 + 319, + 335 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -67,8 +67,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 337, - 354 + 342, + 359 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:ee955e130971b81a3a7565d81618a2edd7c44a50d82b91250b9089cbbabef519", @@ -79,8 +79,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 357, - 386 + 362, + 391 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", @@ -91,8 +91,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 389, - 397 + 394, + 402 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -103,8 +103,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 404, - 429 + 409, + 434 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:1b92c3bed64a4f98aa3afc677bf4e8cdd872bb0ec44f9fb362ad55f7c129b847", @@ -115,8 +115,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 437, - 483 + 442, + 488 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -127,8 +127,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 501, - 547 + 506, + 552 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From 84232fccbf119e12dee2c4832ecefb7e51a2e2dc Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 08:56:55 +0800 Subject: [PATCH 10/18] docs(tree_path): document extension collision behavior Add documentation to detect_language() explaining the behavior when two languages share an extension (first match wins). Original prompt: > Fix them one by one. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 6 +++++ crates/liyi/src/tree_path.rs.liyi.jsonc | 32 ++++++++++++------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index ae59608..04d1f3a 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -260,6 +260,12 @@ impl Language { /// Detect language from file extension. Returns `None` for unsupported /// languages (unknown extension or feature not enabled). +/// +/// # Extension Collision +/// +/// If two languages share an extension (unlikely with built-in languages), +/// the first match in the following order is returned: +/// Rust → Python → Go → JavaScript → TypeScript → TSX. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 0933f8f..5a155b4 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -19,8 +19,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 263, - 296 + 269, + 302 ], "tree_path": "fn::detect_language", "source_hash": "sha256:04d339ef243d8c1844e3e5703062deeac0955f70a92632afd865a1bc186f5fdd", @@ -55,8 +55,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 319, - 335 + 325, + 341 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -67,8 +67,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 342, - 359 + 348, + 365 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:ee955e130971b81a3a7565d81618a2edd7c44a50d82b91250b9089cbbabef519", @@ -79,8 +79,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 362, - 391 + 368, + 397 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", @@ -91,8 +91,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 394, - 402 + 400, + 408 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -103,8 +103,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 409, - 434 + 415, + 440 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:1b92c3bed64a4f98aa3afc677bf4e8cdd872bb0ec44f9fb362ad55f7c129b847", @@ -115,8 +115,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 442, - 488 + 448, + 494 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -127,8 +127,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 506, - 552 + 512, + 558 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From 6d908e67c129e934c023ffb0405cdc1bfb66f62a Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 17:16:09 +0800 Subject: [PATCH 11/18] feat(linter): make all languages built-in and add full Go support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop Cargo feature gates for tree-sitter grammars — all five languages (Rust, Python, Go, JavaScript, TypeScript) are now compiled into the binary unconditionally. The binary-size cost is modest relative to the universality benefit; Python/Go/JS/TS codebases vastly outnumber Rust codebases and requiring opt-in per language would hinder adoption. Go tree_path support: - Add `custom_name` callback to `LanguageConfig` for languages with non-trivial name extraction. - Change `node_name` return type to `Cow` to support both borrowed and owned (composite) names. - Encode method receivers: `method::(*Type).Method` (pointer) vs `method::Type.Method` (value). - Navigate type_declaration → type_spec, const_declaration → const_spec, var_declaration → var_spec indirection via custom_name. - Use unified `type` shorthand for structs, interfaces, and type aliases — Go type names are unique per package. Code changes (tree_path.rs, Cargo.toml): - Remove all #[cfg(feature = "...")] gates from statics, Language impl, detect_language, and test modules. - Make all tree-sitter-* dependencies unconditional; remove [features] section from Cargo.toml. Doc updates (liyi-design.md, liyi-01x-roadmap.md): - Update design doc: language support is built-in (not feature-gated), binary is ~6000 lines / 11 MiB (not "small"), remains single binary. - Update roadmap: mark M1 milestones complete, remove feature-gate references from headings and acceptance criteria, document resolved Go receiver encoding design. All 114 tests pass (90 unit + 20 golden + 4 proptest). Original prompt: > Review the current branch's changes against the roadmap and > design doc on the main branch. > > Regarding M1.3, this pattern is prevalent so support should be > added. Regarding conditional features, having them not built-in by > default would hinder adoption (orders of magnitude more than Rust > codebases in the wild), so to fulfill the project's promise as a > universal tool I'd suggest dropping "conditional" altogether in > the docs. The design doc may also need updating in that the linter > is already >6000 lines of Rust and a 11 MiB release build, by no > means "small". It is expected to remain as one binary, though. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 15 +- crates/liyi/src/tree_path.rs | 362 ++++++++++++++++++++++++----------- docs/liyi-01x-roadmap.md | 54 +++--- docs/liyi-design.md | 12 +- 4 files changed, 285 insertions(+), 158 deletions(-) diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 5ab4a06..58352bf 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -18,17 +18,10 @@ ignore = "0.4" regex = "1" tree-sitter = "0.26.6" tree-sitter-rust = "0.24.0" -tree-sitter-python = { version = "0.25.0", optional = true } -tree-sitter-go = { version = "0.25.0", optional = true } -tree-sitter-javascript = { version = "0.25.0", optional = true } -tree-sitter-typescript = { version = "0.23.2", optional = true } - -[features] -default = ["lang-python", "lang-go", "lang-javascript", "lang-typescript"] -lang-python = ["dep:tree-sitter-python"] -lang-go = ["dep:tree-sitter-go"] -lang-javascript = ["dep:tree-sitter-javascript"] -lang-typescript = ["dep:tree-sitter-typescript", "lang-javascript"] +tree-sitter-python = "0.25.0" +tree-sitter-go = "0.25.0" +tree-sitter-javascript = "0.25.0" +tree-sitter-typescript = "0.23.2" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 04d1f3a..40b93d9 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -9,6 +9,7 @@ //! locate items by structural identity, making span recovery deterministic //! across formatting changes, import additions, and line reflows. +use std::borrow::Cow; use std::path::Path; use tree_sitter::{Language as TSLanguage, Node, Parser}; @@ -31,6 +32,10 @@ pub struct LanguageConfig { name_overrides: &'static [(&'static str, &'static str)], /// Field names to traverse to find a node's body/declaration_list. body_fields: &'static [&'static str], + /// Custom name extraction for node kinds that need special handling + /// (e.g., Go methods with receiver types, Go type_declaration wrapping type_spec). + /// Returns `Some(name)` for handled kinds, `None` to fall through to default. + custom_name: Option Option>, } impl LanguageConfig { @@ -51,7 +56,17 @@ impl LanguageConfig { } /// Extract the name of a named AST node. - fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option<&'a str> { + /// + /// Returns a `Cow` — borrowed from `source` in the common case, + /// owned when the name is constructed (e.g., Go method receiver encoding). + fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> { + // Check custom_name callback first (e.g., Go method receivers) + if let Some(custom) = self.custom_name { + if let Some(name) = custom(node, source) { + return Some(Cow::Owned(name)); + } + } + let kind = node.kind(); // Check for name field override (e.g., impl_item uses "type" field) @@ -63,7 +78,7 @@ impl LanguageConfig { .unwrap_or(self.name_field); let name_node = node.child_by_field_name(field_name)?; - Some(&source[name_node.byte_range()]) + Some(Cow::Borrowed(&source[name_node.byte_range()])) } /// Find a body/declaration_list child for descending into containers. @@ -104,10 +119,10 @@ static RUST_CONFIG: LanguageConfig = LanguageConfig { name_field: "name", name_overrides: &[("impl_item", "type")], body_fields: &["body"], + custom_name: None, }; -/// Python language configuration (requires `lang-python` feature). -#[cfg(feature = "lang-python")] +/// Python language configuration. static PYTHON_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_python::LANGUAGE.into(), extensions: &["py", "pyi"], @@ -115,29 +130,87 @@ static PYTHON_CONFIG: LanguageConfig = LanguageConfig { name_field: "name", name_overrides: &[], body_fields: &["body"], + custom_name: None, }; -/// Go language configuration (requires `lang-go` feature). +/// Custom name extraction for Go nodes. /// -/// Note: Go methods are resolved as `method::MethodName` without receiver -/// type disambiguation. This means two types with the same method name -/// will have colliding tree_paths. This is a known limitation; callers -/// should verify the enclosing type context if ambiguity is possible. -#[cfg(feature = "lang-go")] +/// Handles three Go-specific patterns: +/// - `method_declaration`: encodes receiver type into the name, producing +/// `ReceiverType.MethodName` or `(*ReceiverType).MethodName`. +/// - `type_declaration`: navigates to the inner `type_spec` for the name. +/// - `const_declaration` / `var_declaration`: navigates to the inner spec. +fn go_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "method_declaration" => { + let method_name_node = node.child_by_field_name("name")?; + let method_name = &source[method_name_node.byte_range()]; + + let receiver = node.child_by_field_name("receiver")?; + let mut cursor = receiver.walk(); + let param = receiver + .children(&mut cursor) + .find(|c| c.kind() == "parameter_declaration")?; + + let type_node = param.child_by_field_name("type")?; + let receiver_type = if type_node.kind() == "pointer_type" { + let mut cursor2 = type_node.walk(); + let inner = type_node + .children(&mut cursor2) + .find(|c| c.kind() == "type_identifier")?; + format!("(*{})", &source[inner.byte_range()]) + } else { + source[type_node.byte_range()].to_string() + }; + + Some(format!("{receiver_type}.{method_name}")) + } + "type_declaration" => { + let mut cursor = node.walk(); + let type_spec = node + .children(&mut cursor) + .find(|c| c.kind() == "type_spec")?; + let name_node = type_spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "const_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "const_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "var_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "var_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + _ => None, + } +} + +/// Go language configuration. static GO_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_go::LANGUAGE.into(), extensions: &["go"], kind_map: &[ ("fn", "function_declaration"), ("method", "method_declaration"), + ("type", "type_declaration"), + ("const", "const_declaration"), + ("var", "var_declaration"), ], name_field: "name", name_overrides: &[], body_fields: &["body"], + custom_name: Some(go_node_name), }; -/// JavaScript language configuration (requires `lang-javascript` feature). -#[cfg(feature = "lang-javascript")] +/// JavaScript language configuration. static JAVASCRIPT_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_javascript::LANGUAGE.into(), extensions: &["js", "mjs", "cjs", "jsx"], @@ -149,10 +222,10 @@ static JAVASCRIPT_CONFIG: LanguageConfig = LanguageConfig { name_field: "name", name_overrides: &[], body_fields: &["body"], + custom_name: None, }; -/// TypeScript language configuration (requires `lang-typescript` feature). -#[cfg(feature = "lang-typescript")] +/// TypeScript language configuration. static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), extensions: &["ts", "mts", "cts"], @@ -167,10 +240,10 @@ static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { name_field: "name", name_overrides: &[], body_fields: &["body"], + custom_name: None, }; -/// TSX language configuration (requires `lang-typescript` feature). -#[cfg(feature = "lang-typescript")] +/// TSX language configuration. static TSX_CONFIG: LanguageConfig = LanguageConfig { ts_language: || tree_sitter_typescript::LANGUAGE_TSX.into(), extensions: &["tsx"], @@ -185,6 +258,7 @@ static TSX_CONFIG: LanguageConfig = LanguageConfig { name_field: "name", name_overrides: &[], body_fields: &["body"], + custom_name: None, }; /// Supported languages for tree_path resolution. @@ -199,67 +273,26 @@ pub enum Language { } impl Language { - /// Check if this language is supported (its feature is enabled). - pub fn is_supported(&self) -> bool { - match self { - Language::Rust => true, - Language::Python => cfg!(feature = "lang-python"), - Language::Go => cfg!(feature = "lang-go"), - Language::JavaScript => cfg!(feature = "lang-javascript"), - Language::TypeScript => cfg!(feature = "lang-typescript"), - Language::Tsx => cfg!(feature = "lang-typescript"), - } - } - /// Get the language configuration for this language. - /// - /// Returns `None` if the language is not supported (feature not enabled). - fn config(&self) -> Option<&'static LanguageConfig> { + fn config(&self) -> &'static LanguageConfig { match self { - Language::Rust => Some(&RUST_CONFIG), - Language::Python => { - #[cfg(feature = "lang-python")] - return Some(&PYTHON_CONFIG); - #[cfg(not(feature = "lang-python"))] - return None; - } - Language::Go => { - #[cfg(feature = "lang-go")] - return Some(&GO_CONFIG); - #[cfg(not(feature = "lang-go"))] - return None; - } - Language::JavaScript => { - #[cfg(feature = "lang-javascript")] - return Some(&JAVASCRIPT_CONFIG); - #[cfg(not(feature = "lang-javascript"))] - return None; - } - Language::TypeScript => { - #[cfg(feature = "lang-typescript")] - return Some(&TYPESCRIPT_CONFIG); - #[cfg(not(feature = "lang-typescript"))] - return None; - } - Language::Tsx => { - #[cfg(feature = "lang-typescript")] - return Some(&TSX_CONFIG); - #[cfg(not(feature = "lang-typescript"))] - return None; - } + Language::Rust => &RUST_CONFIG, + Language::Python => &PYTHON_CONFIG, + Language::Go => &GO_CONFIG, + Language::JavaScript => &JAVASCRIPT_CONFIG, + Language::TypeScript => &TYPESCRIPT_CONFIG, + Language::Tsx => &TSX_CONFIG, } } /// Get the tree-sitter language grammar. - /// - /// Returns `None` if the language is not supported. - fn ts_language(&self) -> Option { - self.config().map(|cfg| (cfg.ts_language)()) + fn ts_language(&self) -> TSLanguage { + (self.config().ts_language)() } } /// Detect language from file extension. Returns `None` for unsupported -/// languages (unknown extension or feature not enabled). +/// languages (unknown extension). /// /// # Extension Collision /// @@ -273,42 +306,35 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Rust); } - #[cfg(feature = "lang-python")] if PYTHON_CONFIG.matches_extension(ext) { return Some(Language::Python); } - #[cfg(feature = "lang-go")] if GO_CONFIG.matches_extension(ext) { return Some(Language::Go); } - #[cfg(feature = "lang-javascript")] if JAVASCRIPT_CONFIG.matches_extension(ext) { return Some(Language::JavaScript); } - #[cfg(feature = "lang-typescript")] - { - if TYPESCRIPT_CONFIG.matches_extension(ext) { - return Some(Language::TypeScript); - } - if TSX_CONFIG.matches_extension(ext) { - return Some(Language::Tsx); - } + if TYPESCRIPT_CONFIG.matches_extension(ext) { + return Some(Language::TypeScript); + } + if TSX_CONFIG.matches_extension(ext) { + return Some(Language::Tsx); } None } /// Create a tree-sitter parser for the given language. -/// -/// Returns `None` if the language is not supported (feature not enabled). -fn make_parser(lang: Language) -> Option { +fn make_parser(lang: Language) -> Parser { let mut parser = Parser::new(); - let ts_lang = lang.ts_language()?; - parser.set_language(&ts_lang).ok()?; - Some(parser) + parser + .set_language(&lang.ts_language()) + .expect("tree-sitter grammar should load"); + parser } /// A parsed tree_path segment: (kind_shorthand, name). @@ -350,9 +376,9 @@ pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Optio return None; } - let config = lang.config()?; + let config = lang.config(); let segments = parse_tree_path(tree_path)?; - let mut parser = make_parser(lang)?; + let mut parser = make_parser(lang); let tree = parser.parse(source, None)?; let root = tree.root_node(); @@ -384,9 +410,9 @@ fn resolve_segments<'a>( continue; } if let Some(name) = config.node_name(&child, source) { - if name == seg.name && segments.len() == 1 { + if *name == seg.name && segments.len() == 1 { return Some(child); - } else if name == seg.name { + } else if *name == seg.name { // Descend — look inside this node's body return resolve_in_body(config, &child, &segments[1..], source); } @@ -413,12 +439,8 @@ fn resolve_in_body<'a>( /// (e.g., the span doesn't align with a named item, or the language is /// unsupported). pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { - let Some(config) = lang.config() else { - return String::new(); - }; - let Some(mut parser) = make_parser(lang) else { - return String::new(); - }; + let config = lang.config(); + let mut parser = make_parser(lang); let tree = match parser.parse(source, None) { Some(t) => t, None => return String::new(), @@ -782,11 +804,7 @@ fn standalone() -> i32 { detect_language(Path::new("src/main.rs")), Some(Language::Rust) ); - // Python detection depends on the lang-python feature - #[cfg(feature = "lang-python")] assert_eq!(detect_language(Path::new("foo.py")), Some(Language::Python)); - #[cfg(not(feature = "lang-python"))] - assert_eq!(detect_language(Path::new("foo.py")), None); } #[test] @@ -827,7 +845,6 @@ fn standalone() -> i32 { 42 } } } - #[cfg(feature = "lang-python")] mod python_tests { use super::*; @@ -941,7 +958,6 @@ def calculate_total(items): } } - #[cfg(feature = "lang-go")] mod go_tests { use super::*; @@ -954,6 +970,17 @@ type Calculator struct { value int } +// Reader is an interface +type Reader interface { + Read(p []byte) (n int, err error) +} + +// MaxRetries is a constant +const MaxRetries = 3 + +// DefaultTimeout is a var +var DefaultTimeout = 30 + // Add adds a number to the calculator's value func (c *Calculator) Add(n int) { c.value += n @@ -984,9 +1011,10 @@ func Add(a, b int) int { } #[test] - fn resolve_go_method() { - let span = resolve_tree_path(SAMPLE_GO, "method::Add", Language::Go); - assert!(span.is_some(), "should resolve method::Add"); + fn resolve_go_pointer_method() { + let span = + resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); + assert!(span.is_some(), "should resolve method::(*Calculator).Add"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_GO.lines().collect(); assert!( @@ -996,10 +1024,75 @@ func Add(a, b int) int { ); } + #[test] + fn resolve_go_value_method() { + let span = + resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); + assert!(span.is_some(), "should resolve method::Calculator.Value"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c Calculator) Value"), + "span should point to Value method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_struct() { + let span = resolve_tree_path(SAMPLE_GO, "type::Calculator", Language::Go); + assert!(span.is_some(), "should resolve type::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Calculator struct"), + "span should point to Calculator struct, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_interface() { + let span = resolve_tree_path(SAMPLE_GO, "type::Reader", Language::Go); + assert!(span.is_some(), "should resolve type::Reader"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Reader interface"), + "span should point to Reader interface, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_const() { + let span = resolve_tree_path(SAMPLE_GO, "const::MaxRetries", Language::Go); + assert!(span.is_some(), "should resolve const::MaxRetries"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("const MaxRetries"), + "span should point to MaxRetries const, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_var() { + let span = resolve_tree_path(SAMPLE_GO, "var::DefaultTimeout", Language::Go); + assert!(span.is_some(), "should resolve var::DefaultTimeout"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("var DefaultTimeout"), + "span should point to DefaultTimeout var, got: {}", + lines[start - 1] + ); + } + #[test] fn compute_go_function_path() { let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - // Find the standalone Add function (last one in file) let start = lines .iter() .enumerate() @@ -1015,14 +1108,13 @@ func Add(a, b int) int { } #[test] - fn compute_go_method_path() { + fn compute_go_pointer_method_path() { let lines: Vec<&str> = SAMPLE_GO.lines().collect(); let start = lines .iter() .position(|l| l.contains("func (c *Calculator) Add")) .unwrap() + 1; - // Find end of method (next closing brace at start of line or end of file) let end = lines .iter() .enumerate() @@ -1032,12 +1124,51 @@ func Add(a, b int) int { .unwrap_or(lines.len()); let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); - assert_eq!(path, "method::Add"); + assert_eq!(path, "method::(*Calculator).Add"); + } + + #[test] + fn compute_go_value_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c Calculator) Value")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::Calculator.Value"); + } + + #[test] + fn compute_go_type_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("type Calculator struct")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "type::Calculator"); } #[test] fn roundtrip_go() { - // Compute path for fn::Add, then resolve it let resolved_span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); @@ -1046,9 +1177,20 @@ func Add(a, b int) int { let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); assert_eq!(re_resolved, resolved_span); } + + #[test] + fn roundtrip_go_method() { + let resolved_span = + resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "method::(*Calculator).Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } } - #[cfg(feature = "lang-javascript")] mod javascript_tests { use super::*; @@ -1152,7 +1294,6 @@ const utils = { } } - #[cfg(feature = "lang-typescript")] mod typescript_tests { use super::*; @@ -1271,7 +1412,6 @@ function createUser(name: string): User { } } - #[cfg(feature = "lang-typescript")] mod tsx_tests { use super::*; diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index 26c3b6a..a8fafbb 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -1,16 +1,16 @@ # 立意 (Lìyì) — 0.1.x Roadmap -2026-03-06 (updated 2026-03-09) +2026-03-06 (updated 2026-03-10) --- ## Overview -This document covers post-MVP work that ships as 0.1.x patch releases. Everything here is additive — no schema changes, no CLI breaking changes, no behavioral regressions. Users who never enable a Cargo feature or run a new subcommand see zero impact. +This document covers post-MVP work that ships as 0.1.x patch releases. Everything here is additive — no schema changes, no CLI breaking changes, no behavioral regressions. The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This document picks up where it leaves off. -**Design authority:** `docs/liyi-design.md` v8.7 — see *Structural identity via `tree_path`*, *Multi-language architecture (`LanguageConfig`)*, and *Annotation coverage*. +**Design authority:** `docs/liyi-design.md` v8.8 — see *Structural identity via `tree_path`*, *Multi-language architecture (`LanguageConfig`)*, and *Annotation coverage*. --- @@ -18,6 +18,7 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu | Milestone | Status | Notes | |-----------|--------|-------| +| M1 Multi-language tree_path | ✅ Complete | All 5 languages built-in, no feature gates | | M3 Remaining MVP gaps | ✅ Complete | All items implemented | | M5.1 MissingRelated | ✅ Complete | Diagnostic implemented, auto-fix in `--fix` mode | | M5.2 `--fail-on-untracked` | ✅ Complete | Flag implemented with tests | @@ -34,20 +35,18 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu ## M1. Multi-language `tree_path` support -**Status:** Not started — deferred to post-0.1.x or community contribution. +**Status:** ✅ Complete — all languages built-in, no feature gates. -**Goal:** Extend tree-sitter-based structural identity from Rust-only to Python, Go, JavaScript, and TypeScript. +**Goal:** Extend tree-sitter-based structural identity from Rust-only to Python, Go, JavaScript, and TypeScript. All grammars are compiled into the binary unconditionally — no Cargo features, no opt-in. The binary-size cost is modest relative to the universality benefit; Python, Go, JavaScript, and TypeScript codebases vastly outnumber Rust codebases, and requiring users to opt in per language would hinder adoption of a tool whose value proposition is universality. -**Prerequisite:** Refactor `tree_path.rs` from hardcoded Rust-specific `KIND_MAP` + `node_name` to a data-driven `LanguageConfig` abstraction. This is the enabling refactor — each subsequent language is additive data, not new code paths. +### M1.1. `LanguageConfig` refactor ✅ -### M1.1. `LanguageConfig` refactor (~half day) - -Extract the four language-specific touch points into a configuration struct: +Extracted language-specific touch points into a data-driven `LanguageConfig` struct: | Current code | Becomes | |---|---| | `KIND_MAP` (hardcoded Rust node kinds) | `LanguageConfig::kind_map` | -| `Language` enum (only `Rust`) | Extended with variants per feature | +| `Language` enum (only `Rust`) | Extended with variants per language | | `detect_language()` (only `.rs`) | Dispatch table from extensions | | `make_parser()` (only `tree_sitter_rust`) | `LanguageConfig::ts_language` | | `node_name()` (`impl_item` special case) | `LanguageConfig::name_overrides` | @@ -56,20 +55,23 @@ The `LanguageConfig` struct (from design doc v8.6): ```rust struct LanguageConfig { - ts_language: tree_sitter::Language, + ts_language: fn() -> tree_sitter::Language, extensions: &'static [&'static str], kind_map: &'static [(&'static str, &'static str)], name_field: &'static str, name_overrides: &'static [(&'static str, &'static str)], body_fields: &'static [&'static str], + custom_name: Option Option>, } ``` +The `custom_name` callback handles languages with non-trivial name extraction (e.g., Go method receiver encoding, Go `type_declaration` → `type_spec` indirection). + **Acceptance criteria:** - All existing tests pass with Rust handled via `LanguageConfig` instead of hardcoded paths. -- Adding a new language requires only a new `LanguageConfig` constant and a Cargo feature — no changes to resolve/compute logic. +- Adding a new language requires only a new `LanguageConfig` constant — no changes to resolve/compute logic. -### M1.2. Python (`lang-python` feature) +### M1.2. Python ✅ **Grammar:** `tree-sitter-python` (0.25.0) @@ -93,7 +95,7 @@ struct LanguageConfig { - `compute_tree_path` produces correct path for top-level functions, class methods, nested classes. - Roundtrip (compute → resolve → same span) passes for representative Python code. -### M1.3. Go (`lang-go` feature) +### M1.3. Go ✅ **Grammar:** `tree-sitter-go` (0.25.0) @@ -103,30 +105,22 @@ struct LanguageConfig { |---|---| | `fn` | `function_declaration` | | `method` | `method_declaration` | -| `struct` | `type_declaration` → `type_spec` with `struct_type` | -| `interface` | `type_declaration` → `type_spec` with `interface_type` | -| `const` | `const_declaration` | -| `var` | `var_declaration` | +| `type` | `type_declaration` (name extracted from inner `type_spec`) | +| `const` | `const_declaration` (name extracted from inner `const_spec`) | +| `var` | `var_declaration` (name extracted from inner `var_spec`) | **Design notes:** -- Go methods have receivers and live at top level, not nested inside a struct body. Tree_path encoding: `method::(*MyType).DoThing` or `method::MyType.DoThing`. The method name includes the receiver type for disambiguation. -- `type_declaration` wraps `type_spec` which has the actual name. Name extraction needs to reach into `type_spec` → `name` field. +- Go methods encode the receiver type in tree_path: `method::(*MyType).DoThing` (pointer receiver) or `method::MyType.DoThing` (value receiver). This disambiguates methods with the same name on different types. +- `type_declaration` wraps `type_spec` which has the actual name. A `custom_name` callback navigates the indirection. A single `type` shorthand covers structs, interfaces, and type aliases — Go type names are unique per package, so no disambiguation is needed. - No nesting equivalent to Rust's `impl` or Python's class body — all functions/methods are top-level. **Extensions:** `.go` -**Open design question:** Receiver encoding in tree_path. Options: -1. `method::MyType.DoThing` — simple, matches Go syntax -2. `method::(*MyType).DoThing` — distinguishes pointer/value receivers -3. `struct::MyType::method::DoThing` — uses nested path syntax despite flat AST - -Option 1 is recommended — simple and readable, with pointer receiver indicated by `*` prefix when present. - **Acceptance criteria:** -- Functions, methods (pointer + value receiver), struct types, interface types resolve correctly. +- Functions, methods (pointer + value receiver), type declarations (struct + interface), const, var resolve correctly. - Roundtrip passes for representative Go code. -### M1.4. JavaScript (`lang-javascript` feature) +### M1.4. JavaScript ✅ **Grammar:** `tree-sitter-javascript` (0.25.0) @@ -151,7 +145,7 @@ Option 1 is recommended — simple and readable, with pointer receiver indicated - Arrow functions in const declarations map to `fn::name`. - Export-wrapped declarations resolve correctly. -### M1.5. TypeScript (`lang-typescript` feature) +### M1.5. TypeScript ✅ **Grammar:** `tree-sitter-typescript` (0.23.2) — ships two grammars: `typescript` and `tsx`. diff --git a/docs/liyi-design.md b/docs/liyi-design.md index 97d3697..fb7aaca 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -466,7 +466,7 @@ The path identifies the item by node kind and name, not by position. The tool co The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi reanchor` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. -**Language support.** Tree-sitter support is grammar-dependent. In 0.1, Rust is the primary supported language (via `tree-sitter-rust`). For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. +**Language support.** Tree-sitter support is grammar-dependent. Rust, Python, Go, JavaScript, and TypeScript are built-in. For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. **Multi-language architecture (`LanguageConfig`).** The `tree_path` implementation is designed to be language-extensible via a data-driven configuration per language. Each supported language provides: @@ -481,14 +481,14 @@ The agent MAY set `tree_path` to `""` explicitly to signal "I considered structu The shorthand vocabulary (`fn`, `struct`, `class`, `mod`, `impl`, `trait`, `enum`, `const`, `static`, `type`, `macro`, `interface`, `method`) is shared across languages — `fn` always means "function-like item" regardless of whether the underlying node kind is `function_item` (Rust), `function_definition` (Python/Go), or `function_declaration` (JS/TS). The `tree_path` format remains the same: `fn::add_money`, `class::Order::fn::process`. -Each language is gated behind a Cargo feature (`lang-python`, `lang-go`, `lang-javascript`, `lang-typescript`) so users only pay binary-size cost for languages they need. A `lang-all` convenience feature includes everything. +All languages are built-in — the binary ships with every supported tree-sitter grammar. The binary-size cost is modest relative to the universality benefit; Python, Go, JavaScript, and TypeScript codebases vastly outnumber Rust codebases, and requiring users to opt in per language would hinder adoption of a tool whose value proposition is universality. -**Planned languages (0.1.x):** +**Built-in languages:** | Language | Grammar crate | Notes | |---|---|---| | Python | `tree-sitter-python` | Flat AST; methods are `function_definition` inside `class_definition` body. No `impl`-block equivalent. | -| Go | `tree-sitter-go` | `type_declaration` → `type_spec` indirection for structs/interfaces. Methods have receivers and live at top level — tree_path encodes as `method::(*MyType).DoThing` or `fn::DoThing`. | +| Go | `tree-sitter-go` | `type_declaration` wraps `type_spec` for structs/interfaces — custom name extraction navigates the indirection. Methods encode receiver type: `method::(*MyType).DoThing` (pointer) or `method::MyType.DoThing` (value). | | JavaScript | `tree-sitter-javascript` | Arrow functions in `const` declarations are pervasive — `const foo = () => ...` maps to `fn::foo` (tracking the `variable_declarator` when its value is an `arrow_function`). | | TypeScript | `tree-sitter-typescript` | Superset of JS; adds `interface_declaration`, `type_alias_declaration`, `enum_declaration`. Dual grammar: `.ts` → typescript, `.tsx` → tsx. | @@ -1349,7 +1349,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | Agent (next session) | `suggested_intent` for items with `verdict: semantic` | Read triage, propose intent updates in sidecar | | Human (terminal) | Formatted summary + triage table | `liyi triage --summary`; `--json` for raw | -**Why the LLM is not in the binary.** Building LLM calls into `liyi` would require API key management, provider abstraction (OpenAI, Anthropic, Bedrock, Vertex, local models...), HTTP client + TLS, rate limit handling, token budgeting, and retry logic. It would bloat a ~3000-line binary with complexity that the agentic framework already solved. The binary stays deterministic, offline, and small. The reasoning lives where the model access already is. +**Why the LLM is not in the binary.** Building LLM calls into `liyi` would require API key management, provider abstraction (OpenAI, Anthropic, Bedrock, Vertex, local models...), HTTP client + TLS, rate limit handling, token budgeting, and retry logic. It would bloat the binary with complexity that the agentic framework already solved. The binary stays deterministic and offline. The reasoning lives where the model access already is. **Triage workflow:** @@ -1897,7 +1897,7 @@ The spec-driven development space is no longer hypothetical — Augment Intent, - **Persistent by design.** Intent survives context windows, agent sessions, and team turnover. It's a file in the repo, not a message in a thread. - **Each level stands alone.** You can adopt the instruction without the linter, or the linter without adversarial tests. - **Nothing to learn.** JSONC, Markdown, SHA-256. No DSL, no specification language, no framework. -- **Lightweight.** The linter is ~3000 lines of Rust across two crates with 7 direct runtime dependencies (including tree-sitter for structural span recovery). Small enough to audit, understand, and port to another language if needed. +- **Lightweight.** The linter is two Rust crates (~6000 lines including tests) with tree-sitter grammars for Rust, Python, Go, JavaScript, and TypeScript built in. Single binary, no runtime dependencies. - **No lock-in.** `.liyi.jsonc` files are plain JSONC. `@liyi:module` markers are comments. Delete them and nothing breaks. - **Any programming language.** The linter doesn't parse source code. It reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. - **Hardware RTL too.** The convention applies at the RTL level (Verilog, SystemVerilog, VHDL, Chisel) with no design changes — sidecars co-locate with `.v`/`.vhd`/`.scala` files, `source_span` and `source_hash` work on any text, and tree-sitter grammars exist for Verilog and VHDL. In hardware domains where requirements traceability is a compliance obligation (DO-254, ISO 26262, IEC 61508), 立意 functions as a lightweight shim between a requirements management system and RTL source: a `liyi import-reqif` command (post-MVP) can consume ReqIF — the open OMG standard (ReqIF 1.2, `formal/2016-07-01`) that DOORS, Polarion, and other tools export — and emit `@liyi:requirement` blocks, connecting managed requirements to RTL implementations with hash-based staleness detection. The tool doesn't replace DOORS; it fills the last mile that DOORS doesn't cover. From 6bcf01f1af94676143bfd5cfd1619419e5ce2f77 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 17:21:38 +0800 Subject: [PATCH 12/18] docs(linter): sync tree_path.rs sidecar specs after langconfig refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reanchor all specs after the LanguageConfig refactor and Go support addition. Fix two misidentified specs that reanchor shifted into wrong items: - KIND_MAP → LanguageConfig (struct replaced the static array) - node_name at matches_extension span → node_name at actual method span Update Language enum intent from "only Rust" to list all six built-in variants. Add go_node_name spec covering receiver encoding and type/const/var spec indirection. liyi check: 85 current, 0 stale, 0 shifted. Original prompt: > please sync 立意 AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs.liyi.jsonc | 83 ++++++++++++++----------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 5a155b4..102abb4 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -4,35 +4,36 @@ "source": "crates/liyi/src/tree_path.rs", "specs": [ { - "item": "KIND_MAP", + "item": "LanguageConfig", "reviewed": false, - "intent": "Define the bijective mapping between tree_path shorthand strings (fn, struct, enum, impl, trait, mod, const, static, type, macro) and tree-sitter-rust node kind strings (function_item, struct_item, etc.). This is the single source of truth for kind translation.", + "intent": "Define the data-driven abstraction for language-specific tree_path behaviour. Each field captures one language-dependent axis: grammar loader (ts_language), file extensions, kind shorthand mapping, name extraction field and overrides, body-descending fields, and an optional custom_name callback for languages with non-trivial name extraction (e.g., Go receiver encoding).", "source_span": [ - 17, - 28 + 22, + 39 ], - "source_hash": "sha256:fa948a9c1c198b1bf3ed0655165aad53a4d1a0ad2d48a134a5f58a43b5554a82", - "source_anchor": "///" + "tree_path": "struct::LanguageConfig", + "source_hash": "sha256:cc0ae5ada967354b9d5e9863be2c72136c5dd85832b29ee5e44e118d1c99f5da", + "source_anchor": "pub struct LanguageConfig {" }, { "item": "detect_language", "reviewed": false, "intent": "=doc", "source_span": [ - 269, - 302 + 302, + 329 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:04d339ef243d8c1844e3e5703062deeac0955f70a92632afd865a1bc186f5fdd", + "source_hash": "sha256:e36b1be80d8077b3102823129cb1729359c050a04ee083294eeba09f444de7cc", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { "item": "Language", "reviewed": false, - "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", + "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 192, - 199 + 266, + 273 ], "tree_path": "enum::Language", "source_hash": "sha256:cfad736b976eb0cd3212d06cb22896d43affbd78181dc5878a71553467e7ff29", @@ -41,22 +42,34 @@ { "item": "node_name", "reviewed": false, - "intent": "Extract the user-visible name of an AST node. For impl_item, return the type field text (e.g., 'Money' from 'impl Money'). For all other item kinds, return the name field. Return None if the node has no name/type field.", + "intent": "Extract the user-visible name of an AST node via the language's LanguageConfig. Checks the custom_name callback first (for complex patterns like Go receiver encoding). Falls back to name_overrides for special cases (e.g., impl_item uses type field). Otherwise reads the standard name field. Returns Cow::Owned for constructed names, Cow::Borrowed for field-extracted names.", "source_span": [ - 83, - 85 + 62, + 82 ], - "tree_path": "impl::LanguageConfig::fn::matches_extension", - "source_hash": "sha256:7544b1ad4acbd6144486ed88419b6a9069425b448d9b96c032d5b3f9909403e3", - "source_anchor": " pub fn matches_extension(&self, ext: &str) -> bool {" + "tree_path": "impl::LanguageConfig::fn::node_name", + "source_hash": "sha256:1f187fdb6eab1bd532149c8007d31dae3cd2c210edc153484dfd46210e287f5f", + "source_anchor": " fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> {" + }, + { + "item": "go_node_name", + "reviewed": false, + "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", + "source_span": [ + 143, + 194 + ], + "tree_path": "fn::go_node_name", + "source_hash": "sha256:5198217ac70bb06963c30ee0f9f0daa9972cdb47834ed82cf99b800f8b043620", + "source_anchor": "fn go_node_name(node: &Node, source: &str) -> Option {" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 325, - 341 + 351, + 367 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -67,11 +80,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 348, - 365 + 374, + 391 ], "tree_path": "fn::resolve_tree_path", - "source_hash": "sha256:ee955e130971b81a3a7565d81618a2edd7c44a50d82b91250b9089cbbabef519", + "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", "source_anchor": "pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> {" }, { @@ -79,11 +92,11 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 368, - 397 + 394, + 423 ], "tree_path": "fn::resolve_segments", - "source_hash": "sha256:1fcaea1f0eccde605bcd9de4094980378600d7a8d221aed55dc8fad1f60cf48b", + "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", "source_anchor": "fn resolve_segments<'a>(" }, { @@ -91,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 400, - 408 + 426, + 434 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -103,11 +116,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 415, - 440 + 441, + 462 ], "tree_path": "fn::compute_tree_path", - "source_hash": "sha256:1b92c3bed64a4f98aa3afc677bf4e8cdd872bb0ec44f9fb362ad55f7c129b847", + "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { @@ -115,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 448, - 494 + 470, + 516 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -127,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 512, - 558 + 534, + 580 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From 86b42a87002ffb352774a03c3c29e10f04f671f6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 19:50:23 +0800 Subject: [PATCH 13/18] feat(linter): add tree-sitter support for 8 new languages Add tree_path structural identity support for C, C++, Java, C#, PHP, Objective-C, Kotlin, and Swift. Each language gets a LanguageConfig with kind mappings and, where needed, custom name extraction callbacks: - C/C++: declarator-chain unwrapping for function_definition, C++ adds template_declaration transparency and alias_declaration - Objective-C: class_interface/implementation/protocol name extraction, selector composition for methods - Kotlin: property_declaration and type_alias name extraction, class_body positional-child handling in find_body() - PHP: const_declaration name via const_element child - Java/C#/Swift: standard field-based extraction (no custom callback needed) Extends detect_language() with all new file extensions. Generalizes find_body() to search body_fields as child node kinds (not just field names), enabling Kotlin class_body and C++ field_declaration_list. All 103 tree_path tests pass, including 8 new per-language test modules. Full test suite (unit, golden, proptest) green. Updates M2 section of docs/liyi-01x-roadmap.md from placeholder "Deferred languages" to comprehensive documentation of all 8 language integrations. Original prompt: > Let's build them into the roadmap docs and implement. > (Referring to C, C++, Objective-C, Java, C#, PHP, Kotlin, Swift > tree-sitter language support for tree_path.) AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- Cargo.lock | 88 ++ crates/liyi/Cargo.toml | 8 + crates/liyi/src/tree_path.rs | 1085 ++++++++++++++++++++++- crates/liyi/src/tree_path.rs.liyi.jsonc | 44 +- docs/liyi-01x-roadmap.md | 241 ++++- 5 files changed, 1440 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d3aa1a2..a6cc5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -604,10 +604,18 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", "tree-sitter-go", + "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-kotlin-ng", + "tree-sitter-objc", + "tree-sitter-php", "tree-sitter-python", "tree-sitter-rust", + "tree-sitter-swift", "tree-sitter-typescript", ] @@ -1186,6 +1194,36 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-go" version = "0.25.0" @@ -1196,6 +1234,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-javascript" version = "0.25.0" @@ -1206,12 +1254,42 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-kotlin-ng" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-python" version = "0.25.0" @@ -1232,6 +1310,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-typescript" version = "0.23.2" diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 58352bf..d6658e8 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -22,6 +22,14 @@ tree-sitter-python = "0.25.0" tree-sitter-go = "0.25.0" tree-sitter-javascript = "0.25.0" tree-sitter-typescript = "0.23.2" +tree-sitter-c = "0.24.1" +tree-sitter-cpp = "0.23.4" +tree-sitter-java = "0.23.5" +tree-sitter-c-sharp = "0.23.1" +tree-sitter-php = "0.24.2" +tree-sitter-objc = "3.0.2" +tree-sitter-kotlin-ng = "1.1.0" +tree-sitter-swift = "0.7.1" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs index 40b93d9..a18f346 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path.rs @@ -88,10 +88,17 @@ impl LanguageConfig { return Some(body); } } - // Fallback: look for declaration_list as direct child + // Fallback: search for body_fields or declaration_list as direct + // (unnamed) children. Needed for languages where the body is a + // positional child rather than a named field (e.g., Kotlin class_body, + // C++ field_declaration_list). let mut cursor = node.walk(); node.children(&mut cursor) - .find(|c| c.kind() == "declaration_list") + .find(|c| { + self.body_fields.contains(&c.kind()) + || c.kind() == "declaration_list" + || c.kind() == "field_declaration_list" + }) } /// Check if the given file extension is associated with this language. @@ -133,6 +140,199 @@ static PYTHON_CONFIG: LanguageConfig = LanguageConfig { custom_name: None, }; +/// Extract the function name from a C/C++ `function_definition` node. +/// +/// C/C++ functions store their name inside the `declarator` field chain: +/// `function_definition` → (field `declarator`) `function_declarator` +/// → (field `declarator`) `identifier` / `field_identifier`. +/// Pointer declarators and other wrappers may appear in the chain; +/// we unwrap them until we find a `function_declarator`. +fn c_extract_declarator_name(node: &Node, source: &str) -> Option { + let declarator = node.child_by_field_name("declarator")?; + let func_decl = unwrap_to_function_declarator(&declarator)?; + let name_node = func_decl.child_by_field_name("declarator")?; + Some(source[name_node.byte_range()].to_string()) +} + +/// Walk through pointer_declarator / parenthesized_declarator / attributed_declarator +/// wrappers to find the inner `function_declarator`. +fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> { + match node.kind() { + "function_declarator" => Some(*node), + "pointer_declarator" | "parenthesized_declarator" | "attributed_declarator" => { + let inner = node.child_by_field_name("declarator")?; + unwrap_to_function_declarator(&inner) + } + _ => None, + } +} + +/// Custom name extraction for C nodes. +/// +/// Handles `function_definition` (name in declarator chain) and +/// `type_definition` (name in declarator field, which is a type_identifier). +fn c_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + // typedef: the 'declarator' field holds the new type name + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + _ => None, + } +} + +/// Custom name extraction for C++ nodes. +/// +/// Extends `c_node_name` with C++-specific patterns: +/// - `template_declaration`: transparent wrapper — extracts name from inner decl. +/// - `namespace_definition`: name is in a `namespace_identifier` child (no "name" field). +fn cpp_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" | "alias_declaration" => { + let name_node = node.child_by_field_name("name") + .or_else(|| node.child_by_field_name("declarator"))?; + Some(source[name_node.byte_range()].to_string()) + } + "template_declaration" => { + // template_declaration wraps an inner declaration — find it and + // extract the name from the inner node. + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + match child.kind() { + "function_definition" => return c_extract_declarator_name(&child, source), + "class_specifier" | "struct_specifier" | "enum_specifier" + | "concept_definition" | "alias_declaration" => { + let n = child.child_by_field_name("name")?; + return Some(source[n.byte_range()].to_string()); + } + // A template can also wrap another template_declaration (nested) + "template_declaration" => return cpp_node_name(&child, source), + _ => {} + } + } + None + } + _ => None, + } +} + +/// Custom name extraction for Objective-C nodes. +/// +/// ObjC node types like `class_interface`, `class_implementation`, +/// `protocol_declaration`, `method_declaration`, and `method_definition` +/// do not use standard `name` fields. Their names are extracted from +/// specific child node patterns. +fn objc_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + // C function definitions use the same declarator chain as C. + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + // @interface ClassName or @interface ClassName (Category) + "class_interface" | "class_implementation" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // @protocol ProtocolName + "protocol_declaration" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // - (ReturnType)methodName or - (ReturnType)methodName:(Type)arg + // + (ReturnType)classMethodName + "method_declaration" | "method_definition" => { + let mut cursor = node.walk(); + // The selector is composed of keyword_declarator children or + // a single identifier (for zero-argument methods). + let mut parts: Vec = Vec::new(); + for child in node.children(&mut cursor) { + match child.kind() { + "identifier" | "field_identifier" if parts.is_empty() => { + // Single-part selector (no arguments) + parts.push(source[child.byte_range()].to_string()); + } + "keyword_declarator" => { + // Each keyword_declarator has a keyword child + let mut kw_cursor = child.walk(); + if let Some(kw) = child.children(&mut kw_cursor) + .find(|c| c.kind() == "keyword_selector" || c.kind() == "identifier") + { + parts.push(format!("{}:", &source[kw.byte_range()])); + } + } + _ => {} + } + } + if parts.is_empty() { + None + } else { + Some(parts.join("")) + } + } + _ => None, + } +} + +/// Custom name extraction for Kotlin nodes. +/// +/// Handles `property_declaration` where the name is in a child +/// `variable_declaration` node, and `type_alias` where the name is +/// in an `identifier` child before the `=` (the `type` field is the RHS). +fn kotlin_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "property_declaration" => { + let mut cursor = node.walk(); + // Name is in the first variable_declaration or identifier child + for child in node.children(&mut cursor) { + if child.kind() == "variable_declaration" { + let name = child.child_by_field_name("name") + .or_else(|| { + let mut c2 = child.walk(); + child.children(&mut c2).find(|c| c.kind() == "simple_identifier") + })?; + return Some(source[name.byte_range()].to_string()); + } + if child.kind() == "simple_identifier" { + return Some(source[child.byte_range()].to_string()); + } + } + None + } + "type_alias" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "type_identifier" || c.kind() == "simple_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + _ => None, + } +} + +/// Custom name extraction for PHP `const_declaration` nodes. +/// +/// PHP `const_declaration` stores names inside `const_element` children. +fn php_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "const_declaration" => { + let mut cursor = node.walk(); + let elem = node.children(&mut cursor) + .find(|c| c.kind() == "const_element")?; + let name = elem.child_by_field_name("name")?; + Some(source[name.byte_range()].to_string()) + } + _ => None, + } +} + /// Custom name extraction for Go nodes. /// /// Handles three Go-specific patterns: @@ -261,6 +461,160 @@ static TSX_CONFIG: LanguageConfig = LanguageConfig { custom_name: None, }; +/// C language configuration. +static C_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c::LANGUAGE.into(), + extensions: &["c", "h"], + kind_map: &[ + ("fn", "function_definition"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(c_node_name), +}; + +/// C++ language configuration. +static CPP_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_cpp::LANGUAGE.into(), + extensions: &["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_specifier"), + ("struct", "struct_specifier"), + ("namespace", "namespace_definition"), + ("enum", "enum_specifier"), + ("template", "template_declaration"), + ("typedef", "type_definition"), + ("using", "alias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "declaration_list"], + custom_name: Some(cpp_node_name), +}; + +/// Java language configuration. +static JAVA_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_java::LANGUAGE.into(), + extensions: &["java"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("constructor", "constructor_declaration"), + ("record", "record_declaration"), + ("annotation", "annotation_type_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +/// C# language configuration. +static CSHARP_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c_sharp::LANGUAGE.into(), + extensions: &["cs"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("struct", "struct_declaration"), + ("namespace", "namespace_declaration"), + ("constructor", "constructor_declaration"), + ("property", "property_declaration"), + ("record", "record_declaration"), + ("delegate", "delegate_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +/// PHP language configuration (PHP-only grammar, no HTML interleaving). +static PHP_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_php::LANGUAGE_PHP_ONLY.into(), + extensions: &["php"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_declaration"), + ("method", "method_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("trait", "trait_declaration"), + ("namespace", "namespace_definition"), + ("const", "const_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(php_node_name), +}; + +/// Objective-C language configuration. +static OBJC_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_objc::LANGUAGE.into(), + extensions: &["m", "mm"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_interface"), + ("impl", "class_implementation"), + ("protocol", "protocol_declaration"), + ("method", "method_definition"), + ("method_decl", "method_declaration"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(objc_node_name), +}; + +/// Kotlin language configuration. +static KOTLIN_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_kotlin_ng::LANGUAGE.into(), + extensions: &["kt", "kts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("object", "object_declaration"), + ("property", "property_declaration"), + ("typealias", "type_alias"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "class_body"], + custom_name: Some(kotlin_node_name), +}; + +/// Swift language configuration. +static SWIFT_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_swift::LANGUAGE.into(), + extensions: &["swift"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("protocol", "protocol_declaration"), + ("enum", "enum_entry"), + ("property", "property_declaration"), + ("init", "init_declaration"), + ("typealias", "typealias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { @@ -270,6 +624,14 @@ pub enum Language { JavaScript, TypeScript, Tsx, + C, + Cpp, + Java, + CSharp, + Php, + ObjectiveC, + Kotlin, + Swift, } impl Language { @@ -282,6 +644,14 @@ impl Language { Language::JavaScript => &JAVASCRIPT_CONFIG, Language::TypeScript => &TYPESCRIPT_CONFIG, Language::Tsx => &TSX_CONFIG, + Language::C => &C_CONFIG, + Language::Cpp => &CPP_CONFIG, + Language::Java => &JAVA_CONFIG, + Language::CSharp => &CSHARP_CONFIG, + Language::Php => &PHP_CONFIG, + Language::ObjectiveC => &OBJC_CONFIG, + Language::Kotlin => &KOTLIN_CONFIG, + Language::Swift => &SWIFT_CONFIG, } } @@ -296,9 +666,13 @@ impl Language { /// /// # Extension Collision /// +/// `.h` files are ambiguous (C, C++, or Objective-C). We map them to C +/// by default. Users can override via future configuration if needed. +/// /// If two languages share an extension (unlikely with built-in languages), /// the first match in the following order is returned: -/// Rust → Python → Go → JavaScript → TypeScript → TSX. +/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Java → C# → PHP → Objective-C → Kotlin → Swift. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; @@ -325,6 +699,31 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Tsx); } + if C_CONFIG.matches_extension(ext) { + return Some(Language::C); + } + if CPP_CONFIG.matches_extension(ext) { + return Some(Language::Cpp); + } + if JAVA_CONFIG.matches_extension(ext) { + return Some(Language::Java); + } + if CSHARP_CONFIG.matches_extension(ext) { + return Some(Language::CSharp); + } + if PHP_CONFIG.matches_extension(ext) { + return Some(Language::Php); + } + if OBJC_CONFIG.matches_extension(ext) { + return Some(Language::ObjectiveC); + } + if KOTLIN_CONFIG.matches_extension(ext) { + return Some(Language::Kotlin); + } + if SWIFT_CONFIG.matches_extension(ext) { + return Some(Language::Swift); + } + None } @@ -1482,4 +1881,684 @@ class Container extends React.Component { ); } } + + mod c_tests { + use super::*; + + const SAMPLE_C: &str = r#"#include + +struct Point { + int x; + int y; +}; + +enum Color { RED, GREEN, BLUE }; + +typedef struct Point Point_t; + +void process(int x, int y) { + printf("hello"); +} + +static int helper(void) { + return 42; +} +"#; + + #[test] + fn resolve_c_function() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C); + assert!(span.is_some(), "should resolve fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("void process"), + "span should point to process function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_c_struct() { + let span = resolve_tree_path(SAMPLE_C, "struct::Point", Language::C); + assert!(span.is_some(), "should resolve struct::Point"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("struct Point"), + "span should point to Point struct" + ); + } + + #[test] + fn resolve_c_enum() { + let span = resolve_tree_path(SAMPLE_C, "enum::Color", Language::C); + assert!(span.is_some(), "should resolve enum::Color"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("enum Color"), + "span should point to Color enum" + ); + } + + #[test] + fn resolve_c_typedef() { + let span = resolve_tree_path(SAMPLE_C, "typedef::Point_t", Language::C); + assert!(span.is_some(), "should resolve typedef::Point_t"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("typedef"), + "span should point to typedef" + ); + } + + #[test] + fn compute_c_function_path() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(path, "fn::process"); + } + + #[test] + fn roundtrip_c() { + for tp in &["fn::process", "fn::helper", "struct::Point", "enum::Color"] { + let span = resolve_tree_path(SAMPLE_C, tp, Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(&path, tp, "roundtrip failed for {tp}"); + } + } + + #[test] + fn detect_c_extensions() { + assert_eq!(detect_language(Path::new("main.c")), Some(Language::C)); + assert_eq!(detect_language(Path::new("header.h")), Some(Language::C)); + } + } + + mod cpp_tests { + use super::*; + + const SAMPLE_CPP: &str = r#"namespace math { + +class Calculator { +public: + int add(int a, int b) { + return a + b; + } +}; + +struct Point { + int x, y; +}; + +enum class Color { Red, Green, Blue }; + +} + +void standalone() {} +"#; + + #[test] + fn resolve_cpp_namespace() { + let span = resolve_tree_path(SAMPLE_CPP, "namespace::math", Language::Cpp); + assert!(span.is_some(), "should resolve namespace::math"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("namespace math"), + "span should point to namespace math, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_cpp_class_in_namespace() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve namespace::math::class::Calculator"); + } + + #[test] + fn resolve_cpp_method_in_class() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator::fn::add", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve nested method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("add"), + "span should point to add method" + ); + } + + #[test] + fn resolve_cpp_standalone() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn resolve_cpp_enum() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::enum::Color", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve enum in namespace"); + } + + #[test] + fn roundtrip_cpp() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp).unwrap(); + let path = compute_tree_path(SAMPLE_CPP, span, Language::Cpp); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_cpp_extensions() { + assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp)); + assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp)); + assert_eq!(detect_language(Path::new("header.hpp")), Some(Language::Cpp)); + } + } + + mod java_tests { + use super::*; + + const SAMPLE_JAVA: &str = r#"package com.example; + +public class Calculator { + public int add(int a, int b) { + return a + b; + } + + public Calculator() { + // constructor + } +} + +interface Computable { + int compute(int x); +} + +enum Direction { + NORTH, SOUTH, EAST, WEST +} + +record Point(int x, int y) {} +"#; + + #[test] + fn resolve_java_class() { + let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator", Language::Java); + assert!(span.is_some(), "should resolve class::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JAVA.lines().collect(); + assert!( + lines[start - 1].contains("class Calculator"), + "span should point to Calculator class" + ); + } + + #[test] + fn resolve_java_method() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::fn::add", + Language::Java, + ); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_java_constructor() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::constructor::Calculator", + Language::Java, + ); + assert!(span.is_some(), "should resolve constructor"); + } + + #[test] + fn resolve_java_interface() { + let span = resolve_tree_path(SAMPLE_JAVA, "interface::Computable", Language::Java); + assert!(span.is_some(), "should resolve interface::Computable"); + } + + #[test] + fn resolve_java_enum() { + let span = resolve_tree_path(SAMPLE_JAVA, "enum::Direction", Language::Java); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn resolve_java_record() { + let span = resolve_tree_path(SAMPLE_JAVA, "record::Point", Language::Java); + assert!(span.is_some(), "should resolve record::Point"); + } + + #[test] + fn roundtrip_java() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::fn::add", + Language::Java, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_JAVA, span, Language::Java); + assert_eq!(path, "class::Calculator::fn::add"); + } + + #[test] + fn detect_java_extension() { + assert_eq!( + detect_language(Path::new("Main.java")), + Some(Language::Java) + ); + } + } + + mod csharp_tests { + use super::*; + + const SAMPLE_CSHARP: &str = r#"namespace MyApp { + +class Calculator { + public int Add(int a, int b) { + return a + b; + } + + public string Name { get; set; } + + public Calculator() {} +} + +interface IComputable { + int Compute(int x); +} + +enum Direction { + North, South, East, West +} + +struct Vector { + public int X; + public int Y; +} + +record Person(string Name, int Age); + +} +"#; + + #[test] + fn resolve_csharp_class() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve namespace::MyApp::class::Calculator"); + } + + #[test] + fn resolve_csharp_method() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve method in class in namespace"); + } + + #[test] + fn resolve_csharp_property() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::property::Name", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve property::Name"); + } + + #[test] + fn resolve_csharp_interface() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::interface::IComputable", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve interface::IComputable"); + } + + #[test] + fn resolve_csharp_struct() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::struct::Vector", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve struct::Vector"); + } + + #[test] + fn resolve_csharp_enum() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::enum::Direction", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn roundtrip_csharp() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_CSHARP, span, Language::CSharp); + assert_eq!(path, "namespace::MyApp::class::Calculator::fn::Add"); + } + + #[test] + fn detect_csharp_extension() { + assert_eq!( + detect_language(Path::new("Program.cs")), + Some(Language::CSharp) + ); + } + } + + mod php_tests { + use super::*; + + const SAMPLE_PHP: &str = r#" +"#; + + #[test] + fn resolve_kotlin_class() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "class::Calculator", Language::Kotlin); + assert!(span.is_some(), "should resolve class::Calculator"); + } + + #[test] + fn resolve_kotlin_method() { + let span = resolve_tree_path( + SAMPLE_KOTLIN, + "class::Calculator::fn::add", + Language::Kotlin, + ); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_kotlin_object() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "object::Singleton", Language::Kotlin); + assert!(span.is_some(), "should resolve object::Singleton"); + } + + #[test] + fn resolve_kotlin_function() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_kotlin() { + let span = + resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); + let path = compute_tree_path(SAMPLE_KOTLIN, span, Language::Kotlin); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_kotlin_extension() { + assert_eq!( + detect_language(Path::new("Main.kt")), + Some(Language::Kotlin) + ); + assert_eq!( + detect_language(Path::new("build.gradle.kts")), + Some(Language::Kotlin) + ); + } + } + + mod swift_tests { + use super::*; + + const SAMPLE_SWIFT: &str = r#"protocol Drawable { + func draw() +} + +class Shape { + func area() -> Double { + return 0.0 + } + + init() {} +} + +func standalone() -> Int { + return 42 +} + +typealias Callback = () -> Void +"#; + + #[test] + fn resolve_swift_protocol() { + let span = resolve_tree_path(SAMPLE_SWIFT, "protocol::Drawable", Language::Swift); + assert!(span.is_some(), "should resolve protocol::Drawable"); + } + + #[test] + fn resolve_swift_class() { + let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape", Language::Swift); + assert!(span.is_some(), "should resolve class::Shape"); + } + + #[test] + fn resolve_swift_method() { + let span = resolve_tree_path( + SAMPLE_SWIFT, + "class::Shape::fn::area", + Language::Swift, + ); + assert!(span.is_some(), "should resolve class::Shape::fn::area"); + } + + #[test] + fn resolve_swift_function() { + let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_swift() { + let span = + resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); + let path = compute_tree_path(SAMPLE_SWIFT, span, Language::Swift); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_swift_extension() { + assert_eq!( + detect_language(Path::new("ViewController.swift")), + Some(Language::Swift) + ); + } + } + + mod objc_tests { + use super::*; + + const SAMPLE_OBJC: &str = r#"#import + +struct CGPoint { + float x; + float y; +}; + +void helper(void) { + NSLog(@"hello"); +} +"#; + + #[test] + fn resolve_objc_function() { + let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC); + assert!(span.is_some(), "should resolve fn::helper"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_OBJC.lines().collect(); + assert!( + lines[start - 1].contains("void helper"), + "span should point to helper function" + ); + } + + #[test] + fn resolve_objc_struct() { + let span = resolve_tree_path(SAMPLE_OBJC, "struct::CGPoint", Language::ObjectiveC); + assert!(span.is_some(), "should resolve struct::CGPoint"); + } + + #[test] + fn roundtrip_objc() { + let span = + resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); + let path = compute_tree_path(SAMPLE_OBJC, span, Language::ObjectiveC); + assert_eq!(path, "fn::helper"); + } + + #[test] + fn detect_objc_extensions() { + assert_eq!( + detect_language(Path::new("AppDelegate.m")), + Some(Language::ObjectiveC) + ); + assert_eq!( + detect_language(Path::new("mixed.mm")), + Some(Language::ObjectiveC) + ); + } + } } diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path.rs.liyi.jsonc index 102abb4..c8ee66d 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path.rs.liyi.jsonc @@ -20,11 +20,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 302, - 329 + 676, + 728 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:e36b1be80d8077b3102823129cb1729359c050a04ee083294eeba09f444de7cc", + "source_hash": "sha256:6d8686b824ee8856102af3d018b3788e9c99a80b69e078c74572482389ad7e25", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -32,11 +32,11 @@ "reviewed": false, "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 266, - 273 + 620, + 635 ], "tree_path": "enum::Language", - "source_hash": "sha256:cfad736b976eb0cd3212d06cb22896d43affbd78181dc5878a71553467e7ff29", + "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", "source_anchor": "pub enum Language {" }, { @@ -56,8 +56,8 @@ "reviewed": false, "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", "source_span": [ - 143, - 194 + 343, + 394 ], "tree_path": "fn::go_node_name", "source_hash": "sha256:5198217ac70bb06963c30ee0f9f0daa9972cdb47834ed82cf99b800f8b043620", @@ -68,8 +68,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 351, - 367 + 750, + 766 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -80,8 +80,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 374, - 391 + 773, + 790 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", @@ -92,8 +92,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 394, - 423 + 793, + 822 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", @@ -104,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 426, - 434 + 825, + 833 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -116,8 +116,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 441, - 462 + 840, + 861 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -128,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 470, - 516 + 869, + 915 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -140,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 534, - 580 + 933, + 979 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index a8fafbb..a1a4d3d 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -19,6 +19,7 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu | Milestone | Status | Notes | |-----------|--------|-------| | M1 Multi-language tree_path | ✅ Complete | All 5 languages built-in, no feature gates | +| M2 Extended language support | ✅ Complete | C, C++, Java, C#, PHP, ObjC, Kotlin, Swift | | M3 Remaining MVP gaps | ✅ Complete | All items implemented | | M5.1 MissingRelated | ✅ Complete | Diagnostic implemented, auto-fix in `--fix` mode | | M5.2 `--fail-on-untracked` | ✅ Complete | Flag implemented with tests | @@ -170,7 +171,245 @@ The `custom_name` callback handles languages with non-trivial name extraction (e --- -## M2. Deferred languages — design notes +## M2. Extended language support + +**Status:** ✅ Complete — 8 additional languages built-in, no feature gates. + +**Goal:** Extend tree-sitter structural identity to C, C++, Java, C#, PHP, Objective-C, Kotlin, and Swift. All grammars are compiled into the binary unconditionally, matching the M1 design decision. The binary-size cost remains modest (tree-sitter grammars are compact C code) and the universality benefit is significant — C/C++ codebases are where intent drift is most acute and structural anchors most valuable. + +### M2.1. C ✅ + +**Grammar:** `tree-sitter-c` (0.24.1) — the oldest and most mature tree-sitter grammar. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `struct` | `struct_specifier` | +| `enum` | `enum_specifier` | +| `typedef` | `type_definition` | + +**Design notes:** +- C function names live inside a `declarator` → `function_declarator` → `identifier` chain, not a simple `name` field. A `c_node_name` custom callback recursively unwraps `pointer_declarator`, `parenthesized_declarator`, and `attributed_declarator` wrappers to find the `function_declarator`, then extracts the identifier. +- `type_definition` (typedef) names are in the `declarator` field. +- `.h` files are ambiguous (could be C, C++, or ObjC). Mapped to C by default since C has the simplest grammar and produces valid tree_paths for the overlapping subset. + +**Extensions:** `.c`, `.h` + +**Acceptance criteria:** +- Functions, structs, enums, typedefs all resolve. +- Roundtrip (compute → resolve → same span) passes. + +### M2.2. C++ ✅ + +**Grammar:** `tree-sitter-cpp` (0.23.4) — second-oldest tree-sitter grammar, extremely mature. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_specifier` | +| `struct` | `struct_specifier` | +| `namespace` | `namespace_definition` | +| `enum` | `enum_specifier` | +| `template` | `template_declaration` | +| `typedef` | `type_definition` | +| `using` | `alias_declaration` | + +**Design notes:** +- Inherits C's declarator-chain name extraction pattern via a `cpp_node_name` callback. +- `template_declaration` is a transparent wrapper. The callback unwraps it to find the inner declaration (`function_definition`, `class_specifier`, etc.) and extracts the name from there. +- Namespaces use `declaration_list` as their body container; `find_body` finds this via the fallback child search. +- Class methods are `function_definition` inside `field_declaration_list`; the extended `find_body` fallback handles this. +- `enum class` (scoped enums) parse as `enum_specifier` just like plain enums. + +**Extensions:** `.cpp`, `.cc`, `.cxx`, `.hpp`, `.hh`, `.hxx`, `.h++`, `.c++` + +**Acceptance criteria:** +- Namespaces, classes-in-namespaces, methods-in-classes, standalone functions, enums all resolve. +- Template-wrapped declarations resolve correctly. +- Roundtrip passes through namespace nesting. + +### M2.3. Java ✅ + +**Grammar:** `tree-sitter-java` (0.23.5) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `method_declaration` | +| `class` | `class_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `constructor` | `constructor_declaration` | +| `record` | `record_declaration` | +| `annotation` | `annotation_type_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- Methods are `method_declaration` inside `class_body`. Tree_path: `class::Calculator::fn::add`. +- Records (Java 14+) and annotation types are included for completeness. + +**Extensions:** `.java` + +**Acceptance criteria:** +- Classes, methods, constructors, interfaces, enums, records all resolve. +- Roundtrip passes for methods nested in classes. + +### M2.4. C# ✅ + +**Grammar:** `tree-sitter-c-sharp` (0.23.1) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `method_declaration` | +| `class` | `class_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `struct` | `struct_declaration` | +| `namespace` | `namespace_declaration` | +| `constructor` | `constructor_declaration` | +| `property` | `property_declaration` | +| `record` | `record_declaration` | +| `delegate` | `delegate_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- Namespaces use `body` field for descent, enabling `namespace::MyApp::class::Foo::fn::Bar` paths. +- Properties are tracked as named items (important for C#'s property-centric design). +- File-scoped namespace declarations (`namespace Foo;`) are not tracked as container items since they have no body to descend into. + +**Extensions:** `.cs` + +**Acceptance criteria:** +- Namespaces, classes, methods, properties, interfaces, enums, structs all resolve. +- Namespace → class → method nesting roundtrips correctly. + +### M2.5. PHP ✅ + +**Grammar:** `tree-sitter-php` (0.24.2) — uses `LANGUAGE_PHP_ONLY` (pure PHP, no HTML interleaving). + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_declaration` | +| `method` | `method_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `trait` | `trait_declaration` | +| `namespace` | `namespace_definition` | +| `const` | `const_declaration` | + +**Design notes:** +- PHP distinguishes `function_definition` (top-level) from `method_declaration` (inside classes). Both have a `name` field. +- `const_declaration` stores its name inside a `const_element` child — a `php_node_name` custom callback handles this. +- Traits are first-class items (important for Laravel/Symfony codebases). +- PHP 8.1 enums are supported. + +**Extensions:** `.php` + +**Acceptance criteria:** +- Classes, methods, functions, interfaces, traits, enums all resolve. +- Roundtrip passes. + +### M2.6. Objective-C ✅ + +**Grammar:** `tree-sitter-objc` (3.0.2) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_interface` | +| `impl` | `class_implementation` | +| `protocol` | `protocol_declaration` | +| `method` | `method_definition` | +| `method_decl` | `method_declaration` | +| `struct` | `struct_specifier` | +| `enum` | `enum_specifier` | +| `typedef` | `type_definition` | + +**Design notes:** +- Most ObjC declaration node types lack standard `name` fields. An `objc_node_name` custom callback handles: + - `function_definition`: C-style declarator chain (shared with C callback). + - `class_interface` / `class_implementation`: name is a direct child `identifier` or `type_identifier`. + - `protocol_declaration`: same pattern. + - `method_declaration` / `method_definition`: ObjC selector names are composed from `keyword_declarator` children (e.g., `initWithFrame:style:`). +- C-level structs and enums use the standard `name` field. +- `class_interface` (`@interface`) and `class_implementation` (`@implementation`) are tracked as separate item types, mirroring ObjC's header/implementation split. + +**Extensions:** `.m`, `.mm` + +**Acceptance criteria:** +- C functions, structs, and enums resolve (shared with C grammar patterns). +- Roundtrip passes for C-level items. + +### M2.7. Kotlin ✅ + +**Grammar:** `tree-sitter-kotlin-ng` (1.1.0) — the `-ng` fork, compatible with tree-sitter 0.26.x. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_declaration` | +| `class` | `class_declaration` | +| `object` | `object_declaration` | +| `property` | `property_declaration` | +| `typealias` | `type_alias` | + +**Design notes:** +- `class_body` is a positional child of `class_declaration` (not a named field). The `find_body` fallback was extended to search `body_fields` entries as child node kinds, not just field names. +- `property_declaration` names live inside a `variable_declaration` or `simple_identifier` child — handled by `kotlin_node_name` callback. +- `type_alias` names are in a `type_identifier` or `simple_identifier` child. +- `object_declaration` (Kotlin objects / companion objects) has a standard `name` field. +- The original `tree-sitter-kotlin` crate (0.3.x) requires tree-sitter <0.23 and is incompatible. The `-ng` fork from `tree-sitter-grammars` is the maintained successor. + +**Extensions:** `.kt`, `.kts` + +**Acceptance criteria:** +- Classes, methods-in-classes, objects, functions all resolve. +- Roundtrip passes. + +### M2.8. Swift ✅ + +**Grammar:** `tree-sitter-swift` (0.7.1) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_declaration` | +| `class` | `class_declaration` | +| `protocol` | `protocol_declaration` | +| `enum` | `enum_entry` | +| `property` | `property_declaration` | +| `init` | `init_declaration` | +| `typealias` | `typealias_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- `class_declaration` covers both `class` and `struct` keywords (both use `class_declaration` with a `declaration_kind` field distinguishing them). +- Protocols map naturally to the `protocol` shorthand. +- `init_declaration` is tracked separately from methods since Swift initializers are syntactically distinct. + +**Extensions:** `.swift` + +**Acceptance criteria:** +- Protocols, classes, methods-in-classes, functions, init all resolve. +- Roundtrip passes. + +--- + +## M2.9. Deferred languages — design notes These languages are tracked but not planned for 0.1.x. From d405515a3103116494ab6bdb29548c29bfec474d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 20:05:26 +0800 Subject: [PATCH 14/18] refactor(linter): split tree_path module into per-language files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract each language configuration into its own file under crates/liyi/src/tree_path/: mod.rs – core infrastructure (LanguageConfig, Language enum, detect_language, resolve/compute functions) — 752 lines lang_rust.rs – Rust config lang_python.rs – Python config + tests lang_go.rs – Go config + go_node_name callback + tests lang_c.rs – C config + c_extract_declarator_name (shared) + tests lang_cpp.rs – C++ config + tests (imports c_extract_declarator_name) lang_objc.rs – Objective-C config + tests (imports c_extract_declarator_name) lang_java.rs – Java config + tests lang_csharp.rs – C# config + tests lang_php.rs – PHP config + php_node_name callback + tests lang_kotlin.rs – Kotlin config + kotlin_node_name callback + tests lang_swift.rs – Swift config + tests lang_javascript.rs – JavaScript config + tests lang_typescript.rs – TypeScript + TSX configs + tests No behavioral changes. All 168 tests pass (144 unit + 20 golden + 4 proptest). Sidecar moved to tree_path/mod.rs.liyi.jsonc and reanchored (85 current). Original prompt: > The tree_path module is getting large, please refactor so > every language lives its own file and commit. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path.rs | 2564 ----------------- crates/liyi/src/tree_path/lang_c.rs | 159 + crates/liyi/src/tree_path/lang_cpp.rs | 155 + crates/liyi/src/tree_path/lang_csharp.rs | 139 + crates/liyi/src/tree_path/lang_go.rs | 314 ++ crates/liyi/src/tree_path/lang_java.rs | 119 + crates/liyi/src/tree_path/lang_javascript.rs | 120 + crates/liyi/src/tree_path/lang_kotlin.rs | 126 + crates/liyi/src/tree_path/lang_objc.rs | 144 + crates/liyi/src/tree_path/lang_php.rs | 132 + crates/liyi/src/tree_path/lang_python.rs | 126 + crates/liyi/src/tree_path/lang_rust.rs | 23 + crates/liyi/src/tree_path/lang_swift.rs | 89 + crates/liyi/src/tree_path/lang_typescript.rs | 224 ++ crates/liyi/src/tree_path/mod.rs | 752 +++++ .../mod.rs.liyi.jsonc} | 54 +- 16 files changed, 2649 insertions(+), 2591 deletions(-) delete mode 100644 crates/liyi/src/tree_path.rs create mode 100644 crates/liyi/src/tree_path/lang_c.rs create mode 100644 crates/liyi/src/tree_path/lang_cpp.rs create mode 100644 crates/liyi/src/tree_path/lang_csharp.rs create mode 100644 crates/liyi/src/tree_path/lang_go.rs create mode 100644 crates/liyi/src/tree_path/lang_java.rs create mode 100644 crates/liyi/src/tree_path/lang_javascript.rs create mode 100644 crates/liyi/src/tree_path/lang_kotlin.rs create mode 100644 crates/liyi/src/tree_path/lang_objc.rs create mode 100644 crates/liyi/src/tree_path/lang_php.rs create mode 100644 crates/liyi/src/tree_path/lang_python.rs create mode 100644 crates/liyi/src/tree_path/lang_rust.rs create mode 100644 crates/liyi/src/tree_path/lang_swift.rs create mode 100644 crates/liyi/src/tree_path/lang_typescript.rs create mode 100644 crates/liyi/src/tree_path/mod.rs rename crates/liyi/src/{tree_path.rs.liyi.jsonc => tree_path/mod.rs.liyi.jsonc} (90%) diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path.rs deleted file mode 100644 index a18f346..0000000 --- a/crates/liyi/src/tree_path.rs +++ /dev/null @@ -1,2564 +0,0 @@ -//! Tree-sitter structural identity for span recovery. -//! -//! `tree_path` provides format-invariant item identity by encoding an item's -//! position in the AST as a `::` delimited path of (kind, name) segments. -//! For example, `fn::add_money` or `impl::Money::fn::new`. -//! -//! When `tree_path` is populated and a tree-sitter grammar is available for -//! the source language, `liyi reanchor` and `liyi check --fix` use it to -//! locate items by structural identity, making span recovery deterministic -//! across formatting changes, import additions, and line reflows. - -use std::borrow::Cow; -use std::path::Path; - -use tree_sitter::{Language as TSLanguage, Node, Parser}; - -/// Language-specific configuration for tree_path resolution. -/// -/// Each supported language provides a static `LanguageConfig` that defines -/// how to parse it and map between tree-sitter node kinds and tree_path -/// shorthands. -pub struct LanguageConfig { - /// Function to get the tree-sitter language grammar (lazy initialization). - ts_language: fn() -> TSLanguage, - /// File extensions associated with this language. - extensions: &'static [&'static str], - /// Map from tree_path kind shorthand to tree-sitter node kind. - kind_map: &'static [(&'static str, &'static str)], - /// Field name to extract the node's name (usually "name"). - name_field: &'static str, - /// Overrides for special cases: (node_kind, field_name) pairs. - name_overrides: &'static [(&'static str, &'static str)], - /// Field names to traverse to find a node's body/declaration_list. - body_fields: &'static [&'static str], - /// Custom name extraction for node kinds that need special handling - /// (e.g., Go methods with receiver types, Go type_declaration wrapping type_spec). - /// Returns `Some(name)` for handled kinds, `None` to fall through to default. - custom_name: Option Option>, -} - -impl LanguageConfig { - /// Map tree-sitter node kind → tree_path shorthand. - fn kind_to_shorthand(&self, ts_kind: &str) -> Option<&'static str> { - self.kind_map - .iter() - .find(|(_, ts)| *ts == ts_kind) - .map(|(short, _)| *short) - } - - /// Map tree_path shorthand → tree-sitter node kind. - fn shorthand_to_kind(&self, short: &str) -> Option<&'static str> { - self.kind_map - .iter() - .find(|(s, _)| *s == short) - .map(|(_, ts)| *ts) - } - - /// Extract the name of a named AST node. - /// - /// Returns a `Cow` — borrowed from `source` in the common case, - /// owned when the name is constructed (e.g., Go method receiver encoding). - fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> { - // Check custom_name callback first (e.g., Go method receivers) - if let Some(custom) = self.custom_name { - if let Some(name) = custom(node, source) { - return Some(Cow::Owned(name)); - } - } - - let kind = node.kind(); - - // Check for name field override (e.g., impl_item uses "type" field) - let field_name = self - .name_overrides - .iter() - .find(|(k, _)| *k == kind) - .map(|(_, f)| *f) - .unwrap_or(self.name_field); - - let name_node = node.child_by_field_name(field_name)?; - Some(Cow::Borrowed(&source[name_node.byte_range()])) - } - - /// Find a body/declaration_list child for descending into containers. - fn find_body<'a>(&self, node: &Node<'a>) -> Option> { - for field in self.body_fields { - if let Some(body) = node.child_by_field_name(field) { - return Some(body); - } - } - // Fallback: search for body_fields or declaration_list as direct - // (unnamed) children. Needed for languages where the body is a - // positional child rather than a named field (e.g., Kotlin class_body, - // C++ field_declaration_list). - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| { - self.body_fields.contains(&c.kind()) - || c.kind() == "declaration_list" - || c.kind() == "field_declaration_list" - }) - } - - /// Check if the given file extension is associated with this language. - pub fn matches_extension(&self, ext: &str) -> bool { - self.extensions.contains(&ext) - } -} - -/// Rust language configuration. -static RUST_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_rust::LANGUAGE.into(), - extensions: &["rs"], - kind_map: &[ - ("fn", "function_item"), - ("struct", "struct_item"), - ("enum", "enum_item"), - ("impl", "impl_item"), - ("trait", "trait_item"), - ("mod", "mod_item"), - ("const", "const_item"), - ("static", "static_item"), - ("type", "type_item"), - ("macro", "macro_definition"), - ], - name_field: "name", - name_overrides: &[("impl_item", "type")], - body_fields: &["body"], - custom_name: None, -}; - -/// Python language configuration. -static PYTHON_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_python::LANGUAGE.into(), - extensions: &["py", "pyi"], - kind_map: &[("fn", "function_definition"), ("class", "class_definition")], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// Extract the function name from a C/C++ `function_definition` node. -/// -/// C/C++ functions store their name inside the `declarator` field chain: -/// `function_definition` → (field `declarator`) `function_declarator` -/// → (field `declarator`) `identifier` / `field_identifier`. -/// Pointer declarators and other wrappers may appear in the chain; -/// we unwrap them until we find a `function_declarator`. -fn c_extract_declarator_name(node: &Node, source: &str) -> Option { - let declarator = node.child_by_field_name("declarator")?; - let func_decl = unwrap_to_function_declarator(&declarator)?; - let name_node = func_decl.child_by_field_name("declarator")?; - Some(source[name_node.byte_range()].to_string()) -} - -/// Walk through pointer_declarator / parenthesized_declarator / attributed_declarator -/// wrappers to find the inner `function_declarator`. -fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> { - match node.kind() { - "function_declarator" => Some(*node), - "pointer_declarator" | "parenthesized_declarator" | "attributed_declarator" => { - let inner = node.child_by_field_name("declarator")?; - unwrap_to_function_declarator(&inner) - } - _ => None, - } -} - -/// Custom name extraction for C nodes. -/// -/// Handles `function_definition` (name in declarator chain) and -/// `type_definition` (name in declarator field, which is a type_identifier). -fn c_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - "function_definition" => c_extract_declarator_name(node, source), - "type_definition" => { - // typedef: the 'declarator' field holds the new type name - let declarator = node.child_by_field_name("declarator")?; - Some(source[declarator.byte_range()].to_string()) - } - _ => None, - } -} - -/// Custom name extraction for C++ nodes. -/// -/// Extends `c_node_name` with C++-specific patterns: -/// - `template_declaration`: transparent wrapper — extracts name from inner decl. -/// - `namespace_definition`: name is in a `namespace_identifier` child (no "name" field). -fn cpp_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - "function_definition" => c_extract_declarator_name(node, source), - "type_definition" | "alias_declaration" => { - let name_node = node.child_by_field_name("name") - .or_else(|| node.child_by_field_name("declarator"))?; - Some(source[name_node.byte_range()].to_string()) - } - "template_declaration" => { - // template_declaration wraps an inner declaration — find it and - // extract the name from the inner node. - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - match child.kind() { - "function_definition" => return c_extract_declarator_name(&child, source), - "class_specifier" | "struct_specifier" | "enum_specifier" - | "concept_definition" | "alias_declaration" => { - let n = child.child_by_field_name("name")?; - return Some(source[n.byte_range()].to_string()); - } - // A template can also wrap another template_declaration (nested) - "template_declaration" => return cpp_node_name(&child, source), - _ => {} - } - } - None - } - _ => None, - } -} - -/// Custom name extraction for Objective-C nodes. -/// -/// ObjC node types like `class_interface`, `class_implementation`, -/// `protocol_declaration`, `method_declaration`, and `method_definition` -/// do not use standard `name` fields. Their names are extracted from -/// specific child node patterns. -fn objc_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - // C function definitions use the same declarator chain as C. - "function_definition" => c_extract_declarator_name(node, source), - "type_definition" => { - let declarator = node.child_by_field_name("declarator")?; - Some(source[declarator.byte_range()].to_string()) - } - // @interface ClassName or @interface ClassName (Category) - "class_interface" | "class_implementation" => { - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") - .map(|c| source[c.byte_range()].to_string()) - } - // @protocol ProtocolName - "protocol_declaration" => { - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") - .map(|c| source[c.byte_range()].to_string()) - } - // - (ReturnType)methodName or - (ReturnType)methodName:(Type)arg - // + (ReturnType)classMethodName - "method_declaration" | "method_definition" => { - let mut cursor = node.walk(); - // The selector is composed of keyword_declarator children or - // a single identifier (for zero-argument methods). - let mut parts: Vec = Vec::new(); - for child in node.children(&mut cursor) { - match child.kind() { - "identifier" | "field_identifier" if parts.is_empty() => { - // Single-part selector (no arguments) - parts.push(source[child.byte_range()].to_string()); - } - "keyword_declarator" => { - // Each keyword_declarator has a keyword child - let mut kw_cursor = child.walk(); - if let Some(kw) = child.children(&mut kw_cursor) - .find(|c| c.kind() == "keyword_selector" || c.kind() == "identifier") - { - parts.push(format!("{}:", &source[kw.byte_range()])); - } - } - _ => {} - } - } - if parts.is_empty() { - None - } else { - Some(parts.join("")) - } - } - _ => None, - } -} - -/// Custom name extraction for Kotlin nodes. -/// -/// Handles `property_declaration` where the name is in a child -/// `variable_declaration` node, and `type_alias` where the name is -/// in an `identifier` child before the `=` (the `type` field is the RHS). -fn kotlin_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - "property_declaration" => { - let mut cursor = node.walk(); - // Name is in the first variable_declaration or identifier child - for child in node.children(&mut cursor) { - if child.kind() == "variable_declaration" { - let name = child.child_by_field_name("name") - .or_else(|| { - let mut c2 = child.walk(); - child.children(&mut c2).find(|c| c.kind() == "simple_identifier") - })?; - return Some(source[name.byte_range()].to_string()); - } - if child.kind() == "simple_identifier" { - return Some(source[child.byte_range()].to_string()); - } - } - None - } - "type_alias" => { - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| c.kind() == "type_identifier" || c.kind() == "simple_identifier") - .map(|c| source[c.byte_range()].to_string()) - } - _ => None, - } -} - -/// Custom name extraction for PHP `const_declaration` nodes. -/// -/// PHP `const_declaration` stores names inside `const_element` children. -fn php_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - "const_declaration" => { - let mut cursor = node.walk(); - let elem = node.children(&mut cursor) - .find(|c| c.kind() == "const_element")?; - let name = elem.child_by_field_name("name")?; - Some(source[name.byte_range()].to_string()) - } - _ => None, - } -} - -/// Custom name extraction for Go nodes. -/// -/// Handles three Go-specific patterns: -/// - `method_declaration`: encodes receiver type into the name, producing -/// `ReceiverType.MethodName` or `(*ReceiverType).MethodName`. -/// - `type_declaration`: navigates to the inner `type_spec` for the name. -/// - `const_declaration` / `var_declaration`: navigates to the inner spec. -fn go_node_name(node: &Node, source: &str) -> Option { - match node.kind() { - "method_declaration" => { - let method_name_node = node.child_by_field_name("name")?; - let method_name = &source[method_name_node.byte_range()]; - - let receiver = node.child_by_field_name("receiver")?; - let mut cursor = receiver.walk(); - let param = receiver - .children(&mut cursor) - .find(|c| c.kind() == "parameter_declaration")?; - - let type_node = param.child_by_field_name("type")?; - let receiver_type = if type_node.kind() == "pointer_type" { - let mut cursor2 = type_node.walk(); - let inner = type_node - .children(&mut cursor2) - .find(|c| c.kind() == "type_identifier")?; - format!("(*{})", &source[inner.byte_range()]) - } else { - source[type_node.byte_range()].to_string() - }; - - Some(format!("{receiver_type}.{method_name}")) - } - "type_declaration" => { - let mut cursor = node.walk(); - let type_spec = node - .children(&mut cursor) - .find(|c| c.kind() == "type_spec")?; - let name_node = type_spec.child_by_field_name("name")?; - Some(source[name_node.byte_range()].to_string()) - } - "const_declaration" => { - let mut cursor = node.walk(); - let spec = node - .children(&mut cursor) - .find(|c| c.kind() == "const_spec")?; - let name_node = spec.child_by_field_name("name")?; - Some(source[name_node.byte_range()].to_string()) - } - "var_declaration" => { - let mut cursor = node.walk(); - let spec = node - .children(&mut cursor) - .find(|c| c.kind() == "var_spec")?; - let name_node = spec.child_by_field_name("name")?; - Some(source[name_node.byte_range()].to_string()) - } - _ => None, - } -} - -/// Go language configuration. -static GO_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_go::LANGUAGE.into(), - extensions: &["go"], - kind_map: &[ - ("fn", "function_declaration"), - ("method", "method_declaration"), - ("type", "type_declaration"), - ("const", "const_declaration"), - ("var", "var_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: Some(go_node_name), -}; - -/// JavaScript language configuration. -static JAVASCRIPT_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_javascript::LANGUAGE.into(), - extensions: &["js", "mjs", "cjs", "jsx"], - kind_map: &[ - ("fn", "function_declaration"), - ("class", "class_declaration"), - ("method", "method_definition"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// TypeScript language configuration. -static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), - extensions: &["ts", "mts", "cts"], - kind_map: &[ - ("fn", "function_declaration"), - ("class", "class_declaration"), - ("method", "method_definition"), - ("interface", "interface_declaration"), - ("type", "type_alias_declaration"), - ("enum", "enum_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// TSX language configuration. -static TSX_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_typescript::LANGUAGE_TSX.into(), - extensions: &["tsx"], - kind_map: &[ - ("fn", "function_declaration"), - ("class", "class_declaration"), - ("method", "method_definition"), - ("interface", "interface_declaration"), - ("type", "type_alias_declaration"), - ("enum", "enum_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// C language configuration. -static C_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_c::LANGUAGE.into(), - extensions: &["c", "h"], - kind_map: &[ - ("fn", "function_definition"), - ("struct", "struct_specifier"), - ("enum", "enum_specifier"), - ("typedef", "type_definition"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: Some(c_node_name), -}; - -/// C++ language configuration. -static CPP_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_cpp::LANGUAGE.into(), - extensions: &["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], - kind_map: &[ - ("fn", "function_definition"), - ("class", "class_specifier"), - ("struct", "struct_specifier"), - ("namespace", "namespace_definition"), - ("enum", "enum_specifier"), - ("template", "template_declaration"), - ("typedef", "type_definition"), - ("using", "alias_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body", "declaration_list"], - custom_name: Some(cpp_node_name), -}; - -/// Java language configuration. -static JAVA_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_java::LANGUAGE.into(), - extensions: &["java"], - kind_map: &[ - ("fn", "method_declaration"), - ("class", "class_declaration"), - ("interface", "interface_declaration"), - ("enum", "enum_declaration"), - ("constructor", "constructor_declaration"), - ("record", "record_declaration"), - ("annotation", "annotation_type_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// C# language configuration. -static CSHARP_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_c_sharp::LANGUAGE.into(), - extensions: &["cs"], - kind_map: &[ - ("fn", "method_declaration"), - ("class", "class_declaration"), - ("interface", "interface_declaration"), - ("enum", "enum_declaration"), - ("struct", "struct_declaration"), - ("namespace", "namespace_declaration"), - ("constructor", "constructor_declaration"), - ("property", "property_declaration"), - ("record", "record_declaration"), - ("delegate", "delegate_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// PHP language configuration (PHP-only grammar, no HTML interleaving). -static PHP_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_php::LANGUAGE_PHP_ONLY.into(), - extensions: &["php"], - kind_map: &[ - ("fn", "function_definition"), - ("class", "class_declaration"), - ("method", "method_declaration"), - ("interface", "interface_declaration"), - ("enum", "enum_declaration"), - ("trait", "trait_declaration"), - ("namespace", "namespace_definition"), - ("const", "const_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: Some(php_node_name), -}; - -/// Objective-C language configuration. -static OBJC_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_objc::LANGUAGE.into(), - extensions: &["m", "mm"], - kind_map: &[ - ("fn", "function_definition"), - ("class", "class_interface"), - ("impl", "class_implementation"), - ("protocol", "protocol_declaration"), - ("method", "method_definition"), - ("method_decl", "method_declaration"), - ("struct", "struct_specifier"), - ("enum", "enum_specifier"), - ("typedef", "type_definition"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: Some(objc_node_name), -}; - -/// Kotlin language configuration. -static KOTLIN_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_kotlin_ng::LANGUAGE.into(), - extensions: &["kt", "kts"], - kind_map: &[ - ("fn", "function_declaration"), - ("class", "class_declaration"), - ("object", "object_declaration"), - ("property", "property_declaration"), - ("typealias", "type_alias"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body", "class_body"], - custom_name: Some(kotlin_node_name), -}; - -/// Swift language configuration. -static SWIFT_CONFIG: LanguageConfig = LanguageConfig { - ts_language: || tree_sitter_swift::LANGUAGE.into(), - extensions: &["swift"], - kind_map: &[ - ("fn", "function_declaration"), - ("class", "class_declaration"), - ("protocol", "protocol_declaration"), - ("enum", "enum_entry"), - ("property", "property_declaration"), - ("init", "init_declaration"), - ("typealias", "typealias_declaration"), - ], - name_field: "name", - name_overrides: &[], - body_fields: &["body"], - custom_name: None, -}; - -/// Supported languages for tree_path resolution. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Language { - Rust, - Python, - Go, - JavaScript, - TypeScript, - Tsx, - C, - Cpp, - Java, - CSharp, - Php, - ObjectiveC, - Kotlin, - Swift, -} - -impl Language { - /// Get the language configuration for this language. - fn config(&self) -> &'static LanguageConfig { - match self { - Language::Rust => &RUST_CONFIG, - Language::Python => &PYTHON_CONFIG, - Language::Go => &GO_CONFIG, - Language::JavaScript => &JAVASCRIPT_CONFIG, - Language::TypeScript => &TYPESCRIPT_CONFIG, - Language::Tsx => &TSX_CONFIG, - Language::C => &C_CONFIG, - Language::Cpp => &CPP_CONFIG, - Language::Java => &JAVA_CONFIG, - Language::CSharp => &CSHARP_CONFIG, - Language::Php => &PHP_CONFIG, - Language::ObjectiveC => &OBJC_CONFIG, - Language::Kotlin => &KOTLIN_CONFIG, - Language::Swift => &SWIFT_CONFIG, - } - } - - /// Get the tree-sitter language grammar. - fn ts_language(&self) -> TSLanguage { - (self.config().ts_language)() - } -} - -/// Detect language from file extension. Returns `None` for unsupported -/// languages (unknown extension). -/// -/// # Extension Collision -/// -/// `.h` files are ambiguous (C, C++, or Objective-C). We map them to C -/// by default. Users can override via future configuration if needed. -/// -/// If two languages share an extension (unlikely with built-in languages), -/// the first match in the following order is returned: -/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → -/// Java → C# → PHP → Objective-C → Kotlin → Swift. -pub fn detect_language(path: &Path) -> Option { - let ext = path.extension()?.to_str()?; - - if RUST_CONFIG.matches_extension(ext) { - return Some(Language::Rust); - } - - if PYTHON_CONFIG.matches_extension(ext) { - return Some(Language::Python); - } - - if GO_CONFIG.matches_extension(ext) { - return Some(Language::Go); - } - - if JAVASCRIPT_CONFIG.matches_extension(ext) { - return Some(Language::JavaScript); - } - - if TYPESCRIPT_CONFIG.matches_extension(ext) { - return Some(Language::TypeScript); - } - if TSX_CONFIG.matches_extension(ext) { - return Some(Language::Tsx); - } - - if C_CONFIG.matches_extension(ext) { - return Some(Language::C); - } - if CPP_CONFIG.matches_extension(ext) { - return Some(Language::Cpp); - } - if JAVA_CONFIG.matches_extension(ext) { - return Some(Language::Java); - } - if CSHARP_CONFIG.matches_extension(ext) { - return Some(Language::CSharp); - } - if PHP_CONFIG.matches_extension(ext) { - return Some(Language::Php); - } - if OBJC_CONFIG.matches_extension(ext) { - return Some(Language::ObjectiveC); - } - if KOTLIN_CONFIG.matches_extension(ext) { - return Some(Language::Kotlin); - } - if SWIFT_CONFIG.matches_extension(ext) { - return Some(Language::Swift); - } - - None -} - -/// Create a tree-sitter parser for the given language. -fn make_parser(lang: Language) -> Parser { - let mut parser = Parser::new(); - parser - .set_language(&lang.ts_language()) - .expect("tree-sitter grammar should load"); - parser -} - -/// A parsed tree_path segment: (kind_shorthand, name). -#[derive(Debug, Clone, PartialEq, Eq)] -struct PathSegment { - kind: String, - name: String, -} - -/// Parse a tree_path string into segments. -/// -/// `"fn::add_money"` → `[PathSegment { kind: "fn", name: "add_money" }]` -/// `"impl::Money::fn::new"` → `[impl/Money, fn/new]` -fn parse_tree_path(tree_path: &str) -> Option> { - let parts: Vec<&str> = tree_path.split("::").collect(); - if !parts.len().is_multiple_of(2) { - return None; // must be pairs - } - let segments: Vec = parts - .chunks(2) - .map(|pair| PathSegment { - kind: pair[0].to_string(), - name: pair[1].to_string(), - }) - .collect(); - if segments.is_empty() { - return None; - } - Some(segments) -} - -/// Resolve a `tree_path` to a source span `[start_line, end_line]` (1-indexed, -/// inclusive). -/// -/// Returns `None` if the tree_path cannot be resolved (item renamed, deleted, -/// grammar unavailable, or language not supported). -pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> { - if tree_path.is_empty() { - return None; - } - - let config = lang.config(); - let segments = parse_tree_path(tree_path)?; - let mut parser = make_parser(lang); - let tree = parser.parse(source, None)?; - let root = tree.root_node(); - - let node = resolve_segments(config, &root, &segments, source)?; - - // Return 1-indexed inclusive line range - let start_line = node.start_position().row + 1; - let end_line = node.end_position().row + 1; - Some([start_line, end_line]) -} - -/// Walk the tree to find a node matching the given path segments. -fn resolve_segments<'a>( - config: &LanguageConfig, - parent: &Node<'a>, - segments: &[PathSegment], - source: &'a str, -) -> Option> { - if segments.is_empty() { - return Some(*parent); - } - - let seg = &segments[0]; - let ts_kind = config.shorthand_to_kind(&seg.kind)?; - - let mut cursor = parent.walk(); - for child in parent.children(&mut cursor) { - if child.kind() != ts_kind { - continue; - } - if let Some(name) = config.node_name(&child, source) { - if *name == seg.name && segments.len() == 1 { - return Some(child); - } else if *name == seg.name { - // Descend — look inside this node's body - return resolve_in_body(config, &child, &segments[1..], source); - } - } - } - - None -} - -/// Find subsequent segments inside an item's body (e.g., methods inside impl). -fn resolve_in_body<'a>( - config: &LanguageConfig, - node: &Node<'a>, - segments: &[PathSegment], - source: &'a str, -) -> Option> { - let body = config.find_body(node)?; - resolve_segments(config, &body, segments, source) -} - -/// Compute the canonical `tree_path` for the AST node at the given span. -/// -/// Returns an empty string if no suitable structural path can be determined -/// (e.g., the span doesn't align with a named item, or the language is -/// unsupported). -pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { - let config = lang.config(); - let mut parser = make_parser(lang); - let tree = match parser.parse(source, None) { - Some(t) => t, - None => return String::new(), - }; - - let root = tree.root_node(); - // Convert 1-indexed inclusive span to 0-indexed row - let target_start = span[0].saturating_sub(1); - let target_end = span[1].saturating_sub(1); - - // Find the best item node within the target range - let node = match find_item_in_range(config, &root, target_start, target_end) { - Some(n) => n, - None => return String::new(), - }; - - // Build path from root to this node - build_path_to_node(config, &root, &node, source) -} - -/// Find the best item node within [target_start, target_end] (0-indexed rows). -/// -/// Attributes in Rust are sibling nodes, not children of the item, so a -/// sidecar span that includes `#[derive(...)]` lines will start before the -/// item node. We therefore match any item whose start/end rows fall within -/// the target range, preferring the widest match (the outermost item). -fn find_item_in_range<'a>( - config: &LanguageConfig, - root: &Node<'a>, - target_start: usize, - target_end: usize, -) -> Option> { - let mut best: Option> = None; - - fn walk<'a>( - config: &LanguageConfig, - node: &Node<'a>, - target_start: usize, - target_end: usize, - best: &mut Option>, - ) { - let start = node.start_position().row; - let end = node.end_position().row; - - // Skip nodes that don't overlap our target - if start > target_end || end < target_start { - return; - } - - // Check if this is a named item node within the target range - if start >= target_start && end <= target_end && is_item_node(config, node) { - // Prefer the widest (outermost) match - if let Some(b) = best { - let b_size = b.end_position().row - b.start_position().row; - let n_size = end - start; - if n_size >= b_size { - *best = Some(*node); - } - } else { - *best = Some(*node); - } - } - - // Recurse into children - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - walk(config, &child, target_start, target_end, best); - } - } - - walk(config, root, target_start, target_end, &mut best); - best -} - -/// Check if a node is an item type we track in tree_path. -fn is_item_node(config: &LanguageConfig, node: &Node) -> bool { - config.kind_to_shorthand(node.kind()).is_some() -} - -/// Build the tree_path string for a given target node by walking from root. -fn build_path_to_node(config: &LanguageConfig, root: &Node, target: &Node, source: &str) -> String { - let mut segments: Vec = Vec::new(); - if collect_path(config, root, target, source, &mut segments) { - segments.join("::") - } else { - String::new() - } -} - -/// Recursively find `target` in the tree and collect path segments. -fn collect_path( - config: &LanguageConfig, - node: &Node, - target: &Node, - source: &str, - segments: &mut Vec, -) -> bool { - if node.id() == target.id() { - // We found the target — add this node's segment if it's an item - if let (Some(short), Some(name)) = ( - config.kind_to_shorthand(node.kind()), - config.node_name(node, source), - ) { - segments.push(format!("{short}::{name}")); - return true; - } - return false; - } - - // Check children - let mut cursor = node.walk(); - for child in node.children(&mut cursor) { - let child_start = child.start_position().row; - let child_end = child.end_position().row; - let target_start = target.start_position().row; - let target_end = target.end_position().row; - - // Only descend into nodes that contain the target - if child_start <= target_start - && child_end >= target_end - && collect_path(config, &child, target, source, segments) - { - // If this node is an item node, prepend its segment - if is_item_node(config, node) - && let (Some(short), Some(name)) = ( - config.kind_to_shorthand(node.kind()), - config.node_name(node, source), - ) - { - segments.insert(0, format!("{short}::{name}")); - } - return true; - } - } - - false -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - const SAMPLE_RUST: &str = r#"use std::collections::HashMap; - -/// A monetary amount -pub struct Money { - amount: i64, - currency: String, -} - -impl Money { - pub fn new(amount: i64, currency: String) -> Self { - Self { amount, currency } - } - - pub fn add(&self, other: &Money) -> Result { - if self.currency != other.currency { - return Err("mismatched currencies"); - } - Ok(Money { - amount: self.amount + other.amount, - currency: self.currency.clone(), - }) - } -} - -mod billing { - pub fn charge(amount: i64) -> bool { - amount > 0 - } -} - -fn standalone() -> i32 { - 42 -} -"#; - - #[test] - fn resolve_top_level_fn() { - let span = resolve_tree_path(SAMPLE_RUST, "fn::standalone", Language::Rust); - assert!(span.is_some(), "should resolve fn::standalone"); - let [start, end] = span.unwrap(); - assert!(start > 0); - assert!(end >= start); - // Verify the span contains the function - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("fn standalone"), - "span start should point to fn standalone, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_struct() { - let span = resolve_tree_path(SAMPLE_RUST, "struct::Money", Language::Rust); - assert!(span.is_some(), "should resolve struct::Money"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("struct Money"), - "span start should point to struct Money, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_impl_method() { - let span = resolve_tree_path(SAMPLE_RUST, "impl::Money::fn::new", Language::Rust); - assert!(span.is_some(), "should resolve impl::Money::fn::new"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("fn new"), - "span start should point to fn new, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_impl_method_add() { - let span = resolve_tree_path(SAMPLE_RUST, "impl::Money::fn::add", Language::Rust); - assert!(span.is_some(), "should resolve impl::Money::fn::add"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("fn add"), - "span start should point to fn add, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_mod_fn() { - let span = resolve_tree_path(SAMPLE_RUST, "mod::billing::fn::charge", Language::Rust); - assert!(span.is_some(), "should resolve mod::billing::fn::charge"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("fn charge"), - "span start should point to fn charge, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_impl_block() { - let span = resolve_tree_path(SAMPLE_RUST, "impl::Money", Language::Rust); - assert!(span.is_some(), "should resolve impl::Money"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - assert!( - lines[start - 1].contains("impl Money"), - "span start should point to impl Money, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_nonexistent_returns_none() { - let span = resolve_tree_path(SAMPLE_RUST, "fn::nonexistent", Language::Rust); - assert!(span.is_none()); - } - - #[test] - fn resolve_empty_returns_none() { - let span = resolve_tree_path(SAMPLE_RUST, "", Language::Rust); - assert!(span.is_none()); - } - - #[test] - fn compute_fn_path() { - // Find standalone function line - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("fn standalone")) - .unwrap() - + 1; - let end = lines - .iter() - .enumerate() - .skip(start - 1) - .find(|(_, l)| l.contains('}')) - .unwrap() - .0 - + 1; - - let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); - assert_eq!(path, "fn::standalone"); - } - - #[test] - fn compute_impl_method_path() { - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - let start = lines.iter().position(|l| l.contains("pub fn new")).unwrap() + 1; - // fn new spans from its line to the closing } - let mut brace_depth = 0i32; - let mut end = start; - for (i, line) in lines.iter().enumerate().skip(start - 1) { - for ch in line.chars() { - if ch == '{' { - brace_depth += 1; - } - if ch == '}' { - brace_depth -= 1; - } - } - if brace_depth == 0 { - end = i + 1; - break; - } - } - - let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); - assert_eq!(path, "impl::Money::fn::new"); - } - - #[test] - fn compute_struct_path() { - let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("pub struct Money")) - .unwrap() - + 1; - let end = lines - .iter() - .enumerate() - .skip(start - 1) - .find(|(_, l)| l.trim() == "}") - .unwrap() - .0 - + 1; - - let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); - assert_eq!(path, "struct::Money"); - } - - #[test] - fn roundtrip_resolve_compute() { - // Compute path for fn::standalone, then resolve it — spans should match - // Use tree-sitter to find exact span - let resolved_span = - resolve_tree_path(SAMPLE_RUST, "fn::standalone", Language::Rust).unwrap(); - - let computed_path = compute_tree_path(SAMPLE_RUST, resolved_span, Language::Rust); - assert_eq!(computed_path, "fn::standalone"); - - let re_resolved = resolve_tree_path(SAMPLE_RUST, &computed_path, Language::Rust).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - - #[test] - fn detect_language_rust() { - assert_eq!( - detect_language(Path::new("src/main.rs")), - Some(Language::Rust) - ); - assert_eq!(detect_language(Path::new("foo.py")), Some(Language::Python)); - } - - #[test] - fn resilient_to_formatting() { - // Same code reformatted differently — tree_path should still resolve - let reformatted = r#"use std::collections::HashMap; - -/// A monetary amount -pub struct Money { amount: i64, currency: String } - -impl Money { - pub fn new(amount: i64, currency: String) -> Self { Self { amount, currency } } - - pub fn add(&self, other: &Money) -> Result { - if self.currency != other.currency { return Err("mismatched currencies"); } - Ok(Money { amount: self.amount + other.amount, currency: self.currency.clone() }) - } -} - -mod billing { - pub fn charge(amount: i64) -> bool { amount > 0 } -} - -fn standalone() -> i32 { 42 } -"#; - - // All tree_paths from the original should resolve in the reformatted version - for tp in &[ - "fn::standalone", - "struct::Money", - "impl::Money", - "impl::Money::fn::new", - "impl::Money::fn::add", - "mod::billing::fn::charge", - ] { - let span = resolve_tree_path(reformatted, tp, Language::Rust); - assert!(span.is_some(), "should resolve {tp} in reformatted code"); - } - } - - mod python_tests { - use super::*; - - const SAMPLE_PYTHON: &str = r#"# A simple order processing module - -class Order: - def __init__(self, amount): - self.amount = amount - - def process(self): - return self.amount > 0 - -def calculate_total(items): - return sum(items) -"#; - - #[test] - fn resolve_python_function() { - let span = resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python); - assert!(span.is_some(), "should resolve fn::calculate_total"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - assert!( - lines[start - 1].contains("def calculate_total"), - "span should point to calculate_total function" - ); - } - - #[test] - fn resolve_python_class() { - let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order", Language::Python); - assert!(span.is_some(), "should resolve class::Order"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - assert!( - lines[start - 1].contains("class Order"), - "span should point to Order class" - ); - } - - #[test] - fn resolve_python_class_method() { - let span = - resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); - assert!(span.is_some(), "should resolve class::Order::fn::process"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - assert!( - lines[start - 1].contains("def process"), - "span should point to process method" - ); - } - - #[test] - fn resolve_python_init_method() { - let span = resolve_tree_path( - SAMPLE_PYTHON, - "class::Order::fn::__init__", - Language::Python, - ); - assert!(span.is_some(), "should resolve class::Order::fn::__init__"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - assert!( - lines[start - 1].contains("def __init__"), - "span should point to __init__ method" - ); - } - - #[test] - fn compute_python_function_path() { - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("def calculate_total")) - .unwrap() - + 1; - let end = lines.len(); - - let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); - assert_eq!(path, "fn::calculate_total"); - } - - #[test] - fn compute_python_class_method_path() { - let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("def process")) - .unwrap() - + 1; - // Find end of method (next line with same or less indentation) - let end = start + 1; // Single-line body for this test - - let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); - assert_eq!(path, "class::Order::fn::process"); - } - - #[test] - fn roundtrip_python() { - // Compute path for fn::calculate_total, then resolve it - let resolved_span = - resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python).unwrap(); - - let computed_path = compute_tree_path(SAMPLE_PYTHON, resolved_span, Language::Python); - assert_eq!(computed_path, "fn::calculate_total"); - - let re_resolved = - resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - } - - mod go_tests { - use super::*; - - const SAMPLE_GO: &str = r#"package main - -import "fmt" - -// Calculator performs arithmetic operations -type Calculator struct { - value int -} - -// Reader is an interface -type Reader interface { - Read(p []byte) (n int, err error) -} - -// MaxRetries is a constant -const MaxRetries = 3 - -// DefaultTimeout is a var -var DefaultTimeout = 30 - -// Add adds a number to the calculator's value -func (c *Calculator) Add(n int) { - c.value += n -} - -// Value returns the current value -func (c Calculator) Value() int { - return c.value -} - -// Add is a standalone function -func Add(a, b int) int { - return a + b -} -"#; - - #[test] - fn resolve_go_function() { - let span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go); - assert!(span.is_some(), "should resolve fn::Add"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("func Add("), - "span should point to Add function, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_pointer_method() { - let span = - resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); - assert!(span.is_some(), "should resolve method::(*Calculator).Add"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("func (c *Calculator) Add"), - "span should point to Add method, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_value_method() { - let span = - resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); - assert!(span.is_some(), "should resolve method::Calculator.Value"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("func (c Calculator) Value"), - "span should point to Value method, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_type_struct() { - let span = resolve_tree_path(SAMPLE_GO, "type::Calculator", Language::Go); - assert!(span.is_some(), "should resolve type::Calculator"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("type Calculator struct"), - "span should point to Calculator struct, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_type_interface() { - let span = resolve_tree_path(SAMPLE_GO, "type::Reader", Language::Go); - assert!(span.is_some(), "should resolve type::Reader"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("type Reader interface"), - "span should point to Reader interface, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_const() { - let span = resolve_tree_path(SAMPLE_GO, "const::MaxRetries", Language::Go); - assert!(span.is_some(), "should resolve const::MaxRetries"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("const MaxRetries"), - "span should point to MaxRetries const, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_go_var() { - let span = resolve_tree_path(SAMPLE_GO, "var::DefaultTimeout", Language::Go); - assert!(span.is_some(), "should resolve var::DefaultTimeout"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - assert!( - lines[start - 1].contains("var DefaultTimeout"), - "span should point to DefaultTimeout var, got: {}", - lines[start - 1] - ); - } - - #[test] - fn compute_go_function_path() { - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - let start = lines - .iter() - .enumerate() - .rev() - .find(|(_, l)| l.contains("func Add(")) - .unwrap() - .0 - + 1; - let end = lines.len(); - - let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); - assert_eq!(path, "fn::Add"); - } - - #[test] - fn compute_go_pointer_method_path() { - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("func (c *Calculator) Add")) - .unwrap() - + 1; - let end = lines - .iter() - .enumerate() - .skip(start) - .find(|(_, l)| l.starts_with('}')) - .map(|(i, _)| i + 1) - .unwrap_or(lines.len()); - - let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); - assert_eq!(path, "method::(*Calculator).Add"); - } - - #[test] - fn compute_go_value_method_path() { - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("func (c Calculator) Value")) - .unwrap() - + 1; - let end = lines - .iter() - .enumerate() - .skip(start) - .find(|(_, l)| l.starts_with('}')) - .map(|(i, _)| i + 1) - .unwrap_or(lines.len()); - - let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); - assert_eq!(path, "method::Calculator.Value"); - } - - #[test] - fn compute_go_type_path() { - let lines: Vec<&str> = SAMPLE_GO.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("type Calculator struct")) - .unwrap() - + 1; - let end = lines - .iter() - .enumerate() - .skip(start) - .find(|(_, l)| l.starts_with('}')) - .map(|(i, _)| i + 1) - .unwrap_or(lines.len()); - - let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); - assert_eq!(path, "type::Calculator"); - } - - #[test] - fn roundtrip_go() { - let resolved_span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); - - let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); - assert_eq!(computed_path, "fn::Add"); - - let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - - #[test] - fn roundtrip_go_method() { - let resolved_span = - resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go).unwrap(); - - let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); - assert_eq!(computed_path, "method::(*Calculator).Add"); - - let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - } - - mod javascript_tests { - use super::*; - - const SAMPLE_JS: &str = r#"// A simple counter module - -class Counter { - constructor(initial = 0) { - this.count = initial; - } - - increment() { - this.count++; - } - - getValue() { - return this.count; - } -} - -function createCounter(initial) { - return new Counter(initial); -} - -const utils = { - formatCount: (n) => `${n} items` -}; -"#; - - #[test] - fn resolve_js_function() { - let span = resolve_tree_path(SAMPLE_JS, "fn::createCounter", Language::JavaScript); - assert!(span.is_some(), "should resolve fn::createCounter"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_JS.lines().collect(); - assert!( - lines[start - 1].contains("function createCounter"), - "span should point to createCounter function" - ); - } - - #[test] - fn resolve_js_class() { - let span = resolve_tree_path(SAMPLE_JS, "class::Counter", Language::JavaScript); - assert!(span.is_some(), "should resolve class::Counter"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_JS.lines().collect(); - assert!( - lines[start - 1].contains("class Counter"), - "span should point to Counter class" - ); - } - - #[test] - fn resolve_js_method() { - let span = resolve_tree_path( - SAMPLE_JS, - "class::Counter::method::increment", - Language::JavaScript, - ); - assert!( - span.is_some(), - "should resolve class::Counter::method::increment" - ); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_JS.lines().collect(); - assert!( - lines[start - 1].contains("increment()"), - "span should point to increment method" - ); - } - - #[test] - fn compute_js_function_path() { - let lines: Vec<&str> = SAMPLE_JS.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("function createCounter")) - .unwrap() - + 1; - let end = lines.len() - 3; // Rough end - - let path = compute_tree_path(SAMPLE_JS, [start, end], Language::JavaScript); - assert_eq!(path, "fn::createCounter"); - } - - #[test] - fn roundtrip_js() { - let resolved_span = resolve_tree_path( - SAMPLE_JS, - "class::Counter::method::getValue", - Language::JavaScript, - ) - .unwrap(); - - let computed_path = compute_tree_path(SAMPLE_JS, resolved_span, Language::JavaScript); - assert_eq!(computed_path, "class::Counter::method::getValue"); - - let re_resolved = - resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - } - - mod typescript_tests { - use super::*; - - const SAMPLE_TS: &str = r#"// A typed user service - -interface User { - id: number; - name: string; -} - -type UserId = number; - -enum UserRole { - Admin, - User, - Guest -} - -class UserService { - private users: User[] = []; - - addUser(user: User): void { - this.users.push(user); - } - - findById(id: UserId): User | undefined { - return this.users.find(u => u.id === id); - } -} - -function createUser(name: string): User { - return { id: Date.now(), name }; -} -"#; - - #[test] - fn resolve_ts_interface() { - let span = resolve_tree_path(SAMPLE_TS, "interface::User", Language::TypeScript); - assert!(span.is_some(), "should resolve interface::User"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TS.lines().collect(); - assert!( - lines[start - 1].contains("interface User"), - "span should point to User interface" - ); - } - - #[test] - fn resolve_ts_type_alias() { - let span = resolve_tree_path(SAMPLE_TS, "type::UserId", Language::TypeScript); - assert!(span.is_some(), "should resolve type::UserId"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TS.lines().collect(); - assert!( - lines[start - 1].contains("type UserId"), - "span should point to UserId type alias" - ); - } - - #[test] - fn resolve_ts_enum() { - let span = resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript); - assert!(span.is_some(), "should resolve enum::UserRole"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TS.lines().collect(); - assert!( - lines[start - 1].contains("enum UserRole"), - "span should point to UserRole enum" - ); - } - - #[test] - fn resolve_ts_class_method() { - let span = resolve_tree_path( - SAMPLE_TS, - "class::UserService::method::findById", - Language::TypeScript, - ); - assert!( - span.is_some(), - "should resolve class::UserService::method::findById" - ); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TS.lines().collect(); - assert!( - lines[start - 1].contains("findById("), - "span should point to findById method" - ); - } - - #[test] - fn compute_ts_interface_path() { - let lines: Vec<&str> = SAMPLE_TS.lines().collect(); - let start = lines - .iter() - .position(|l| l.contains("interface User")) - .unwrap() - + 1; - let end = start + 3; - - let path = compute_tree_path(SAMPLE_TS, [start, end], Language::TypeScript); - assert_eq!(path, "interface::User"); - } - - #[test] - fn roundtrip_ts() { - let resolved_span = - resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript).unwrap(); - - let computed_path = compute_tree_path(SAMPLE_TS, resolved_span, Language::TypeScript); - assert_eq!(computed_path, "enum::UserRole"); - - let re_resolved = - resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); - assert_eq!(re_resolved, resolved_span); - } - } - - mod tsx_tests { - use super::*; - - const SAMPLE_TSX: &str = r#"// A React component - -interface Props { - title: string; - count: number; -} - -function Counter({ title, count }: Props) { - return ( -
-

{title}

-

Count: {count}

-
- ); -} - -class Container extends React.Component { - render() { - return
{this.props.title}
; - } -} -"#; - - #[test] - fn resolve_tsx_function() { - let span = resolve_tree_path(SAMPLE_TSX, "fn::Counter", Language::Tsx); - assert!(span.is_some(), "should resolve fn::Counter in TSX"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); - assert!( - lines[start - 1].contains("function Counter"), - "span should point to Counter function" - ); - } - - #[test] - fn resolve_tsx_class() { - let span = resolve_tree_path(SAMPLE_TSX, "class::Container", Language::Tsx); - assert!(span.is_some(), "should resolve class::Container in TSX"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); - assert!( - lines[start - 1].contains("class Container"), - "span should point to Container class" - ); - } - - #[test] - fn resolve_tsx_interface() { - let span = resolve_tree_path(SAMPLE_TSX, "interface::Props", Language::Tsx); - assert!(span.is_some(), "should resolve interface::Props in TSX"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); - assert!( - lines[start - 1].contains("interface Props"), - "span should point to Props interface" - ); - } - - #[test] - fn detect_tsx_extension() { - assert_eq!( - detect_language(Path::new("component.tsx")), - Some(Language::Tsx) - ); - } - } - - mod c_tests { - use super::*; - - const SAMPLE_C: &str = r#"#include - -struct Point { - int x; - int y; -}; - -enum Color { RED, GREEN, BLUE }; - -typedef struct Point Point_t; - -void process(int x, int y) { - printf("hello"); -} - -static int helper(void) { - return 42; -} -"#; - - #[test] - fn resolve_c_function() { - let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C); - assert!(span.is_some(), "should resolve fn::process"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_C.lines().collect(); - assert!( - lines[start - 1].contains("void process"), - "span should point to process function, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_c_struct() { - let span = resolve_tree_path(SAMPLE_C, "struct::Point", Language::C); - assert!(span.is_some(), "should resolve struct::Point"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_C.lines().collect(); - assert!( - lines[start - 1].contains("struct Point"), - "span should point to Point struct" - ); - } - - #[test] - fn resolve_c_enum() { - let span = resolve_tree_path(SAMPLE_C, "enum::Color", Language::C); - assert!(span.is_some(), "should resolve enum::Color"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_C.lines().collect(); - assert!( - lines[start - 1].contains("enum Color"), - "span should point to Color enum" - ); - } - - #[test] - fn resolve_c_typedef() { - let span = resolve_tree_path(SAMPLE_C, "typedef::Point_t", Language::C); - assert!(span.is_some(), "should resolve typedef::Point_t"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_C.lines().collect(); - assert!( - lines[start - 1].contains("typedef"), - "span should point to typedef" - ); - } - - #[test] - fn compute_c_function_path() { - let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C).unwrap(); - let path = compute_tree_path(SAMPLE_C, span, Language::C); - assert_eq!(path, "fn::process"); - } - - #[test] - fn roundtrip_c() { - for tp in &["fn::process", "fn::helper", "struct::Point", "enum::Color"] { - let span = resolve_tree_path(SAMPLE_C, tp, Language::C).unwrap(); - let path = compute_tree_path(SAMPLE_C, span, Language::C); - assert_eq!(&path, tp, "roundtrip failed for {tp}"); - } - } - - #[test] - fn detect_c_extensions() { - assert_eq!(detect_language(Path::new("main.c")), Some(Language::C)); - assert_eq!(detect_language(Path::new("header.h")), Some(Language::C)); - } - } - - mod cpp_tests { - use super::*; - - const SAMPLE_CPP: &str = r#"namespace math { - -class Calculator { -public: - int add(int a, int b) { - return a + b; - } -}; - -struct Point { - int x, y; -}; - -enum class Color { Red, Green, Blue }; - -} - -void standalone() {} -"#; - - #[test] - fn resolve_cpp_namespace() { - let span = resolve_tree_path(SAMPLE_CPP, "namespace::math", Language::Cpp); - assert!(span.is_some(), "should resolve namespace::math"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); - assert!( - lines[start - 1].contains("namespace math"), - "span should point to namespace math, got: {}", - lines[start - 1] - ); - } - - #[test] - fn resolve_cpp_class_in_namespace() { - let span = resolve_tree_path( - SAMPLE_CPP, - "namespace::math::class::Calculator", - Language::Cpp, - ); - assert!(span.is_some(), "should resolve namespace::math::class::Calculator"); - } - - #[test] - fn resolve_cpp_method_in_class() { - let span = resolve_tree_path( - SAMPLE_CPP, - "namespace::math::class::Calculator::fn::add", - Language::Cpp, - ); - assert!(span.is_some(), "should resolve nested method"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); - assert!( - lines[start - 1].contains("add"), - "span should point to add method" - ); - } - - #[test] - fn resolve_cpp_standalone() { - let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp); - assert!(span.is_some(), "should resolve fn::standalone"); - } - - #[test] - fn resolve_cpp_enum() { - let span = resolve_tree_path( - SAMPLE_CPP, - "namespace::math::enum::Color", - Language::Cpp, - ); - assert!(span.is_some(), "should resolve enum in namespace"); - } - - #[test] - fn roundtrip_cpp() { - let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp).unwrap(); - let path = compute_tree_path(SAMPLE_CPP, span, Language::Cpp); - assert_eq!(path, "fn::standalone"); - } - - #[test] - fn detect_cpp_extensions() { - assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp)); - assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp)); - assert_eq!(detect_language(Path::new("header.hpp")), Some(Language::Cpp)); - } - } - - mod java_tests { - use super::*; - - const SAMPLE_JAVA: &str = r#"package com.example; - -public class Calculator { - public int add(int a, int b) { - return a + b; - } - - public Calculator() { - // constructor - } -} - -interface Computable { - int compute(int x); -} - -enum Direction { - NORTH, SOUTH, EAST, WEST -} - -record Point(int x, int y) {} -"#; - - #[test] - fn resolve_java_class() { - let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator", Language::Java); - assert!(span.is_some(), "should resolve class::Calculator"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_JAVA.lines().collect(); - assert!( - lines[start - 1].contains("class Calculator"), - "span should point to Calculator class" - ); - } - - #[test] - fn resolve_java_method() { - let span = resolve_tree_path( - SAMPLE_JAVA, - "class::Calculator::fn::add", - Language::Java, - ); - assert!(span.is_some(), "should resolve class::Calculator::fn::add"); - } - - #[test] - fn resolve_java_constructor() { - let span = resolve_tree_path( - SAMPLE_JAVA, - "class::Calculator::constructor::Calculator", - Language::Java, - ); - assert!(span.is_some(), "should resolve constructor"); - } - - #[test] - fn resolve_java_interface() { - let span = resolve_tree_path(SAMPLE_JAVA, "interface::Computable", Language::Java); - assert!(span.is_some(), "should resolve interface::Computable"); - } - - #[test] - fn resolve_java_enum() { - let span = resolve_tree_path(SAMPLE_JAVA, "enum::Direction", Language::Java); - assert!(span.is_some(), "should resolve enum::Direction"); - } - - #[test] - fn resolve_java_record() { - let span = resolve_tree_path(SAMPLE_JAVA, "record::Point", Language::Java); - assert!(span.is_some(), "should resolve record::Point"); - } - - #[test] - fn roundtrip_java() { - let span = resolve_tree_path( - SAMPLE_JAVA, - "class::Calculator::fn::add", - Language::Java, - ) - .unwrap(); - let path = compute_tree_path(SAMPLE_JAVA, span, Language::Java); - assert_eq!(path, "class::Calculator::fn::add"); - } - - #[test] - fn detect_java_extension() { - assert_eq!( - detect_language(Path::new("Main.java")), - Some(Language::Java) - ); - } - } - - mod csharp_tests { - use super::*; - - const SAMPLE_CSHARP: &str = r#"namespace MyApp { - -class Calculator { - public int Add(int a, int b) { - return a + b; - } - - public string Name { get; set; } - - public Calculator() {} -} - -interface IComputable { - int Compute(int x); -} - -enum Direction { - North, South, East, West -} - -struct Vector { - public int X; - public int Y; -} - -record Person(string Name, int Age); - -} -"#; - - #[test] - fn resolve_csharp_class() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::class::Calculator", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve namespace::MyApp::class::Calculator"); - } - - #[test] - fn resolve_csharp_method() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::class::Calculator::fn::Add", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve method in class in namespace"); - } - - #[test] - fn resolve_csharp_property() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::class::Calculator::property::Name", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve property::Name"); - } - - #[test] - fn resolve_csharp_interface() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::interface::IComputable", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve interface::IComputable"); - } - - #[test] - fn resolve_csharp_struct() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::struct::Vector", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve struct::Vector"); - } - - #[test] - fn resolve_csharp_enum() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::enum::Direction", - Language::CSharp, - ); - assert!(span.is_some(), "should resolve enum::Direction"); - } - - #[test] - fn roundtrip_csharp() { - let span = resolve_tree_path( - SAMPLE_CSHARP, - "namespace::MyApp::class::Calculator::fn::Add", - Language::CSharp, - ) - .unwrap(); - let path = compute_tree_path(SAMPLE_CSHARP, span, Language::CSharp); - assert_eq!(path, "namespace::MyApp::class::Calculator::fn::Add"); - } - - #[test] - fn detect_csharp_extension() { - assert_eq!( - detect_language(Path::new("Program.cs")), - Some(Language::CSharp) - ); - } - } - - mod php_tests { - use super::*; - - const SAMPLE_PHP: &str = r#" -"#; - - #[test] - fn resolve_kotlin_class() { - let span = resolve_tree_path(SAMPLE_KOTLIN, "class::Calculator", Language::Kotlin); - assert!(span.is_some(), "should resolve class::Calculator"); - } - - #[test] - fn resolve_kotlin_method() { - let span = resolve_tree_path( - SAMPLE_KOTLIN, - "class::Calculator::fn::add", - Language::Kotlin, - ); - assert!(span.is_some(), "should resolve class::Calculator::fn::add"); - } - - #[test] - fn resolve_kotlin_object() { - let span = resolve_tree_path(SAMPLE_KOTLIN, "object::Singleton", Language::Kotlin); - assert!(span.is_some(), "should resolve object::Singleton"); - } - - #[test] - fn resolve_kotlin_function() { - let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin); - assert!(span.is_some(), "should resolve fn::standalone"); - } - - #[test] - fn roundtrip_kotlin() { - let span = - resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); - let path = compute_tree_path(SAMPLE_KOTLIN, span, Language::Kotlin); - assert_eq!(path, "fn::standalone"); - } - - #[test] - fn detect_kotlin_extension() { - assert_eq!( - detect_language(Path::new("Main.kt")), - Some(Language::Kotlin) - ); - assert_eq!( - detect_language(Path::new("build.gradle.kts")), - Some(Language::Kotlin) - ); - } - } - - mod swift_tests { - use super::*; - - const SAMPLE_SWIFT: &str = r#"protocol Drawable { - func draw() -} - -class Shape { - func area() -> Double { - return 0.0 - } - - init() {} -} - -func standalone() -> Int { - return 42 -} - -typealias Callback = () -> Void -"#; - - #[test] - fn resolve_swift_protocol() { - let span = resolve_tree_path(SAMPLE_SWIFT, "protocol::Drawable", Language::Swift); - assert!(span.is_some(), "should resolve protocol::Drawable"); - } - - #[test] - fn resolve_swift_class() { - let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape", Language::Swift); - assert!(span.is_some(), "should resolve class::Shape"); - } - - #[test] - fn resolve_swift_method() { - let span = resolve_tree_path( - SAMPLE_SWIFT, - "class::Shape::fn::area", - Language::Swift, - ); - assert!(span.is_some(), "should resolve class::Shape::fn::area"); - } - - #[test] - fn resolve_swift_function() { - let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift); - assert!(span.is_some(), "should resolve fn::standalone"); - } - - #[test] - fn roundtrip_swift() { - let span = - resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); - let path = compute_tree_path(SAMPLE_SWIFT, span, Language::Swift); - assert_eq!(path, "fn::standalone"); - } - - #[test] - fn detect_swift_extension() { - assert_eq!( - detect_language(Path::new("ViewController.swift")), - Some(Language::Swift) - ); - } - } - - mod objc_tests { - use super::*; - - const SAMPLE_OBJC: &str = r#"#import - -struct CGPoint { - float x; - float y; -}; - -void helper(void) { - NSLog(@"hello"); -} -"#; - - #[test] - fn resolve_objc_function() { - let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC); - assert!(span.is_some(), "should resolve fn::helper"); - let [start, _end] = span.unwrap(); - let lines: Vec<&str> = SAMPLE_OBJC.lines().collect(); - assert!( - lines[start - 1].contains("void helper"), - "span should point to helper function" - ); - } - - #[test] - fn resolve_objc_struct() { - let span = resolve_tree_path(SAMPLE_OBJC, "struct::CGPoint", Language::ObjectiveC); - assert!(span.is_some(), "should resolve struct::CGPoint"); - } - - #[test] - fn roundtrip_objc() { - let span = - resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); - let path = compute_tree_path(SAMPLE_OBJC, span, Language::ObjectiveC); - assert_eq!(path, "fn::helper"); - } - - #[test] - fn detect_objc_extensions() { - assert_eq!( - detect_language(Path::new("AppDelegate.m")), - Some(Language::ObjectiveC) - ); - assert_eq!( - detect_language(Path::new("mixed.mm")), - Some(Language::ObjectiveC) - ); - } - } -} diff --git a/crates/liyi/src/tree_path/lang_c.rs b/crates/liyi/src/tree_path/lang_c.rs new file mode 100644 index 0000000..0531f7c --- /dev/null +++ b/crates/liyi/src/tree_path/lang_c.rs @@ -0,0 +1,159 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Extract the function name from a C/C++ `function_definition` node. +/// +/// C/C++ functions store their name inside the `declarator` field chain: +/// `function_definition` → (field `declarator`) `function_declarator` +/// → (field `declarator`) `identifier` / `field_identifier`. +/// Pointer declarators and other wrappers may appear in the chain; +/// we unwrap them until we find a `function_declarator`. +pub(super) fn c_extract_declarator_name(node: &Node, source: &str) -> Option { + let declarator = node.child_by_field_name("declarator")?; + let func_decl = unwrap_to_function_declarator(&declarator)?; + let name_node = func_decl.child_by_field_name("declarator")?; + Some(source[name_node.byte_range()].to_string()) +} + +/// Walk through pointer_declarator / parenthesized_declarator / attributed_declarator +/// wrappers to find the inner `function_declarator`. +fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> { + match node.kind() { + "function_declarator" => Some(*node), + "pointer_declarator" | "parenthesized_declarator" | "attributed_declarator" => { + let inner = node.child_by_field_name("declarator")?; + unwrap_to_function_declarator(&inner) + } + _ => None, + } +} + +/// Custom name extraction for C nodes. +/// +/// Handles `function_definition` (name in declarator chain) and +/// `type_definition` (name in declarator field, which is a type_identifier). +fn c_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + // typedef: the 'declarator' field holds the new type name + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + _ => None, + } +} + +/// C language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c::LANGUAGE.into(), + extensions: &["c", "h"], + kind_map: &[ + ("fn", "function_definition"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(c_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_C: &str = r#"#include + +struct Point { + int x; + int y; +}; + +enum Color { RED, GREEN, BLUE }; + +typedef struct Point Point_t; + +void process(int x, int y) { + printf("hello"); +} + +static int helper(void) { + return 42; +} +"#; + + #[test] + fn resolve_c_function() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C); + assert!(span.is_some(), "should resolve fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("void process"), + "span should point to process function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_c_struct() { + let span = resolve_tree_path(SAMPLE_C, "struct::Point", Language::C); + assert!(span.is_some(), "should resolve struct::Point"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("struct Point"), + "span should point to Point struct" + ); + } + + #[test] + fn resolve_c_enum() { + let span = resolve_tree_path(SAMPLE_C, "enum::Color", Language::C); + assert!(span.is_some(), "should resolve enum::Color"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("enum Color"), + "span should point to Color enum" + ); + } + + #[test] + fn resolve_c_typedef() { + let span = resolve_tree_path(SAMPLE_C, "typedef::Point_t", Language::C); + assert!(span.is_some(), "should resolve typedef::Point_t"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("typedef"), + "span should point to typedef" + ); + } + + #[test] + fn compute_c_function_path() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(path, "fn::process"); + } + + #[test] + fn roundtrip_c() { + for tp in &["fn::process", "fn::helper", "struct::Point", "enum::Color"] { + let span = resolve_tree_path(SAMPLE_C, tp, Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(&path, tp, "roundtrip failed for {tp}"); + } + } + + #[test] + fn detect_c_extensions() { + assert_eq!(detect_language(Path::new("main.c")), Some(Language::C)); + assert_eq!(detect_language(Path::new("header.h")), Some(Language::C)); + } +} diff --git a/crates/liyi/src/tree_path/lang_cpp.rs b/crates/liyi/src/tree_path/lang_cpp.rs new file mode 100644 index 0000000..08088b1 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_cpp.rs @@ -0,0 +1,155 @@ +use super::LanguageConfig; +use super::lang_c::c_extract_declarator_name; + +use tree_sitter::Node; + +/// Custom name extraction for C++ nodes. +/// +/// Extends `c_node_name` with C++-specific patterns: +/// - `template_declaration`: transparent wrapper — extracts name from inner decl. +/// - `namespace_definition`: name is in a `namespace_identifier` child (no "name" field). +fn cpp_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" | "alias_declaration" => { + let name_node = node.child_by_field_name("name") + .or_else(|| node.child_by_field_name("declarator"))?; + Some(source[name_node.byte_range()].to_string()) + } + "template_declaration" => { + // template_declaration wraps an inner declaration — find it and + // extract the name from the inner node. + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + match child.kind() { + "function_definition" => return c_extract_declarator_name(&child, source), + "class_specifier" | "struct_specifier" | "enum_specifier" + | "concept_definition" | "alias_declaration" => { + let n = child.child_by_field_name("name")?; + return Some(source[n.byte_range()].to_string()); + } + // A template can also wrap another template_declaration (nested) + "template_declaration" => return cpp_node_name(&child, source), + _ => {} + } + } + None + } + _ => None, + } +} + +/// C++ language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_cpp::LANGUAGE.into(), + extensions: &["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_specifier"), + ("struct", "struct_specifier"), + ("namespace", "namespace_definition"), + ("enum", "enum_specifier"), + ("template", "template_declaration"), + ("typedef", "type_definition"), + ("using", "alias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "declaration_list"], + custom_name: Some(cpp_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_CPP: &str = r#"namespace math { + +class Calculator { +public: + int add(int a, int b) { + return a + b; + } +}; + +struct Point { + int x, y; +}; + +enum class Color { Red, Green, Blue }; + +} + +void standalone() {} +"#; + + #[test] + fn resolve_cpp_namespace() { + let span = resolve_tree_path(SAMPLE_CPP, "namespace::math", Language::Cpp); + assert!(span.is_some(), "should resolve namespace::math"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("namespace math"), + "span should point to namespace math, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_cpp_class_in_namespace() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve namespace::math::class::Calculator"); + } + + #[test] + fn resolve_cpp_method_in_class() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator::fn::add", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve nested method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("add"), + "span should point to add method" + ); + } + + #[test] + fn resolve_cpp_standalone() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn resolve_cpp_enum() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::enum::Color", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve enum in namespace"); + } + + #[test] + fn roundtrip_cpp() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp).unwrap(); + let path = compute_tree_path(SAMPLE_CPP, span, Language::Cpp); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_cpp_extensions() { + assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp)); + assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp)); + assert_eq!(detect_language(Path::new("header.hpp")), Some(Language::Cpp)); + } +} diff --git a/crates/liyi/src/tree_path/lang_csharp.rs b/crates/liyi/src/tree_path/lang_csharp.rs new file mode 100644 index 0000000..a93b17d --- /dev/null +++ b/crates/liyi/src/tree_path/lang_csharp.rs @@ -0,0 +1,139 @@ +use super::LanguageConfig; + +/// C# language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c_sharp::LANGUAGE.into(), + extensions: &["cs"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("struct", "struct_declaration"), + ("namespace", "namespace_declaration"), + ("constructor", "constructor_declaration"), + ("property", "property_declaration"), + ("record", "record_declaration"), + ("delegate", "delegate_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_CSHARP: &str = r#"namespace MyApp { + +class Calculator { + public int Add(int a, int b) { + return a + b; + } + + public string Name { get; set; } + + public Calculator() {} +} + +interface IComputable { + int Compute(int x); +} + +enum Direction { + North, South, East, West +} + +struct Vector { + public int X; + public int Y; +} + +record Person(string Name, int Age); + +} +"#; + + #[test] + fn resolve_csharp_class() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve namespace::MyApp::class::Calculator"); + } + + #[test] + fn resolve_csharp_method() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve method in class in namespace"); + } + + #[test] + fn resolve_csharp_property() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::property::Name", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve property::Name"); + } + + #[test] + fn resolve_csharp_interface() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::interface::IComputable", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve interface::IComputable"); + } + + #[test] + fn resolve_csharp_struct() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::struct::Vector", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve struct::Vector"); + } + + #[test] + fn resolve_csharp_enum() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::enum::Direction", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn roundtrip_csharp() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_CSHARP, span, Language::CSharp); + assert_eq!(path, "namespace::MyApp::class::Calculator::fn::Add"); + } + + #[test] + fn detect_csharp_extension() { + assert_eq!( + detect_language(Path::new("Program.cs")), + Some(Language::CSharp) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_go.rs b/crates/liyi/src/tree_path/lang_go.rs new file mode 100644 index 0000000..af712f4 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_go.rs @@ -0,0 +1,314 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Go nodes. +/// +/// Handles three Go-specific patterns: +/// - `method_declaration`: encodes receiver type into the name, producing +/// `ReceiverType.MethodName` or `(*ReceiverType).MethodName`. +/// - `type_declaration`: navigates to the inner `type_spec` for the name. +/// - `const_declaration` / `var_declaration`: navigates to the inner spec. +fn go_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "method_declaration" => { + let method_name_node = node.child_by_field_name("name")?; + let method_name = &source[method_name_node.byte_range()]; + + let receiver = node.child_by_field_name("receiver")?; + let mut cursor = receiver.walk(); + let param = receiver + .children(&mut cursor) + .find(|c| c.kind() == "parameter_declaration")?; + + let type_node = param.child_by_field_name("type")?; + let receiver_type = if type_node.kind() == "pointer_type" { + let mut cursor2 = type_node.walk(); + let inner = type_node + .children(&mut cursor2) + .find(|c| c.kind() == "type_identifier")?; + format!("(*{})", &source[inner.byte_range()]) + } else { + source[type_node.byte_range()].to_string() + }; + + Some(format!("{receiver_type}.{method_name}")) + } + "type_declaration" => { + let mut cursor = node.walk(); + let type_spec = node + .children(&mut cursor) + .find(|c| c.kind() == "type_spec")?; + let name_node = type_spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "const_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "const_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "var_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "var_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + _ => None, + } +} + +/// Go language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_go::LANGUAGE.into(), + extensions: &["go"], + kind_map: &[ + ("fn", "function_declaration"), + ("method", "method_declaration"), + ("type", "type_declaration"), + ("const", "const_declaration"), + ("var", "var_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(go_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + + const SAMPLE_GO: &str = r#"package main + +import "fmt" + +// Calculator performs arithmetic operations +type Calculator struct { + value int +} + +// Reader is an interface +type Reader interface { + Read(p []byte) (n int, err error) +} + +// MaxRetries is a constant +const MaxRetries = 3 + +// DefaultTimeout is a var +var DefaultTimeout = 30 + +// Add adds a number to the calculator's value +func (c *Calculator) Add(n int) { + c.value += n +} + +// Value returns the current value +func (c Calculator) Value() int { + return c.value +} + +// Add is a standalone function +func Add(a, b int) int { + return a + b +} +"#; + + #[test] + fn resolve_go_function() { + let span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go); + assert!(span.is_some(), "should resolve fn::Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func Add("), + "span should point to Add function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_pointer_method() { + let span = + resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); + assert!(span.is_some(), "should resolve method::(*Calculator).Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c *Calculator) Add"), + "span should point to Add method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_value_method() { + let span = + resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); + assert!(span.is_some(), "should resolve method::Calculator.Value"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c Calculator) Value"), + "span should point to Value method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_struct() { + let span = resolve_tree_path(SAMPLE_GO, "type::Calculator", Language::Go); + assert!(span.is_some(), "should resolve type::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Calculator struct"), + "span should point to Calculator struct, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_interface() { + let span = resolve_tree_path(SAMPLE_GO, "type::Reader", Language::Go); + assert!(span.is_some(), "should resolve type::Reader"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Reader interface"), + "span should point to Reader interface, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_const() { + let span = resolve_tree_path(SAMPLE_GO, "const::MaxRetries", Language::Go); + assert!(span.is_some(), "should resolve const::MaxRetries"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("const MaxRetries"), + "span should point to MaxRetries const, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_var() { + let span = resolve_tree_path(SAMPLE_GO, "var::DefaultTimeout", Language::Go); + assert!(span.is_some(), "should resolve var::DefaultTimeout"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("var DefaultTimeout"), + "span should point to DefaultTimeout var, got: {}", + lines[start - 1] + ); + } + + #[test] + fn compute_go_function_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .enumerate() + .rev() + .find(|(_, l)| l.contains("func Add(")) + .unwrap() + .0 + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "fn::Add"); + } + + #[test] + fn compute_go_pointer_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c *Calculator) Add")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::(*Calculator).Add"); + } + + #[test] + fn compute_go_value_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c Calculator) Value")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::Calculator.Value"); + } + + #[test] + fn compute_go_type_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("type Calculator struct")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "type::Calculator"); + } + + #[test] + fn roundtrip_go() { + let resolved_span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "fn::Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn roundtrip_go_method() { + let resolved_span = + resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "method::(*Calculator).Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_java.rs b/crates/liyi/src/tree_path/lang_java.rs new file mode 100644 index 0000000..5fba7c9 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_java.rs @@ -0,0 +1,119 @@ +use super::LanguageConfig; + +/// Java language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_java::LANGUAGE.into(), + extensions: &["java"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("constructor", "constructor_declaration"), + ("record", "record_declaration"), + ("annotation", "annotation_type_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_JAVA: &str = r#"package com.example; + +public class Calculator { + public int add(int a, int b) { + return a + b; + } + + public Calculator() { + // constructor + } +} + +interface Computable { + int compute(int x); +} + +enum Direction { + NORTH, SOUTH, EAST, WEST +} + +record Point(int x, int y) {} +"#; + + #[test] + fn resolve_java_class() { + let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator", Language::Java); + assert!(span.is_some(), "should resolve class::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JAVA.lines().collect(); + assert!( + lines[start - 1].contains("class Calculator"), + "span should point to Calculator class" + ); + } + + #[test] + fn resolve_java_method() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::fn::add", + Language::Java, + ); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_java_constructor() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::constructor::Calculator", + Language::Java, + ); + assert!(span.is_some(), "should resolve constructor"); + } + + #[test] + fn resolve_java_interface() { + let span = resolve_tree_path(SAMPLE_JAVA, "interface::Computable", Language::Java); + assert!(span.is_some(), "should resolve interface::Computable"); + } + + #[test] + fn resolve_java_enum() { + let span = resolve_tree_path(SAMPLE_JAVA, "enum::Direction", Language::Java); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn resolve_java_record() { + let span = resolve_tree_path(SAMPLE_JAVA, "record::Point", Language::Java); + assert!(span.is_some(), "should resolve record::Point"); + } + + #[test] + fn roundtrip_java() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::fn::add", + Language::Java, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_JAVA, span, Language::Java); + assert_eq!(path, "class::Calculator::fn::add"); + } + + #[test] + fn detect_java_extension() { + assert_eq!( + detect_language(Path::new("Main.java")), + Some(Language::Java) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_javascript.rs b/crates/liyi/src/tree_path/lang_javascript.rs new file mode 100644 index 0000000..522f8f2 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_javascript.rs @@ -0,0 +1,120 @@ +use super::LanguageConfig; + +/// JavaScript language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_javascript::LANGUAGE.into(), + extensions: &["js", "mjs", "cjs", "jsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + + const SAMPLE_JS: &str = r#"// A simple counter module + +class Counter { + constructor(initial = 0) { + this.count = initial; + } + + increment() { + this.count++; + } + + getValue() { + return this.count; + } +} + +function createCounter(initial) { + return new Counter(initial); +} + +const utils = { + formatCount: (n) => `${n} items` +}; +"#; + + #[test] + fn resolve_js_function() { + let span = resolve_tree_path(SAMPLE_JS, "fn::createCounter", Language::JavaScript); + assert!(span.is_some(), "should resolve fn::createCounter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("function createCounter"), + "span should point to createCounter function" + ); + } + + #[test] + fn resolve_js_class() { + let span = resolve_tree_path(SAMPLE_JS, "class::Counter", Language::JavaScript); + assert!(span.is_some(), "should resolve class::Counter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("class Counter"), + "span should point to Counter class" + ); + } + + #[test] + fn resolve_js_method() { + let span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::increment", + Language::JavaScript, + ); + assert!( + span.is_some(), + "should resolve class::Counter::method::increment" + ); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("increment()"), + "span should point to increment method" + ); + } + + #[test] + fn compute_js_function_path() { + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("function createCounter")) + .unwrap() + + 1; + let end = lines.len() - 3; // Rough end + + let path = compute_tree_path(SAMPLE_JS, [start, end], Language::JavaScript); + assert_eq!(path, "fn::createCounter"); + } + + #[test] + fn roundtrip_js() { + let resolved_span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::getValue", + Language::JavaScript, + ) + .unwrap(); + + let computed_path = compute_tree_path(SAMPLE_JS, resolved_span, Language::JavaScript); + assert_eq!(computed_path, "class::Counter::method::getValue"); + + let re_resolved = + resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_kotlin.rs b/crates/liyi/src/tree_path/lang_kotlin.rs new file mode 100644 index 0000000..ffc1cdc --- /dev/null +++ b/crates/liyi/src/tree_path/lang_kotlin.rs @@ -0,0 +1,126 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Kotlin nodes. +/// +/// Handles `property_declaration` where the name is in a child +/// `variable_declaration` node, and `type_alias` where the name is +/// in an `identifier` child before the `=` (the `type` field is the RHS). +fn kotlin_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "property_declaration" => { + let mut cursor = node.walk(); + // Name is in the first variable_declaration or identifier child + for child in node.children(&mut cursor) { + if child.kind() == "variable_declaration" { + let name = child.child_by_field_name("name") + .or_else(|| { + let mut c2 = child.walk(); + child.children(&mut c2).find(|c| c.kind() == "simple_identifier") + })?; + return Some(source[name.byte_range()].to_string()); + } + if child.kind() == "simple_identifier" { + return Some(source[child.byte_range()].to_string()); + } + } + None + } + "type_alias" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "type_identifier" || c.kind() == "simple_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + _ => None, + } +} + +/// Kotlin language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_kotlin_ng::LANGUAGE.into(), + extensions: &["kt", "kts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("object", "object_declaration"), + ("property", "property_declaration"), + ("typealias", "type_alias"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "class_body"], + custom_name: Some(kotlin_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_KOTLIN: &str = r#"class Calculator { + fun add(a: Int, b: Int): Int { + return a + b + } +} + +object Singleton { + fun instance(): Singleton = this +} + +fun standalone(): Int { + return 42 +} + +typealias StringList = List +"#; + + #[test] + fn resolve_kotlin_class() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "class::Calculator", Language::Kotlin); + assert!(span.is_some(), "should resolve class::Calculator"); + } + + #[test] + fn resolve_kotlin_method() { + let span = resolve_tree_path( + SAMPLE_KOTLIN, + "class::Calculator::fn::add", + Language::Kotlin, + ); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_kotlin_object() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "object::Singleton", Language::Kotlin); + assert!(span.is_some(), "should resolve object::Singleton"); + } + + #[test] + fn resolve_kotlin_function() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_kotlin() { + let span = + resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); + let path = compute_tree_path(SAMPLE_KOTLIN, span, Language::Kotlin); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_kotlin_extension() { + assert_eq!( + detect_language(Path::new("Main.kt")), + Some(Language::Kotlin) + ); + assert_eq!( + detect_language(Path::new("build.gradle.kts")), + Some(Language::Kotlin) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_objc.rs b/crates/liyi/src/tree_path/lang_objc.rs new file mode 100644 index 0000000..80a60ee --- /dev/null +++ b/crates/liyi/src/tree_path/lang_objc.rs @@ -0,0 +1,144 @@ +use super::LanguageConfig; +use super::lang_c::c_extract_declarator_name; + +use tree_sitter::Node; + +/// Custom name extraction for Objective-C nodes. +/// +/// ObjC node types like `class_interface`, `class_implementation`, +/// `protocol_declaration`, `method_declaration`, and `method_definition` +/// do not use standard `name` fields. Their names are extracted from +/// specific child node patterns. +fn objc_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + // C function definitions use the same declarator chain as C. + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + // @interface ClassName or @interface ClassName (Category) + "class_interface" | "class_implementation" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // @protocol ProtocolName + "protocol_declaration" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // - (ReturnType)methodName or - (ReturnType)methodName:(Type)arg + // + (ReturnType)classMethodName + "method_declaration" | "method_definition" => { + let mut cursor = node.walk(); + // The selector is composed of keyword_declarator children or + // a single identifier (for zero-argument methods). + let mut parts: Vec = Vec::new(); + for child in node.children(&mut cursor) { + match child.kind() { + "identifier" | "field_identifier" if parts.is_empty() => { + // Single-part selector (no arguments) + parts.push(source[child.byte_range()].to_string()); + } + "keyword_declarator" => { + // Each keyword_declarator has a keyword child + let mut kw_cursor = child.walk(); + if let Some(kw) = child.children(&mut kw_cursor) + .find(|c| c.kind() == "keyword_selector" || c.kind() == "identifier") + { + parts.push(format!("{}:", &source[kw.byte_range()])); + } + } + _ => {} + } + } + if parts.is_empty() { + None + } else { + Some(parts.join("")) + } + } + _ => None, + } +} + +/// Objective-C language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_objc::LANGUAGE.into(), + extensions: &["m", "mm"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_interface"), + ("impl", "class_implementation"), + ("protocol", "protocol_declaration"), + ("method", "method_definition"), + ("method_decl", "method_declaration"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(objc_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_OBJC: &str = r#"#import + +struct CGPoint { + float x; + float y; +}; + +void helper(void) { + NSLog(@"hello"); +} +"#; + + #[test] + fn resolve_objc_function() { + let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC); + assert!(span.is_some(), "should resolve fn::helper"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_OBJC.lines().collect(); + assert!( + lines[start - 1].contains("void helper"), + "span should point to helper function" + ); + } + + #[test] + fn resolve_objc_struct() { + let span = resolve_tree_path(SAMPLE_OBJC, "struct::CGPoint", Language::ObjectiveC); + assert!(span.is_some(), "should resolve struct::CGPoint"); + } + + #[test] + fn roundtrip_objc() { + let span = + resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); + let path = compute_tree_path(SAMPLE_OBJC, span, Language::ObjectiveC); + assert_eq!(path, "fn::helper"); + } + + #[test] + fn detect_objc_extensions() { + assert_eq!( + detect_language(Path::new("AppDelegate.m")), + Some(Language::ObjectiveC) + ); + assert_eq!( + detect_language(Path::new("mixed.mm")), + Some(Language::ObjectiveC) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_php.rs b/crates/liyi/src/tree_path/lang_php.rs new file mode 100644 index 0000000..24c9ce1 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_php.rs @@ -0,0 +1,132 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for PHP `const_declaration` nodes. +/// +/// PHP `const_declaration` stores names inside `const_element` children. +fn php_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "const_declaration" => { + let mut cursor = node.walk(); + let elem = node.children(&mut cursor) + .find(|c| c.kind() == "const_element")?; + let name = elem.child_by_field_name("name")?; + Some(source[name.byte_range()].to_string()) + } + _ => None, + } +} + +/// PHP language configuration (PHP-only grammar, no HTML interleaving). +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_php::LANGUAGE_PHP_ONLY.into(), + extensions: &["php"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_declaration"), + ("method", "method_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("trait", "trait_declaration"), + ("namespace", "namespace_definition"), + ("const", "const_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(php_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_PHP: &str = r#" 0 + +def calculate_total(items): + return sum(items) +"#; + + #[test] + fn resolve_python_function() { + let span = resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python); + assert!(span.is_some(), "should resolve fn::calculate_total"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def calculate_total"), + "span should point to calculate_total function" + ); + } + + #[test] + fn resolve_python_class() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order", Language::Python); + assert!(span.is_some(), "should resolve class::Order"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("class Order"), + "span should point to Order class" + ); + } + + #[test] + fn resolve_python_class_method() { + let span = + resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); + assert!(span.is_some(), "should resolve class::Order::fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def process"), + "span should point to process method" + ); + } + + #[test] + fn resolve_python_init_method() { + let span = resolve_tree_path( + SAMPLE_PYTHON, + "class::Order::fn::__init__", + Language::Python, + ); + assert!(span.is_some(), "should resolve class::Order::fn::__init__"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def __init__"), + "span should point to __init__ method" + ); + } + + #[test] + fn compute_python_function_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def calculate_total")) + .unwrap() + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "fn::calculate_total"); + } + + #[test] + fn compute_python_class_method_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def process")) + .unwrap() + + 1; + // Find end of method (next line with same or less indentation) + let end = start + 1; // Single-line body for this test + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "class::Order::fn::process"); + } + + #[test] + fn roundtrip_python() { + // Compute path for fn::calculate_total, then resolve it + let resolved_span = + resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_PYTHON, resolved_span, Language::Python); + assert_eq!(computed_path, "fn::calculate_total"); + + let re_resolved = + resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_rust.rs b/crates/liyi/src/tree_path/lang_rust.rs new file mode 100644 index 0000000..73cc877 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_rust.rs @@ -0,0 +1,23 @@ +use super::LanguageConfig; + +/// Rust language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_rust::LANGUAGE.into(), + extensions: &["rs"], + kind_map: &[ + ("fn", "function_item"), + ("struct", "struct_item"), + ("enum", "enum_item"), + ("impl", "impl_item"), + ("trait", "trait_item"), + ("mod", "mod_item"), + ("const", "const_item"), + ("static", "static_item"), + ("type", "type_item"), + ("macro", "macro_definition"), + ], + name_field: "name", + name_overrides: &[("impl_item", "type")], + body_fields: &["body"], + custom_name: None, +}; diff --git a/crates/liyi/src/tree_path/lang_swift.rs b/crates/liyi/src/tree_path/lang_swift.rs new file mode 100644 index 0000000..efc0fd6 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_swift.rs @@ -0,0 +1,89 @@ +use super::LanguageConfig; + +/// Swift language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_swift::LANGUAGE.into(), + extensions: &["swift"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("protocol", "protocol_declaration"), + ("enum", "enum_entry"), + ("property", "property_declaration"), + ("init", "init_declaration"), + ("typealias", "typealias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_SWIFT: &str = r#"protocol Drawable { + func draw() +} + +class Shape { + func area() -> Double { + return 0.0 + } + + init() {} +} + +func standalone() -> Int { + return 42 +} + +typealias Callback = () -> Void +"#; + + #[test] + fn resolve_swift_protocol() { + let span = resolve_tree_path(SAMPLE_SWIFT, "protocol::Drawable", Language::Swift); + assert!(span.is_some(), "should resolve protocol::Drawable"); + } + + #[test] + fn resolve_swift_class() { + let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape", Language::Swift); + assert!(span.is_some(), "should resolve class::Shape"); + } + + #[test] + fn resolve_swift_method() { + let span = resolve_tree_path( + SAMPLE_SWIFT, + "class::Shape::fn::area", + Language::Swift, + ); + assert!(span.is_some(), "should resolve class::Shape::fn::area"); + } + + #[test] + fn resolve_swift_function() { + let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_swift() { + let span = + resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); + let path = compute_tree_path(SAMPLE_SWIFT, span, Language::Swift); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_swift_extension() { + assert_eq!( + detect_language(Path::new("ViewController.swift")), + Some(Language::Swift) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_typescript.rs b/crates/liyi/src/tree_path/lang_typescript.rs new file mode 100644 index 0000000..0776745 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_typescript.rs @@ -0,0 +1,224 @@ +use super::LanguageConfig; + +/// TypeScript language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + extensions: &["ts", "mts", "cts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +/// TSX language configuration. +pub(super) static TSX_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TSX.into(), + extensions: &["tsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_TS: &str = r#"// A typed user service + +interface User { + id: number; + name: string; +} + +type UserId = number; + +enum UserRole { + Admin, + User, + Guest +} + +class UserService { + private users: User[] = []; + + addUser(user: User): void { + this.users.push(user); + } + + findById(id: UserId): User | undefined { + return this.users.find(u => u.id === id); + } +} + +function createUser(name: string): User { + return { id: Date.now(), name }; +} +"#; + + #[test] + fn resolve_ts_interface() { + let span = resolve_tree_path(SAMPLE_TS, "interface::User", Language::TypeScript); + assert!(span.is_some(), "should resolve interface::User"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("interface User"), + "span should point to User interface" + ); + } + + #[test] + fn resolve_ts_type_alias() { + let span = resolve_tree_path(SAMPLE_TS, "type::UserId", Language::TypeScript); + assert!(span.is_some(), "should resolve type::UserId"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("type UserId"), + "span should point to UserId type alias" + ); + } + + #[test] + fn resolve_ts_enum() { + let span = resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript); + assert!(span.is_some(), "should resolve enum::UserRole"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("enum UserRole"), + "span should point to UserRole enum" + ); + } + + #[test] + fn resolve_ts_class_method() { + let span = resolve_tree_path( + SAMPLE_TS, + "class::UserService::method::findById", + Language::TypeScript, + ); + assert!( + span.is_some(), + "should resolve class::UserService::method::findById" + ); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("findById("), + "span should point to findById method" + ); + } + + #[test] + fn compute_ts_interface_path() { + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("interface User")) + .unwrap() + + 1; + let end = start + 3; + + let path = compute_tree_path(SAMPLE_TS, [start, end], Language::TypeScript); + assert_eq!(path, "interface::User"); + } + + #[test] + fn roundtrip_ts() { + let resolved_span = + resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_TS, resolved_span, Language::TypeScript); + assert_eq!(computed_path, "enum::UserRole"); + + let re_resolved = + resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + const SAMPLE_TSX: &str = r#"// A React component + +interface Props { + title: string; + count: number; +} + +function Counter({ title, count }: Props) { + return ( +
+

{title}

+

Count: {count}

+
+ ); +} + +class Container extends React.Component { + render() { + return
{this.props.title}
; + } +} +"#; + + #[test] + fn resolve_tsx_function() { + let span = resolve_tree_path(SAMPLE_TSX, "fn::Counter", Language::Tsx); + assert!(span.is_some(), "should resolve fn::Counter in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("function Counter"), + "span should point to Counter function" + ); + } + + #[test] + fn resolve_tsx_class() { + let span = resolve_tree_path(SAMPLE_TSX, "class::Container", Language::Tsx); + assert!(span.is_some(), "should resolve class::Container in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("class Container"), + "span should point to Container class" + ); + } + + #[test] + fn resolve_tsx_interface() { + let span = resolve_tree_path(SAMPLE_TSX, "interface::Props", Language::Tsx); + assert!(span.is_some(), "should resolve interface::Props in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("interface Props"), + "span should point to Props interface" + ); + } + + #[test] + fn detect_tsx_extension() { + assert_eq!( + detect_language(Path::new("component.tsx")), + Some(Language::Tsx) + ); + } +} diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs new file mode 100644 index 0000000..d7d8663 --- /dev/null +++ b/crates/liyi/src/tree_path/mod.rs @@ -0,0 +1,752 @@ +//! Tree-sitter structural identity for span recovery. +//! +//! `tree_path` provides format-invariant item identity by encoding an item's +//! position in the AST as a `::` delimited path of (kind, name) segments. +//! For example, `fn::add_money` or `impl::Money::fn::new`. +//! +//! When `tree_path` is populated and a tree-sitter grammar is available for +//! the source language, `liyi reanchor` and `liyi check --fix` use it to +//! locate items by structural identity, making span recovery deterministic +//! across formatting changes, import additions, and line reflows. + +mod lang_c; +mod lang_cpp; +mod lang_csharp; +mod lang_go; +mod lang_java; +mod lang_javascript; +mod lang_kotlin; +mod lang_objc; +mod lang_php; +mod lang_python; +mod lang_rust; +mod lang_swift; +mod lang_typescript; + +use std::borrow::Cow; +use std::path::Path; + +use tree_sitter::{Language as TSLanguage, Node, Parser}; + +/// Language-specific configuration for tree_path resolution. +/// +/// Each supported language provides a static `LanguageConfig` that defines +/// how to parse it and map between tree-sitter node kinds and tree_path +/// shorthands. +pub struct LanguageConfig { + /// Function to get the tree-sitter language grammar (lazy initialization). + ts_language: fn() -> TSLanguage, + /// File extensions associated with this language. + extensions: &'static [&'static str], + /// Map from tree_path kind shorthand to tree-sitter node kind. + kind_map: &'static [(&'static str, &'static str)], + /// Field name to extract the node's name (usually "name"). + name_field: &'static str, + /// Overrides for special cases: (node_kind, field_name) pairs. + name_overrides: &'static [(&'static str, &'static str)], + /// Field names to traverse to find a node's body/declaration_list. + body_fields: &'static [&'static str], + /// Custom name extraction for node kinds that need special handling + /// (e.g., Go methods with receiver types, Go type_declaration wrapping type_spec). + /// Returns `Some(name)` for handled kinds, `None` to fall through to default. + custom_name: Option Option>, +} + +impl LanguageConfig { + /// Map tree-sitter node kind → tree_path shorthand. + fn kind_to_shorthand(&self, ts_kind: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(_, ts)| *ts == ts_kind) + .map(|(short, _)| *short) + } + + /// Map tree_path shorthand → tree-sitter node kind. + fn shorthand_to_kind(&self, short: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(s, _)| *s == short) + .map(|(_, ts)| *ts) + } + + /// Extract the name of a named AST node. + /// + /// Returns a `Cow` — borrowed from `source` in the common case, + /// owned when the name is constructed (e.g., Go method receiver encoding). + fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> { + // Check custom_name callback first (e.g., Go method receivers) + if let Some(custom) = self.custom_name { + if let Some(name) = custom(node, source) { + return Some(Cow::Owned(name)); + } + } + + let kind = node.kind(); + + // Check for name field override (e.g., impl_item uses "type" field) + let field_name = self + .name_overrides + .iter() + .find(|(k, _)| *k == kind) + .map(|(_, f)| *f) + .unwrap_or(self.name_field); + + let name_node = node.child_by_field_name(field_name)?; + Some(Cow::Borrowed(&source[name_node.byte_range()])) + } + + /// Find a body/declaration_list child for descending into containers. + fn find_body<'a>(&self, node: &Node<'a>) -> Option> { + for field in self.body_fields { + if let Some(body) = node.child_by_field_name(field) { + return Some(body); + } + } + // Fallback: search for body_fields or declaration_list as direct + // (unnamed) children. Needed for languages where the body is a + // positional child rather than a named field (e.g., Kotlin class_body, + // C++ field_declaration_list). + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| { + self.body_fields.contains(&c.kind()) + || c.kind() == "declaration_list" + || c.kind() == "field_declaration_list" + }) + } + + /// Check if the given file extension is associated with this language. + pub fn matches_extension(&self, ext: &str) -> bool { + self.extensions.contains(&ext) + } +} + +/// Supported languages for tree_path resolution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Language { + Rust, + Python, + Go, + JavaScript, + TypeScript, + Tsx, + C, + Cpp, + Java, + CSharp, + Php, + ObjectiveC, + Kotlin, + Swift, +} + +impl Language { + /// Get the language configuration for this language. + fn config(&self) -> &'static LanguageConfig { + match self { + Language::Rust => &lang_rust::CONFIG, + Language::Python => &lang_python::CONFIG, + Language::Go => &lang_go::CONFIG, + Language::JavaScript => &lang_javascript::CONFIG, + Language::TypeScript => &lang_typescript::CONFIG, + Language::Tsx => &lang_typescript::TSX_CONFIG, + Language::C => &lang_c::CONFIG, + Language::Cpp => &lang_cpp::CONFIG, + Language::Java => &lang_java::CONFIG, + Language::CSharp => &lang_csharp::CONFIG, + Language::Php => &lang_php::CONFIG, + Language::ObjectiveC => &lang_objc::CONFIG, + Language::Kotlin => &lang_kotlin::CONFIG, + Language::Swift => &lang_swift::CONFIG, + } + } + + /// Get the tree-sitter language grammar. + fn ts_language(&self) -> TSLanguage { + (self.config().ts_language)() + } +} + +/// Detect language from file extension. Returns `None` for unsupported +/// languages (unknown extension). +/// +/// # Extension Collision +/// +/// `.h` files are ambiguous (C, C++, or Objective-C). We map them to C +/// by default. Users can override via future configuration if needed. +/// +/// If two languages share an extension (unlikely with built-in languages), +/// the first match in the following order is returned: +/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Java → C# → PHP → Objective-C → Kotlin → Swift. +pub fn detect_language(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + + if lang_rust::CONFIG.matches_extension(ext) { + return Some(Language::Rust); + } + + if lang_python::CONFIG.matches_extension(ext) { + return Some(Language::Python); + } + + if lang_go::CONFIG.matches_extension(ext) { + return Some(Language::Go); + } + + if lang_javascript::CONFIG.matches_extension(ext) { + return Some(Language::JavaScript); + } + + if lang_typescript::CONFIG.matches_extension(ext) { + return Some(Language::TypeScript); + } + if lang_typescript::TSX_CONFIG.matches_extension(ext) { + return Some(Language::Tsx); + } + + if lang_c::CONFIG.matches_extension(ext) { + return Some(Language::C); + } + if lang_cpp::CONFIG.matches_extension(ext) { + return Some(Language::Cpp); + } + if lang_java::CONFIG.matches_extension(ext) { + return Some(Language::Java); + } + if lang_csharp::CONFIG.matches_extension(ext) { + return Some(Language::CSharp); + } + if lang_php::CONFIG.matches_extension(ext) { + return Some(Language::Php); + } + if lang_objc::CONFIG.matches_extension(ext) { + return Some(Language::ObjectiveC); + } + if lang_kotlin::CONFIG.matches_extension(ext) { + return Some(Language::Kotlin); + } + if lang_swift::CONFIG.matches_extension(ext) { + return Some(Language::Swift); + } + + None +} + +/// Create a tree-sitter parser for the given language. +fn make_parser(lang: Language) -> Parser { + let mut parser = Parser::new(); + parser + .set_language(&lang.ts_language()) + .expect("tree-sitter grammar should load"); + parser +} + +/// A parsed tree_path segment: (kind_shorthand, name). +#[derive(Debug, Clone, PartialEq, Eq)] +struct PathSegment { + kind: String, + name: String, +} + +/// Parse a tree_path string into segments. +/// +/// `"fn::add_money"` → `[PathSegment { kind: "fn", name: "add_money" }]` +/// `"impl::Money::fn::new"` → `[impl/Money, fn/new]` +fn parse_tree_path(tree_path: &str) -> Option> { + let parts: Vec<&str> = tree_path.split("::").collect(); + if !parts.len().is_multiple_of(2) { + return None; // must be pairs + } + let segments: Vec = parts + .chunks(2) + .map(|pair| PathSegment { + kind: pair[0].to_string(), + name: pair[1].to_string(), + }) + .collect(); + if segments.is_empty() { + return None; + } + Some(segments) +} + +/// Resolve a `tree_path` to a source span `[start_line, end_line]` (1-indexed, +/// inclusive). +/// +/// Returns `None` if the tree_path cannot be resolved (item renamed, deleted, +/// grammar unavailable, or language not supported). +pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> { + if tree_path.is_empty() { + return None; + } + + let config = lang.config(); + let segments = parse_tree_path(tree_path)?; + let mut parser = make_parser(lang); + let tree = parser.parse(source, None)?; + let root = tree.root_node(); + + let node = resolve_segments(config, &root, &segments, source)?; + + // Return 1-indexed inclusive line range + let start_line = node.start_position().row + 1; + let end_line = node.end_position().row + 1; + Some([start_line, end_line]) +} + +/// Walk the tree to find a node matching the given path segments. +fn resolve_segments<'a>( + config: &LanguageConfig, + parent: &Node<'a>, + segments: &[PathSegment], + source: &'a str, +) -> Option> { + if segments.is_empty() { + return Some(*parent); + } + + let seg = &segments[0]; + let ts_kind = config.shorthand_to_kind(&seg.kind)?; + + let mut cursor = parent.walk(); + for child in parent.children(&mut cursor) { + if child.kind() != ts_kind { + continue; + } + if let Some(name) = config.node_name(&child, source) { + if *name == seg.name && segments.len() == 1 { + return Some(child); + } else if *name == seg.name { + // Descend — look inside this node's body + return resolve_in_body(config, &child, &segments[1..], source); + } + } + } + + None +} + +/// Find subsequent segments inside an item's body (e.g., methods inside impl). +fn resolve_in_body<'a>( + config: &LanguageConfig, + node: &Node<'a>, + segments: &[PathSegment], + source: &'a str, +) -> Option> { + let body = config.find_body(node)?; + resolve_segments(config, &body, segments, source) +} + +/// Compute the canonical `tree_path` for the AST node at the given span. +/// +/// Returns an empty string if no suitable structural path can be determined +/// (e.g., the span doesn't align with a named item, or the language is +/// unsupported). +pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { + let config = lang.config(); + let mut parser = make_parser(lang); + let tree = match parser.parse(source, None) { + Some(t) => t, + None => return String::new(), + }; + + let root = tree.root_node(); + // Convert 1-indexed inclusive span to 0-indexed row + let target_start = span[0].saturating_sub(1); + let target_end = span[1].saturating_sub(1); + + // Find the best item node within the target range + let node = match find_item_in_range(config, &root, target_start, target_end) { + Some(n) => n, + None => return String::new(), + }; + + // Build path from root to this node + build_path_to_node(config, &root, &node, source) +} + +/// Find the best item node within [target_start, target_end] (0-indexed rows). +/// +/// Attributes in Rust are sibling nodes, not children of the item, so a +/// sidecar span that includes `#[derive(...)]` lines will start before the +/// item node. We therefore match any item whose start/end rows fall within +/// the target range, preferring the widest match (the outermost item). +fn find_item_in_range<'a>( + config: &LanguageConfig, + root: &Node<'a>, + target_start: usize, + target_end: usize, +) -> Option> { + let mut best: Option> = None; + + fn walk<'a>( + config: &LanguageConfig, + node: &Node<'a>, + target_start: usize, + target_end: usize, + best: &mut Option>, + ) { + let start = node.start_position().row; + let end = node.end_position().row; + + // Skip nodes that don't overlap our target + if start > target_end || end < target_start { + return; + } + + // Check if this is a named item node within the target range + if start >= target_start && end <= target_end && is_item_node(config, node) { + // Prefer the widest (outermost) match + if let Some(b) = best { + let b_size = b.end_position().row - b.start_position().row; + let n_size = end - start; + if n_size >= b_size { + *best = Some(*node); + } + } else { + *best = Some(*node); + } + } + + // Recurse into children + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + walk(config, &child, target_start, target_end, best); + } + } + + walk(config, root, target_start, target_end, &mut best); + best +} + +/// Check if a node is an item type we track in tree_path. +fn is_item_node(config: &LanguageConfig, node: &Node) -> bool { + config.kind_to_shorthand(node.kind()).is_some() +} + +/// Build the tree_path string for a given target node by walking from root. +fn build_path_to_node(config: &LanguageConfig, root: &Node, target: &Node, source: &str) -> String { + let mut segments: Vec = Vec::new(); + if collect_path(config, root, target, source, &mut segments) { + segments.join("::") + } else { + String::new() + } +} + +/// Recursively find `target` in the tree and collect path segments. +fn collect_path( + config: &LanguageConfig, + node: &Node, + target: &Node, + source: &str, + segments: &mut Vec, +) -> bool { + if node.id() == target.id() { + // We found the target — add this node's segment if it's an item + if let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) { + segments.push(format!("{short}::{name}")); + return true; + } + return false; + } + + // Check children + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + let child_start = child.start_position().row; + let child_end = child.end_position().row; + let target_start = target.start_position().row; + let target_end = target.end_position().row; + + // Only descend into nodes that contain the target + if child_start <= target_start + && child_end >= target_end + && collect_path(config, &child, target, source, segments) + { + // If this node is an item node, prepend its segment + if is_item_node(config, node) + && let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) + { + segments.insert(0, format!("{short}::{name}")); + } + return true; + } + } + + false +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_RUST: &str = r#"use std::collections::HashMap; + +/// A monetary amount +pub struct Money { + amount: i64, + currency: String, +} + +impl Money { + pub fn new(amount: i64, currency: String) -> Self { + Self { amount, currency } + } + + pub fn add(&self, other: &Money) -> Result { + if self.currency != other.currency { + return Err("mismatched currencies"); + } + Ok(Money { + amount: self.amount + other.amount, + currency: self.currency.clone(), + }) + } +} + +mod billing { + pub fn charge(amount: i64) -> bool { + amount > 0 + } +} + +fn standalone() -> i32 { + 42 +} +"#; + + #[test] + fn resolve_top_level_fn() { + let span = resolve_tree_path(SAMPLE_RUST, "fn::standalone", Language::Rust); + assert!(span.is_some(), "should resolve fn::standalone"); + let [start, end] = span.unwrap(); + assert!(start > 0); + assert!(end >= start); + // Verify the span contains the function + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("fn standalone"), + "span start should point to fn standalone, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_struct() { + let span = resolve_tree_path(SAMPLE_RUST, "struct::Money", Language::Rust); + assert!(span.is_some(), "should resolve struct::Money"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("struct Money"), + "span start should point to struct Money, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_impl_method() { + let span = resolve_tree_path(SAMPLE_RUST, "impl::Money::fn::new", Language::Rust); + assert!(span.is_some(), "should resolve impl::Money::fn::new"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("fn new"), + "span start should point to fn new, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_impl_method_add() { + let span = resolve_tree_path(SAMPLE_RUST, "impl::Money::fn::add", Language::Rust); + assert!(span.is_some(), "should resolve impl::Money::fn::add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("fn add"), + "span start should point to fn add, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_mod_fn() { + let span = resolve_tree_path(SAMPLE_RUST, "mod::billing::fn::charge", Language::Rust); + assert!(span.is_some(), "should resolve mod::billing::fn::charge"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("fn charge"), + "span start should point to fn charge, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_impl_block() { + let span = resolve_tree_path(SAMPLE_RUST, "impl::Money", Language::Rust); + assert!(span.is_some(), "should resolve impl::Money"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + assert!( + lines[start - 1].contains("impl Money"), + "span start should point to impl Money, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_nonexistent_returns_none() { + let span = resolve_tree_path(SAMPLE_RUST, "fn::nonexistent", Language::Rust); + assert!(span.is_none()); + } + + #[test] + fn resolve_empty_returns_none() { + let span = resolve_tree_path(SAMPLE_RUST, "", Language::Rust); + assert!(span.is_none()); + } + + #[test] + fn compute_fn_path() { + // Find standalone function line + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("fn standalone")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start - 1) + .find(|(_, l)| l.contains('}')) + .unwrap() + .0 + + 1; + + let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn compute_impl_method_path() { + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + let start = lines.iter().position(|l| l.contains("pub fn new")).unwrap() + 1; + // fn new spans from its line to the closing } + let mut brace_depth = 0i32; + let mut end = start; + for (i, line) in lines.iter().enumerate().skip(start - 1) { + for ch in line.chars() { + if ch == '{' { + brace_depth += 1; + } + if ch == '}' { + brace_depth -= 1; + } + } + if brace_depth == 0 { + end = i + 1; + break; + } + } + + let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); + assert_eq!(path, "impl::Money::fn::new"); + } + + #[test] + fn compute_struct_path() { + let lines: Vec<&str> = SAMPLE_RUST.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("pub struct Money")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start - 1) + .find(|(_, l)| l.trim() == "}") + .unwrap() + .0 + + 1; + + let path = compute_tree_path(SAMPLE_RUST, [start, end], Language::Rust); + assert_eq!(path, "struct::Money"); + } + + #[test] + fn roundtrip_resolve_compute() { + // Compute path for fn::standalone, then resolve it — spans should match + // Use tree-sitter to find exact span + let resolved_span = + resolve_tree_path(SAMPLE_RUST, "fn::standalone", Language::Rust).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_RUST, resolved_span, Language::Rust); + assert_eq!(computed_path, "fn::standalone"); + + let re_resolved = resolve_tree_path(SAMPLE_RUST, &computed_path, Language::Rust).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_language_rust() { + assert_eq!( + detect_language(Path::new("src/main.rs")), + Some(Language::Rust) + ); + assert_eq!(detect_language(Path::new("foo.py")), Some(Language::Python)); + } + + #[test] + fn resilient_to_formatting() { + // Same code reformatted differently — tree_path should still resolve + let reformatted = r#"use std::collections::HashMap; + +/// A monetary amount +pub struct Money { amount: i64, currency: String } + +impl Money { + pub fn new(amount: i64, currency: String) -> Self { Self { amount, currency } } + + pub fn add(&self, other: &Money) -> Result { + if self.currency != other.currency { return Err("mismatched currencies"); } + Ok(Money { amount: self.amount + other.amount, currency: self.currency.clone() }) + } +} + +mod billing { + pub fn charge(amount: i64) -> bool { amount > 0 } +} + +fn standalone() -> i32 { 42 } +"#; + + // All tree_paths from the original should resolve in the reformatted version + for tp in &[ + "fn::standalone", + "struct::Money", + "impl::Money", + "impl::Money::fn::new", + "impl::Money::fn::add", + "mod::billing::fn::charge", + ] { + let span = resolve_tree_path(reformatted, tp, Language::Rust); + assert!(span.is_some(), "should resolve {tp} in reformatted code"); + } + } +} diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc similarity index 90% rename from crates/liyi/src/tree_path.rs.liyi.jsonc rename to crates/liyi/src/tree_path/mod.rs.liyi.jsonc index c8ee66d..299ef0e 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc @@ -1,15 +1,15 @@ // liyi v0.1 spec file { "version": "0.1", - "source": "crates/liyi/src/tree_path.rs", + "source": "crates/liyi/src/tree_path/mod.rs", "specs": [ { "item": "LanguageConfig", "reviewed": false, "intent": "Define the data-driven abstraction for language-specific tree_path behaviour. Each field captures one language-dependent axis: grammar loader (ts_language), file extensions, kind shorthand mapping, name extraction field and overrides, body-descending fields, and an optional custom_name callback for languages with non-trivial name extraction (e.g., Go receiver encoding).", "source_span": [ - 22, - 39 + 36, + 53 ], "tree_path": "struct::LanguageConfig", "source_hash": "sha256:cc0ae5ada967354b9d5e9863be2c72136c5dd85832b29ee5e44e118d1c99f5da", @@ -20,11 +20,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 676, - 728 + 182, + 234 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:6d8686b824ee8856102af3d018b3788e9c99a80b69e078c74572482389ad7e25", + "source_hash": "sha256:93745e6791e0ebb9eb4704bbcdaeaa193791e75c010fd498ec8cf8c79e1bc26c", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -32,8 +32,8 @@ "reviewed": false, "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 620, - 635 + 126, + 141 ], "tree_path": "enum::Language", "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", @@ -44,8 +44,8 @@ "reviewed": false, "intent": "Extract the user-visible name of an AST node via the language's LanguageConfig. Checks the custom_name callback first (for complex patterns like Go receiver encoding). Falls back to name_overrides for special cases (e.g., impl_item uses type field). Otherwise reads the standard name field. Returns Cow::Owned for constructed names, Cow::Borrowed for field-extracted names.", "source_span": [ - 62, - 82 + 76, + 96 ], "tree_path": "impl::LanguageConfig::fn::node_name", "source_hash": "sha256:1f187fdb6eab1bd532149c8007d31dae3cd2c210edc153484dfd46210e287f5f", @@ -59,17 +59,17 @@ 343, 394 ], - "tree_path": "fn::go_node_name", - "source_hash": "sha256:5198217ac70bb06963c30ee0f9f0daa9972cdb47834ed82cf99b800f8b043620", - "source_anchor": "fn go_node_name(node: &Node, source: &str) -> Option {" + "tree_path": "fn::compute_tree_path", + "source_hash": "sha256:b74c13919ed07d8feb3a7c4d91d94285a190b35bd5a091df3cb0b6275203086e", + "source_anchor": "/// Returns an empty string if no suitable structural path can be determined" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 750, - 766 + 256, + 272 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -80,8 +80,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 773, - 790 + 279, + 296 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", @@ -92,8 +92,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 793, - 822 + 299, + 328 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", @@ -104,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 825, - 833 + 331, + 339 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -116,8 +116,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 840, - 861 + 346, + 367 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -128,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 869, - 915 + 375, + 421 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -140,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 933, - 979 + 439, + 485 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From e41d8c5e36b39de1565ba2d67d5d0406a6d30837 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 20:13:12 +0800 Subject: [PATCH 15/18] docs(design): update size claims after language expansion The linter now has 14 tree-sitter grammars and ~7 k lines of Rust. Update the "self-contained" bullet and the reimplementation cost estimate to reflect the current state. Original prompt: > Regarding the design doc (line 1900 and 2034) -- the linter is now > 7348 lines of Rust and 33 MiB, arguably not "lightweight" any more. > We need to reword a bit. > > For line 1900, I think this version is still a bit too much. Just > "The linter is a single binary with tree-sitter grammars built in, > no runtime dependencies" would be enough, because the size or > complexity doesn't matter for self-containedness. Human note: added the second turn. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-design.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/liyi-design.md b/docs/liyi-design.md index fb7aaca..d1c6847 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -1897,7 +1897,7 @@ The spec-driven development space is no longer hypothetical — Augment Intent, - **Persistent by design.** Intent survives context windows, agent sessions, and team turnover. It's a file in the repo, not a message in a thread. - **Each level stands alone.** You can adopt the instruction without the linter, or the linter without adversarial tests. - **Nothing to learn.** JSONC, Markdown, SHA-256. No DSL, no specification language, no framework. -- **Lightweight.** The linter is two Rust crates (~6000 lines including tests) with tree-sitter grammars for Rust, Python, Go, JavaScript, and TypeScript built in. Single binary, no runtime dependencies. +- **Self-contained.** The linter is a single binary with tree-sitter grammars built in, no runtime dependencies. - **No lock-in.** `.liyi.jsonc` files are plain JSONC. `@liyi:module` markers are comments. Delete them and nothing breaks. - **Any programming language.** The linter doesn't parse source code. It reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. - **Hardware RTL too.** The convention applies at the RTL level (Verilog, SystemVerilog, VHDL, Chisel) with no design changes — sidecars co-locate with `.v`/`.vhd`/`.scala` files, `source_span` and `source_hash` work on any text, and tree-sitter grammars exist for Verilog and VHDL. In hardware domains where requirements traceability is a compliance obligation (DO-254, ISO 26262, IEC 61508), 立意 functions as a lightweight shim between a requirements management system and RTL source: a `liyi import-reqif` command (post-MVP) can consume ReqIF — the open OMG standard (ReqIF 1.2, `formal/2016-07-01`) that DOORS, Polarion, and other tools export — and emit `@liyi:requirement` blocks, connecting managed requirements to RTL implementations with hash-based staleness detection. The tool doesn't replace DOORS; it fills the last mile that DOORS doesn't cover. @@ -2031,7 +2031,7 @@ A well-funded competitor (Augment Code, with their Intent product) can absorb th 2. **Reimplement the staleness model.** If their "living specs" prove unreliable (auto-updating specs drift silently), `source_hash` + `source_span` staleness is a public algorithm, fully specified in this document, trivially reimplementable. They ship "staleness alerts" as a feature. 3. **Ship `.liyi.jsonc` import/export.** If the convention gains traction, they offer compatibility as a feature — their specs are primary, `.liyi.jsonc` is a second-class interop format. They absorb the convention's ecosystem without contributing to it. -**No license can prevent this.** The convention is a file format (`.liyi.jsonc`), a set of marker strings (`@liyi:module`, `@liyi:intent`), and a staleness algorithm (hash lines, compare). These are ideas and data formats — not copyrightable expression. Even under AGPL, a competitor reimplements the algorithm from this public specification without touching the linter's source code. The JSON Schema is a functional specification. The linter is ~3000 lines of Rust (including tree-sitter integration) — reimplementation cost is a few engineer-days. +**No license can prevent this.** The convention is a file format (`.liyi.jsonc`), a set of marker strings (`@liyi:module`, `@liyi:intent`), and a staleness algorithm (hash lines, compare). These are ideas and data formats — not copyrightable expression. Even under AGPL, a competitor reimplements the algorithm from this public specification without touching the linter's source code. The JSON Schema is a functional specification. The linter is ~7 k lines of Rust (including tree-sitter integration for 14 languages) — reimplementation cost is modest. Copyleft (GPL, AGPL, MPL) would protect the **linter binary** from being embedded in a closed product without releasing source. But: From 16c41c661e6959d9e65615c7e865d8cd1151779b Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 20:18:35 +0800 Subject: [PATCH 16/18] style: cargo fmt + clippy fixes, reanchor sidecars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply cargo fmt across tree_path/lang_*.rs files and fix two clippy warnings: - collapsible_if in tree_path/mod.rs (nested if let → combined chain) - cloned_ref_to_slice_refs in discovery.rs (&[sub.clone()] → from_ref) Reanchor mod.rs.liyi.jsonc after the code changes. Original-prompt: please fix cargo fmt and cargo clippy and sync sidecars AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/discovery.rs | 2 +- crates/liyi/src/tree_path/lang_cpp.rs | 19 +++++---- crates/liyi/src/tree_path/lang_csharp.rs | 10 ++++- crates/liyi/src/tree_path/lang_go.rs | 6 +-- crates/liyi/src/tree_path/lang_java.rs | 14 ++----- crates/liyi/src/tree_path/lang_kotlin.rs | 14 +++---- crates/liyi/src/tree_path/lang_objc.rs | 6 +-- crates/liyi/src/tree_path/lang_php.rs | 8 +++- crates/liyi/src/tree_path/lang_python.rs | 3 +- crates/liyi/src/tree_path/lang_swift.rs | 9 +--- crates/liyi/src/tree_path/mod.rs | 19 ++++----- crates/liyi/src/tree_path/mod.rs.liyi.jsonc | 46 ++++++++++----------- 12 files changed, 76 insertions(+), 80 deletions(-) diff --git a/crates/liyi/src/discovery.rs b/crates/liyi/src/discovery.rs index 2eb7739..113680a 100644 --- a/crates/liyi/src/discovery.rs +++ b/crates/liyi/src/discovery.rs @@ -256,7 +256,7 @@ mod tests { fs::write(sub.join("inner.rs"), "").unwrap(); fs::write(sub.join("inner.rs.liyi.jsonc"), "{}").unwrap(); - let scoped = discover(root, &[sub.clone()]); + let scoped = discover(root, std::slice::from_ref(&sub)); assert_eq!(scoped.sidecars.len(), 1); assert_eq!(scoped.sidecars[0].repo_relative_source, "sub/inner.rs"); diff --git a/crates/liyi/src/tree_path/lang_cpp.rs b/crates/liyi/src/tree_path/lang_cpp.rs index 08088b1..5ff1378 100644 --- a/crates/liyi/src/tree_path/lang_cpp.rs +++ b/crates/liyi/src/tree_path/lang_cpp.rs @@ -12,7 +12,8 @@ fn cpp_node_name(node: &Node, source: &str) -> Option { match node.kind() { "function_definition" => c_extract_declarator_name(node, source), "type_definition" | "alias_declaration" => { - let name_node = node.child_by_field_name("name") + let name_node = node + .child_by_field_name("name") .or_else(|| node.child_by_field_name("declarator"))?; Some(source[name_node.byte_range()].to_string()) } @@ -104,7 +105,10 @@ void standalone() {} "namespace::math::class::Calculator", Language::Cpp, ); - assert!(span.is_some(), "should resolve namespace::math::class::Calculator"); + assert!( + span.is_some(), + "should resolve namespace::math::class::Calculator" + ); } #[test] @@ -131,11 +135,7 @@ void standalone() {} #[test] fn resolve_cpp_enum() { - let span = resolve_tree_path( - SAMPLE_CPP, - "namespace::math::enum::Color", - Language::Cpp, - ); + let span = resolve_tree_path(SAMPLE_CPP, "namespace::math::enum::Color", Language::Cpp); assert!(span.is_some(), "should resolve enum in namespace"); } @@ -150,6 +150,9 @@ void standalone() {} fn detect_cpp_extensions() { assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp)); assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp)); - assert_eq!(detect_language(Path::new("header.hpp")), Some(Language::Cpp)); + assert_eq!( + detect_language(Path::new("header.hpp")), + Some(Language::Cpp) + ); } } diff --git a/crates/liyi/src/tree_path/lang_csharp.rs b/crates/liyi/src/tree_path/lang_csharp.rs index a93b17d..7e476a4 100644 --- a/crates/liyi/src/tree_path/lang_csharp.rs +++ b/crates/liyi/src/tree_path/lang_csharp.rs @@ -64,7 +64,10 @@ record Person(string Name, int Age); "namespace::MyApp::class::Calculator", Language::CSharp, ); - assert!(span.is_some(), "should resolve namespace::MyApp::class::Calculator"); + assert!( + span.is_some(), + "should resolve namespace::MyApp::class::Calculator" + ); } #[test] @@ -74,7 +77,10 @@ record Person(string Name, int Age); "namespace::MyApp::class::Calculator::fn::Add", Language::CSharp, ); - assert!(span.is_some(), "should resolve method in class in namespace"); + assert!( + span.is_some(), + "should resolve method in class in namespace" + ); } #[test] diff --git a/crates/liyi/src/tree_path/lang_go.rs b/crates/liyi/src/tree_path/lang_go.rs index af712f4..3e33c6c 100644 --- a/crates/liyi/src/tree_path/lang_go.rs +++ b/crates/liyi/src/tree_path/lang_go.rs @@ -134,8 +134,7 @@ func Add(a, b int) int { #[test] fn resolve_go_pointer_method() { - let span = - resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); + let span = resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); assert!(span.is_some(), "should resolve method::(*Calculator).Add"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_GO.lines().collect(); @@ -148,8 +147,7 @@ func Add(a, b int) int { #[test] fn resolve_go_value_method() { - let span = - resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); + let span = resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); assert!(span.is_some(), "should resolve method::Calculator.Value"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_GO.lines().collect(); diff --git a/crates/liyi/src/tree_path/lang_java.rs b/crates/liyi/src/tree_path/lang_java.rs index 5fba7c9..59ea2a8 100644 --- a/crates/liyi/src/tree_path/lang_java.rs +++ b/crates/liyi/src/tree_path/lang_java.rs @@ -61,11 +61,7 @@ record Point(int x, int y) {} #[test] fn resolve_java_method() { - let span = resolve_tree_path( - SAMPLE_JAVA, - "class::Calculator::fn::add", - Language::Java, - ); + let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator::fn::add", Language::Java); assert!(span.is_some(), "should resolve class::Calculator::fn::add"); } @@ -99,12 +95,8 @@ record Point(int x, int y) {} #[test] fn roundtrip_java() { - let span = resolve_tree_path( - SAMPLE_JAVA, - "class::Calculator::fn::add", - Language::Java, - ) - .unwrap(); + let span = + resolve_tree_path(SAMPLE_JAVA, "class::Calculator::fn::add", Language::Java).unwrap(); let path = compute_tree_path(SAMPLE_JAVA, span, Language::Java); assert_eq!(path, "class::Calculator::fn::add"); } diff --git a/crates/liyi/src/tree_path/lang_kotlin.rs b/crates/liyi/src/tree_path/lang_kotlin.rs index ffc1cdc..afba109 100644 --- a/crates/liyi/src/tree_path/lang_kotlin.rs +++ b/crates/liyi/src/tree_path/lang_kotlin.rs @@ -14,11 +14,12 @@ fn kotlin_node_name(node: &Node, source: &str) -> Option { // Name is in the first variable_declaration or identifier child for child in node.children(&mut cursor) { if child.kind() == "variable_declaration" { - let name = child.child_by_field_name("name") - .or_else(|| { - let mut c2 = child.walk(); - child.children(&mut c2).find(|c| c.kind() == "simple_identifier") - })?; + let name = child.child_by_field_name("name").or_else(|| { + let mut c2 = child.walk(); + child + .children(&mut c2) + .find(|c| c.kind() == "simple_identifier") + })?; return Some(source[name.byte_range()].to_string()); } if child.kind() == "simple_identifier" { @@ -106,8 +107,7 @@ typealias StringList = List #[test] fn roundtrip_kotlin() { - let span = - resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); + let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); let path = compute_tree_path(SAMPLE_KOTLIN, span, Language::Kotlin); assert_eq!(path, "fn::standalone"); } diff --git a/crates/liyi/src/tree_path/lang_objc.rs b/crates/liyi/src/tree_path/lang_objc.rs index 80a60ee..c67af5f 100644 --- a/crates/liyi/src/tree_path/lang_objc.rs +++ b/crates/liyi/src/tree_path/lang_objc.rs @@ -47,7 +47,8 @@ fn objc_node_name(node: &Node, source: &str) -> Option { "keyword_declarator" => { // Each keyword_declarator has a keyword child let mut kw_cursor = child.walk(); - if let Some(kw) = child.children(&mut kw_cursor) + if let Some(kw) = child + .children(&mut kw_cursor) .find(|c| c.kind() == "keyword_selector" || c.kind() == "identifier") { parts.push(format!("{}:", &source[kw.byte_range()])); @@ -124,8 +125,7 @@ void helper(void) { #[test] fn roundtrip_objc() { - let span = - resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); + let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); let path = compute_tree_path(SAMPLE_OBJC, span, Language::ObjectiveC); assert_eq!(path, "fn::helper"); } diff --git a/crates/liyi/src/tree_path/lang_php.rs b/crates/liyi/src/tree_path/lang_php.rs index 24c9ce1..4abb841 100644 --- a/crates/liyi/src/tree_path/lang_php.rs +++ b/crates/liyi/src/tree_path/lang_php.rs @@ -9,7 +9,8 @@ fn php_node_name(node: &Node, source: &str) -> Option { match node.kind() { "const_declaration" => { let mut cursor = node.walk(); - let elem = node.children(&mut cursor) + let elem = node + .children(&mut cursor) .find(|c| c.kind() == "const_element")?; let name = elem.child_by_field_name("name")?; Some(source[name.byte_range()].to_string()) @@ -88,7 +89,10 @@ enum Status { "class::UserService::method::findUser", Language::Php, ); - assert!(span.is_some(), "should resolve class::UserService::method::findUser"); + assert!( + span.is_some(), + "should resolve class::UserService::method::findUser" + ); } #[test] diff --git a/crates/liyi/src/tree_path/lang_python.rs b/crates/liyi/src/tree_path/lang_python.rs index 7a949c6..c4d351d 100644 --- a/crates/liyi/src/tree_path/lang_python.rs +++ b/crates/liyi/src/tree_path/lang_python.rs @@ -54,8 +54,7 @@ def calculate_total(items): #[test] fn resolve_python_class_method() { - let span = - resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); assert!(span.is_some(), "should resolve class::Order::fn::process"); let [start, _end] = span.unwrap(); let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); diff --git a/crates/liyi/src/tree_path/lang_swift.rs b/crates/liyi/src/tree_path/lang_swift.rs index efc0fd6..a8ba79c 100644 --- a/crates/liyi/src/tree_path/lang_swift.rs +++ b/crates/liyi/src/tree_path/lang_swift.rs @@ -57,11 +57,7 @@ typealias Callback = () -> Void #[test] fn resolve_swift_method() { - let span = resolve_tree_path( - SAMPLE_SWIFT, - "class::Shape::fn::area", - Language::Swift, - ); + let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape::fn::area", Language::Swift); assert!(span.is_some(), "should resolve class::Shape::fn::area"); } @@ -73,8 +69,7 @@ typealias Callback = () -> Void #[test] fn roundtrip_swift() { - let span = - resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); + let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); let path = compute_tree_path(SAMPLE_SWIFT, span, Language::Swift); assert_eq!(path, "fn::standalone"); } diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index d7d8663..f300fe5 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -75,10 +75,10 @@ impl LanguageConfig { /// owned when the name is constructed (e.g., Go method receiver encoding). fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> { // Check custom_name callback first (e.g., Go method receivers) - if let Some(custom) = self.custom_name { - if let Some(name) = custom(node, source) { - return Some(Cow::Owned(name)); - } + if let Some(custom) = self.custom_name + && let Some(name) = custom(node, source) + { + return Some(Cow::Owned(name)); } let kind = node.kind(); @@ -107,12 +107,11 @@ impl LanguageConfig { // positional child rather than a named field (e.g., Kotlin class_body, // C++ field_declaration_list). let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| { - self.body_fields.contains(&c.kind()) - || c.kind() == "declaration_list" - || c.kind() == "field_declaration_list" - }) + node.children(&mut cursor).find(|c| { + self.body_fields.contains(&c.kind()) + || c.kind() == "declaration_list" + || c.kind() == "field_declaration_list" + }) } /// Check if the given file extension is associated with this language. diff --git a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc index 299ef0e..18303df 100644 --- a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc @@ -20,8 +20,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 182, - 234 + 181, + 233 ], "tree_path": "fn::detect_language", "source_hash": "sha256:93745e6791e0ebb9eb4704bbcdaeaa193791e75c010fd498ec8cf8c79e1bc26c", @@ -32,8 +32,8 @@ "reviewed": false, "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 126, - 141 + 125, + 140 ], "tree_path": "enum::Language", "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", @@ -48,7 +48,7 @@ 96 ], "tree_path": "impl::LanguageConfig::fn::node_name", - "source_hash": "sha256:1f187fdb6eab1bd532149c8007d31dae3cd2c210edc153484dfd46210e287f5f", + "source_hash": "sha256:d459d381bbc30689c1dd009aa6df01f7815da0b36ed5592ff2b45da8abe27edd", "source_anchor": " fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> {" }, { @@ -56,20 +56,20 @@ "reviewed": false, "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", "source_span": [ - 343, - 394 + 345, + 366 ], "tree_path": "fn::compute_tree_path", - "source_hash": "sha256:b74c13919ed07d8feb3a7c4d91d94285a190b35bd5a091df3cb0b6275203086e", - "source_anchor": "/// Returns an empty string if no suitable structural path can be determined" + "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", + "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 256, - 272 + 255, + 271 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -80,8 +80,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 279, - 296 + 278, + 295 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", @@ -92,8 +92,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 299, - 328 + 298, + 327 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", @@ -104,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 331, - 339 + 330, + 338 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -116,8 +116,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 346, - 367 + 345, + 366 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -128,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 375, - 421 + 374, + 420 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -140,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 439, - 485 + 438, + 484 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", From 5286251bc06631e8621f45017dc72bac10485940 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 20:22:05 +0800 Subject: [PATCH 17/18] docs(liyi): add sidecar specs for all tree_path language modules Scaffold and populate .liyi.jsonc sidecars for 13 language support files (lang_c, lang_cpp, lang_csharp, lang_go, lang_java, lang_javascript, lang_kotlin, lang_objc, lang_php, lang_python, lang_rust, lang_swift, lang_typescript) with intent specs for all non-trivial items (CONFIG statics, custom name extractors). Original prompt: > okay, commit them first. also don't the newly added language > support files need sidecars? if you decide to add, do it with > subagents Human note: fixed formatting and content of "original prompt". AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- .../liyi/src/tree_path/lang_c.rs.liyi.jsonc | 55 +++++++++++++++++++ .../liyi/src/tree_path/lang_cpp.rs.liyi.jsonc | 31 +++++++++++ .../src/tree_path/lang_csharp.rs.liyi.jsonc | 19 +++++++ .../liyi/src/tree_path/lang_go.rs.liyi.jsonc | 31 +++++++++++ .../src/tree_path/lang_java.rs.liyi.jsonc | 19 +++++++ .../tree_path/lang_javascript.rs.liyi.jsonc | 19 +++++++ .../src/tree_path/lang_kotlin.rs.liyi.jsonc | 31 +++++++++++ .../src/tree_path/lang_objc.rs.liyi.jsonc | 31 +++++++++++ .../liyi/src/tree_path/lang_php.rs.liyi.jsonc | 31 +++++++++++ .../src/tree_path/lang_python.rs.liyi.jsonc | 19 +++++++ .../src/tree_path/lang_rust.rs.liyi.jsonc | 19 +++++++ .../src/tree_path/lang_swift.rs.liyi.jsonc | 19 +++++++ .../tree_path/lang_typescript.rs.liyi.jsonc | 31 +++++++++++ 13 files changed, 355 insertions(+) create mode 100644 crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_php.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc create mode 100644 crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc diff --git a/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc new file mode 100644 index 0000000..6f9bac0 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc @@ -0,0 +1,55 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_c.rs", + "specs": [ + { + "item": "c_extract_declarator_name", + "reviewed": false, + "intent": "Extract the function name from a C/C++ function_definition node by walking the declarator field chain (function_definition → function_declarator → declarator identifier), unwrapping pointer/parenthesized/attributed wrappers as needed.", + "source_span": [ + 12, + 17 + ], + "tree_path": "fn::c_extract_declarator_name", + "source_hash": "sha256:b770215018f35c818e0fe450fcee058c8407d827f8fb2bfc2026bfd925b981e4", + "source_anchor": "pub(super) fn c_extract_declarator_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "unwrap_to_function_declarator", + "reviewed": false, + "intent": "Recursively unwrap pointer_declarator, parenthesized_declarator, and attributed_declarator wrapper nodes to locate the inner function_declarator node. Must return None if no function_declarator is found in the chain.", + "source_span": [ + 21, + 30 + ], + "tree_path": "fn::unwrap_to_function_declarator", + "source_hash": "sha256:4e203fab2d264c4f02df280f88a191ae9344828e17bb91dd98c357c327baf7a0", + "source_anchor": "fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> {" + }, + { + "item": "c_node_name", + "reviewed": false, + "intent": "Custom name extractor for C AST nodes: dispatch to c_extract_declarator_name for function_definition, and read the declarator field directly for type_definition (typedef). Return None for all other node kinds.", + "source_span": [ + 36, + 46 + ], + "tree_path": "fn::c_node_name", + "source_hash": "sha256:6c373de50a8b2dee1ca251498a38f9e0397d1061554089e7bc2425434e0fac67", + "source_anchor": "fn c_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C language configuration for tree_path resolution: register tree-sitter-c grammar, .c/.h extensions, kind mappings for fn/struct/enum/typedef, and wire the custom c_node_name callback for name extraction.", + "source_span": [ + 49, + 62 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:9d7828f146ec68282253daedc10b6f3e19b884b881703d558c83fc4618d0012d", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc new file mode 100644 index 0000000..278994d --- /dev/null +++ b/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_cpp.rs", + "specs": [ + { + "item": "cpp_node_name", + "reviewed": false, + "intent": "Custom name extractor for C++ AST nodes extending the C extractor: handle function_definition via c_extract_declarator_name, type_definition/alias_declaration via name or declarator fields, and template_declaration by transparently extracting the name from the wrapped inner declaration (class, struct, enum, concept, alias, function, or nested template).", + "source_span": [ + 11, + 41 + ], + "tree_path": "fn::cpp_node_name", + "source_hash": "sha256:4f9ebbcef1c9124312eed3f03d46f02bc63f85b5d9fd2e6a3ac44d5e3b5f9c08", + "source_anchor": "fn cpp_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C++ language configuration for tree_path resolution: register tree-sitter-cpp grammar, C++ file extensions (.cpp/.cc/.cxx/.hpp etc.), kind mappings for fn/class/struct/namespace/enum/template/typedef/using, and wire the custom cpp_node_name callback.", + "source_span": [ + 44, + 61 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:2fe337bd8a4a25f0ad98cf3106f17746e3898a29e4c6b6edbb86ac4e80ece55c", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc new file mode 100644 index 0000000..03248e6 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_csharp.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C# language configuration for tree_path resolution: register tree-sitter-c-sharp grammar, .cs extension, kind mappings for fn/class/interface/enum/struct/namespace/constructor/property/record/delegate, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 23 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:51d280af9946356cc55cd9d35843d74bbf603996767a4004ee4923946cec1624", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc new file mode 100644 index 0000000..5d4ee20 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_go.rs", + "specs": [ + { + "item": "go_node_name", + "reviewed": false, + "intent": "Custom name extractor for Go AST nodes: encode method receivers into the name as ReceiverType.Method or (*ReceiverType).Method for method_declaration; navigate into type_spec for type_declaration names; navigate into const_spec/var_spec for const_declaration/var_declaration names. Return None for unrecognized kinds.", + "source_span": [ + 12, + 63 + ], + "tree_path": "fn::go_node_name", + "source_hash": "sha256:5198217ac70bb06963c30ee0f9f0daa9972cdb47834ed82cf99b800f8b043620", + "source_anchor": "fn go_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Go language configuration for tree_path resolution: register tree-sitter-go grammar, .go extension, kind mappings for fn/method/type/const/var, and wire the custom go_node_name callback for receiver-encoded method names.", + "source_span": [ + 66, + 80 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:da63d7b2578d2dfaef76a198b5ce9258f3f0fd5e50ae7a2f5b67ad3bbabc972c", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc new file mode 100644 index 0000000..1b07c34 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_java.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Java language configuration for tree_path resolution: register tree-sitter-java grammar, .java extension, kind mappings for fn/class/interface/enum/constructor/record/annotation, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 20 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:1017a74a175aa52b8d3704e6ae8e28f5587bbacffbb265398ad2aa267b7127cf", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc new file mode 100644 index 0000000..425562f --- /dev/null +++ b/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_javascript.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the JavaScript language configuration for tree_path resolution: register tree-sitter-javascript grammar, .js/.mjs/.cjs/.jsx extensions, kind mappings for fn/class/method, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 16 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:5e92f722de7b8368b20bad4c0a46f1c208b2ae45049022f63f2424379f48f775", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc new file mode 100644 index 0000000..1aa770b --- /dev/null +++ b/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_kotlin.rs", + "specs": [ + { + "item": "kotlin_node_name", + "reviewed": false, + "intent": "Custom name extractor for Kotlin AST nodes: for property_declaration, find the name inside a variable_declaration child or a simple_identifier child; for type_alias, find the type_identifier or simple_identifier child before the RHS. Return None for unrecognized kinds.", + "source_span": [ + 10, + 39 + ], + "tree_path": "fn::kotlin_node_name", + "source_hash": "sha256:572be5a3c85aef30324c0fa957348b46f397da76ef354d9d7e1e4d7c19f1b27a", + "source_anchor": "fn kotlin_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Kotlin language configuration for tree_path resolution: register tree-sitter-kotlin-ng grammar, .kt/.kts extensions, kind mappings for fn/class/object/property/typealias, and wire the custom kotlin_node_name callback.", + "source_span": [ + 42, + 56 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:7bf842fab86334a9e6eb26c98ae34f1b341b8622bfbf2d12681a870ddb71e48a", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc new file mode 100644 index 0000000..3400819 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_objc.rs", + "specs": [ + { + "item": "objc_node_name", + "reviewed": false, + "intent": "Custom name extractor for Objective-C AST nodes: delegate function_definition to c_extract_declarator_name; extract class/protocol names from identifier/type_identifier children; compose method selector names from keyword_declarator children (multi-part selectors) or a single identifier (zero-argument methods). Return None for unrecognized kinds.", + "source_span": [ + 12, + 68 + ], + "tree_path": "fn::objc_node_name", + "source_hash": "sha256:4e3e3c9e582612774e53975373f1f9a9542f9c2a3feab6af851c2abb79e31653", + "source_anchor": "fn objc_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Objective-C language configuration for tree_path resolution: register tree-sitter-objc grammar, .m/.mm extensions, kind mappings for fn/class/impl/protocol/method/method_decl/struct/enum/typedef, and wire the custom objc_node_name callback.", + "source_span": [ + 71, + 89 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:3106742e7b8217f60de4d50c7e972124d4af51e9490ab333cd7594f8d0c39df7", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_php.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_php.rs.liyi.jsonc new file mode 100644 index 0000000..bc25498 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_php.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_php.rs", + "specs": [ + { + "item": "php_node_name", + "reviewed": false, + "intent": "Custom name extractor for PHP const_declaration nodes: navigate into the const_element child and read its name field. Return None for all other node kinds.", + "source_span": [ + 8, + 20 + ], + "tree_path": "fn::php_node_name", + "source_hash": "sha256:784f44b137289d73dfcdb013715d01c48c432887e427033284766ec905813368", + "source_anchor": "fn php_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the PHP language configuration for tree_path resolution: register tree-sitter-php PHP-only grammar (no HTML interleaving), .php extension, kind mappings for fn/class/method/interface/enum/trait/namespace/const, and wire the custom php_node_name callback.", + "source_span": [ + 23, + 40 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:0a09a6bb9162b31e32d5ddad17114c705ffc366084ebeaffc15ca828ffa83393", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc new file mode 100644 index 0000000..1efc9dd --- /dev/null +++ b/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_python.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Python language configuration for tree_path resolution: register tree-sitter-python grammar, .py/.pyi extensions, kind mappings for fn (function_definition) and class (class_definition), using standard name field with no custom name extraction.", + "source_span": [ + 4, + 12 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:aa974317e3de74f3d49a3f1fca69ccbacf47a05942d416fe1be099b1889d0668", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc new file mode 100644 index 0000000..2b88817 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_rust.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Rust language configuration for tree_path resolution: register tree-sitter-rust grammar, .rs extension, kind mappings for fn/struct/enum/impl/trait/mod/const/static/type/macro, with impl_item using the type field for name extraction via name_overrides and no custom name callback.", + "source_span": [ + 4, + 23 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:5f7fa704e1952495b5a28b78087f7a26cf4abafb32fa131304fc8b37d0a03786", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc new file mode 100644 index 0000000..a4220ed --- /dev/null +++ b/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_swift.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Swift language configuration for tree_path resolution: register tree-sitter-swift grammar, .swift extension, kind mappings for fn/class/protocol/enum/property/init/typealias, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 20 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:9b703d45d6d6751c7fd41d681b5ba24dd490973c1453fd226f6e2b859c955263", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc new file mode 100644 index 0000000..b55c8fa --- /dev/null +++ b/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_typescript.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the TypeScript language configuration for tree_path resolution: register tree-sitter-typescript grammar (LANGUAGE_TYPESCRIPT), .ts/.mts/.cts extensions, kind mappings for fn/class/method/interface/type/enum, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 19 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:7c920dd5e1c5bb41bc5de6c4e9e06449f370c31799a4c09879136fe11fc85982", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "TSX_CONFIG", + "reviewed": false, + "intent": "Define the TSX language configuration for tree_path resolution: register tree-sitter-typescript TSX grammar (LANGUAGE_TSX), .tsx extension, same kind mappings as TypeScript (fn/class/method/interface/type/enum), using standard name field with no custom name extraction.", + "source_span": [ + 22, + 37 + ], + "tree_path": "static::TSX_CONFIG", + "source_hash": "sha256:d3f258d9e47df10f71f0f44bcf8ba18093ff75390224c03d5a2c487bf65eaf39", + "source_anchor": "pub(super) static TSX_CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} From de7ea3a1632d367b8dcecc773616f59bbf860a1e Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 10 Mar 2026 20:26:44 +0800 Subject: [PATCH 18/18] docs(design): clarify that only checking, not the linter, skips parsing The bullet said "the linter doesn't parse source code", but since semantic anchors (tree_path) were added, the linter does parse source via tree-sitter. Reword to say only the checking process skips parsing. Original prompt: > In line 1902, "the linter doesn't parse source code" -- this is > no longer true, since support for semantic anchors was added. Please > reword to say only the checking process doesn't parse code. Resync > sidecar and commit. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-design.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/liyi-design.md b/docs/liyi-design.md index d1c6847..29b82ab 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -1899,7 +1899,7 @@ The spec-driven development space is no longer hypothetical — Augment Intent, - **Nothing to learn.** JSONC, Markdown, SHA-256. No DSL, no specification language, no framework. - **Self-contained.** The linter is a single binary with tree-sitter grammars built in, no runtime dependencies. - **No lock-in.** `.liyi.jsonc` files are plain JSONC. `@liyi:module` markers are comments. Delete them and nothing breaks. -- **Any programming language.** The linter doesn't parse source code. It reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. +- **Any programming language.** The checking process doesn't parse source code — it reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. - **Hardware RTL too.** The convention applies at the RTL level (Verilog, SystemVerilog, VHDL, Chisel) with no design changes — sidecars co-locate with `.v`/`.vhd`/`.scala` files, `source_span` and `source_hash` work on any text, and tree-sitter grammars exist for Verilog and VHDL. In hardware domains where requirements traceability is a compliance obligation (DO-254, ISO 26262, IEC 61508), 立意 functions as a lightweight shim between a requirements management system and RTL source: a `liyi import-reqif` command (post-MVP) can consume ReqIF — the open OMG standard (ReqIF 1.2, `formal/2016-07-01`) that DOORS, Polarion, and other tools export — and emit `@liyi:requirement` blocks, connecting managed requirements to RTL implementations with hash-based staleness detection. The tool doesn't replace DOORS; it fills the last mile that DOORS doesn't cover. - **Any human language.** Intent prose is natural language — write it in your team’s working language. Annotation markers accept aliases in any supported language (`@liyi:ignore` / `@立意:忽略` / `@liyi:ignorar`). No locale configuration; the linter accepts all aliases from a static table. The project’s Chinese cultural origin isn’t a barrier — it’s an invitation.