diff --git a/Cargo.lock b/Cargo.lock index 139dd90..a6cc5a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -604,7 +604,19 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-kotlin-ng", + "tree-sitter-objc", + "tree-sitter-php", + "tree-sitter-python", "tree-sitter-rust", + "tree-sitter-swift", + "tree-sitter-typescript", ] [[package]] @@ -1182,12 +1194,112 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-ng" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" @@ -1198,6 +1310,26 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "typenum" version = "1.19.0" diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 7410230..d6658e8 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -18,6 +18,18 @@ ignore = "0.4" regex = "1" tree-sitter = "0.26.6" tree-sitter-rust = "0.24.0" +tree-sitter-python = "0.25.0" +tree-sitter-go = "0.25.0" +tree-sitter-javascript = "0.25.0" +tree-sitter-typescript = "0.23.2" +tree-sitter-c = "0.24.1" +tree-sitter-cpp = "0.23.4" +tree-sitter-java = "0.23.5" +tree-sitter-c-sharp = "0.23.1" +tree-sitter-php = "0.24.2" +tree-sitter-objc = "3.0.2" +tree-sitter-kotlin-ng = "1.1.0" +tree-sitter-swift = "0.7.1" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/discovery.rs b/crates/liyi/src/discovery.rs index 2eb7739..113680a 100644 --- a/crates/liyi/src/discovery.rs +++ b/crates/liyi/src/discovery.rs @@ -256,7 +256,7 @@ mod tests { fs::write(sub.join("inner.rs"), "").unwrap(); fs::write(sub.join("inner.rs.liyi.jsonc"), "{}").unwrap(); - let scoped = discover(root, &[sub.clone()]); + let scoped = discover(root, std::slice::from_ref(&sub)); assert_eq!(scoped.sidecars.len(), 1); assert_eq!(scoped.sidecars[0].repo_relative_source, "sub/inner.rs"); diff --git a/crates/liyi/src/tree_path/lang_c.rs b/crates/liyi/src/tree_path/lang_c.rs new file mode 100644 index 0000000..0531f7c --- /dev/null +++ b/crates/liyi/src/tree_path/lang_c.rs @@ -0,0 +1,159 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Extract the function name from a C/C++ `function_definition` node. +/// +/// C/C++ functions store their name inside the `declarator` field chain: +/// `function_definition` → (field `declarator`) `function_declarator` +/// → (field `declarator`) `identifier` / `field_identifier`. +/// Pointer declarators and other wrappers may appear in the chain; +/// we unwrap them until we find a `function_declarator`. +pub(super) fn c_extract_declarator_name(node: &Node, source: &str) -> Option { + let declarator = node.child_by_field_name("declarator")?; + let func_decl = unwrap_to_function_declarator(&declarator)?; + let name_node = func_decl.child_by_field_name("declarator")?; + Some(source[name_node.byte_range()].to_string()) +} + +/// Walk through pointer_declarator / parenthesized_declarator / attributed_declarator +/// wrappers to find the inner `function_declarator`. +fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> { + match node.kind() { + "function_declarator" => Some(*node), + "pointer_declarator" | "parenthesized_declarator" | "attributed_declarator" => { + let inner = node.child_by_field_name("declarator")?; + unwrap_to_function_declarator(&inner) + } + _ => None, + } +} + +/// Custom name extraction for C nodes. +/// +/// Handles `function_definition` (name in declarator chain) and +/// `type_definition` (name in declarator field, which is a type_identifier). +fn c_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + // typedef: the 'declarator' field holds the new type name + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + _ => None, + } +} + +/// C language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c::LANGUAGE.into(), + extensions: &["c", "h"], + kind_map: &[ + ("fn", "function_definition"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(c_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_C: &str = r#"#include + +struct Point { + int x; + int y; +}; + +enum Color { RED, GREEN, BLUE }; + +typedef struct Point Point_t; + +void process(int x, int y) { + printf("hello"); +} + +static int helper(void) { + return 42; +} +"#; + + #[test] + fn resolve_c_function() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C); + assert!(span.is_some(), "should resolve fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("void process"), + "span should point to process function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_c_struct() { + let span = resolve_tree_path(SAMPLE_C, "struct::Point", Language::C); + assert!(span.is_some(), "should resolve struct::Point"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("struct Point"), + "span should point to Point struct" + ); + } + + #[test] + fn resolve_c_enum() { + let span = resolve_tree_path(SAMPLE_C, "enum::Color", Language::C); + assert!(span.is_some(), "should resolve enum::Color"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("enum Color"), + "span should point to Color enum" + ); + } + + #[test] + fn resolve_c_typedef() { + let span = resolve_tree_path(SAMPLE_C, "typedef::Point_t", Language::C); + assert!(span.is_some(), "should resolve typedef::Point_t"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_C.lines().collect(); + assert!( + lines[start - 1].contains("typedef"), + "span should point to typedef" + ); + } + + #[test] + fn compute_c_function_path() { + let span = resolve_tree_path(SAMPLE_C, "fn::process", Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(path, "fn::process"); + } + + #[test] + fn roundtrip_c() { + for tp in &["fn::process", "fn::helper", "struct::Point", "enum::Color"] { + let span = resolve_tree_path(SAMPLE_C, tp, Language::C).unwrap(); + let path = compute_tree_path(SAMPLE_C, span, Language::C); + assert_eq!(&path, tp, "roundtrip failed for {tp}"); + } + } + + #[test] + fn detect_c_extensions() { + assert_eq!(detect_language(Path::new("main.c")), Some(Language::C)); + assert_eq!(detect_language(Path::new("header.h")), Some(Language::C)); + } +} diff --git a/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc new file mode 100644 index 0000000..6f9bac0 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_c.rs.liyi.jsonc @@ -0,0 +1,55 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_c.rs", + "specs": [ + { + "item": "c_extract_declarator_name", + "reviewed": false, + "intent": "Extract the function name from a C/C++ function_definition node by walking the declarator field chain (function_definition → function_declarator → declarator identifier), unwrapping pointer/parenthesized/attributed wrappers as needed.", + "source_span": [ + 12, + 17 + ], + "tree_path": "fn::c_extract_declarator_name", + "source_hash": "sha256:b770215018f35c818e0fe450fcee058c8407d827f8fb2bfc2026bfd925b981e4", + "source_anchor": "pub(super) fn c_extract_declarator_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "unwrap_to_function_declarator", + "reviewed": false, + "intent": "Recursively unwrap pointer_declarator, parenthesized_declarator, and attributed_declarator wrapper nodes to locate the inner function_declarator node. Must return None if no function_declarator is found in the chain.", + "source_span": [ + 21, + 30 + ], + "tree_path": "fn::unwrap_to_function_declarator", + "source_hash": "sha256:4e203fab2d264c4f02df280f88a191ae9344828e17bb91dd98c357c327baf7a0", + "source_anchor": "fn unwrap_to_function_declarator<'a>(node: &Node<'a>) -> Option> {" + }, + { + "item": "c_node_name", + "reviewed": false, + "intent": "Custom name extractor for C AST nodes: dispatch to c_extract_declarator_name for function_definition, and read the declarator field directly for type_definition (typedef). Return None for all other node kinds.", + "source_span": [ + 36, + 46 + ], + "tree_path": "fn::c_node_name", + "source_hash": "sha256:6c373de50a8b2dee1ca251498a38f9e0397d1061554089e7bc2425434e0fac67", + "source_anchor": "fn c_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C language configuration for tree_path resolution: register tree-sitter-c grammar, .c/.h extensions, kind mappings for fn/struct/enum/typedef, and wire the custom c_node_name callback for name extraction.", + "source_span": [ + 49, + 62 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:9d7828f146ec68282253daedc10b6f3e19b884b881703d558c83fc4618d0012d", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_cpp.rs b/crates/liyi/src/tree_path/lang_cpp.rs new file mode 100644 index 0000000..5ff1378 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_cpp.rs @@ -0,0 +1,158 @@ +use super::LanguageConfig; +use super::lang_c::c_extract_declarator_name; + +use tree_sitter::Node; + +/// Custom name extraction for C++ nodes. +/// +/// Extends `c_node_name` with C++-specific patterns: +/// - `template_declaration`: transparent wrapper — extracts name from inner decl. +/// - `namespace_definition`: name is in a `namespace_identifier` child (no "name" field). +fn cpp_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" | "alias_declaration" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| node.child_by_field_name("declarator"))?; + Some(source[name_node.byte_range()].to_string()) + } + "template_declaration" => { + // template_declaration wraps an inner declaration — find it and + // extract the name from the inner node. + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + match child.kind() { + "function_definition" => return c_extract_declarator_name(&child, source), + "class_specifier" | "struct_specifier" | "enum_specifier" + | "concept_definition" | "alias_declaration" => { + let n = child.child_by_field_name("name")?; + return Some(source[n.byte_range()].to_string()); + } + // A template can also wrap another template_declaration (nested) + "template_declaration" => return cpp_node_name(&child, source), + _ => {} + } + } + None + } + _ => None, + } +} + +/// C++ language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_cpp::LANGUAGE.into(), + extensions: &["cpp", "cc", "cxx", "hpp", "hh", "hxx", "h++", "c++"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_specifier"), + ("struct", "struct_specifier"), + ("namespace", "namespace_definition"), + ("enum", "enum_specifier"), + ("template", "template_declaration"), + ("typedef", "type_definition"), + ("using", "alias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "declaration_list"], + custom_name: Some(cpp_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_CPP: &str = r#"namespace math { + +class Calculator { +public: + int add(int a, int b) { + return a + b; + } +}; + +struct Point { + int x, y; +}; + +enum class Color { Red, Green, Blue }; + +} + +void standalone() {} +"#; + + #[test] + fn resolve_cpp_namespace() { + let span = resolve_tree_path(SAMPLE_CPP, "namespace::math", Language::Cpp); + assert!(span.is_some(), "should resolve namespace::math"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("namespace math"), + "span should point to namespace math, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_cpp_class_in_namespace() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator", + Language::Cpp, + ); + assert!( + span.is_some(), + "should resolve namespace::math::class::Calculator" + ); + } + + #[test] + fn resolve_cpp_method_in_class() { + let span = resolve_tree_path( + SAMPLE_CPP, + "namespace::math::class::Calculator::fn::add", + Language::Cpp, + ); + assert!(span.is_some(), "should resolve nested method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_CPP.lines().collect(); + assert!( + lines[start - 1].contains("add"), + "span should point to add method" + ); + } + + #[test] + fn resolve_cpp_standalone() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn resolve_cpp_enum() { + let span = resolve_tree_path(SAMPLE_CPP, "namespace::math::enum::Color", Language::Cpp); + assert!(span.is_some(), "should resolve enum in namespace"); + } + + #[test] + fn roundtrip_cpp() { + let span = resolve_tree_path(SAMPLE_CPP, "fn::standalone", Language::Cpp).unwrap(); + let path = compute_tree_path(SAMPLE_CPP, span, Language::Cpp); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_cpp_extensions() { + assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp)); + assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp)); + assert_eq!( + detect_language(Path::new("header.hpp")), + Some(Language::Cpp) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc new file mode 100644 index 0000000..278994d --- /dev/null +++ b/crates/liyi/src/tree_path/lang_cpp.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_cpp.rs", + "specs": [ + { + "item": "cpp_node_name", + "reviewed": false, + "intent": "Custom name extractor for C++ AST nodes extending the C extractor: handle function_definition via c_extract_declarator_name, type_definition/alias_declaration via name or declarator fields, and template_declaration by transparently extracting the name from the wrapped inner declaration (class, struct, enum, concept, alias, function, or nested template).", + "source_span": [ + 11, + 41 + ], + "tree_path": "fn::cpp_node_name", + "source_hash": "sha256:4f9ebbcef1c9124312eed3f03d46f02bc63f85b5d9fd2e6a3ac44d5e3b5f9c08", + "source_anchor": "fn cpp_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C++ language configuration for tree_path resolution: register tree-sitter-cpp grammar, C++ file extensions (.cpp/.cc/.cxx/.hpp etc.), kind mappings for fn/class/struct/namespace/enum/template/typedef/using, and wire the custom cpp_node_name callback.", + "source_span": [ + 44, + 61 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:2fe337bd8a4a25f0ad98cf3106f17746e3898a29e4c6b6edbb86ac4e80ece55c", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_csharp.rs b/crates/liyi/src/tree_path/lang_csharp.rs new file mode 100644 index 0000000..7e476a4 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_csharp.rs @@ -0,0 +1,145 @@ +use super::LanguageConfig; + +/// C# language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_c_sharp::LANGUAGE.into(), + extensions: &["cs"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("struct", "struct_declaration"), + ("namespace", "namespace_declaration"), + ("constructor", "constructor_declaration"), + ("property", "property_declaration"), + ("record", "record_declaration"), + ("delegate", "delegate_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_CSHARP: &str = r#"namespace MyApp { + +class Calculator { + public int Add(int a, int b) { + return a + b; + } + + public string Name { get; set; } + + public Calculator() {} +} + +interface IComputable { + int Compute(int x); +} + +enum Direction { + North, South, East, West +} + +struct Vector { + public int X; + public int Y; +} + +record Person(string Name, int Age); + +} +"#; + + #[test] + fn resolve_csharp_class() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator", + Language::CSharp, + ); + assert!( + span.is_some(), + "should resolve namespace::MyApp::class::Calculator" + ); + } + + #[test] + fn resolve_csharp_method() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ); + assert!( + span.is_some(), + "should resolve method in class in namespace" + ); + } + + #[test] + fn resolve_csharp_property() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::property::Name", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve property::Name"); + } + + #[test] + fn resolve_csharp_interface() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::interface::IComputable", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve interface::IComputable"); + } + + #[test] + fn resolve_csharp_struct() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::struct::Vector", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve struct::Vector"); + } + + #[test] + fn resolve_csharp_enum() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::enum::Direction", + Language::CSharp, + ); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn roundtrip_csharp() { + let span = resolve_tree_path( + SAMPLE_CSHARP, + "namespace::MyApp::class::Calculator::fn::Add", + Language::CSharp, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_CSHARP, span, Language::CSharp); + assert_eq!(path, "namespace::MyApp::class::Calculator::fn::Add"); + } + + #[test] + fn detect_csharp_extension() { + assert_eq!( + detect_language(Path::new("Program.cs")), + Some(Language::CSharp) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc new file mode 100644 index 0000000..03248e6 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_csharp.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_csharp.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the C# language configuration for tree_path resolution: register tree-sitter-c-sharp grammar, .cs extension, kind mappings for fn/class/interface/enum/struct/namespace/constructor/property/record/delegate, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 23 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:51d280af9946356cc55cd9d35843d74bbf603996767a4004ee4923946cec1624", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_go.rs b/crates/liyi/src/tree_path/lang_go.rs new file mode 100644 index 0000000..3e33c6c --- /dev/null +++ b/crates/liyi/src/tree_path/lang_go.rs @@ -0,0 +1,312 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Go nodes. +/// +/// Handles three Go-specific patterns: +/// - `method_declaration`: encodes receiver type into the name, producing +/// `ReceiverType.MethodName` or `(*ReceiverType).MethodName`. +/// - `type_declaration`: navigates to the inner `type_spec` for the name. +/// - `const_declaration` / `var_declaration`: navigates to the inner spec. +fn go_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "method_declaration" => { + let method_name_node = node.child_by_field_name("name")?; + let method_name = &source[method_name_node.byte_range()]; + + let receiver = node.child_by_field_name("receiver")?; + let mut cursor = receiver.walk(); + let param = receiver + .children(&mut cursor) + .find(|c| c.kind() == "parameter_declaration")?; + + let type_node = param.child_by_field_name("type")?; + let receiver_type = if type_node.kind() == "pointer_type" { + let mut cursor2 = type_node.walk(); + let inner = type_node + .children(&mut cursor2) + .find(|c| c.kind() == "type_identifier")?; + format!("(*{})", &source[inner.byte_range()]) + } else { + source[type_node.byte_range()].to_string() + }; + + Some(format!("{receiver_type}.{method_name}")) + } + "type_declaration" => { + let mut cursor = node.walk(); + let type_spec = node + .children(&mut cursor) + .find(|c| c.kind() == "type_spec")?; + let name_node = type_spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "const_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "const_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + "var_declaration" => { + let mut cursor = node.walk(); + let spec = node + .children(&mut cursor) + .find(|c| c.kind() == "var_spec")?; + let name_node = spec.child_by_field_name("name")?; + Some(source[name_node.byte_range()].to_string()) + } + _ => None, + } +} + +/// Go language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_go::LANGUAGE.into(), + extensions: &["go"], + kind_map: &[ + ("fn", "function_declaration"), + ("method", "method_declaration"), + ("type", "type_declaration"), + ("const", "const_declaration"), + ("var", "var_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(go_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + + const SAMPLE_GO: &str = r#"package main + +import "fmt" + +// Calculator performs arithmetic operations +type Calculator struct { + value int +} + +// Reader is an interface +type Reader interface { + Read(p []byte) (n int, err error) +} + +// MaxRetries is a constant +const MaxRetries = 3 + +// DefaultTimeout is a var +var DefaultTimeout = 30 + +// Add adds a number to the calculator's value +func (c *Calculator) Add(n int) { + c.value += n +} + +// Value returns the current value +func (c Calculator) Value() int { + return c.value +} + +// Add is a standalone function +func Add(a, b int) int { + return a + b +} +"#; + + #[test] + fn resolve_go_function() { + let span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go); + assert!(span.is_some(), "should resolve fn::Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func Add("), + "span should point to Add function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_pointer_method() { + let span = resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go); + assert!(span.is_some(), "should resolve method::(*Calculator).Add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c *Calculator) Add"), + "span should point to Add method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_value_method() { + let span = resolve_tree_path(SAMPLE_GO, "method::Calculator.Value", Language::Go); + assert!(span.is_some(), "should resolve method::Calculator.Value"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("func (c Calculator) Value"), + "span should point to Value method, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_struct() { + let span = resolve_tree_path(SAMPLE_GO, "type::Calculator", Language::Go); + assert!(span.is_some(), "should resolve type::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Calculator struct"), + "span should point to Calculator struct, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_type_interface() { + let span = resolve_tree_path(SAMPLE_GO, "type::Reader", Language::Go); + assert!(span.is_some(), "should resolve type::Reader"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("type Reader interface"), + "span should point to Reader interface, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_const() { + let span = resolve_tree_path(SAMPLE_GO, "const::MaxRetries", Language::Go); + assert!(span.is_some(), "should resolve const::MaxRetries"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("const MaxRetries"), + "span should point to MaxRetries const, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_go_var() { + let span = resolve_tree_path(SAMPLE_GO, "var::DefaultTimeout", Language::Go); + assert!(span.is_some(), "should resolve var::DefaultTimeout"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + assert!( + lines[start - 1].contains("var DefaultTimeout"), + "span should point to DefaultTimeout var, got: {}", + lines[start - 1] + ); + } + + #[test] + fn compute_go_function_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .enumerate() + .rev() + .find(|(_, l)| l.contains("func Add(")) + .unwrap() + .0 + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "fn::Add"); + } + + #[test] + fn compute_go_pointer_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c *Calculator) Add")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::(*Calculator).Add"); + } + + #[test] + fn compute_go_value_method_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("func (c Calculator) Value")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "method::Calculator.Value"); + } + + #[test] + fn compute_go_type_path() { + let lines: Vec<&str> = SAMPLE_GO.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("type Calculator struct")) + .unwrap() + + 1; + let end = lines + .iter() + .enumerate() + .skip(start) + .find(|(_, l)| l.starts_with('}')) + .map(|(i, _)| i + 1) + .unwrap_or(lines.len()); + + let path = compute_tree_path(SAMPLE_GO, [start, end], Language::Go); + assert_eq!(path, "type::Calculator"); + } + + #[test] + fn roundtrip_go() { + let resolved_span = resolve_tree_path(SAMPLE_GO, "fn::Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "fn::Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn roundtrip_go_method() { + let resolved_span = + resolve_tree_path(SAMPLE_GO, "method::(*Calculator).Add", Language::Go).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_GO, resolved_span, Language::Go); + assert_eq!(computed_path, "method::(*Calculator).Add"); + + let re_resolved = resolve_tree_path(SAMPLE_GO, &computed_path, Language::Go).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc new file mode 100644 index 0000000..5d4ee20 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_go.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_go.rs", + "specs": [ + { + "item": "go_node_name", + "reviewed": false, + "intent": "Custom name extractor for Go AST nodes: encode method receivers into the name as ReceiverType.Method or (*ReceiverType).Method for method_declaration; navigate into type_spec for type_declaration names; navigate into const_spec/var_spec for const_declaration/var_declaration names. Return None for unrecognized kinds.", + "source_span": [ + 12, + 63 + ], + "tree_path": "fn::go_node_name", + "source_hash": "sha256:5198217ac70bb06963c30ee0f9f0daa9972cdb47834ed82cf99b800f8b043620", + "source_anchor": "fn go_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Go language configuration for tree_path resolution: register tree-sitter-go grammar, .go extension, kind mappings for fn/method/type/const/var, and wire the custom go_node_name callback for receiver-encoded method names.", + "source_span": [ + 66, + 80 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:da63d7b2578d2dfaef76a198b5ce9258f3f0fd5e50ae7a2f5b67ad3bbabc972c", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_java.rs b/crates/liyi/src/tree_path/lang_java.rs new file mode 100644 index 0000000..59ea2a8 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_java.rs @@ -0,0 +1,111 @@ +use super::LanguageConfig; + +/// Java language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_java::LANGUAGE.into(), + extensions: &["java"], + kind_map: &[ + ("fn", "method_declaration"), + ("class", "class_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("constructor", "constructor_declaration"), + ("record", "record_declaration"), + ("annotation", "annotation_type_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_JAVA: &str = r#"package com.example; + +public class Calculator { + public int add(int a, int b) { + return a + b; + } + + public Calculator() { + // constructor + } +} + +interface Computable { + int compute(int x); +} + +enum Direction { + NORTH, SOUTH, EAST, WEST +} + +record Point(int x, int y) {} +"#; + + #[test] + fn resolve_java_class() { + let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator", Language::Java); + assert!(span.is_some(), "should resolve class::Calculator"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JAVA.lines().collect(); + assert!( + lines[start - 1].contains("class Calculator"), + "span should point to Calculator class" + ); + } + + #[test] + fn resolve_java_method() { + let span = resolve_tree_path(SAMPLE_JAVA, "class::Calculator::fn::add", Language::Java); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_java_constructor() { + let span = resolve_tree_path( + SAMPLE_JAVA, + "class::Calculator::constructor::Calculator", + Language::Java, + ); + assert!(span.is_some(), "should resolve constructor"); + } + + #[test] + fn resolve_java_interface() { + let span = resolve_tree_path(SAMPLE_JAVA, "interface::Computable", Language::Java); + assert!(span.is_some(), "should resolve interface::Computable"); + } + + #[test] + fn resolve_java_enum() { + let span = resolve_tree_path(SAMPLE_JAVA, "enum::Direction", Language::Java); + assert!(span.is_some(), "should resolve enum::Direction"); + } + + #[test] + fn resolve_java_record() { + let span = resolve_tree_path(SAMPLE_JAVA, "record::Point", Language::Java); + assert!(span.is_some(), "should resolve record::Point"); + } + + #[test] + fn roundtrip_java() { + let span = + resolve_tree_path(SAMPLE_JAVA, "class::Calculator::fn::add", Language::Java).unwrap(); + let path = compute_tree_path(SAMPLE_JAVA, span, Language::Java); + assert_eq!(path, "class::Calculator::fn::add"); + } + + #[test] + fn detect_java_extension() { + assert_eq!( + detect_language(Path::new("Main.java")), + Some(Language::Java) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc new file mode 100644 index 0000000..1b07c34 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_java.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_java.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Java language configuration for tree_path resolution: register tree-sitter-java grammar, .java extension, kind mappings for fn/class/interface/enum/constructor/record/annotation, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 20 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:1017a74a175aa52b8d3704e6ae8e28f5587bbacffbb265398ad2aa267b7127cf", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_javascript.rs b/crates/liyi/src/tree_path/lang_javascript.rs new file mode 100644 index 0000000..522f8f2 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_javascript.rs @@ -0,0 +1,120 @@ +use super::LanguageConfig; + +/// JavaScript language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_javascript::LANGUAGE.into(), + extensions: &["js", "mjs", "cjs", "jsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + + const SAMPLE_JS: &str = r#"// A simple counter module + +class Counter { + constructor(initial = 0) { + this.count = initial; + } + + increment() { + this.count++; + } + + getValue() { + return this.count; + } +} + +function createCounter(initial) { + return new Counter(initial); +} + +const utils = { + formatCount: (n) => `${n} items` +}; +"#; + + #[test] + fn resolve_js_function() { + let span = resolve_tree_path(SAMPLE_JS, "fn::createCounter", Language::JavaScript); + assert!(span.is_some(), "should resolve fn::createCounter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("function createCounter"), + "span should point to createCounter function" + ); + } + + #[test] + fn resolve_js_class() { + let span = resolve_tree_path(SAMPLE_JS, "class::Counter", Language::JavaScript); + assert!(span.is_some(), "should resolve class::Counter"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("class Counter"), + "span should point to Counter class" + ); + } + + #[test] + fn resolve_js_method() { + let span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::increment", + Language::JavaScript, + ); + assert!( + span.is_some(), + "should resolve class::Counter::method::increment" + ); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + assert!( + lines[start - 1].contains("increment()"), + "span should point to increment method" + ); + } + + #[test] + fn compute_js_function_path() { + let lines: Vec<&str> = SAMPLE_JS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("function createCounter")) + .unwrap() + + 1; + let end = lines.len() - 3; // Rough end + + let path = compute_tree_path(SAMPLE_JS, [start, end], Language::JavaScript); + assert_eq!(path, "fn::createCounter"); + } + + #[test] + fn roundtrip_js() { + let resolved_span = resolve_tree_path( + SAMPLE_JS, + "class::Counter::method::getValue", + Language::JavaScript, + ) + .unwrap(); + + let computed_path = compute_tree_path(SAMPLE_JS, resolved_span, Language::JavaScript); + assert_eq!(computed_path, "class::Counter::method::getValue"); + + let re_resolved = + resolve_tree_path(SAMPLE_JS, &computed_path, Language::JavaScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc new file mode 100644 index 0000000..425562f --- /dev/null +++ b/crates/liyi/src/tree_path/lang_javascript.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_javascript.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the JavaScript language configuration for tree_path resolution: register tree-sitter-javascript grammar, .js/.mjs/.cjs/.jsx extensions, kind mappings for fn/class/method, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 16 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:5e92f722de7b8368b20bad4c0a46f1c208b2ae45049022f63f2424379f48f775", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_kotlin.rs b/crates/liyi/src/tree_path/lang_kotlin.rs new file mode 100644 index 0000000..afba109 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_kotlin.rs @@ -0,0 +1,126 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Kotlin nodes. +/// +/// Handles `property_declaration` where the name is in a child +/// `variable_declaration` node, and `type_alias` where the name is +/// in an `identifier` child before the `=` (the `type` field is the RHS). +fn kotlin_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "property_declaration" => { + let mut cursor = node.walk(); + // Name is in the first variable_declaration or identifier child + for child in node.children(&mut cursor) { + if child.kind() == "variable_declaration" { + let name = child.child_by_field_name("name").or_else(|| { + let mut c2 = child.walk(); + child + .children(&mut c2) + .find(|c| c.kind() == "simple_identifier") + })?; + return Some(source[name.byte_range()].to_string()); + } + if child.kind() == "simple_identifier" { + return Some(source[child.byte_range()].to_string()); + } + } + None + } + "type_alias" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "type_identifier" || c.kind() == "simple_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + _ => None, + } +} + +/// Kotlin language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_kotlin_ng::LANGUAGE.into(), + extensions: &["kt", "kts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("object", "object_declaration"), + ("property", "property_declaration"), + ("typealias", "type_alias"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "class_body"], + custom_name: Some(kotlin_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_KOTLIN: &str = r#"class Calculator { + fun add(a: Int, b: Int): Int { + return a + b + } +} + +object Singleton { + fun instance(): Singleton = this +} + +fun standalone(): Int { + return 42 +} + +typealias StringList = List +"#; + + #[test] + fn resolve_kotlin_class() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "class::Calculator", Language::Kotlin); + assert!(span.is_some(), "should resolve class::Calculator"); + } + + #[test] + fn resolve_kotlin_method() { + let span = resolve_tree_path( + SAMPLE_KOTLIN, + "class::Calculator::fn::add", + Language::Kotlin, + ); + assert!(span.is_some(), "should resolve class::Calculator::fn::add"); + } + + #[test] + fn resolve_kotlin_object() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "object::Singleton", Language::Kotlin); + assert!(span.is_some(), "should resolve object::Singleton"); + } + + #[test] + fn resolve_kotlin_function() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_kotlin() { + let span = resolve_tree_path(SAMPLE_KOTLIN, "fn::standalone", Language::Kotlin).unwrap(); + let path = compute_tree_path(SAMPLE_KOTLIN, span, Language::Kotlin); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_kotlin_extension() { + assert_eq!( + detect_language(Path::new("Main.kt")), + Some(Language::Kotlin) + ); + assert_eq!( + detect_language(Path::new("build.gradle.kts")), + Some(Language::Kotlin) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc new file mode 100644 index 0000000..1aa770b --- /dev/null +++ b/crates/liyi/src/tree_path/lang_kotlin.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_kotlin.rs", + "specs": [ + { + "item": "kotlin_node_name", + "reviewed": false, + "intent": "Custom name extractor for Kotlin AST nodes: for property_declaration, find the name inside a variable_declaration child or a simple_identifier child; for type_alias, find the type_identifier or simple_identifier child before the RHS. Return None for unrecognized kinds.", + "source_span": [ + 10, + 39 + ], + "tree_path": "fn::kotlin_node_name", + "source_hash": "sha256:572be5a3c85aef30324c0fa957348b46f397da76ef354d9d7e1e4d7c19f1b27a", + "source_anchor": "fn kotlin_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Kotlin language configuration for tree_path resolution: register tree-sitter-kotlin-ng grammar, .kt/.kts extensions, kind mappings for fn/class/object/property/typealias, and wire the custom kotlin_node_name callback.", + "source_span": [ + 42, + 56 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:7bf842fab86334a9e6eb26c98ae34f1b341b8622bfbf2d12681a870ddb71e48a", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_objc.rs b/crates/liyi/src/tree_path/lang_objc.rs new file mode 100644 index 0000000..c67af5f --- /dev/null +++ b/crates/liyi/src/tree_path/lang_objc.rs @@ -0,0 +1,144 @@ +use super::LanguageConfig; +use super::lang_c::c_extract_declarator_name; + +use tree_sitter::Node; + +/// Custom name extraction for Objective-C nodes. +/// +/// ObjC node types like `class_interface`, `class_implementation`, +/// `protocol_declaration`, `method_declaration`, and `method_definition` +/// do not use standard `name` fields. Their names are extracted from +/// specific child node patterns. +fn objc_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + // C function definitions use the same declarator chain as C. + "function_definition" => c_extract_declarator_name(node, source), + "type_definition" => { + let declarator = node.child_by_field_name("declarator")?; + Some(source[declarator.byte_range()].to_string()) + } + // @interface ClassName or @interface ClassName (Category) + "class_interface" | "class_implementation" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // @protocol ProtocolName + "protocol_declaration" => { + let mut cursor = node.walk(); + node.children(&mut cursor) + .find(|c| c.kind() == "identifier" || c.kind() == "type_identifier") + .map(|c| source[c.byte_range()].to_string()) + } + // - (ReturnType)methodName or - (ReturnType)methodName:(Type)arg + // + (ReturnType)classMethodName + "method_declaration" | "method_definition" => { + let mut cursor = node.walk(); + // The selector is composed of keyword_declarator children or + // a single identifier (for zero-argument methods). + let mut parts: Vec = Vec::new(); + for child in node.children(&mut cursor) { + match child.kind() { + "identifier" | "field_identifier" if parts.is_empty() => { + // Single-part selector (no arguments) + parts.push(source[child.byte_range()].to_string()); + } + "keyword_declarator" => { + // Each keyword_declarator has a keyword child + let mut kw_cursor = child.walk(); + if let Some(kw) = child + .children(&mut kw_cursor) + .find(|c| c.kind() == "keyword_selector" || c.kind() == "identifier") + { + parts.push(format!("{}:", &source[kw.byte_range()])); + } + } + _ => {} + } + } + if parts.is_empty() { + None + } else { + Some(parts.join("")) + } + } + _ => None, + } +} + +/// Objective-C language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_objc::LANGUAGE.into(), + extensions: &["m", "mm"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_interface"), + ("impl", "class_implementation"), + ("protocol", "protocol_declaration"), + ("method", "method_definition"), + ("method_decl", "method_declaration"), + ("struct", "struct_specifier"), + ("enum", "enum_specifier"), + ("typedef", "type_definition"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(objc_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_OBJC: &str = r#"#import + +struct CGPoint { + float x; + float y; +}; + +void helper(void) { + NSLog(@"hello"); +} +"#; + + #[test] + fn resolve_objc_function() { + let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC); + assert!(span.is_some(), "should resolve fn::helper"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_OBJC.lines().collect(); + assert!( + lines[start - 1].contains("void helper"), + "span should point to helper function" + ); + } + + #[test] + fn resolve_objc_struct() { + let span = resolve_tree_path(SAMPLE_OBJC, "struct::CGPoint", Language::ObjectiveC); + assert!(span.is_some(), "should resolve struct::CGPoint"); + } + + #[test] + fn roundtrip_objc() { + let span = resolve_tree_path(SAMPLE_OBJC, "fn::helper", Language::ObjectiveC).unwrap(); + let path = compute_tree_path(SAMPLE_OBJC, span, Language::ObjectiveC); + assert_eq!(path, "fn::helper"); + } + + #[test] + fn detect_objc_extensions() { + assert_eq!( + detect_language(Path::new("AppDelegate.m")), + Some(Language::ObjectiveC) + ); + assert_eq!( + detect_language(Path::new("mixed.mm")), + Some(Language::ObjectiveC) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc new file mode 100644 index 0000000..3400819 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_objc.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_objc.rs", + "specs": [ + { + "item": "objc_node_name", + "reviewed": false, + "intent": "Custom name extractor for Objective-C AST nodes: delegate function_definition to c_extract_declarator_name; extract class/protocol names from identifier/type_identifier children; compose method selector names from keyword_declarator children (multi-part selectors) or a single identifier (zero-argument methods). Return None for unrecognized kinds.", + "source_span": [ + 12, + 68 + ], + "tree_path": "fn::objc_node_name", + "source_hash": "sha256:4e3e3c9e582612774e53975373f1f9a9542f9c2a3feab6af851c2abb79e31653", + "source_anchor": "fn objc_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Objective-C language configuration for tree_path resolution: register tree-sitter-objc grammar, .m/.mm extensions, kind mappings for fn/class/impl/protocol/method/method_decl/struct/enum/typedef, and wire the custom objc_node_name callback.", + "source_span": [ + 71, + 89 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:3106742e7b8217f60de4d50c7e972124d4af51e9490ab333cd7594f8d0c39df7", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_php.rs b/crates/liyi/src/tree_path/lang_php.rs new file mode 100644 index 0000000..4abb841 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_php.rs @@ -0,0 +1,136 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for PHP `const_declaration` nodes. +/// +/// PHP `const_declaration` stores names inside `const_element` children. +fn php_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "const_declaration" => { + let mut cursor = node.walk(); + let elem = node + .children(&mut cursor) + .find(|c| c.kind() == "const_element")?; + let name = elem.child_by_field_name("name")?; + Some(source[name.byte_range()].to_string()) + } + _ => None, + } +} + +/// PHP language configuration (PHP-only grammar, no HTML interleaving). +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_php::LANGUAGE_PHP_ONLY.into(), + extensions: &["php"], + kind_map: &[ + ("fn", "function_definition"), + ("class", "class_declaration"), + ("method", "method_declaration"), + ("interface", "interface_declaration"), + ("enum", "enum_declaration"), + ("trait", "trait_declaration"), + ("namespace", "namespace_definition"), + ("const", "const_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: Some(php_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_PHP: &str = r#" Option {" + }, + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the PHP language configuration for tree_path resolution: register tree-sitter-php PHP-only grammar (no HTML interleaving), .php extension, kind mappings for fn/class/method/interface/enum/trait/namespace/const, and wire the custom php_node_name callback.", + "source_span": [ + 23, + 40 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:0a09a6bb9162b31e32d5ddad17114c705ffc366084ebeaffc15ca828ffa83393", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_python.rs b/crates/liyi/src/tree_path/lang_python.rs new file mode 100644 index 0000000..c4d351d --- /dev/null +++ b/crates/liyi/src/tree_path/lang_python.rs @@ -0,0 +1,125 @@ +use super::LanguageConfig; + +/// Python language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_python::LANGUAGE.into(), + extensions: &["py", "pyi"], + kind_map: &[("fn", "function_definition"), ("class", "class_definition")], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + + const SAMPLE_PYTHON: &str = r#"# A simple order processing module + +class Order: + def __init__(self, amount): + self.amount = amount + + def process(self): + return self.amount > 0 + +def calculate_total(items): + return sum(items) +"#; + + #[test] + fn resolve_python_function() { + let span = resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python); + assert!(span.is_some(), "should resolve fn::calculate_total"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def calculate_total"), + "span should point to calculate_total function" + ); + } + + #[test] + fn resolve_python_class() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order", Language::Python); + assert!(span.is_some(), "should resolve class::Order"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("class Order"), + "span should point to Order class" + ); + } + + #[test] + fn resolve_python_class_method() { + let span = resolve_tree_path(SAMPLE_PYTHON, "class::Order::fn::process", Language::Python); + assert!(span.is_some(), "should resolve class::Order::fn::process"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def process"), + "span should point to process method" + ); + } + + #[test] + fn resolve_python_init_method() { + let span = resolve_tree_path( + SAMPLE_PYTHON, + "class::Order::fn::__init__", + Language::Python, + ); + assert!(span.is_some(), "should resolve class::Order::fn::__init__"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + assert!( + lines[start - 1].contains("def __init__"), + "span should point to __init__ method" + ); + } + + #[test] + fn compute_python_function_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def calculate_total")) + .unwrap() + + 1; + let end = lines.len(); + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "fn::calculate_total"); + } + + #[test] + fn compute_python_class_method_path() { + let lines: Vec<&str> = SAMPLE_PYTHON.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("def process")) + .unwrap() + + 1; + // Find end of method (next line with same or less indentation) + let end = start + 1; // Single-line body for this test + + let path = compute_tree_path(SAMPLE_PYTHON, [start, end], Language::Python); + assert_eq!(path, "class::Order::fn::process"); + } + + #[test] + fn roundtrip_python() { + // Compute path for fn::calculate_total, then resolve it + let resolved_span = + resolve_tree_path(SAMPLE_PYTHON, "fn::calculate_total", Language::Python).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_PYTHON, resolved_span, Language::Python); + assert_eq!(computed_path, "fn::calculate_total"); + + let re_resolved = + resolve_tree_path(SAMPLE_PYTHON, &computed_path, Language::Python).unwrap(); + assert_eq!(re_resolved, resolved_span); + } +} diff --git a/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc new file mode 100644 index 0000000..1efc9dd --- /dev/null +++ b/crates/liyi/src/tree_path/lang_python.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_python.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Python language configuration for tree_path resolution: register tree-sitter-python grammar, .py/.pyi extensions, kind mappings for fn (function_definition) and class (class_definition), using standard name field with no custom name extraction.", + "source_span": [ + 4, + 12 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:aa974317e3de74f3d49a3f1fca69ccbacf47a05942d416fe1be099b1889d0668", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_rust.rs b/crates/liyi/src/tree_path/lang_rust.rs new file mode 100644 index 0000000..73cc877 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_rust.rs @@ -0,0 +1,23 @@ +use super::LanguageConfig; + +/// Rust language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_rust::LANGUAGE.into(), + extensions: &["rs"], + kind_map: &[ + ("fn", "function_item"), + ("struct", "struct_item"), + ("enum", "enum_item"), + ("impl", "impl_item"), + ("trait", "trait_item"), + ("mod", "mod_item"), + ("const", "const_item"), + ("static", "static_item"), + ("type", "type_item"), + ("macro", "macro_definition"), + ], + name_field: "name", + name_overrides: &[("impl_item", "type")], + body_fields: &["body"], + custom_name: None, +}; diff --git a/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc new file mode 100644 index 0000000..2b88817 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_rust.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_rust.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Rust language configuration for tree_path resolution: register tree-sitter-rust grammar, .rs extension, kind mappings for fn/struct/enum/impl/trait/mod/const/static/type/macro, with impl_item using the type field for name extraction via name_overrides and no custom name callback.", + "source_span": [ + 4, + 23 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:5f7fa704e1952495b5a28b78087f7a26cf4abafb32fa131304fc8b37d0a03786", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_swift.rs b/crates/liyi/src/tree_path/lang_swift.rs new file mode 100644 index 0000000..a8ba79c --- /dev/null +++ b/crates/liyi/src/tree_path/lang_swift.rs @@ -0,0 +1,84 @@ +use super::LanguageConfig; + +/// Swift language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_swift::LANGUAGE.into(), + extensions: &["swift"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("protocol", "protocol_declaration"), + ("enum", "enum_entry"), + ("property", "property_declaration"), + ("init", "init_declaration"), + ("typealias", "typealias_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_SWIFT: &str = r#"protocol Drawable { + func draw() +} + +class Shape { + func area() -> Double { + return 0.0 + } + + init() {} +} + +func standalone() -> Int { + return 42 +} + +typealias Callback = () -> Void +"#; + + #[test] + fn resolve_swift_protocol() { + let span = resolve_tree_path(SAMPLE_SWIFT, "protocol::Drawable", Language::Swift); + assert!(span.is_some(), "should resolve protocol::Drawable"); + } + + #[test] + fn resolve_swift_class() { + let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape", Language::Swift); + assert!(span.is_some(), "should resolve class::Shape"); + } + + #[test] + fn resolve_swift_method() { + let span = resolve_tree_path(SAMPLE_SWIFT, "class::Shape::fn::area", Language::Swift); + assert!(span.is_some(), "should resolve class::Shape::fn::area"); + } + + #[test] + fn resolve_swift_function() { + let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift); + assert!(span.is_some(), "should resolve fn::standalone"); + } + + #[test] + fn roundtrip_swift() { + let span = resolve_tree_path(SAMPLE_SWIFT, "fn::standalone", Language::Swift).unwrap(); + let path = compute_tree_path(SAMPLE_SWIFT, span, Language::Swift); + assert_eq!(path, "fn::standalone"); + } + + #[test] + fn detect_swift_extension() { + assert_eq!( + detect_language(Path::new("ViewController.swift")), + Some(Language::Swift) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc new file mode 100644 index 0000000..a4220ed --- /dev/null +++ b/crates/liyi/src/tree_path/lang_swift.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_swift.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Swift language configuration for tree_path resolution: register tree-sitter-swift grammar, .swift extension, kind mappings for fn/class/protocol/enum/property/init/typealias, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 20 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:9b703d45d6d6751c7fd41d681b5ba24dd490973c1453fd226f6e2b859c955263", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_typescript.rs b/crates/liyi/src/tree_path/lang_typescript.rs new file mode 100644 index 0000000..0776745 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_typescript.rs @@ -0,0 +1,224 @@ +use super::LanguageConfig; + +/// TypeScript language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + extensions: &["ts", "mts", "cts"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +/// TSX language configuration. +pub(super) static TSX_CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_typescript::LANGUAGE_TSX.into(), + extensions: &["tsx"], + kind_map: &[ + ("fn", "function_declaration"), + ("class", "class_declaration"), + ("method", "method_definition"), + ("interface", "interface_declaration"), + ("type", "type_alias_declaration"), + ("enum", "enum_declaration"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_TS: &str = r#"// A typed user service + +interface User { + id: number; + name: string; +} + +type UserId = number; + +enum UserRole { + Admin, + User, + Guest +} + +class UserService { + private users: User[] = []; + + addUser(user: User): void { + this.users.push(user); + } + + findById(id: UserId): User | undefined { + return this.users.find(u => u.id === id); + } +} + +function createUser(name: string): User { + return { id: Date.now(), name }; +} +"#; + + #[test] + fn resolve_ts_interface() { + let span = resolve_tree_path(SAMPLE_TS, "interface::User", Language::TypeScript); + assert!(span.is_some(), "should resolve interface::User"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("interface User"), + "span should point to User interface" + ); + } + + #[test] + fn resolve_ts_type_alias() { + let span = resolve_tree_path(SAMPLE_TS, "type::UserId", Language::TypeScript); + assert!(span.is_some(), "should resolve type::UserId"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("type UserId"), + "span should point to UserId type alias" + ); + } + + #[test] + fn resolve_ts_enum() { + let span = resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript); + assert!(span.is_some(), "should resolve enum::UserRole"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("enum UserRole"), + "span should point to UserRole enum" + ); + } + + #[test] + fn resolve_ts_class_method() { + let span = resolve_tree_path( + SAMPLE_TS, + "class::UserService::method::findById", + Language::TypeScript, + ); + assert!( + span.is_some(), + "should resolve class::UserService::method::findById" + ); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + assert!( + lines[start - 1].contains("findById("), + "span should point to findById method" + ); + } + + #[test] + fn compute_ts_interface_path() { + let lines: Vec<&str> = SAMPLE_TS.lines().collect(); + let start = lines + .iter() + .position(|l| l.contains("interface User")) + .unwrap() + + 1; + let end = start + 3; + + let path = compute_tree_path(SAMPLE_TS, [start, end], Language::TypeScript); + assert_eq!(path, "interface::User"); + } + + #[test] + fn roundtrip_ts() { + let resolved_span = + resolve_tree_path(SAMPLE_TS, "enum::UserRole", Language::TypeScript).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_TS, resolved_span, Language::TypeScript); + assert_eq!(computed_path, "enum::UserRole"); + + let re_resolved = + resolve_tree_path(SAMPLE_TS, &computed_path, Language::TypeScript).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + const SAMPLE_TSX: &str = r#"// A React component + +interface Props { + title: string; + count: number; +} + +function Counter({ title, count }: Props) { + return ( +
+

{title}

+

Count: {count}

+
+ ); +} + +class Container extends React.Component { + render() { + return
{this.props.title}
; + } +} +"#; + + #[test] + fn resolve_tsx_function() { + let span = resolve_tree_path(SAMPLE_TSX, "fn::Counter", Language::Tsx); + assert!(span.is_some(), "should resolve fn::Counter in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("function Counter"), + "span should point to Counter function" + ); + } + + #[test] + fn resolve_tsx_class() { + let span = resolve_tree_path(SAMPLE_TSX, "class::Container", Language::Tsx); + assert!(span.is_some(), "should resolve class::Container in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("class Container"), + "span should point to Container class" + ); + } + + #[test] + fn resolve_tsx_interface() { + let span = resolve_tree_path(SAMPLE_TSX, "interface::Props", Language::Tsx); + assert!(span.is_some(), "should resolve interface::Props in TSX"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_TSX.lines().collect(); + assert!( + lines[start - 1].contains("interface Props"), + "span should point to Props interface" + ); + } + + #[test] + fn detect_tsx_extension() { + assert_eq!( + detect_language(Path::new("component.tsx")), + Some(Language::Tsx) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc new file mode 100644 index 0000000..b55c8fa --- /dev/null +++ b/crates/liyi/src/tree_path/lang_typescript.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_typescript.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the TypeScript language configuration for tree_path resolution: register tree-sitter-typescript grammar (LANGUAGE_TYPESCRIPT), .ts/.mts/.cts extensions, kind mappings for fn/class/method/interface/type/enum, using standard name field with no custom name extraction.", + "source_span": [ + 4, + 19 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:7c920dd5e1c5bb41bc5de6c4e9e06449f370c31799a4c09879136fe11fc85982", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "TSX_CONFIG", + "reviewed": false, + "intent": "Define the TSX language configuration for tree_path resolution: register tree-sitter-typescript TSX grammar (LANGUAGE_TSX), .tsx extension, same kind mappings as TypeScript (fn/class/method/interface/type/enum), using standard name field with no custom name extraction.", + "source_span": [ + 22, + 37 + ], + "tree_path": "static::TSX_CONFIG", + "source_hash": "sha256:d3f258d9e47df10f71f0f44bcf8ba18093ff75390224c03d5a2c487bf65eaf39", + "source_anchor": "pub(super) static TSX_CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path.rs b/crates/liyi/src/tree_path/mod.rs similarity index 62% rename from crates/liyi/src/tree_path.rs rename to crates/liyi/src/tree_path/mod.rs index 0e9f36c..f300fe5 100644 --- a/crates/liyi/src/tree_path.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -9,46 +9,114 @@ //! locate items by structural identity, making span recovery deterministic //! across formatting changes, import additions, and line reflows. +mod lang_c; +mod lang_cpp; +mod lang_csharp; +mod lang_go; +mod lang_java; +mod lang_javascript; +mod lang_kotlin; +mod lang_objc; +mod lang_php; +mod lang_python; +mod lang_rust; +mod lang_swift; +mod lang_typescript; + +use std::borrow::Cow; use std::path::Path; -use tree_sitter::{Node, Parser}; - -/// Map from tree_path kind shorthand to tree-sitter-rust node kind strings. -const KIND_MAP: &[(&str, &str)] = &[ - ("fn", "function_item"), - ("struct", "struct_item"), - ("enum", "enum_item"), - ("impl", "impl_item"), - ("trait", "trait_item"), - ("mod", "mod_item"), - ("const", "const_item"), - ("static", "static_item"), - ("type", "type_item"), - ("macro", "macro_definition"), -]; - -/// Reverse map: tree-sitter node kind → tree_path shorthand. -fn kind_to_shorthand(ts_kind: &str) -> Option<&'static str> { - KIND_MAP - .iter() - .find(|(_, ts)| *ts == ts_kind) - .map(|(short, _)| *short) -} +use tree_sitter::{Language as TSLanguage, Node, Parser}; -/// Forward map: tree_path shorthand → tree-sitter node kind. -fn shorthand_to_kind(short: &str) -> Option<&'static str> { - KIND_MAP - .iter() - .find(|(s, _)| *s == short) - .map(|(_, ts)| *ts) +/// Language-specific configuration for tree_path resolution. +/// +/// Each supported language provides a static `LanguageConfig` that defines +/// how to parse it and map between tree-sitter node kinds and tree_path +/// shorthands. +pub struct LanguageConfig { + /// Function to get the tree-sitter language grammar (lazy initialization). + ts_language: fn() -> TSLanguage, + /// File extensions associated with this language. + extensions: &'static [&'static str], + /// Map from tree_path kind shorthand to tree-sitter node kind. + kind_map: &'static [(&'static str, &'static str)], + /// Field name to extract the node's name (usually "name"). + name_field: &'static str, + /// Overrides for special cases: (node_kind, field_name) pairs. + name_overrides: &'static [(&'static str, &'static str)], + /// Field names to traverse to find a node's body/declaration_list. + body_fields: &'static [&'static str], + /// Custom name extraction for node kinds that need special handling + /// (e.g., Go methods with receiver types, Go type_declaration wrapping type_spec). + /// Returns `Some(name)` for handled kinds, `None` to fall through to default. + custom_name: Option Option>, } -/// Detect language from file extension. Returns `None` for unsupported -/// languages (only Rust is supported in 0.1). -pub fn detect_language(path: &Path) -> Option { - match path.extension()?.to_str()? { - "rs" => Some(Language::Rust), - _ => None, +impl LanguageConfig { + /// Map tree-sitter node kind → tree_path shorthand. + fn kind_to_shorthand(&self, ts_kind: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(_, ts)| *ts == ts_kind) + .map(|(short, _)| *short) + } + + /// Map tree_path shorthand → tree-sitter node kind. + fn shorthand_to_kind(&self, short: &str) -> Option<&'static str> { + self.kind_map + .iter() + .find(|(s, _)| *s == short) + .map(|(_, ts)| *ts) + } + + /// Extract the name of a named AST node. + /// + /// Returns a `Cow` — borrowed from `source` in the common case, + /// owned when the name is constructed (e.g., Go method receiver encoding). + fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> { + // Check custom_name callback first (e.g., Go method receivers) + if let Some(custom) = self.custom_name + && let Some(name) = custom(node, source) + { + return Some(Cow::Owned(name)); + } + + let kind = node.kind(); + + // Check for name field override (e.g., impl_item uses "type" field) + let field_name = self + .name_overrides + .iter() + .find(|(k, _)| *k == kind) + .map(|(_, f)| *f) + .unwrap_or(self.name_field); + + let name_node = node.child_by_field_name(field_name)?; + Some(Cow::Borrowed(&source[name_node.byte_range()])) + } + + /// Find a body/declaration_list child for descending into containers. + fn find_body<'a>(&self, node: &Node<'a>) -> Option> { + for field in self.body_fields { + if let Some(body) = node.child_by_field_name(field) { + return Some(body); + } + } + // Fallback: search for body_fields or declaration_list as direct + // (unnamed) children. Needed for languages where the body is a + // positional child rather than a named field (e.g., Kotlin class_body, + // C++ field_declaration_list). + let mut cursor = node.walk(); + node.children(&mut cursor).find(|c| { + self.body_fields.contains(&c.kind()) + || c.kind() == "declaration_list" + || c.kind() == "field_declaration_list" + }) + } + + /// Check if the given file extension is associated with this language. + pub fn matches_extension(&self, ext: &str) -> bool { + self.extensions.contains(&ext) } } @@ -56,36 +124,121 @@ pub fn detect_language(path: &Path) -> Option { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { Rust, + Python, + Go, + JavaScript, + TypeScript, + Tsx, + C, + Cpp, + Java, + CSharp, + Php, + ObjectiveC, + Kotlin, + Swift, } -/// Create a tree-sitter parser for the given language. -fn make_parser(lang: Language) -> Parser { - let mut parser = Parser::new(); - match lang { - Language::Rust => { - parser - .set_language(&tree_sitter_rust::LANGUAGE.into()) - .expect("tree-sitter-rust grammar should load"); +impl Language { + /// Get the language configuration for this language. + fn config(&self) -> &'static LanguageConfig { + match self { + Language::Rust => &lang_rust::CONFIG, + Language::Python => &lang_python::CONFIG, + Language::Go => &lang_go::CONFIG, + Language::JavaScript => &lang_javascript::CONFIG, + Language::TypeScript => &lang_typescript::CONFIG, + Language::Tsx => &lang_typescript::TSX_CONFIG, + Language::C => &lang_c::CONFIG, + Language::Cpp => &lang_cpp::CONFIG, + Language::Java => &lang_java::CONFIG, + Language::CSharp => &lang_csharp::CONFIG, + Language::Php => &lang_php::CONFIG, + Language::ObjectiveC => &lang_objc::CONFIG, + Language::Kotlin => &lang_kotlin::CONFIG, + Language::Swift => &lang_swift::CONFIG, } } - parser + + /// Get the tree-sitter language grammar. + fn ts_language(&self) -> TSLanguage { + (self.config().ts_language)() + } } -/// Extract the name of a named AST node. +/// Detect language from file extension. Returns `None` for unsupported +/// languages (unknown extension). /// -/// For most items (fn, struct, enum, mod, trait, const, static, type, macro), -/// the name is in the `name` field. For `impl_item`, the name is the text of -/// the `type` field (the type being implemented). -fn node_name<'a>(node: &Node<'a>, source: &'a str) -> Option<&'a str> { - let kind = node.kind(); - if kind == "impl_item" { - // impl blocks: use the `type` field text - let type_node = node.child_by_field_name("type")?; - Some(&source[type_node.byte_range()]) - } else { - let name_node = node.child_by_field_name("name")?; - Some(&source[name_node.byte_range()]) +/// # Extension Collision +/// +/// `.h` files are ambiguous (C, C++, or Objective-C). We map them to C +/// by default. Users can override via future configuration if needed. +/// +/// If two languages share an extension (unlikely with built-in languages), +/// the first match in the following order is returned: +/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Java → C# → PHP → Objective-C → Kotlin → Swift. +pub fn detect_language(path: &Path) -> Option { + let ext = path.extension()?.to_str()?; + + if lang_rust::CONFIG.matches_extension(ext) { + return Some(Language::Rust); + } + + if lang_python::CONFIG.matches_extension(ext) { + return Some(Language::Python); + } + + if lang_go::CONFIG.matches_extension(ext) { + return Some(Language::Go); + } + + if lang_javascript::CONFIG.matches_extension(ext) { + return Some(Language::JavaScript); + } + + if lang_typescript::CONFIG.matches_extension(ext) { + return Some(Language::TypeScript); + } + if lang_typescript::TSX_CONFIG.matches_extension(ext) { + return Some(Language::Tsx); + } + + if lang_c::CONFIG.matches_extension(ext) { + return Some(Language::C); + } + if lang_cpp::CONFIG.matches_extension(ext) { + return Some(Language::Cpp); + } + if lang_java::CONFIG.matches_extension(ext) { + return Some(Language::Java); + } + if lang_csharp::CONFIG.matches_extension(ext) { + return Some(Language::CSharp); } + if lang_php::CONFIG.matches_extension(ext) { + return Some(Language::Php); + } + if lang_objc::CONFIG.matches_extension(ext) { + return Some(Language::ObjectiveC); + } + if lang_kotlin::CONFIG.matches_extension(ext) { + return Some(Language::Kotlin); + } + if lang_swift::CONFIG.matches_extension(ext) { + return Some(Language::Swift); + } + + None +} + +/// Create a tree-sitter parser for the given language. +fn make_parser(lang: Language) -> Parser { + let mut parser = Parser::new(); + parser + .set_language(&lang.ts_language()) + .expect("tree-sitter grammar should load"); + parser } /// A parsed tree_path segment: (kind_shorthand, name). @@ -121,18 +274,19 @@ fn parse_tree_path(tree_path: &str) -> Option> { /// inclusive). /// /// Returns `None` if the tree_path cannot be resolved (item renamed, deleted, -/// or grammar unavailable). +/// grammar unavailable, or language not supported). pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> { if tree_path.is_empty() { return None; } + let config = lang.config(); let segments = parse_tree_path(tree_path)?; let mut parser = make_parser(lang); let tree = parser.parse(source, None)?; let root = tree.root_node(); - let node = resolve_segments(&root, &segments, source)?; + let node = resolve_segments(config, &root, &segments, source)?; // Return 1-indexed inclusive line range let start_line = node.start_position().row + 1; @@ -142,6 +296,7 @@ pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Optio /// Walk the tree to find a node matching the given path segments. fn resolve_segments<'a>( + config: &LanguageConfig, parent: &Node<'a>, segments: &[PathSegment], source: &'a str, @@ -151,19 +306,19 @@ fn resolve_segments<'a>( } let seg = &segments[0]; - let ts_kind = shorthand_to_kind(&seg.kind)?; + let ts_kind = config.shorthand_to_kind(&seg.kind)?; let mut cursor = parent.walk(); for child in parent.children(&mut cursor) { if child.kind() != ts_kind { continue; } - if let Some(name) = node_name(&child, source) { - if name == seg.name && segments.len() == 1 { + if let Some(name) = config.node_name(&child, source) { + if *name == seg.name && segments.len() == 1 { return Some(child); - } else if name == seg.name { + } else if *name == seg.name { // Descend — look inside this node's body - return resolve_in_body(&child, &segments[1..], source); + return resolve_in_body(config, &child, &segments[1..], source); } } } @@ -173,20 +328,13 @@ fn resolve_segments<'a>( /// Find subsequent segments inside an item's body (e.g., methods inside impl). fn resolve_in_body<'a>( + config: &LanguageConfig, node: &Node<'a>, segments: &[PathSegment], source: &'a str, ) -> Option> { - // For impl/mod/trait blocks, the children are inside the declaration_list - // or body field. Walk all descendants at the next level. - let body = node.child_by_field_name("body").or_else(|| { - // Try finding declaration_list child directly - let mut cursor = node.walk(); - node.children(&mut cursor) - .find(|c| c.kind() == "declaration_list") - })?; - - resolve_segments(&body, segments, source) + let body = config.find_body(node)?; + resolve_segments(config, &body, segments, source) } /// Compute the canonical `tree_path` for the AST node at the given span. @@ -195,6 +343,7 @@ fn resolve_in_body<'a>( /// (e.g., the span doesn't align with a named item, or the language is /// unsupported). pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String { + let config = lang.config(); let mut parser = make_parser(lang); let tree = match parser.parse(source, None) { Some(t) => t, @@ -207,13 +356,13 @@ pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> Stri let target_end = span[1].saturating_sub(1); // Find the best item node within the target range - let node = match find_item_in_range(&root, target_start, target_end) { + let node = match find_item_in_range(config, &root, target_start, target_end) { Some(n) => n, None => return String::new(), }; // Build path from root to this node - build_path_to_node(&root, &node, source) + build_path_to_node(config, &root, &node, source) } /// Find the best item node within [target_start, target_end] (0-indexed rows). @@ -223,6 +372,7 @@ pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> Stri /// item node. We therefore match any item whose start/end rows fall within /// the target range, preferring the widest match (the outermost item). fn find_item_in_range<'a>( + config: &LanguageConfig, root: &Node<'a>, target_start: usize, target_end: usize, @@ -230,6 +380,7 @@ fn find_item_in_range<'a>( let mut best: Option> = None; fn walk<'a>( + config: &LanguageConfig, node: &Node<'a>, target_start: usize, target_end: usize, @@ -244,7 +395,7 @@ fn find_item_in_range<'a>( } // Check if this is a named item node within the target range - if start >= target_start && end <= target_end && is_item_node(node) { + if start >= target_start && end <= target_end && is_item_node(config, node) { // Prefer the widest (outermost) match if let Some(b) = best { let b_size = b.end_position().row - b.start_position().row; @@ -260,23 +411,23 @@ fn find_item_in_range<'a>( // Recurse into children let mut cursor = node.walk(); for child in node.children(&mut cursor) { - walk(&child, target_start, target_end, best); + walk(config, &child, target_start, target_end, best); } } - walk(root, target_start, target_end, &mut best); + walk(config, root, target_start, target_end, &mut best); best } /// Check if a node is an item type we track in tree_path. -fn is_item_node(node: &Node) -> bool { - kind_to_shorthand(node.kind()).is_some() +fn is_item_node(config: &LanguageConfig, node: &Node) -> bool { + config.kind_to_shorthand(node.kind()).is_some() } /// Build the tree_path string for a given target node by walking from root. -fn build_path_to_node(root: &Node, target: &Node, source: &str) -> String { +fn build_path_to_node(config: &LanguageConfig, root: &Node, target: &Node, source: &str) -> String { let mut segments: Vec = Vec::new(); - if collect_path(root, target, source, &mut segments) { + if collect_path(config, root, target, source, &mut segments) { segments.join("::") } else { String::new() @@ -284,11 +435,19 @@ fn build_path_to_node(root: &Node, target: &Node, source: &str) -> String { } /// Recursively find `target` in the tree and collect path segments. -fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec) -> bool { +fn collect_path( + config: &LanguageConfig, + node: &Node, + target: &Node, + source: &str, + segments: &mut Vec, +) -> bool { if node.id() == target.id() { // We found the target — add this node's segment if it's an item - if let (Some(short), Some(name)) = (kind_to_shorthand(node.kind()), node_name(node, source)) - { + if let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) { segments.push(format!("{short}::{name}")); return true; } @@ -306,12 +465,14 @@ fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec= target_end - && collect_path(&child, target, source, segments) + && collect_path(config, &child, target, source, segments) { // If this node is an item node, prepend its segment - if is_item_node(node) - && let (Some(short), Some(name)) = - (kind_to_shorthand(node.kind()), node_name(node, source)) + if is_item_node(config, node) + && let (Some(short), Some(name)) = ( + config.kind_to_shorthand(node.kind()), + config.node_name(node, source), + ) { segments.insert(0, format!("{short}::{name}")); } @@ -547,7 +708,7 @@ fn standalone() -> i32 { detect_language(Path::new("src/main.rs")), Some(Language::Rust) ); - assert_eq!(detect_language(Path::new("foo.py")), None); + assert_eq!(detect_language(Path::new("foo.py")), Some(Language::Python)); } #[test] diff --git a/crates/liyi/src/tree_path.rs.liyi.jsonc b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc similarity index 53% rename from crates/liyi/src/tree_path.rs.liyi.jsonc rename to crates/liyi/src/tree_path/mod.rs.liyi.jsonc index 916c7e5..18303df 100644 --- a/crates/liyi/src/tree_path.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc @@ -1,63 +1,75 @@ // liyi v0.1 spec file { "version": "0.1", - "source": "crates/liyi/src/tree_path.rs", + "source": "crates/liyi/src/tree_path/mod.rs", "specs": [ { - "item": "KIND_MAP", + "item": "LanguageConfig", "reviewed": false, - "intent": "Define the bijective mapping between tree_path shorthand strings (fn, struct, enum, impl, trait, mod, const, static, type, macro) and tree-sitter-rust node kind strings (function_item, struct_item, etc.). This is the single source of truth for kind translation.", + "intent": "Define the data-driven abstraction for language-specific tree_path behaviour. Each field captures one language-dependent axis: grammar loader (ts_language), file extensions, kind shorthand mapping, name extraction field and overrides, body-descending fields, and an optional custom_name callback for languages with non-trivial name extraction (e.g., Go receiver encoding).", "source_span": [ - 17, - 28 + 36, + 53 ], - "tree_path": "const::KIND_MAP", - "source_hash": "sha256:6a6b6eb7c463a163555dadc8f1cd701266d2d64cf6a06e75f4128feef9fe45ee", - "source_anchor": "const KIND_MAP: &[(&str, &str)] = &[" + "tree_path": "struct::LanguageConfig", + "source_hash": "sha256:cc0ae5ada967354b9d5e9863be2c72136c5dd85832b29ee5e44e118d1c99f5da", + "source_anchor": "pub struct LanguageConfig {" }, { "item": "detect_language", "reviewed": false, "intent": "=doc", "source_span": [ - 48, - 53 + 181, + 233 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:2c784d4dfacb2142be9b46e13c2713208c7797747374057a65ae114fdb8be45c", + "source_hash": "sha256:93745e6791e0ebb9eb4704bbcdaeaa193791e75c010fd498ec8cf8c79e1bc26c", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { "item": "Language", "reviewed": false, - "intent": "Enumerate supported tree-sitter languages for tree_path operations. In 0.1, only Rust is supported; the enum is the extension point for adding more languages.", + "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 57, - 59 + 125, + 140 ], "tree_path": "enum::Language", - "source_hash": "sha256:6d0a6933befabccdcfc1030cac109588b55363bb531987e4aa7c9d4cf1a68e1e", + "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", "source_anchor": "pub enum Language {" }, { "item": "node_name", "reviewed": false, - "intent": "Extract the user-visible name of an AST node. For impl_item, return the type field text (e.g., 'Money' from 'impl Money'). For all other item kinds, return the name field. Return None if the node has no name/type field.", + "intent": "Extract the user-visible name of an AST node via the language's LanguageConfig. Checks the custom_name callback first (for complex patterns like Go receiver encoding). Falls back to name_overrides for special cases (e.g., impl_item uses type field). Otherwise reads the standard name field. Returns Cow::Owned for constructed names, Cow::Borrowed for field-extracted names.", "source_span": [ - 79, - 89 + 76, + 96 ], - "tree_path": "fn::node_name", - "source_hash": "sha256:b35a2bd695cb373f84ad62c1d483a893c9e0dcc93cc0d72f42a096cf1a16d79a", - "source_anchor": "fn node_name<'a>(node: &Node<'a>, source: &'a str) -> Option<&'a str> {" + "tree_path": "impl::LanguageConfig::fn::node_name", + "source_hash": "sha256:d459d381bbc30689c1dd009aa6df01f7815da0b36ed5592ff2b45da8abe27edd", + "source_anchor": " fn node_name<'a>(&self, node: &Node<'a>, source: &'a str) -> Option> {" + }, + { + "item": "go_node_name", + "reviewed": false, + "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", + "source_span": [ + 345, + 366 + ], + "tree_path": "fn::compute_tree_path", + "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", + "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 102, - 118 + 255, + 271 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -68,11 +80,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 125, - 141 + 278, + 295 ], "tree_path": "fn::resolve_tree_path", - "source_hash": "sha256:3d7856a13db4b62a7800d9457c1b079a8aa5a99296d010d4196e97eb1465c8c9", + "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", "source_anchor": "pub fn resolve_tree_path(source: &str, tree_path: &str, lang: Language) -> Option<[usize; 2]> {" }, { @@ -80,11 +92,11 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 144, - 172 + 298, + 327 ], "tree_path": "fn::resolve_segments", - "source_hash": "sha256:cb4227e128b6b2b7cf3766e0f8c492e21692cbcf44b5a4b95addffb34ad87451", + "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", "source_anchor": "fn resolve_segments<'a>(" }, { @@ -92,11 +104,11 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 175, - 190 + 330, + 338 ], "tree_path": "fn::resolve_in_body", - "source_hash": "sha256:36340ecde43345970b601709b06045ce4eb2d59361073a740cffdc53dac11dbc", + "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", "source_anchor": "fn resolve_in_body<'a>(" }, { @@ -104,11 +116,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 197, - 217 + 345, + 366 ], "tree_path": "fn::compute_tree_path", - "source_hash": "sha256:da391fb9dcee5c9ba55d539a4c16eb60f51511713b9c843dfeb87409a2a6d96e", + "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", "source_anchor": "pub fn compute_tree_path(source: &str, span: [usize; 2], lang: Language) -> String {" }, { @@ -116,11 +128,11 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 225, - 269 + 374, + 420 ], "tree_path": "fn::find_item_in_range", - "source_hash": "sha256:23d04aca348b644f78e2415418361bf6c963c868b00ca9f3882483c511fe2d3d", + "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", "source_anchor": "fn find_item_in_range<'a>(" }, { @@ -128,12 +140,12 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 287, - 323 + 438, + 484 ], "tree_path": "fn::collect_path", - "source_hash": "sha256:5596cd923fd63ffdcf3d5871e7b7e06e48f56cc5ac790aab5afa1e2351eb1046", - "source_anchor": "fn collect_path(node: &Node, target: &Node, source: &str, segments: &mut Vec) -> bool {" + "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", + "source_anchor": "fn collect_path(" } ] } diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index 26c3b6a..a1a4d3d 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -1,16 +1,16 @@ # 立意 (Lìyì) — 0.1.x Roadmap -2026-03-06 (updated 2026-03-09) +2026-03-06 (updated 2026-03-10) --- ## Overview -This document covers post-MVP work that ships as 0.1.x patch releases. Everything here is additive — no schema changes, no CLI breaking changes, no behavioral regressions. Users who never enable a Cargo feature or run a new subcommand see zero impact. +This document covers post-MVP work that ships as 0.1.x patch releases. Everything here is additive — no schema changes, no CLI breaking changes, no behavioral regressions. The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This document picks up where it leaves off. -**Design authority:** `docs/liyi-design.md` v8.7 — see *Structural identity via `tree_path`*, *Multi-language architecture (`LanguageConfig`)*, and *Annotation coverage*. +**Design authority:** `docs/liyi-design.md` v8.8 — see *Structural identity via `tree_path`*, *Multi-language architecture (`LanguageConfig`)*, and *Annotation coverage*. --- @@ -18,6 +18,8 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu | Milestone | Status | Notes | |-----------|--------|-------| +| M1 Multi-language tree_path | ✅ Complete | All 5 languages built-in, no feature gates | +| M2 Extended language support | ✅ Complete | C, C++, Java, C#, PHP, ObjC, Kotlin, Swift | | M3 Remaining MVP gaps | ✅ Complete | All items implemented | | M5.1 MissingRelated | ✅ Complete | Diagnostic implemented, auto-fix in `--fix` mode | | M5.2 `--fail-on-untracked` | ✅ Complete | Flag implemented with tests | @@ -34,20 +36,18 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu ## M1. Multi-language `tree_path` support -**Status:** Not started — deferred to post-0.1.x or community contribution. +**Status:** ✅ Complete — all languages built-in, no feature gates. -**Goal:** Extend tree-sitter-based structural identity from Rust-only to Python, Go, JavaScript, and TypeScript. +**Goal:** Extend tree-sitter-based structural identity from Rust-only to Python, Go, JavaScript, and TypeScript. All grammars are compiled into the binary unconditionally — no Cargo features, no opt-in. The binary-size cost is modest relative to the universality benefit; Python, Go, JavaScript, and TypeScript codebases vastly outnumber Rust codebases, and requiring users to opt in per language would hinder adoption of a tool whose value proposition is universality. -**Prerequisite:** Refactor `tree_path.rs` from hardcoded Rust-specific `KIND_MAP` + `node_name` to a data-driven `LanguageConfig` abstraction. This is the enabling refactor — each subsequent language is additive data, not new code paths. +### M1.1. `LanguageConfig` refactor ✅ -### M1.1. `LanguageConfig` refactor (~half day) - -Extract the four language-specific touch points into a configuration struct: +Extracted language-specific touch points into a data-driven `LanguageConfig` struct: | Current code | Becomes | |---|---| | `KIND_MAP` (hardcoded Rust node kinds) | `LanguageConfig::kind_map` | -| `Language` enum (only `Rust`) | Extended with variants per feature | +| `Language` enum (only `Rust`) | Extended with variants per language | | `detect_language()` (only `.rs`) | Dispatch table from extensions | | `make_parser()` (only `tree_sitter_rust`) | `LanguageConfig::ts_language` | | `node_name()` (`impl_item` special case) | `LanguageConfig::name_overrides` | @@ -56,20 +56,23 @@ The `LanguageConfig` struct (from design doc v8.6): ```rust struct LanguageConfig { - ts_language: tree_sitter::Language, + ts_language: fn() -> tree_sitter::Language, extensions: &'static [&'static str], kind_map: &'static [(&'static str, &'static str)], name_field: &'static str, name_overrides: &'static [(&'static str, &'static str)], body_fields: &'static [&'static str], + custom_name: Option Option>, } ``` +The `custom_name` callback handles languages with non-trivial name extraction (e.g., Go method receiver encoding, Go `type_declaration` → `type_spec` indirection). + **Acceptance criteria:** - All existing tests pass with Rust handled via `LanguageConfig` instead of hardcoded paths. -- Adding a new language requires only a new `LanguageConfig` constant and a Cargo feature — no changes to resolve/compute logic. +- Adding a new language requires only a new `LanguageConfig` constant — no changes to resolve/compute logic. -### M1.2. Python (`lang-python` feature) +### M1.2. Python ✅ **Grammar:** `tree-sitter-python` (0.25.0) @@ -93,7 +96,7 @@ struct LanguageConfig { - `compute_tree_path` produces correct path for top-level functions, class methods, nested classes. - Roundtrip (compute → resolve → same span) passes for representative Python code. -### M1.3. Go (`lang-go` feature) +### M1.3. Go ✅ **Grammar:** `tree-sitter-go` (0.25.0) @@ -103,30 +106,22 @@ struct LanguageConfig { |---|---| | `fn` | `function_declaration` | | `method` | `method_declaration` | -| `struct` | `type_declaration` → `type_spec` with `struct_type` | -| `interface` | `type_declaration` → `type_spec` with `interface_type` | -| `const` | `const_declaration` | -| `var` | `var_declaration` | +| `type` | `type_declaration` (name extracted from inner `type_spec`) | +| `const` | `const_declaration` (name extracted from inner `const_spec`) | +| `var` | `var_declaration` (name extracted from inner `var_spec`) | **Design notes:** -- Go methods have receivers and live at top level, not nested inside a struct body. Tree_path encoding: `method::(*MyType).DoThing` or `method::MyType.DoThing`. The method name includes the receiver type for disambiguation. -- `type_declaration` wraps `type_spec` which has the actual name. Name extraction needs to reach into `type_spec` → `name` field. +- Go methods encode the receiver type in tree_path: `method::(*MyType).DoThing` (pointer receiver) or `method::MyType.DoThing` (value receiver). This disambiguates methods with the same name on different types. +- `type_declaration` wraps `type_spec` which has the actual name. A `custom_name` callback navigates the indirection. A single `type` shorthand covers structs, interfaces, and type aliases — Go type names are unique per package, so no disambiguation is needed. - No nesting equivalent to Rust's `impl` or Python's class body — all functions/methods are top-level. **Extensions:** `.go` -**Open design question:** Receiver encoding in tree_path. Options: -1. `method::MyType.DoThing` — simple, matches Go syntax -2. `method::(*MyType).DoThing` — distinguishes pointer/value receivers -3. `struct::MyType::method::DoThing` — uses nested path syntax despite flat AST - -Option 1 is recommended — simple and readable, with pointer receiver indicated by `*` prefix when present. - **Acceptance criteria:** -- Functions, methods (pointer + value receiver), struct types, interface types resolve correctly. +- Functions, methods (pointer + value receiver), type declarations (struct + interface), const, var resolve correctly. - Roundtrip passes for representative Go code. -### M1.4. JavaScript (`lang-javascript` feature) +### M1.4. JavaScript ✅ **Grammar:** `tree-sitter-javascript` (0.25.0) @@ -151,7 +146,7 @@ Option 1 is recommended — simple and readable, with pointer receiver indicated - Arrow functions in const declarations map to `fn::name`. - Export-wrapped declarations resolve correctly. -### M1.5. TypeScript (`lang-typescript` feature) +### M1.5. TypeScript ✅ **Grammar:** `tree-sitter-typescript` (0.23.2) — ships two grammars: `typescript` and `tsx`. @@ -176,7 +171,245 @@ Option 1 is recommended — simple and readable, with pointer receiver indicated --- -## M2. Deferred languages — design notes +## M2. Extended language support + +**Status:** ✅ Complete — 8 additional languages built-in, no feature gates. + +**Goal:** Extend tree-sitter structural identity to C, C++, Java, C#, PHP, Objective-C, Kotlin, and Swift. All grammars are compiled into the binary unconditionally, matching the M1 design decision. The binary-size cost remains modest (tree-sitter grammars are compact C code) and the universality benefit is significant — C/C++ codebases are where intent drift is most acute and structural anchors most valuable. + +### M2.1. C ✅ + +**Grammar:** `tree-sitter-c` (0.24.1) — the oldest and most mature tree-sitter grammar. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `struct` | `struct_specifier` | +| `enum` | `enum_specifier` | +| `typedef` | `type_definition` | + +**Design notes:** +- C function names live inside a `declarator` → `function_declarator` → `identifier` chain, not a simple `name` field. A `c_node_name` custom callback recursively unwraps `pointer_declarator`, `parenthesized_declarator`, and `attributed_declarator` wrappers to find the `function_declarator`, then extracts the identifier. +- `type_definition` (typedef) names are in the `declarator` field. +- `.h` files are ambiguous (could be C, C++, or ObjC). Mapped to C by default since C has the simplest grammar and produces valid tree_paths for the overlapping subset. + +**Extensions:** `.c`, `.h` + +**Acceptance criteria:** +- Functions, structs, enums, typedefs all resolve. +- Roundtrip (compute → resolve → same span) passes. + +### M2.2. C++ ✅ + +**Grammar:** `tree-sitter-cpp` (0.23.4) — second-oldest tree-sitter grammar, extremely mature. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_specifier` | +| `struct` | `struct_specifier` | +| `namespace` | `namespace_definition` | +| `enum` | `enum_specifier` | +| `template` | `template_declaration` | +| `typedef` | `type_definition` | +| `using` | `alias_declaration` | + +**Design notes:** +- Inherits C's declarator-chain name extraction pattern via a `cpp_node_name` callback. +- `template_declaration` is a transparent wrapper. The callback unwraps it to find the inner declaration (`function_definition`, `class_specifier`, etc.) and extracts the name from there. +- Namespaces use `declaration_list` as their body container; `find_body` finds this via the fallback child search. +- Class methods are `function_definition` inside `field_declaration_list`; the extended `find_body` fallback handles this. +- `enum class` (scoped enums) parse as `enum_specifier` just like plain enums. + +**Extensions:** `.cpp`, `.cc`, `.cxx`, `.hpp`, `.hh`, `.hxx`, `.h++`, `.c++` + +**Acceptance criteria:** +- Namespaces, classes-in-namespaces, methods-in-classes, standalone functions, enums all resolve. +- Template-wrapped declarations resolve correctly. +- Roundtrip passes through namespace nesting. + +### M2.3. Java ✅ + +**Grammar:** `tree-sitter-java` (0.23.5) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `method_declaration` | +| `class` | `class_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `constructor` | `constructor_declaration` | +| `record` | `record_declaration` | +| `annotation` | `annotation_type_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- Methods are `method_declaration` inside `class_body`. Tree_path: `class::Calculator::fn::add`. +- Records (Java 14+) and annotation types are included for completeness. + +**Extensions:** `.java` + +**Acceptance criteria:** +- Classes, methods, constructors, interfaces, enums, records all resolve. +- Roundtrip passes for methods nested in classes. + +### M2.4. C# ✅ + +**Grammar:** `tree-sitter-c-sharp` (0.23.1) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `method_declaration` | +| `class` | `class_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `struct` | `struct_declaration` | +| `namespace` | `namespace_declaration` | +| `constructor` | `constructor_declaration` | +| `property` | `property_declaration` | +| `record` | `record_declaration` | +| `delegate` | `delegate_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- Namespaces use `body` field for descent, enabling `namespace::MyApp::class::Foo::fn::Bar` paths. +- Properties are tracked as named items (important for C#'s property-centric design). +- File-scoped namespace declarations (`namespace Foo;`) are not tracked as container items since they have no body to descend into. + +**Extensions:** `.cs` + +**Acceptance criteria:** +- Namespaces, classes, methods, properties, interfaces, enums, structs all resolve. +- Namespace → class → method nesting roundtrips correctly. + +### M2.5. PHP ✅ + +**Grammar:** `tree-sitter-php` (0.24.2) — uses `LANGUAGE_PHP_ONLY` (pure PHP, no HTML interleaving). + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_declaration` | +| `method` | `method_declaration` | +| `interface` | `interface_declaration` | +| `enum` | `enum_declaration` | +| `trait` | `trait_declaration` | +| `namespace` | `namespace_definition` | +| `const` | `const_declaration` | + +**Design notes:** +- PHP distinguishes `function_definition` (top-level) from `method_declaration` (inside classes). Both have a `name` field. +- `const_declaration` stores its name inside a `const_element` child — a `php_node_name` custom callback handles this. +- Traits are first-class items (important for Laravel/Symfony codebases). +- PHP 8.1 enums are supported. + +**Extensions:** `.php` + +**Acceptance criteria:** +- Classes, methods, functions, interfaces, traits, enums all resolve. +- Roundtrip passes. + +### M2.6. Objective-C ✅ + +**Grammar:** `tree-sitter-objc` (3.0.2) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_definition` | +| `class` | `class_interface` | +| `impl` | `class_implementation` | +| `protocol` | `protocol_declaration` | +| `method` | `method_definition` | +| `method_decl` | `method_declaration` | +| `struct` | `struct_specifier` | +| `enum` | `enum_specifier` | +| `typedef` | `type_definition` | + +**Design notes:** +- Most ObjC declaration node types lack standard `name` fields. An `objc_node_name` custom callback handles: + - `function_definition`: C-style declarator chain (shared with C callback). + - `class_interface` / `class_implementation`: name is a direct child `identifier` or `type_identifier`. + - `protocol_declaration`: same pattern. + - `method_declaration` / `method_definition`: ObjC selector names are composed from `keyword_declarator` children (e.g., `initWithFrame:style:`). +- C-level structs and enums use the standard `name` field. +- `class_interface` (`@interface`) and `class_implementation` (`@implementation`) are tracked as separate item types, mirroring ObjC's header/implementation split. + +**Extensions:** `.m`, `.mm` + +**Acceptance criteria:** +- C functions, structs, and enums resolve (shared with C grammar patterns). +- Roundtrip passes for C-level items. + +### M2.7. Kotlin ✅ + +**Grammar:** `tree-sitter-kotlin-ng` (1.1.0) — the `-ng` fork, compatible with tree-sitter 0.26.x. + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_declaration` | +| `class` | `class_declaration` | +| `object` | `object_declaration` | +| `property` | `property_declaration` | +| `typealias` | `type_alias` | + +**Design notes:** +- `class_body` is a positional child of `class_declaration` (not a named field). The `find_body` fallback was extended to search `body_fields` entries as child node kinds, not just field names. +- `property_declaration` names live inside a `variable_declaration` or `simple_identifier` child — handled by `kotlin_node_name` callback. +- `type_alias` names are in a `type_identifier` or `simple_identifier` child. +- `object_declaration` (Kotlin objects / companion objects) has a standard `name` field. +- The original `tree-sitter-kotlin` crate (0.3.x) requires tree-sitter <0.23 and is incompatible. The `-ng` fork from `tree-sitter-grammars` is the maintained successor. + +**Extensions:** `.kt`, `.kts` + +**Acceptance criteria:** +- Classes, methods-in-classes, objects, functions all resolve. +- Roundtrip passes. + +### M2.8. Swift ✅ + +**Grammar:** `tree-sitter-swift` (0.7.1) + +**Kind mappings:** + +| Shorthand | Node kind | +|---|---| +| `fn` | `function_declaration` | +| `class` | `class_declaration` | +| `protocol` | `protocol_declaration` | +| `enum` | `enum_entry` | +| `property` | `property_declaration` | +| `init` | `init_declaration` | +| `typealias` | `typealias_declaration` | + +**Design notes:** +- All node types have a standard `name` field — no custom callback needed. +- `class_declaration` covers both `class` and `struct` keywords (both use `class_declaration` with a `declaration_kind` field distinguishing them). +- Protocols map naturally to the `protocol` shorthand. +- `init_declaration` is tracked separately from methods since Swift initializers are syntactically distinct. + +**Extensions:** `.swift` + +**Acceptance criteria:** +- Protocols, classes, methods-in-classes, functions, init all resolve. +- Roundtrip passes. + +--- + +## M2.9. Deferred languages — design notes These languages are tracked but not planned for 0.1.x. diff --git a/docs/liyi-design.md b/docs/liyi-design.md index 97d3697..29b82ab 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -466,7 +466,7 @@ The path identifies the item by node kind and name, not by position. The tool co The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi reanchor` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. -**Language support.** Tree-sitter support is grammar-dependent. In 0.1, Rust is the primary supported language (via `tree-sitter-rust`). For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. +**Language support.** Tree-sitter support is grammar-dependent. Rust, Python, Go, JavaScript, and TypeScript are built-in. For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. **Multi-language architecture (`LanguageConfig`).** The `tree_path` implementation is designed to be language-extensible via a data-driven configuration per language. Each supported language provides: @@ -481,14 +481,14 @@ The agent MAY set `tree_path` to `""` explicitly to signal "I considered structu The shorthand vocabulary (`fn`, `struct`, `class`, `mod`, `impl`, `trait`, `enum`, `const`, `static`, `type`, `macro`, `interface`, `method`) is shared across languages — `fn` always means "function-like item" regardless of whether the underlying node kind is `function_item` (Rust), `function_definition` (Python/Go), or `function_declaration` (JS/TS). The `tree_path` format remains the same: `fn::add_money`, `class::Order::fn::process`. -Each language is gated behind a Cargo feature (`lang-python`, `lang-go`, `lang-javascript`, `lang-typescript`) so users only pay binary-size cost for languages they need. A `lang-all` convenience feature includes everything. +All languages are built-in — the binary ships with every supported tree-sitter grammar. The binary-size cost is modest relative to the universality benefit; Python, Go, JavaScript, and TypeScript codebases vastly outnumber Rust codebases, and requiring users to opt in per language would hinder adoption of a tool whose value proposition is universality. -**Planned languages (0.1.x):** +**Built-in languages:** | Language | Grammar crate | Notes | |---|---|---| | Python | `tree-sitter-python` | Flat AST; methods are `function_definition` inside `class_definition` body. No `impl`-block equivalent. | -| Go | `tree-sitter-go` | `type_declaration` → `type_spec` indirection for structs/interfaces. Methods have receivers and live at top level — tree_path encodes as `method::(*MyType).DoThing` or `fn::DoThing`. | +| Go | `tree-sitter-go` | `type_declaration` wraps `type_spec` for structs/interfaces — custom name extraction navigates the indirection. Methods encode receiver type: `method::(*MyType).DoThing` (pointer) or `method::MyType.DoThing` (value). | | JavaScript | `tree-sitter-javascript` | Arrow functions in `const` declarations are pervasive — `const foo = () => ...` maps to `fn::foo` (tracking the `variable_declarator` when its value is an `arrow_function`). | | TypeScript | `tree-sitter-typescript` | Superset of JS; adds `interface_declaration`, `type_alias_declaration`, `enum_declaration`. Dual grammar: `.ts` → typescript, `.tsx` → tsx. | @@ -1349,7 +1349,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | Agent (next session) | `suggested_intent` for items with `verdict: semantic` | Read triage, propose intent updates in sidecar | | Human (terminal) | Formatted summary + triage table | `liyi triage --summary`; `--json` for raw | -**Why the LLM is not in the binary.** Building LLM calls into `liyi` would require API key management, provider abstraction (OpenAI, Anthropic, Bedrock, Vertex, local models...), HTTP client + TLS, rate limit handling, token budgeting, and retry logic. It would bloat a ~3000-line binary with complexity that the agentic framework already solved. The binary stays deterministic, offline, and small. The reasoning lives where the model access already is. +**Why the LLM is not in the binary.** Building LLM calls into `liyi` would require API key management, provider abstraction (OpenAI, Anthropic, Bedrock, Vertex, local models...), HTTP client + TLS, rate limit handling, token budgeting, and retry logic. It would bloat the binary with complexity that the agentic framework already solved. The binary stays deterministic and offline. The reasoning lives where the model access already is. **Triage workflow:** @@ -1897,9 +1897,9 @@ The spec-driven development space is no longer hypothetical — Augment Intent, - **Persistent by design.** Intent survives context windows, agent sessions, and team turnover. It's a file in the repo, not a message in a thread. - **Each level stands alone.** You can adopt the instruction without the linter, or the linter without adversarial tests. - **Nothing to learn.** JSONC, Markdown, SHA-256. No DSL, no specification language, no framework. -- **Lightweight.** The linter is ~3000 lines of Rust across two crates with 7 direct runtime dependencies (including tree-sitter for structural span recovery). Small enough to audit, understand, and port to another language if needed. +- **Self-contained.** The linter is a single binary with tree-sitter grammars built in, no runtime dependencies. - **No lock-in.** `.liyi.jsonc` files are plain JSONC. `@liyi:module` markers are comments. Delete them and nothing breaks. -- **Any programming language.** The linter doesn't parse source code. It reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. +- **Any programming language.** The checking process doesn't parse source code — it reads line ranges from `source_span`, hashes them, compares. `.liyi.jsonc` is JSONC. `@liyi:module` markers use whatever comment syntax the host format already provides. Works with any language, any framework, any build system, any design pattern. - **Hardware RTL too.** The convention applies at the RTL level (Verilog, SystemVerilog, VHDL, Chisel) with no design changes — sidecars co-locate with `.v`/`.vhd`/`.scala` files, `source_span` and `source_hash` work on any text, and tree-sitter grammars exist for Verilog and VHDL. In hardware domains where requirements traceability is a compliance obligation (DO-254, ISO 26262, IEC 61508), 立意 functions as a lightweight shim between a requirements management system and RTL source: a `liyi import-reqif` command (post-MVP) can consume ReqIF — the open OMG standard (ReqIF 1.2, `formal/2016-07-01`) that DOORS, Polarion, and other tools export — and emit `@liyi:requirement` blocks, connecting managed requirements to RTL implementations with hash-based staleness detection. The tool doesn't replace DOORS; it fills the last mile that DOORS doesn't cover. - **Any human language.** Intent prose is natural language — write it in your team’s working language. Annotation markers accept aliases in any supported language (`@liyi:ignore` / `@立意:忽略` / `@liyi:ignorar`). No locale configuration; the linter accepts all aliases from a static table. The project’s Chinese cultural origin isn’t a barrier — it’s an invitation. @@ -2031,7 +2031,7 @@ A well-funded competitor (Augment Code, with their Intent product) can absorb th 2. **Reimplement the staleness model.** If their "living specs" prove unreliable (auto-updating specs drift silently), `source_hash` + `source_span` staleness is a public algorithm, fully specified in this document, trivially reimplementable. They ship "staleness alerts" as a feature. 3. **Ship `.liyi.jsonc` import/export.** If the convention gains traction, they offer compatibility as a feature — their specs are primary, `.liyi.jsonc` is a second-class interop format. They absorb the convention's ecosystem without contributing to it. -**No license can prevent this.** The convention is a file format (`.liyi.jsonc`), a set of marker strings (`@liyi:module`, `@liyi:intent`), and a staleness algorithm (hash lines, compare). These are ideas and data formats — not copyrightable expression. Even under AGPL, a competitor reimplements the algorithm from this public specification without touching the linter's source code. The JSON Schema is a functional specification. The linter is ~3000 lines of Rust (including tree-sitter integration) — reimplementation cost is a few engineer-days. +**No license can prevent this.** The convention is a file format (`.liyi.jsonc`), a set of marker strings (`@liyi:module`, `@liyi:intent`), and a staleness algorithm (hash lines, compare). These are ideas and data formats — not copyrightable expression. Even under AGPL, a competitor reimplements the algorithm from this public specification without touching the linter's source code. The JSON Schema is a functional specification. The linter is ~7 k lines of Rust (including tree-sitter integration for 14 languages) — reimplementation cost is modest. Copyleft (GPL, AGPL, MPL) would protect the **linter binary** from being embedded in a closed product without releasing source. But: