From 15f201d97e01a23bc844628098940a088b94e6c3 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:35:20 +0800 Subject: [PATCH 01/21] feat(tree_path): add Bash language support Add tree-sitter-bash grammar (v0.25.1) and LanguageConfig for Bash. Bash has simple function_definition nodes with no nesting complexity. - Extensions: .sh, .bash - Kind: fn -> function_definition - No custom name extraction needed - body_fields: ["body"] Original prompt: > Implement bash, ruby, and zig, one by one, then push to the currently empty PR. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 1 + crates/liyi/src/tree_path/lang_bash.rs | 90 +++++++++++++++++++ .../src/tree_path/lang_bash.rs.liyi.jsonc | 15 ++++ crates/liyi/src/tree_path/mod.rs | 9 +- 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 crates/liyi/src/tree_path/lang_bash.rs create mode 100644 crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index d6658e8..57b7052 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -30,6 +30,7 @@ tree-sitter-php = "0.24.2" tree-sitter-objc = "3.0.2" tree-sitter-kotlin-ng = "1.1.0" tree-sitter-swift = "0.7.1" +tree-sitter-bash = "0.25.1" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path/lang_bash.rs b/crates/liyi/src/tree_path/lang_bash.rs new file mode 100644 index 0000000..bdc5f30 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_bash.rs @@ -0,0 +1,90 @@ +use super::LanguageConfig; + +/// Bash language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_bash::LANGUAGE.into(), + extensions: &["sh", "bash"], + kind_map: &[("fn", "function_definition")], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_BASH: &str = r#"#!/bin/bash + +# Helper function +function helper() { + echo "helping" +} + +# Main function with alternate syntax +main_func() { + echo "main" +} + +# Function with no parens style (some shells) +another_func { + echo "another" +} +"#; + + #[test] + fn resolve_bash_function_with_function_keyword() { + let span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash); + assert!(span.is_some(), "should resolve fn::helper"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_BASH.lines().collect(); + assert!( + lines[start - 1].contains("function helper"), + "span should point to helper function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_bash_function_with_parens_syntax() { + let span = resolve_tree_path(SAMPLE_BASH, "fn::main_func", Language::Bash); + assert!(span.is_some(), "should resolve fn::main_func"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_BASH.lines().collect(); + assert!( + lines[start - 1].contains("main_func()"), + "span should point to main_func function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn compute_bash_function_path() { + // Use resolve to get the span, then verify compute produces the same path + let resolved_span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash).unwrap(); + let path = compute_tree_path(SAMPLE_BASH, resolved_span, Language::Bash); + assert_eq!(path, "fn::helper"); + } + + #[test] + fn roundtrip_bash() { + let resolved_span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_BASH, resolved_span, Language::Bash); + assert_eq!(computed_path, "fn::helper"); + + let re_resolved = resolve_tree_path(SAMPLE_BASH, &computed_path, Language::Bash).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_bash_extensions() { + assert_eq!(detect_language(Path::new("script.sh")), Some(Language::Bash)); + assert_eq!( + detect_language(Path::new("script.bash")), + Some(Language::Bash) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc new file mode 100644 index 0000000..43337d3 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc @@ -0,0 +1,15 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_bash.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Bash language configuration for tree_path resolution: register tree-sitter-bash grammar v0.25.1, .sh/.bash extensions, kind mapping for fn (function_definition), using standard name field with no custom name extraction and body field for function body traversal.", + "source_span": [4, 12], + "tree_path": "static::CONFIG", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index f300fe5..f92725c 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -9,6 +9,7 @@ //! locate items by structural identity, making span recovery deterministic //! across formatting changes, import additions, and line reflows. +mod lang_bash; mod lang_c; mod lang_cpp; mod lang_csharp; @@ -123,6 +124,7 @@ impl LanguageConfig { /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { + Bash, Rust, Python, Go, @@ -143,6 +145,7 @@ impl Language { /// Get the language configuration for this language. fn config(&self) -> &'static LanguageConfig { match self { + Language::Bash => &lang_bash::CONFIG, Language::Rust => &lang_rust::CONFIG, Language::Python => &lang_python::CONFIG, Language::Go => &lang_go::CONFIG, @@ -176,11 +179,15 @@ impl Language { /// /// If two languages share an extension (unlikely with built-in languages), /// the first match in the following order is returned: -/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Bash → Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → /// Java → C# → PHP → Objective-C → Kotlin → Swift. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; + if lang_bash::CONFIG.matches_extension(ext) { + return Some(Language::Bash); + } + if lang_rust::CONFIG.matches_extension(ext) { return Some(Language::Rust); } From 45887a12c99a236cf748eb2d8b2857889c1f669c Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:35:35 +0800 Subject: [PATCH 02/21] feat(tree_path): add Ruby language support Add tree-sitter-ruby grammar (v0.23.1) and LanguageConfig for Ruby. - Extensions: .rb, .rake, .gemspec - Kinds: fn -> method, class -> class, module -> module, singleton_method -> singleton_method - Custom name extraction for singleton methods (def self.foo) to encode receiver type: "self.method_name" or "ClassName.method_name" - body_fields: ["body", "statements"] Original prompt: > Implement bash, ruby, and zig, one by one, then push to the currently empty PR. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 1 + crates/liyi/src/tree_path/lang_ruby.rs | 177 ++++++++++++++++++ .../src/tree_path/lang_ruby.rs.liyi.jsonc | 23 +++ crates/liyi/src/tree_path/mod.rs | 9 +- 4 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 crates/liyi/src/tree_path/lang_ruby.rs create mode 100644 crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 57b7052..59d3dc3 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -31,6 +31,7 @@ tree-sitter-objc = "3.0.2" tree-sitter-kotlin-ng = "1.1.0" tree-sitter-swift = "0.7.1" tree-sitter-bash = "0.25.1" +tree-sitter-ruby = "0.23.1" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path/lang_ruby.rs b/crates/liyi/src/tree_path/lang_ruby.rs new file mode 100644 index 0000000..b575bed --- /dev/null +++ b/crates/liyi/src/tree_path/lang_ruby.rs @@ -0,0 +1,177 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Ruby nodes. +/// +/// Handles `singleton_method` (class methods like `def self.foo`) which encodes +/// the class name in the path: `singleton_method::ClassName.method_name`. +fn ruby_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "singleton_method" => { + let method_name_node = node.child_by_field_name("name")?; + let method_name = &source[method_name_node.byte_range()]; + + // The object field holds the receiver (e.g., `self` or class name) + // For `def self.foo`, object is `self` + // For `def ClassName.foo`, object is the class name identifier + let object = node.child_by_field_name("object")?; + let receiver = if object.kind() == "self" { + "self".to_string() + } else { + source[object.byte_range()].to_string() + }; + + Some(format!("{receiver}.{method_name}")) + } + _ => None, + } +} + +/// Ruby language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_ruby::LANGUAGE.into(), + extensions: &["rb", "rake", "gemspec"], + kind_map: &[ + ("fn", "method"), + ("class", "class"), + ("module", "module"), + ("singleton_method", "singleton_method"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "statements"], + custom_name: Some(ruby_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_RUBY: &str = r#"# A billing module +module Billing + class Invoice + def total + @items.sum + end + + def self.calculate_tax(amount) + amount * 0.1 + end + end + + def standalone_helper + "helper" + end +end + +class Order + def process + "processing" + end +end +"#; + + #[test] + fn resolve_ruby_module() { + let span = resolve_tree_path(SAMPLE_RUBY, "module::Billing", Language::Ruby); + assert!(span.is_some(), "should resolve module::Billing"); + } + + #[test] + fn resolve_ruby_class_in_module() { + let span = resolve_tree_path(SAMPLE_RUBY, "module::Billing::class::Invoice", Language::Ruby); + assert!(span.is_some(), "should resolve module::Billing::class::Invoice"); + } + + #[test] + fn resolve_ruby_method_in_class() { + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::fn::total", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve nested method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUBY.lines().collect(); + assert!( + lines[start - 1].contains("def total"), + "span should point to total method" + ); + } + + #[test] + fn resolve_ruby_singleton_method() { + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::singleton_method::self.calculate_tax", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve singleton method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUBY.lines().collect(); + assert!( + lines[start - 1].contains("def self.calculate_tax"), + "span should point to class method" + ); + } + + #[test] + fn resolve_ruby_module_function() { + // standalone_helper is defined directly in the module body + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::fn::standalone_helper", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve module-level function"); + } + + #[test] + fn resolve_ruby_top_level_class() { + let span = resolve_tree_path(SAMPLE_RUBY, "class::Order", Language::Ruby); + assert!(span.is_some(), "should resolve top-level class"); + } + + #[test] + fn resolve_ruby_method_in_top_level_class() { + let span = resolve_tree_path(SAMPLE_RUBY, "class::Order::fn::process", Language::Ruby); + assert!(span.is_some(), "should resolve method in top-level class"); + } + + #[test] + fn compute_ruby_method_path() { + let resolved_span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::fn::total", + Language::Ruby, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_RUBY, resolved_span, Language::Ruby); + assert_eq!(path, "module::Billing::class::Invoice::fn::total"); + } + + #[test] + fn roundtrip_ruby() { + let resolved_span = + resolve_tree_path(SAMPLE_RUBY, "class::Order::fn::process", Language::Ruby).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_RUBY, resolved_span, Language::Ruby); + assert_eq!(computed_path, "class::Order::fn::process"); + + let re_resolved = + resolve_tree_path(SAMPLE_RUBY, &computed_path, Language::Ruby).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_ruby_extensions() { + assert_eq!(detect_language(Path::new("app.rb")), Some(Language::Ruby)); + assert_eq!(detect_language(Path::new("tasks.rake")), Some(Language::Ruby)); + assert_eq!( + detect_language(Path::new("my_gem.gemspec")), + Some(Language::Ruby) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc new file mode 100644 index 0000000..c1aa288 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc @@ -0,0 +1,23 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_ruby.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Ruby language configuration for tree_path resolution: register tree-sitter-ruby grammar v0.23.1, .rb/.rake/.gemspec extensions, kind mappings for fn (method), class (class), module (module), and singleton_method with custom name extraction for receiver encoding.", + "source_span": [4, 12], + "tree_path": "static::CONFIG", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "ruby_node_name", + "reviewed": false, + "intent": "Custom name extraction for Ruby singleton methods (def self.foo). Encodes the receiver type into the name as ReceiverType.method_name or self.method_name, enabling disambiguation of class methods.", + "source_span": [14, 28], + "tree_path": "fn::ruby_node_name", + "source_anchor": "fn ruby_node_name(node: &Node, source: &str) -> Option {" + } + ] +} diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index f92725c..50b0919 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -20,6 +20,7 @@ mod lang_kotlin; mod lang_objc; mod lang_php; mod lang_python; +mod lang_ruby; mod lang_rust; mod lang_swift; mod lang_typescript; @@ -126,6 +127,7 @@ impl LanguageConfig { pub enum Language { Bash, Rust, + Ruby, Python, Go, JavaScript, @@ -147,6 +149,7 @@ impl Language { match self { Language::Bash => &lang_bash::CONFIG, Language::Rust => &lang_rust::CONFIG, + Language::Ruby => &lang_ruby::CONFIG, Language::Python => &lang_python::CONFIG, Language::Go => &lang_go::CONFIG, Language::JavaScript => &lang_javascript::CONFIG, @@ -179,7 +182,7 @@ impl Language { /// /// If two languages share an extension (unlikely with built-in languages), /// the first match in the following order is returned: -/// Bash → Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Bash → Rust → Ruby → Python → Go → JavaScript → TypeScript → TSX → C → C++ → /// Java → C# → PHP → Objective-C → Kotlin → Swift. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; @@ -192,6 +195,10 @@ pub fn detect_language(path: &Path) -> Option { return Some(Language::Rust); } + if lang_ruby::CONFIG.matches_extension(ext) { + return Some(Language::Ruby); + } + if lang_python::CONFIG.matches_extension(ext) { return Some(Language::Python); } From 801d5414a02f393816741bceda11bb33aaac217b Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:35:53 +0800 Subject: [PATCH 03/21] feat(tree_path): add Zig language support Add tree-sitter-zig grammar (v1.1.2) and LanguageConfig for Zig. - Extension: .zig - Kinds: - fn -> function_declaration - struct -> variable_declaration (for const Name = struct { ... }) - test -> test_declaration - Custom name extraction: - function_declaration: finds identifier child node - variable_declaration: detects const + struct_declaration pattern, extracts identifier for struct-as-namespace - test_declaration: extracts string literal content - body_fields: ["block", "struct_declaration"] (struct_declaration contains struct body for method lookup) Original prompt: > Implement bash, ruby, and zig, one by one, then push to the currently empty PR. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 1 + crates/liyi/src/tree_path/lang_zig.rs | 204 ++++++++++++++++++ .../liyi/src/tree_path/lang_zig.rs.liyi.jsonc | 31 +++ crates/liyi/src/tree_path/mod.rs | 8 +- 4 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 crates/liyi/src/tree_path/lang_zig.rs create mode 100644 crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index 59d3dc3..caddf95 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -32,6 +32,7 @@ tree-sitter-kotlin-ng = "1.1.0" tree-sitter-swift = "0.7.1" tree-sitter-bash = "0.25.1" tree-sitter-ruby = "0.23.1" +tree-sitter-zig = "1.1.2" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/tree_path/lang_zig.rs b/crates/liyi/src/tree_path/lang_zig.rs new file mode 100644 index 0000000..14963fb --- /dev/null +++ b/crates/liyi/src/tree_path/lang_zig.rs @@ -0,0 +1,204 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Find the first child with a given kind. +fn find_child_by_kind<'a>(node: &Node<'a>, kind: &str) -> Option> { + let mut cursor = node.walk(); + node.children(&mut cursor).find(|c| c.kind() == kind) +} + +/// Custom name extraction for Zig nodes. +/// +/// Handles two Zig-specific patterns: +/// - `variable_declaration` with `const` qualifier holding a `struct_declaration`: +/// emits `struct::Name` instead of `const::Name` to support Zig's struct-as-namespace pattern. +/// - `function_declaration`: extracts name from child `identifier` node. +/// - `test_declaration`: extracts the name from the string literal. +fn zig_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_declaration" => { + // Find the identifier child which is the function name + find_child_by_kind(node, "identifier") + .map(|n| source[n.byte_range()].to_string()) + } + "variable_declaration" => { + // Check if this is a `const` declaration + let is_const = node + .children(&mut node.walk()) + .any(|c| c.kind() == "const"); + + if !is_const { + return None; + } + + // Check if the value is a struct_declaration + let has_struct = node + .children(&mut node.walk()) + .any(|c| c.kind() == "struct_declaration"); + + if has_struct { + // This is `const Name = struct { ... }` — extract just the name + // (the "struct::" prefix is added by compute_tree_path) + find_child_by_kind(node, "identifier") + .map(|n| source[n.byte_range()].to_string()) + } else { + None + } + } + "test_declaration" => { + // Test declarations have a string child for the name + // e.g., test "my test" { ... } + find_child_by_kind(node, "string") + .map(|n| { + let raw = &source[n.byte_range()]; + // Remove surrounding quotes + raw.strip_prefix('"').and_then(|s| s.strip_suffix('"')) + .map(|s| s.to_string()) + .unwrap_or_default() + }) + } + _ => None, + } +} + +/// Zig language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_zig::LANGUAGE.into(), + extensions: &["zig"], + kind_map: &[ + ("fn", "function_declaration"), + ("struct", "variable_declaration"), // const Name = struct { ... } + ("test", "test_declaration"), + ], + name_field: "", // Not used - we extract names via custom callback + name_overrides: &[], + // Zig uses "block" for function bodies, and "struct_declaration" is the + // container for struct-as-namespace contents (methods, fields). + body_fields: &["block", "struct_declaration"], + custom_name: Some(zig_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_ZIG: &str = r#"const std = @import("std"); + +const Point = struct { + x: i32, + y: i32, + + pub fn new(x: i32, y: i32) Point { + return Point{ .x = x, .y = y }; + } +}; + +const MAX_SIZE = 100; + +fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "add function" { + try std.testing.expectEqual(add(2, 3), 5); +} +"#; + + #[test] + fn resolve_zig_function() { + let span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig); + assert!(span.is_some(), "should resolve fn::add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("fn add("), + "span should point to add function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_zig_struct_as_namespace() { + let span = resolve_tree_path(SAMPLE_ZIG, "struct::Point", Language::Zig); + assert!(span.is_some(), "should resolve struct::Point"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("const Point = struct"), + "span should point to Point struct definition" + ); + } + + #[test] + fn resolve_zig_method_in_struct() { + let span = resolve_tree_path(SAMPLE_ZIG, "struct::Point::fn::new", Language::Zig); + assert!(span.is_some(), "should resolve method in struct"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("fn new("), + "span should point to new method" + ); + } + + #[test] + fn resolve_zig_test() { + let span = resolve_tree_path(SAMPLE_ZIG, "test::add function", Language::Zig); + assert!(span.is_some(), "should resolve test declaration"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("test \"add function\""), + "span should point to test declaration" + ); + } + + #[test] + fn compute_zig_function_path() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig).unwrap(); + let path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(path, "fn::add"); + } + + #[test] + fn compute_zig_struct_namespace_path() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "struct::Point", Language::Zig).unwrap(); + let path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(path, "struct::Point"); + } + + #[test] + fn roundtrip_zig() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(computed_path, "fn::add"); + + let re_resolved = resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn roundtrip_zig_struct_namespace() { + let resolved_span = + resolve_tree_path(SAMPLE_ZIG, "struct::Point::fn::new", Language::Zig).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(computed_path, "struct::Point::fn::new"); + + let re_resolved = + resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_zig_extensions() { + assert_eq!(detect_language(Path::new("main.zig")), Some(Language::Zig)); + assert_eq!( + detect_language(Path::new("lib/foo.zig")), + Some(Language::Zig) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc new file mode 100644 index 0000000..c29c44c --- /dev/null +++ b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_zig.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Zig language configuration for tree_path resolution: register tree-sitter-zig grammar v1.1.2, .zig extension, kind mappings for fn (function_declaration), struct (variable_declaration for const Name = struct { ... }), and test (test_declaration). Uses custom name extraction and struct_declaration as body container for struct-as-namespace pattern.", + "source_span": [4, 12], + "tree_path": "static::CONFIG", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "zig_node_name", + "reviewed": false, + "intent": "Custom name extraction for Zig nodes. Handles function_declaration (finds identifier child), variable_declaration (detects const + struct_declaration pattern for struct-as-namespace), and test_declaration (extracts string literal content for test names).", + "source_span": [14, 28], + "tree_path": "fn::zig_node_name", + "source_anchor": "fn zig_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "find_child_by_kind", + "reviewed": false, + "intent": "Helper function to find the first child node with a given kind. Used by zig_node_name for identifier and string extraction.", + "source_span": [30, 33], + "tree_path": "fn::find_child_by_kind", + "source_anchor": "fn find_child_by_kind<'a>(node: &Node<'a>, kind: &str) -> Option> {" + } + ] +} diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index 50b0919..56994e9 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -24,6 +24,7 @@ mod lang_ruby; mod lang_rust; mod lang_swift; mod lang_typescript; +mod lang_zig; use std::borrow::Cow; use std::path::Path; @@ -141,6 +142,7 @@ pub enum Language { ObjectiveC, Kotlin, Swift, + Zig, } impl Language { @@ -163,6 +165,7 @@ impl Language { Language::ObjectiveC => &lang_objc::CONFIG, Language::Kotlin => &lang_kotlin::CONFIG, Language::Swift => &lang_swift::CONFIG, + Language::Zig => &lang_zig::CONFIG, } } @@ -183,7 +186,7 @@ impl Language { /// If two languages share an extension (unlikely with built-in languages), /// the first match in the following order is returned: /// Bash → Rust → Ruby → Python → Go → JavaScript → TypeScript → TSX → C → C++ → -/// Java → C# → PHP → Objective-C → Kotlin → Swift. +/// Java → C# → PHP → Objective-C → Kotlin → Swift → Zig. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; @@ -242,6 +245,9 @@ pub fn detect_language(path: &Path) -> Option { if lang_swift::CONFIG.matches_extension(ext) { return Some(Language::Swift); } + if lang_zig::CONFIG.matches_extension(ext) { + return Some(Language::Zig); + } None } From e0a7617a4d99286da89ec721d6d3e9d88725d2c8 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:36:03 +0800 Subject: [PATCH 04/21] build(deps): add nom 8 for tree_path parsing Add nom parser combinator library for implementing the formal tree_path grammar specification (v0.2). This enables unambiguous parsing of names containing spaces, :: delimiters, or special characters. Original prompt: > You seem to have added support for Zig test cases which made spaces > inside tree paths possible. I suggest parsing the tree path with nom > so we can have unambiguous string escaping. What's your opinion? AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index caddf95..d64070a 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -33,6 +33,7 @@ tree-sitter-swift = "0.7.1" tree-sitter-bash = "0.25.1" tree-sitter-ruby = "0.23.1" tree-sitter-zig = "1.1.2" +nom = "8" [dev-dependencies] proptest = "1" From 9d21f8a99a6152ee977feb2bdd59f7819dbc9093 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:36:15 +0800 Subject: [PATCH 05/21] feat(tree_path): add nom parser for tree_path grammar v0.2 Implement formal tree_path parser using nom 8 with support for: - Simple identifiers: fn::add, class::MyClass - Quoted strings for names with spaces: test::"add function" - Escaped quotes and backslashes: test::"with \"quote\"" - Names containing :: delimiters: fn::"foo::bar" - Injection marker syntax (M9 prep): //bash The parser uses heuristics to distinguish Kind vs Name segments (common kind list: fn, class, struct, enum, trait, impl, mod, const, type, test, namespace, interface, protocol). Includes full roundtrip property tests and serialization that automatically quotes names when needed. Original prompt: > You seem to have added support for Zig test cases which made spaces > inside tree paths possible. I suggest parsing the tree path with nom > so we can have unambiguous string escaping. What's your opinion? AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path/mod.rs | 1 + crates/liyi/src/tree_path/parser.rs | 348 ++++++++++++++++++ .../liyi/src/tree_path/parser.rs.liyi.jsonc | 47 +++ 3 files changed, 396 insertions(+) create mode 100644 crates/liyi/src/tree_path/parser.rs create mode 100644 crates/liyi/src/tree_path/parser.rs.liyi.jsonc diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index 56994e9..515cb05 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -25,6 +25,7 @@ mod lang_rust; mod lang_swift; mod lang_typescript; mod lang_zig; +pub mod parser; use std::borrow::Cow; use std::path::Path; diff --git a/crates/liyi/src/tree_path/parser.rs b/crates/liyi/src/tree_path/parser.rs new file mode 100644 index 0000000..1bf8c22 --- /dev/null +++ b/crates/liyi/src/tree_path/parser.rs @@ -0,0 +1,348 @@ +//! tree_path parser — formal grammar implementation using nom. +//! +//! This module implements the tree_path grammar spec (v0.2) from +//! `docs/liyi-01x-roadmap.md` Appendix A. + +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{char, digit1, none_of, one_of}, + combinator::{map, recognize}, + multi::many0, + sequence::{delimited, pair, preceded}, + IResult, Parser as _, +}; + +/// A segment in a tree_path — either a kind, name, or injection marker. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Segment { + /// Kind shorthand (e.g., "fn", "class", "struct") + Kind(String), + /// Item name (e.g., "add", "MyClass", "add function") + Name(String), + /// Injection marker for M9 (e.g., "//bash") + Injection(String), +} + +/// Parsed tree_path representation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePath { + pub segments: Vec, +} + +impl TreePath { + /// Parse a tree_path string. + pub fn parse(input: &str) -> Result { + match parse_tree_path(input) { + Ok(("", path)) => Ok(path), + Ok((remainder, _)) => Err(format!("Unexpected trailing input: {:?}", remainder)), + Err(e) => Err(format!("Parse error: {:?}", e)), + } + } + + /// Serialize a tree_path to string. + pub fn to_string(&self) -> String { + self.segments + .iter() + .map(|s| match s { + Segment::Kind(k) => k.clone(), + Segment::Name(n) => serialize_name(n), + Segment::Injection(lang) => format!("//{}", lang), + }) + .collect::>() + .join("::") + } +} + +/// Serialize a name, quoting if necessary. +fn serialize_name(name: &str) -> String { + // Check if we need quoting + let needs_quote = name.is_empty() + || name.contains('"') + || name.contains('\\') + || name.contains("::") + || name.contains(' ') + || name.contains('\t') + || name.contains('\n') + || name.contains(':') + || !is_simple_identifier(name); + + if !needs_quote { + return name.to_string(); + } + + // Escape quotes and backslashes + let escaped = name + .replace('\\', "\\\\") + .replace('"', "\\\""); + format!("\"{}\"", escaped) +} + +/// Check if a string is a simple identifier (no quoting needed). +fn is_simple_identifier(s: &str) -> bool { + let mut chars = s.chars(); + match chars.next() { + Some(c) if c.is_ascii_alphabetic() || c == '_' => {} + _ => return false, + } + chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '(' || c == ')' || c == '*') +} + +/// Parse a complete tree_path. +fn parse_tree_path(input: &str) -> IResult<&str, TreePath> { + let (input, first) = parse_segment(input)?; + let (input, rest) = many0(preceded(tag("::"), parse_segment)).parse(input)?; + let mut segments = vec![first]; + segments.extend(rest); + Ok((input, TreePath { segments })) +} + +/// Parse a single segment. +fn parse_segment(input: &str) -> IResult<&str, Segment> { + alt(( + parse_injection_marker, + map(parse_quoted_string, Segment::Name), + map(parse_simple_name, |s| { + // Heuristic: if it matches common kind patterns, treat as Kind + // This is a simplification — full implementation would check LanguageConfig + if is_common_kind(s) { + Segment::Kind(s.to_string()) + } else { + Segment::Name(s.to_string()) + } + }), + )).parse(input) +} + +/// Common kind shorthands (for heuristic parsing). +fn is_common_kind(s: &str) -> bool { + matches!( + s, + "fn" + | "class" + | "struct" + | "enum" + | "trait" + | "impl" + | "mod" + | "module" + | "const" + | "type" + | "test" + | "namespace" + | "interface" + | "protocol" + ) +} + +/// Parse an injection marker (//lang). +fn parse_injection_marker(input: &str) -> IResult<&str, Segment> { + map(preceded(tag("//"), parse_identifier), |lang| { + Segment::Injection(lang.to_string()) + }).parse(input) +} + +/// Parse a quoted string. +fn parse_quoted_string(input: &str) -> IResult<&str, String> { + delimited( + char('"'), + map(many0(parse_escaped_char), |chars| chars.into_iter().collect()), + char('"'), + ).parse(input) +} + +/// Parse a single character or escaped sequence inside a quoted string. +fn parse_escaped_char(input: &str) -> IResult<&str, char> { + alt(( + preceded(char('\\'), one_of("\\\"n:t")), + none_of("\""), + )).parse(input) +} + +/// Parse a simple name (unquoted identifier, number, or special values). +fn parse_simple_name(input: &str) -> IResult<&str, &str> { + recognize(alt(( + parse_identifier, + parse_number, + tag("self"), + tag("Self"), + ))).parse(input) +} + +/// Parse an identifier. +fn parse_identifier(input: &str) -> IResult<&str, &str> { + recognize(pair( + one_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), + many0(one_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")), + )).parse(input) +} + +/// Parse a number. +fn parse_number(input: &str) -> IResult<&str, &str> { + digit1(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_simple_fn_path() { + let path = TreePath::parse("fn::add").unwrap(); + assert_eq!( + path.segments, + vec![Segment::Kind("fn".to_string()), Segment::Name("add".to_string())] + ); + } + + #[test] + fn parse_class_method_path() { + let path = TreePath::parse("class::MyClass::fn::method").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("class".to_string()), + Segment::Name("MyClass".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("method".to_string()), + ] + ); + } + + #[test] + fn parse_quoted_name_with_spaces() { + let path = TreePath::parse("test::\"add function\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("test".to_string()), + Segment::Name("add function".to_string()), + ] + ); + } + + #[test] + fn parse_quoted_name_with_colons() { + let path = TreePath::parse("fn::\"foo::bar\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("fn".to_string()), + Segment::Name("foo::bar".to_string()), + ] + ); + } + + #[test] + fn parse_escaped_quote() { + let path = TreePath::parse("test::\"with \\\"quote\\\"\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("test".to_string()), + Segment::Name("with \"quote\"".to_string()), + ] + ); + } + + #[test] + fn parse_injection_marker() { + // Injection as standalone segment (M9 syntax) + let path = TreePath::parse("key::run:://bash::fn::setup").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Name("key".to_string()), + Segment::Name("run".to_string()), + Segment::Injection("bash".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("setup".to_string()), + ] + ); + } + + #[test] + fn parse_module_nested_path() { + let path = TreePath::parse("module::Billing::class::Invoice::fn::total").unwrap(); + assert_eq!(path.segments.len(), 6); + } + + #[test] + fn parse_zig_struct_namespace() { + let path = TreePath::parse("struct::Point::fn::new").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("struct".to_string()), + Segment::Name("Point".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("new".to_string()), + ] + ); + } + + #[test] + fn serialize_simple_name() { + let path = TreePath { + segments: vec![ + Segment::Kind("fn".to_string()), + Segment::Name("add".to_string()), + ], + }; + assert_eq!(path.to_string(), "fn::add"); + } + + #[test] + fn serialize_name_with_spaces() { + let path = TreePath { + segments: vec![ + Segment::Kind("test".to_string()), + Segment::Name("add function".to_string()), + ], + }; + assert_eq!(path.to_string(), "test::\"add function\""); + } + + #[test] + fn serialize_name_with_double_colons() { + let path = TreePath { + segments: vec![ + Segment::Kind("fn".to_string()), + Segment::Name("foo::bar".to_string()), + ], + }; + assert_eq!(path.to_string(), "fn::\"foo::bar\""); + } + + #[test] + fn serialize_name_with_quote() { + let path = TreePath { + segments: vec![ + Segment::Kind("test".to_string()), + Segment::Name("with \"quote\"".to_string()), + ], + }; + assert_eq!(path.to_string(), "test::\"with \\\"quote\\\"\""); + } + + #[test] + fn roundtrip_simple_path() { + let original = "class::MyClass::fn::method"; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.to_string(), original); + } + + #[test] + fn roundtrip_complex_path() { + let original = "test::\"add function\""; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.to_string(), original); + } + + #[test] + fn roundtrip_with_escapes() { + let original = "test::\"with \\\"quote\\\"\""; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.to_string(), original); + } +} diff --git a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc new file mode 100644 index 0000000..07fce20 --- /dev/null +++ b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc @@ -0,0 +1,47 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/parser.rs", + "specs": [ + { + "item": "Segment", + "reviewed": false, + "intent": "Represents a single segment in a parsed tree_path: Kind for kind shorthands (fn, class), Name for item names, or Injection for M9 language injection markers.", + "source_span": [14, 22], + "tree_path": "enum::Segment", + "source_anchor": "pub enum Segment {" + }, + { + "item": "TreePath", + "reviewed": false, + "intent": "Parsed representation of a complete tree_path string. Provides parse() for deserializing from string and to_string() for serializing back with automatic quoting of complex names.", + "source_span": [24, 32], + "tree_path": "struct::TreePath", + "source_anchor": "pub struct TreePath {" + }, + { + "item": "parse_tree_path", + "reviewed": false, + "intent": "Nom parser combinator for parsing a complete tree_path into a TreePath struct. Handles segment parsing and :: delimiter separation.", + "source_span": [60, 63], + "tree_path": "fn::parse_tree_path", + "source_anchor": "fn parse_tree_path(input: &str) -> IResult<&str, TreePath> {" + }, + { + "item": "parse_segment", + "reviewed": false, + "intent": "Parses a single tree_path segment, trying injection markers, quoted strings, and simple names in order. Uses heuristic to classify simple names as Kind or Name.", + "source_span": [66, 80], + "tree_path": "fn::parse_segment", + "source_anchor": "fn parse_segment(input: &str) -> IResult<&str, Segment> {" + }, + { + "item": "serialize_name", + "reviewed": false, + "intent": "Serializes a name, automatically quoting if it contains spaces, :: delimiters, quotes, or other special characters. Handles backslash and quote escaping.", + "source_span": [35, 45], + "tree_path": "fn::serialize_name", + "source_anchor": "fn serialize_name(name: &str) -> String {" + } + ] +} From 40bcc56190270cbc9627d02bac56a7a936da4865 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:36:24 +0800 Subject: [PATCH 06/21] docs(roadmap): update M7 status and add tree_path grammar spec Mark M7.1 Ruby, M7.2 Bash, and M7.4 Zig as complete. Add Appendix A: tree_path Grammar Specification (v0.2) documenting the formal grammar for unambiguous tree_path parsing using nom. Key design decisions: - Quoted strings for names containing spaces, ::, or special chars - Backslash escaping inside quoted strings - Unquoted shorthand for simple identifiers (backward compatible) - Extension point for M9 injection syntax (//lang) Original prompt: > Implement bash, ruby, and zig, one by one, then push to the currently empty PR. AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- docs/liyi-01x-roadmap.md | 69 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index e342a3e..889c320 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -26,7 +26,10 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu | M5.4 Golden fixtures | ✅ Complete | `missing_related/` and `missing_related_pass/` added | | M5.5 AGENTS.md rule 11 | ✅ Complete | Pre-commit check requirement added | | M5.3 `--prompt` mode | ⏳ Design | Design doc at `docs/prompt-mode-design.md` | -| M7 Additional languages | ⏳ Planned | Ruby, Bash, Dart, Zig | +| M7.1 Ruby | ✅ Complete | tree-sitter-ruby v0.23.1 | +| M7.2 Bash | ✅ Complete | tree-sitter-bash v0.25.1 | +| M7.3 Dart | ⏳ Planned | Flutter ecosystem | +| M7.4 Zig | ✅ Complete | tree-sitter-zig v1.1.2 | | M8 Data file support | ⏳ Design | TOML, JSON, YAML; key-path tree_path paradigm | | M9 Injection framework | ⏳ Design | Multi-language files (YAML+shell, Vue SFC) | | M6.1–M6.3 NL-quoting core | ✅ Complete | Fenced blocks, inline backticks, quote chars | @@ -971,17 +974,75 @@ End-to-end golden test demonstrating the full scaffold workflow: | 13 | M10.2 Doc comment heuristic | ⏳ Planned | ~2h | `=doc` suggestions | | 14 | M10.3 Item size heuristic | ⏳ Planned | ~1h | Trivial suggestions | | 15 | M10.5 Combined scaffold test | ⏳ Planned | ~1h | Regression guard | -| 16 | M7.1 Ruby | ⏳ Planned | ~2h | Ruby/Rails ecosystem | -| 17 | M7.2 Bash | ⏳ Planned | ~1h | CI scripts, devops | +| ~~16~~ | ~~M7.1 Ruby~~ | ✅ Done | — | Ruby/Rails ecosystem | +| ~~17~~ | ~~M7.2 Bash~~ | ✅ Done | — | CI scripts, devops | | 18 | M8.2 TOML | ⏳ Planned | ~3h | Config-as-source (dogfooding) | | 19 | M8.3 JSON | ⏳ Planned | ~2h | Schemas, package.json | | 20 | M7.3 Dart | ⏳ Planned | ~3h | Flutter ecosystem | -| 21 | M7.4 Zig | ⏳ Planned | ~3h | Systems lang, growing | +| ~~21~~ | ~~M7.4 Zig~~ | ✅ Done | — | Systems lang, growing | | 22 | M8.4 YAML (no injection) | ⏳ Planned | ~2h | CI/k8s (limited without M9) | | 18 | M9 Injection framework | ⏳ Design | ~20h | Multi-language files | --- +## Appendix: tree_path Grammar Specification (v0.2) + +**Status:** ⏳ Design — pending implementation with nom parser. + +The current `split("::")` parser is ambiguous when names contain `::` or spaces (as seen in Zig `test "add function"`). This appendix defines a formal grammar for unambiguous tree_path parsing. + +### A.1 Grammar (EBNF) + +```ebnf +tree_path := segment ("::" segment)* +segment := kind | name +kind := identifier +name := simple_name | quoted_string +simple_name := identifier | "self" | number +quoted_string:= '"' (escaped_char | any_unicode_except_quote)* '"' +identifier := [A-Za-z_][A-Za-z0-9_]* +number := [0-9]+ +escaped_char := '\\' ( '"' | '\\' | 'n' | ':' ) +``` + +### A.2 Design decisions + +1. **Quoted strings for complex names:** Any name containing spaces, `::`, quotes, or Unicode control characters must be quoted. Example: `test::"add function"`. + +2. **Backslash escaping:** Inside quoted strings, `"` and `\` must be escaped. `\:` is provided as a convenience for names containing colons (though `::` is the delimiter). + +3. **Unquoted shorthand:** Simple identifiers (alphanumeric + underscore) can remain unquoted for ergonomics. This preserves backward compatibility with existing tree_paths like `fn::add` or `class::MyClass`. + +4. **Kind disambiguation:** The parser doesn't validate that a segment is a "kind" vs "name" — that happens at resolution time using the `LanguageConfig::kind_map`. The grammar treats both uniformly at the syntactic level. + +### A.3 Injection syntax (future) + +When M9 (injection framework) is implemented, the grammar will extend to: + +```ebnf +segment := kind | name | injection_marker +injection_marker := "//" language +language := identifier +``` + +The injection marker `//lang` appears as a standalone segment or appended to a name segment (`run//bash`). The latter preserves the even-pair invariant for shell-safe paths. + +### A.4 Implementation plan + +1. Add `nom = "8"` to `crates/liyi/Cargo.toml` ✅ +2. Create `tree_path/parser.rs` with nom combinators +3. Update `resolve_tree_path` to use the new parser +4. Update `compute_tree_path` to escape names containing `::`, quotes, or spaces +5. Add roundtrip property tests: `parse(serialize(path)) == path` + +### A.5 Migration path + +- **Phase 1:** Parser accepts both old (unquoted) and new (quoted) syntax +- **Phase 2:** `compute_tree_path` starts quoting names that need it +- **Phase 3:** (Optional) Deprecate unquoted complex names with a warning + +--- + ## AIGC Disclaimer This document contains content from the following AI agents: From fa701380990e6eb7277d45e344b08f8cf9cb842b Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 17:47:23 +0800 Subject: [PATCH 07/21] fix(tree_path): address clippy warnings and format code - Use inline format strings for cleaner code - Rename to_string to serialize to avoid conflict with Display trait - Apply cargo fmt to all modified files AI-assisted-by: Kimi K2.5 (OpenClaw) Signed-off-by: WANG Xuerui --- Cargo.lock | 43 ++++++++++++ crates/liyi/src/tree_path/lang_bash.rs | 5 +- .../src/tree_path/lang_bash.rs.liyi.jsonc | 6 +- crates/liyi/src/tree_path/lang_ruby.rs | 19 +++-- .../src/tree_path/lang_ruby.rs.liyi.jsonc | 12 +++- crates/liyi/src/tree_path/lang_zig.rs | 29 ++++---- .../liyi/src/tree_path/lang_zig.rs.liyi.jsonc | 18 ++++- crates/liyi/src/tree_path/mod.rs.liyi.jsonc | 52 +++++++------- crates/liyi/src/tree_path/parser.rs | 70 +++++++++++-------- .../liyi/src/tree_path/parser.rs.liyi.jsonc | 30 ++++++-- 10 files changed, 193 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaad99c..e1c11b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -597,6 +597,7 @@ name = "liyi" version = "0.1.0" dependencies = [ "ignore", + "nom", "proptest", "regex", "serde", @@ -604,6 +605,7 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-bash", "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", @@ -614,9 +616,11 @@ dependencies = [ "tree-sitter-objc", "tree-sitter-php", "tree-sitter-python", + "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-swift", "tree-sitter-typescript", + "tree-sitter-zig", ] [[package]] @@ -683,6 +687,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1195,6 +1208,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-c" version = "0.24.1" @@ -1301,6 +1324,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" @@ -1331,6 +1364,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "typenum" version = "1.19.0" diff --git a/crates/liyi/src/tree_path/lang_bash.rs b/crates/liyi/src/tree_path/lang_bash.rs index bdc5f30..4cb39cf 100644 --- a/crates/liyi/src/tree_path/lang_bash.rs +++ b/crates/liyi/src/tree_path/lang_bash.rs @@ -81,7 +81,10 @@ another_func { #[test] fn detect_bash_extensions() { - assert_eq!(detect_language(Path::new("script.sh")), Some(Language::Bash)); + assert_eq!( + detect_language(Path::new("script.sh")), + Some(Language::Bash) + ); assert_eq!( detect_language(Path::new("script.bash")), Some(Language::Bash) diff --git a/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc index 43337d3..da84352 100644 --- a/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc @@ -7,8 +7,12 @@ "item": "CONFIG", "reviewed": false, "intent": "Define the Bash language configuration for tree_path resolution: register tree-sitter-bash grammar v0.25.1, .sh/.bash extensions, kind mapping for fn (function_definition), using standard name field with no custom name extraction and body field for function body traversal.", - "source_span": [4, 12], + "source_span": [ + 4, + 12 + ], "tree_path": "static::CONFIG", + "source_hash": "sha256:0afdf1f1e9ab2d5031ac0f157339f189b73aca7179e5114fc21764fe73206b29", "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" } ] diff --git a/crates/liyi/src/tree_path/lang_ruby.rs b/crates/liyi/src/tree_path/lang_ruby.rs index b575bed..0c395c9 100644 --- a/crates/liyi/src/tree_path/lang_ruby.rs +++ b/crates/liyi/src/tree_path/lang_ruby.rs @@ -81,8 +81,15 @@ end #[test] fn resolve_ruby_class_in_module() { - let span = resolve_tree_path(SAMPLE_RUBY, "module::Billing::class::Invoice", Language::Ruby); - assert!(span.is_some(), "should resolve module::Billing::class::Invoice"); + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice", + Language::Ruby, + ); + assert!( + span.is_some(), + "should resolve module::Billing::class::Invoice" + ); } #[test] @@ -160,15 +167,17 @@ end let computed_path = compute_tree_path(SAMPLE_RUBY, resolved_span, Language::Ruby); assert_eq!(computed_path, "class::Order::fn::process"); - let re_resolved = - resolve_tree_path(SAMPLE_RUBY, &computed_path, Language::Ruby).unwrap(); + let re_resolved = resolve_tree_path(SAMPLE_RUBY, &computed_path, Language::Ruby).unwrap(); assert_eq!(re_resolved, resolved_span); } #[test] fn detect_ruby_extensions() { assert_eq!(detect_language(Path::new("app.rb")), Some(Language::Ruby)); - assert_eq!(detect_language(Path::new("tasks.rake")), Some(Language::Ruby)); + assert_eq!( + detect_language(Path::new("tasks.rake")), + Some(Language::Ruby) + ); assert_eq!( detect_language(Path::new("my_gem.gemspec")), Some(Language::Ruby) diff --git a/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc index c1aa288..9bb67e0 100644 --- a/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc @@ -7,16 +7,24 @@ "item": "CONFIG", "reviewed": false, "intent": "Define the Ruby language configuration for tree_path resolution: register tree-sitter-ruby grammar v0.23.1, .rb/.rake/.gemspec extensions, kind mappings for fn (method), class (class), module (module), and singleton_method with custom name extraction for receiver encoding.", - "source_span": [4, 12], + "source_span": [ + 32, + 45 + ], "tree_path": "static::CONFIG", + "source_hash": "sha256:bde809cf0dc7c3f2f5aebd07f6f2da126783075cb50dc80fb941fc04c0da348b", "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" }, { "item": "ruby_node_name", "reviewed": false, "intent": "Custom name extraction for Ruby singleton methods (def self.foo). Encodes the receiver type into the name as ReceiverType.method_name or self.method_name, enabling disambiguation of class methods.", - "source_span": [14, 28], + "source_span": [ + 9, + 29 + ], "tree_path": "fn::ruby_node_name", + "source_hash": "sha256:0cae66fec47ab63a69c7ed15043bf099bcb9e2f1cb935a42682cf6be168a1ec5", "source_anchor": "fn ruby_node_name(node: &Node, source: &str) -> Option {" } ] diff --git a/crates/liyi/src/tree_path/lang_zig.rs b/crates/liyi/src/tree_path/lang_zig.rs index 14963fb..1b319ae 100644 --- a/crates/liyi/src/tree_path/lang_zig.rs +++ b/crates/liyi/src/tree_path/lang_zig.rs @@ -19,14 +19,11 @@ fn zig_node_name(node: &Node, source: &str) -> Option { match node.kind() { "function_declaration" => { // Find the identifier child which is the function name - find_child_by_kind(node, "identifier") - .map(|n| source[n.byte_range()].to_string()) + find_child_by_kind(node, "identifier").map(|n| source[n.byte_range()].to_string()) } "variable_declaration" => { // Check if this is a `const` declaration - let is_const = node - .children(&mut node.walk()) - .any(|c| c.kind() == "const"); + let is_const = node.children(&mut node.walk()).any(|c| c.kind() == "const"); if !is_const { return None; @@ -40,8 +37,7 @@ fn zig_node_name(node: &Node, source: &str) -> Option { if has_struct { // This is `const Name = struct { ... }` — extract just the name // (the "struct::" prefix is added by compute_tree_path) - find_child_by_kind(node, "identifier") - .map(|n| source[n.byte_range()].to_string()) + find_child_by_kind(node, "identifier").map(|n| source[n.byte_range()].to_string()) } else { None } @@ -49,14 +45,14 @@ fn zig_node_name(node: &Node, source: &str) -> Option { "test_declaration" => { // Test declarations have a string child for the name // e.g., test "my test" { ... } - find_child_by_kind(node, "string") - .map(|n| { - let raw = &source[n.byte_range()]; - // Remove surrounding quotes - raw.strip_prefix('"').and_then(|s| s.strip_suffix('"')) - .map(|s| s.to_string()) - .unwrap_or_default() - }) + find_child_by_kind(node, "string").map(|n| { + let raw = &source[n.byte_range()]; + // Remove surrounding quotes + raw.strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .map(|s| s.to_string()) + .unwrap_or_default() + }) } _ => None, } @@ -188,8 +184,7 @@ test "add function" { let computed_path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); assert_eq!(computed_path, "struct::Point::fn::new"); - let re_resolved = - resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); + let re_resolved = resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); assert_eq!(re_resolved, resolved_span); } diff --git a/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc index c29c44c..564fa7a 100644 --- a/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc @@ -7,24 +7,36 @@ "item": "CONFIG", "reviewed": false, "intent": "Define the Zig language configuration for tree_path resolution: register tree-sitter-zig grammar v1.1.2, .zig extension, kind mappings for fn (function_declaration), struct (variable_declaration for const Name = struct { ... }), and test (test_declaration). Uses custom name extraction and struct_declaration as body container for struct-as-namespace pattern.", - "source_span": [4, 12], + "source_span": [ + 62, + 76 + ], "tree_path": "static::CONFIG", + "source_hash": "sha256:54a2ac5ee9d70ab368f11ba3fcc6cd7ffad5bac03f61e43b377ab2c29af73ba9", "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" }, { "item": "zig_node_name", "reviewed": false, "intent": "Custom name extraction for Zig nodes. Handles function_declaration (finds identifier child), variable_declaration (detects const + struct_declaration pattern for struct-as-namespace), and test_declaration (extracts string literal content for test names).", - "source_span": [14, 28], + "source_span": [ + 18, + 59 + ], "tree_path": "fn::zig_node_name", + "source_hash": "sha256:c7ad83623acd418403fcf7630c35bd2286ebbed7c3a6d668aa5590d110b62c02", "source_anchor": "fn zig_node_name(node: &Node, source: &str) -> Option {" }, { "item": "find_child_by_kind", "reviewed": false, "intent": "Helper function to find the first child node with a given kind. Used by zig_node_name for identifier and string extraction.", - "source_span": [30, 33], + "source_span": [ + 6, + 9 + ], "tree_path": "fn::find_child_by_kind", + "source_hash": "sha256:b6c6c11eb73bb8ae27a5f77ebde1125b9e808b6efa96850bb3f398ddff5604ab", "source_anchor": "fn find_child_by_kind<'a>(node: &Node<'a>, kind: &str) -> Option> {" } ] diff --git a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc index 18303df..18ad9ff 100644 --- a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc @@ -8,8 +8,8 @@ "reviewed": false, "intent": "Define the data-driven abstraction for language-specific tree_path behaviour. Each field captures one language-dependent axis: grammar loader (ts_language), file extensions, kind shorthand mapping, name extraction field and overrides, body-descending fields, and an optional custom_name callback for languages with non-trivial name extraction (e.g., Go receiver encoding).", "source_span": [ - 36, - 53 + 40, + 57 ], "tree_path": "struct::LanguageConfig", "source_hash": "sha256:cc0ae5ada967354b9d5e9863be2c72136c5dd85832b29ee5e44e118d1c99f5da", @@ -20,11 +20,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 181, - 233 + 191, + 254 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:93745e6791e0ebb9eb4704bbcdaeaa193791e75c010fd498ec8cf8c79e1bc26c", + "source_hash": "sha256:4b0b5ae1855a5953f477cac3fe787210af7e4e1692fa6f48f934b63fe5482e2d", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -32,11 +32,11 @@ "reviewed": false, "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 125, - 140 + 129, + 147 ], "tree_path": "enum::Language", - "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", + "source_hash": "sha256:6fd356b6c60278caa7845762f2656d794abd3a113c1b120dfe4c8c8c627e8776", "source_anchor": "pub enum Language {" }, { @@ -44,8 +44,8 @@ "reviewed": false, "intent": "Extract the user-visible name of an AST node via the language's LanguageConfig. Checks the custom_name callback first (for complex patterns like Go receiver encoding). Falls back to name_overrides for special cases (e.g., impl_item uses type field). Otherwise reads the standard name field. Returns Cow::Owned for constructed names, Cow::Borrowed for field-extracted names.", "source_span": [ - 76, - 96 + 80, + 100 ], "tree_path": "impl::LanguageConfig::fn::node_name", "source_hash": "sha256:d459d381bbc30689c1dd009aa6df01f7815da0b36ed5592ff2b45da8abe27edd", @@ -56,8 +56,8 @@ "reviewed": false, "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", "source_span": [ - 345, - 366 + 366, + 387 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -68,8 +68,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 255, - 271 + 276, + 292 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -80,8 +80,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 278, - 295 + 299, + 316 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", @@ -92,8 +92,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 298, - 327 + 319, + 348 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", @@ -104,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 330, - 338 + 351, + 359 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -116,8 +116,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 345, - 366 + 366, + 387 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -128,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 374, - 420 + 395, + 441 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -140,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 438, - 484 + 459, + 505 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", diff --git a/crates/liyi/src/tree_path/parser.rs b/crates/liyi/src/tree_path/parser.rs index 1bf8c22..06f2e6b 100644 --- a/crates/liyi/src/tree_path/parser.rs +++ b/crates/liyi/src/tree_path/parser.rs @@ -4,13 +4,13 @@ //! `docs/liyi-01x-roadmap.md` Appendix A. use nom::{ + IResult, Parser as _, branch::alt, bytes::complete::tag, character::complete::{char, digit1, none_of, one_of}, combinator::{map, recognize}, multi::many0, sequence::{delimited, pair, preceded}, - IResult, Parser as _, }; /// A segment in a tree_path — either a kind, name, or injection marker. @@ -35,19 +35,19 @@ impl TreePath { pub fn parse(input: &str) -> Result { match parse_tree_path(input) { Ok(("", path)) => Ok(path), - Ok((remainder, _)) => Err(format!("Unexpected trailing input: {:?}", remainder)), - Err(e) => Err(format!("Parse error: {:?}", e)), + Ok((remainder, _)) => Err(format!("Unexpected trailing input: {remainder:?}")), + Err(e) => Err(format!("Parse error: {e:?}")), } } /// Serialize a tree_path to string. - pub fn to_string(&self) -> String { + pub fn serialize(&self) -> String { self.segments .iter() .map(|s| match s { Segment::Kind(k) => k.clone(), Segment::Name(n) => serialize_name(n), - Segment::Injection(lang) => format!("//{}", lang), + Segment::Injection(lang) => format!("//{lang}"), }) .collect::>() .join("::") @@ -72,10 +72,8 @@ fn serialize_name(name: &str) -> String { } // Escape quotes and backslashes - let escaped = name - .replace('\\', "\\\\") - .replace('"', "\\\""); - format!("\"{}\"", escaped) + let escaped = name.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{escaped}\"") } /// Check if a string is a simple identifier (no quoting needed). @@ -85,7 +83,9 @@ fn is_simple_identifier(s: &str) -> bool { Some(c) if c.is_ascii_alphabetic() || c == '_' => {} _ => return false, } - chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '(' || c == ')' || c == '*') + chars.all(|c| { + c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '(' || c == ')' || c == '*' + }) } /// Parse a complete tree_path. @@ -111,15 +111,15 @@ fn parse_segment(input: &str) -> IResult<&str, Segment> { Segment::Name(s.to_string()) } }), - )).parse(input) + )) + .parse(input) } /// Common kind shorthands (for heuristic parsing). fn is_common_kind(s: &str) -> bool { matches!( s, - "fn" - | "class" + "fn" | "class" | "struct" | "enum" | "trait" @@ -139,24 +139,25 @@ fn is_common_kind(s: &str) -> bool { fn parse_injection_marker(input: &str) -> IResult<&str, Segment> { map(preceded(tag("//"), parse_identifier), |lang| { Segment::Injection(lang.to_string()) - }).parse(input) + }) + .parse(input) } /// Parse a quoted string. fn parse_quoted_string(input: &str) -> IResult<&str, String> { delimited( char('"'), - map(many0(parse_escaped_char), |chars| chars.into_iter().collect()), + map(many0(parse_escaped_char), |chars| { + chars.into_iter().collect() + }), char('"'), - ).parse(input) + ) + .parse(input) } /// Parse a single character or escaped sequence inside a quoted string. fn parse_escaped_char(input: &str) -> IResult<&str, char> { - alt(( - preceded(char('\\'), one_of("\\\"n:t")), - none_of("\""), - )).parse(input) + alt((preceded(char('\\'), one_of("\\\"n:t")), none_of("\""))).parse(input) } /// Parse a simple name (unquoted identifier, number, or special values). @@ -166,15 +167,19 @@ fn parse_simple_name(input: &str) -> IResult<&str, &str> { parse_number, tag("self"), tag("Self"), - ))).parse(input) + ))) + .parse(input) } /// Parse an identifier. fn parse_identifier(input: &str) -> IResult<&str, &str> { recognize(pair( one_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), - many0(one_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")), - )).parse(input) + many0(one_of( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_", + )), + )) + .parse(input) } /// Parse a number. @@ -191,7 +196,10 @@ mod tests { let path = TreePath::parse("fn::add").unwrap(); assert_eq!( path.segments, - vec![Segment::Kind("fn".to_string()), Segment::Name("add".to_string())] + vec![ + Segment::Kind("fn".to_string()), + Segment::Name("add".to_string()) + ] ); } @@ -289,7 +297,7 @@ mod tests { Segment::Name("add".to_string()), ], }; - assert_eq!(path.to_string(), "fn::add"); + assert_eq!(path.serialize(), "fn::add"); } #[test] @@ -300,7 +308,7 @@ mod tests { Segment::Name("add function".to_string()), ], }; - assert_eq!(path.to_string(), "test::\"add function\""); + assert_eq!(path.serialize(), "test::\"add function\""); } #[test] @@ -311,7 +319,7 @@ mod tests { Segment::Name("foo::bar".to_string()), ], }; - assert_eq!(path.to_string(), "fn::\"foo::bar\""); + assert_eq!(path.serialize(), "fn::\"foo::bar\""); } #[test] @@ -322,27 +330,27 @@ mod tests { Segment::Name("with \"quote\"".to_string()), ], }; - assert_eq!(path.to_string(), "test::\"with \\\"quote\\\"\""); + assert_eq!(path.serialize(), "test::\"with \\\"quote\\\"\""); } #[test] fn roundtrip_simple_path() { let original = "class::MyClass::fn::method"; let path = TreePath::parse(original).unwrap(); - assert_eq!(path.to_string(), original); + assert_eq!(path.serialize(), original); } #[test] fn roundtrip_complex_path() { let original = "test::\"add function\""; let path = TreePath::parse(original).unwrap(); - assert_eq!(path.to_string(), original); + assert_eq!(path.serialize(), original); } #[test] fn roundtrip_with_escapes() { let original = "test::\"with \\\"quote\\\"\""; let path = TreePath::parse(original).unwrap(); - assert_eq!(path.to_string(), original); + assert_eq!(path.serialize(), original); } } diff --git a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc index 07fce20..bfd64a0 100644 --- a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc @@ -7,40 +7,60 @@ "item": "Segment", "reviewed": false, "intent": "Represents a single segment in a parsed tree_path: Kind for kind shorthands (fn, class), Name for item names, or Injection for M9 language injection markers.", - "source_span": [14, 22], + "source_span": [ + 18, + 25 + ], "tree_path": "enum::Segment", + "source_hash": "sha256:a39c9ad666dce2c942b63befc555c5ee3b0d31bd3a533892187c90441f107266", "source_anchor": "pub enum Segment {" }, { "item": "TreePath", "reviewed": false, "intent": "Parsed representation of a complete tree_path string. Provides parse() for deserializing from string and to_string() for serializing back with automatic quoting of complex names.", - "source_span": [24, 32], + "source_span": [ + 29, + 31 + ], "tree_path": "struct::TreePath", + "source_hash": "sha256:0de76bc568b5f672322f5e55065fbde484625f3fb22da038178653ae22231793", "source_anchor": "pub struct TreePath {" }, { "item": "parse_tree_path", "reviewed": false, "intent": "Nom parser combinator for parsing a complete tree_path into a TreePath struct. Handles segment parsing and :: delimiter separation.", - "source_span": [60, 63], + "source_span": [ + 92, + 98 + ], "tree_path": "fn::parse_tree_path", + "source_hash": "sha256:b7c1726d5777001170c7e2395199eaa919620d01a589c303cd47f31f6c467ac6", "source_anchor": "fn parse_tree_path(input: &str) -> IResult<&str, TreePath> {" }, { "item": "parse_segment", "reviewed": false, "intent": "Parses a single tree_path segment, trying injection markers, quoted strings, and simple names in order. Uses heuristic to classify simple names as Kind or Name.", - "source_span": [66, 80], + "source_span": [ + 101, + 116 + ], "tree_path": "fn::parse_segment", + "source_hash": "sha256:2d570524b2adefd28ed3af36218c7b77db1eff350d48dd830a7651b2eae2debd", "source_anchor": "fn parse_segment(input: &str) -> IResult<&str, Segment> {" }, { "item": "serialize_name", "reviewed": false, "intent": "Serializes a name, automatically quoting if it contains spaces, :: delimiters, quotes, or other special characters. Handles backslash and quote escaping.", - "source_span": [35, 45], + "source_span": [ + 58, + 77 + ], "tree_path": "fn::serialize_name", + "source_hash": "sha256:0398cceb21d0564c22c3105074f081f493aaab7e7e6a7b171df93ace78192ad6", "source_anchor": "fn serialize_name(name: &str) -> String {" } ] From 9e8fe484f02e27f02e6ae4cbcd91e60c381bbd35 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 11 Mar 2026 23:56:15 +0800 Subject: [PATCH 08/21] docs(roadmap): sync status for implemented M7 languages and parser Move Ruby, Bash, and Zig from "Planned" to "Built-in" in the design doc language table. Update Appendix A status from "Design" to "Partial" and mark parser.rs implementation step as done. Original prompt: > I have updated the Rust toolchain, please re-check, and fix any > inconsistencies you have surfaced and commit. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-01x-roadmap.md | 4 ++-- docs/liyi-design.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index 889c320..396eb3e 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -987,7 +987,7 @@ End-to-end golden test demonstrating the full scaffold workflow: ## Appendix: tree_path Grammar Specification (v0.2) -**Status:** ⏳ Design — pending implementation with nom parser. +**Status:** ⏳ Partial — nom parser implemented (`tree_path/parser.rs`), integration into `resolve_tree_path`/`compute_tree_path` pending. The current `split("::")` parser is ambiguous when names contain `::` or spaces (as seen in Zig `test "add function"`). This appendix defines a formal grammar for unambiguous tree_path parsing. @@ -1030,7 +1030,7 @@ The injection marker `//lang` appears as a standalone segment or appended to a n ### A.4 Implementation plan 1. Add `nom = "8"` to `crates/liyi/Cargo.toml` ✅ -2. Create `tree_path/parser.rs` with nom combinators +2. Create `tree_path/parser.rs` with nom combinators ✅ 3. Update `resolve_tree_path` to use the new parser 4. Update `compute_tree_path` to escape names containing `::`, quotes, or spaces 5. Add roundtrip property tests: `parse(serialize(path)) == path` diff --git a/docs/liyi-design.md b/docs/liyi-design.md index 0499e16..ad2b60b 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -505,15 +505,15 @@ All languages are built-in — the binary ships with every supported tree-sitter | Go | `tree-sitter-go` | `type_declaration` wraps `type_spec` for structs/interfaces — custom name extraction navigates the indirection. Methods encode receiver type: `method::(*MyType).DoThing` (pointer) or `method::MyType.DoThing` (value). | | JavaScript | `tree-sitter-javascript` | Arrow functions in `const` declarations are pervasive — `const foo = () => ...` maps to `fn::foo` (tracking the `variable_declarator` when its value is an `arrow_function`). | | TypeScript | `tree-sitter-typescript` | Superset of JS; adds `interface_declaration`, `type_alias_declaration`, `enum_declaration`. Dual grammar: `.ts` → typescript, `.tsx` → tsx. | +| Ruby | `tree-sitter-ruby` | `class`, `module`, `method`, `singleton_method`. Class methods use `custom_name` callback for receiver encoding. | +| Bash | `tree-sitter-bash` | `function_definition` only. Simplest config — structurally flat. | +| Zig | `tree-sitter-zig` | `fn`, `const`, `test`. Struct-as-namespace pattern (`const Foo = struct { ... }`) uses `custom_name`. | **Planned languages (0.1.x, see roadmap M7–M9):** | Language | Grammar | Notes | |---|---|---| -| Ruby | `tree-sitter-ruby` | `class`, `module`, `method`, `singleton_method`. Class methods need `custom_name` callback. | -| Bash | `tree-sitter-bash` | `function_definition` only. Simplest config — structurally flat. | | Dart | `tree-sitter-dart` | `class`, `method`, `mixin`, `extension`, `enum`. Grammar crate stability TBD. | -| Zig | `tree-sitter-zig` | `fn`, `const`, `test`. Struct-as-namespace pattern (`const Foo = struct { ... }`) needs `custom_name`. | | TOML | `tree-sitter-toml` | Data file — `table`, `key`. Key-path identity, not named items. | | JSON | `tree-sitter-json` | Data file — `key` (from `pair`). Targets schemas, `package.json`. | | YAML | `tree-sitter-yaml` | Data file — `key` (from `block_mapping_pair`). Limited without injection framework (M9). | From 353f3d79aae9acfab8be3f957a5dda8fb8aa14a0 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:01:46 +0800 Subject: [PATCH 09/21] fix(tree_path): attach injection marker to preceding segment without :: The design doc (M9) specifies that injection markers attach directly to the preceding name (e.g., run//bash), not as a separate :: delimited segment (run:://bash). Fix the parser to accept both forms and the serializer to always emit the canonical appended form. Update Appendix A.3 grammar to match. Original prompt: > Currently the tree_path injection syntax seems implemented as > key:foo:://bash::fn::bar, a superfluous :: is before //. > Please confirm and fix it. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/tree_path/parser.rs | 65 +++++++++++++++---- .../liyi/src/tree_path/parser.rs.liyi.jsonc | 16 ++--- docs/liyi-01x-roadmap.md | 5 +- 3 files changed, 65 insertions(+), 21 deletions(-) diff --git a/crates/liyi/src/tree_path/parser.rs b/crates/liyi/src/tree_path/parser.rs index 06f2e6b..d94c885 100644 --- a/crates/liyi/src/tree_path/parser.rs +++ b/crates/liyi/src/tree_path/parser.rs @@ -42,15 +42,22 @@ impl TreePath { /// Serialize a tree_path to string. pub fn serialize(&self) -> String { - self.segments - .iter() - .map(|s| match s { - Segment::Kind(k) => k.clone(), - Segment::Name(n) => serialize_name(n), - Segment::Injection(lang) => format!("//{lang}"), - }) - .collect::>() - .join("::") + let mut out = String::new(); + for (i, seg) in self.segments.iter().enumerate() { + // Injection markers attach to the preceding segment without :: + if i > 0 && !matches!(seg, Segment::Injection(_)) { + out.push_str("::"); + } + match seg { + Segment::Kind(k) => out.push_str(k), + Segment::Name(n) => out.push_str(&serialize_name(n)), + Segment::Injection(lang) => { + out.push_str("//"); + out.push_str(lang); + } + } + } + out } } @@ -91,7 +98,13 @@ fn is_simple_identifier(s: &str) -> bool { /// Parse a complete tree_path. fn parse_tree_path(input: &str) -> IResult<&str, TreePath> { let (input, first) = parse_segment(input)?; - let (input, rest) = many0(preceded(tag("::"), parse_segment)).parse(input)?; + let (input, rest) = many0(alt(( + // Injection marker directly after a segment (no :: separator): run//bash + parse_injection_marker, + // Standard :: separated segment + preceded(tag("::"), parse_segment), + ))) + .parse(input)?; let mut segments = vec![first]; segments.extend(rest); Ok((input, TreePath { segments })) @@ -255,7 +268,23 @@ mod tests { #[test] fn parse_injection_marker() { - // Injection as standalone segment (M9 syntax) + // Injection appended to preceding segment (canonical M9 syntax) + let path = TreePath::parse("key::run//bash::fn::setup").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Name("key".to_string()), + Segment::Name("run".to_string()), + Segment::Injection("bash".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("setup".to_string()), + ] + ); + } + + #[test] + fn parse_injection_marker_standalone() { + // Injection as standalone :: separated segment (also accepted) let path = TreePath::parse("key::run:://bash::fn::setup").unwrap(); assert_eq!( path.segments, @@ -353,4 +382,18 @@ mod tests { let path = TreePath::parse(original).unwrap(); assert_eq!(path.serialize(), original); } + + #[test] + fn roundtrip_injection_canonical() { + let original = "key::run//bash::fn::setup"; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.serialize(), original); + } + + #[test] + fn standalone_injection_serializes_canonical() { + // Standalone form (with ::) normalizes to canonical (without ::) + let path = TreePath::parse("key::run:://bash::fn::setup").unwrap(); + assert_eq!(path.serialize(), "key::run//bash::fn::setup"); + } } diff --git a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc index bfd64a0..46c9d51 100644 --- a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc @@ -30,13 +30,13 @@ { "item": "parse_tree_path", "reviewed": false, - "intent": "Nom parser combinator for parsing a complete tree_path into a TreePath struct. Handles segment parsing and :: delimiter separation.", + "intent": "Nom parser combinator for parsing a complete tree_path into a TreePath struct. Handles :: delimited segments and injection markers attached directly to the preceding segment (name//lang) without requiring a :: separator.", "source_span": [ - 92, - 98 + 99, + 111 ], "tree_path": "fn::parse_tree_path", - "source_hash": "sha256:b7c1726d5777001170c7e2395199eaa919620d01a589c303cd47f31f6c467ac6", + "source_hash": "sha256:209d3af3a95c59cfc448b099fea352329bd4fc895d6fd7737ed95fb063d38c06", "source_anchor": "fn parse_tree_path(input: &str) -> IResult<&str, TreePath> {" }, { @@ -44,8 +44,8 @@ "reviewed": false, "intent": "Parses a single tree_path segment, trying injection markers, quoted strings, and simple names in order. Uses heuristic to classify simple names as Kind or Name.", "source_span": [ - 101, - 116 + 114, + 129 ], "tree_path": "fn::parse_segment", "source_hash": "sha256:2d570524b2adefd28ed3af36218c7b77db1eff350d48dd830a7651b2eae2debd", @@ -56,8 +56,8 @@ "reviewed": false, "intent": "Serializes a name, automatically quoting if it contains spaces, :: delimiters, quotes, or other special characters. Handles backslash and quote escaping.", "source_span": [ - 58, - 77 + 65, + 84 ], "tree_path": "fn::serialize_name", "source_hash": "sha256:0398cceb21d0564c22c3105074f081f493aaab7e7e6a7b171df93ace78192ad6", diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index 396eb3e..ba26d41 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -1020,12 +1020,13 @@ escaped_char := '\\' ( '"' | '\\' | 'n' | ':' ) When M9 (injection framework) is implemented, the grammar will extend to: ```ebnf -segment := kind | name | injection_marker +tree_path := segment (":" segment)* +segment := (kind | name) injection_marker? | injection_marker injection_marker := "//" language language := identifier ``` -The injection marker `//lang` appears as a standalone segment or appended to a name segment (`run//bash`). The latter preserves the even-pair invariant for shell-safe paths. +The injection marker `//lang` attaches to the preceding name segment (`run//bash`), preserving the even-pair invariant for shell-safe paths. The standalone form (`:://lang`) is also accepted for parsing but the canonical serialization always uses the appended form. ### A.4 Implementation plan From 9ee324e42304e679c620f6e2bc05648cbd6ba7ba Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:04:13 +0800 Subject: [PATCH 10/21] docs(design): document tree_path quoting and injection syntax Add quoting example (Zig test name) and injection example (YAML+Bash) to the tree_path format table. Add a short paragraph explaining quoted names and //lang injection markers, with a forward-reference to the roadmap appendix for the full grammar. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-design.md | 4 ++++ docs/liyi-design.md.liyi.jsonc | 44 +++++++++++++++++----------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/docs/liyi-design.md b/docs/liyi-design.md index ad2b60b..a9f0eb6 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -457,9 +457,13 @@ Without a `tree_path`, the fallback is: batch false positives on any line-shifti | `struct Money { … }` | `struct::Money` | | `mod billing { fn charge(…) }` | `mod::billing::fn::charge` | | `#[test] fn test_add()` | `fn::test_add` | +| Zig `test "add function" { … }` | `test::"add function"` | +| YAML `run:` with embedded Bash `setup_env()` | `key::run//bash::fn::setup_env` | The path identifies the item by node kind and name, not by position. The tool constructs the path by walking the tree-sitter CST from root to the node that covers `source_span`, recording each named ancestor. This is deterministic — the same source item always produces the same path regardless of where it appears in the file. +**Quoting and injection.** Names containing spaces, `::`, or quotes are double-quoted with backslash escaping (`test::"add function"`). For multi-language files (M9), an injection marker `//lang` attaches to the preceding segment to cross a language boundary (`key::run//bash::fn::setup_env`); the `//` delimiter requires no shell escaping. The full grammar is specified in the roadmap appendix (tree_path Grammar v0.2). + **Behavior during reanchor and check.** diff --git a/docs/liyi-design.md.liyi.jsonc b/docs/liyi-design.md.liyi.jsonc index 3949094..194bc02 100644 --- a/docs/liyi-design.md.liyi.jsonc +++ b/docs/liyi-design.md.liyi.jsonc @@ -63,8 +63,8 @@ 465, 469 ], - "source_hash": "sha256:1df1d3238855b1d8c1b20493e2788af47135f318e399d43b650bded04c7c5a5a", - "source_anchor": "" + "source_hash": "sha256:845369bcf7b7044aa1a3087ff8a41c0ec0d5e7be2ce7df0928b6233bf2a59cb9", + "source_anchor": "**Quoting and injection.** Names containing spaces, `::`, or quotes are double-quoted with backslash escaping (`test::\"add function\"`). For multi-language files (M9), an injection marker `//lang` attaches to the preceding segment to cross a language boundary (`key::run//bash::fn::setup_env`); the `//` delimiter requires no shell escaping. The full grammar is specified in the roadmap appendix (tree_path Grammar v0.2)." }, { "requirement": "tree-path-empty-fallback", @@ -72,8 +72,8 @@ 473, 481 ], - "source_hash": "sha256:0bbc4ec24af3fb1c3abce91342f1249f5b0a029e2a6a0573544d5a13d59722d6", - "source_anchor": "" + "source_hash": "sha256:0640051cf35edbedf41b53a801a2bd00dddce436ea8cd9ccd0d7a23c8b9ea5da", + "source_anchor": "" }, { "requirement": "requirement-name-uniqueness", @@ -81,8 +81,8 @@ 629, 631 ], - "source_hash": "sha256:03a65c514532f4f424cbe30ed55fe7455c3a602ac286b1c62efa9722b0968700", - "source_anchor": "" + "source_hash": "sha256:36b6ac28f97a670fb5e902406e0e86c51705ddf7cfb47566adea5c4a030f0dd0", + "source_anchor": "No `intent` field — the requirement text lives at the source site, not duplicated in the sidecar. No `reviewed` — the act of writing a requirement *is* the assertion of intent; provenance belongs to VCS (`git blame` tells you who wrote it and when). The `\"requirement\"` key itself signals prescriptiveness — no separate boolean needed." }, { "requirement": "cycle-detection", @@ -90,8 +90,8 @@ 754, 756 ], - "source_hash": "sha256:66311f793af3fcf3539f50be30b4f2e7d4e56fd3d1dbd4069bf34b6699868542", - "source_anchor": "" + "source_hash": "sha256:638676e2dd2e8b36aaa949892523061b55381f993ce157d71c69159dede96595", + "source_anchor": "```" }, { "requirement": "requirement-discovery-global", @@ -99,8 +99,8 @@ 1054, 1056 ], - "source_hash": "sha256:c6a5b25878f52c9f835e31aa11a060af59e390e64f0730d153004a7993548c6f", - "source_anchor": "```gitignore" + "source_hash": "sha256:e50122cc204abef464a0b88636129acbb9dd870dc31b0bb903a7d8c873bca26d", + "source_anchor": "Inline annotations don’t work for files you can’t modify — generated code, vendored dependencies, protobuf bindings. The generator will overwrite any annotation you add." }, { "requirement": "liyi-check-exit-code", @@ -108,8 +108,8 @@ 1177, 1183 ], - "source_hash": "sha256:3c5e25f88155fb2751b7ff2b520cc60e1b6f0172c854077b54dab753e169f682", - "source_anchor": "**Summary-first output.** The summary line is printed before the per-item diagnostics, so that in large projects the user sees the aggregate picture immediately — without scrolling past hundreds of lines. (When the output is short enough, this also serves as a final line for quick scanning.)" + "source_hash": "sha256:23d3f13cfa09b7e029bf89c69aeae7cdfbb36d0a82d30c4d993b84f6faa12fd2", + "source_anchor": "**Limitation: explicit annotations only.** This mechanism enforces that every *existing* `@liyi:related` annotation has a corresponding sidecar edge. It does not detect *missing* annotations — items that semantically depend on a requirement but lack an `@liyi:related` marker in source. That is an inference problem (the agent or human must recognize the semantic dependency), not a coverage-checking problem, and is outside the linter's deterministic scope." }, { "requirement": "marker-normalization", @@ -117,8 +117,8 @@ 1210, 1212 ], - "source_hash": "sha256:5b12f3063785e56636d702bebcbd8f759ad512c72fa4af056edc5548fb13b116", - "source_anchor": "" + "source_hash": "sha256:4f1a4b10b6baac121522a8ed95f10e3eac48e6c5cf5f7418423e27e859a69280", + "source_anchor": "| `error` | Errors only — parse errors, unknown versions, orphaned sources |" }, { "requirement": "quine-escape-in-source", @@ -126,8 +126,8 @@ 1224, 1226 ], - "source_hash": "sha256:f59014cebee07606dbb103f06b6b6336409222fce1f6bb6191e800e38c40cd61", - "source_anchor": "### Marker normalization (half-width / full-width equivalence)" + "source_hash": "sha256:946f04222d5a129ec28878260c8ca615ccfdf1d1e27c7705603261ae45bfe2b1", + "source_anchor": "- No LLM calls. No API keys. No network access." }, { "requirement": "markdown-fenced-block-skip", @@ -135,8 +135,8 @@ 1230, 1236 ], - "source_hash": "sha256:95608b3b77f9c441d2c9dfe396edfbd9ffe25ea5a5ff4ed2943356c5a30e382d", - "source_anchor": "| Half-width | Full-width | Role |" + "source_hash": "sha256:47cccd00938c19259d7676c9531bbeb32c35778094dc7aa8b3dea2c6e25435fc", + "source_anchor": "CJK input methods default to full-width punctuation. Japanese IME often produces full-width `@` as well. Requiring users to switch to half-width mode for every annotation is a constant friction — and a guaranteed source of \"why doesn't the linter see my marker\" bug reports." }, { "requirement": "fix-never-modifies-human-fields", @@ -144,8 +144,8 @@ 1442, 1444 ], - "source_hash": "sha256:607df81587523fe5a12ddb1b5d4ef92aaed48c07811581d4c9fe40a8d6fe0b6a", - "source_anchor": "```" + "source_hash": "sha256:a55f807047e0c031ee73d97afd444d4da4423ad7a1541d6ad8ced6d0734c4ee3", + "source_anchor": " %% ── Batch path ──" }, { "requirement": "fix-semantic-drift-protection", @@ -153,8 +153,8 @@ 1446, 1448 ], - "source_hash": "sha256:f40c009e185cefee8531edae568cab5de68a64203695d03e8575b16623e4910a", - "source_anchor": "**Direct re-inference: the fast path.** Triage adds value when many items are stale and a human needs to prioritize — a refactor that touches 30 functions, a CI pipeline processing a large PR. But during interactive editing — an agent making focused changes to 2-3 functions in a single session — triage is overhead. The agent already knows what it changed and why." + "source_hash": "sha256:607df81587523fe5a12ddb1b5d4ef92aaed48c07811581d4c9fe40a8d6fe0b6a", + "source_anchor": "```" } ] } From 711105034fb78006f853cc1f394b5fe75a8ba94b Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:15:11 +0800 Subject: [PATCH 11/21] fix(design): rebuild design doc sidecar with correct hashes and anchors The previous reanchor computed wrong SHA-256 hashes and assigned nonsensical anchor lines because Markdown lacks tree-sitter semantic support, causing the span-shift heuristic to mismatch content after a 4-line insertion. Manually recomputed all 17 requirement specs with correct source_span, source_hash, and source_anchor values. liyi check now reports 134 current, 0 stale. Original prompt: > Please fix design doc sidecar -- the reanchor did not account for > the shifted span due to missing semantic support for Markdown, > and calculated wrong SHA-256 hashes and several anchor lines are > now meaningless. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-design.md.liyi.jsonc | 129 ++++++++++----------------------- 1 file changed, 39 insertions(+), 90 deletions(-) diff --git a/docs/liyi-design.md.liyi.jsonc b/docs/liyi-design.md.liyi.jsonc index 194bc02..0ac4d79 100644 --- a/docs/liyi-design.md.liyi.jsonc +++ b/docs/liyi-design.md.liyi.jsonc @@ -5,156 +5,105 @@ "specs": [ { "requirement": "liyi-sidecar-naming-convention", - "source_span": [ - 343, - 345 - ], + "source_span": [343, 345], "source_hash": "sha256:0000eb15eb6a1ebe192c1f1b3d43b1f60e619935997bbfd8df5d1e80c02fe163", "source_anchor": "" }, { "requirement": "version-field-required", - "source_span": [ - 377, - 379 - ], + "source_span": [377, 379], "source_hash": "sha256:b4f52a48481fe808a550884bee8949e665b3f34fd4dd77e55c51a0f46abd3009", "source_anchor": "" }, { "requirement": "reviewed-semantics", - "source_span": [ - 403, - 405 - ], + "source_span": [403, 405], "source_hash": "sha256:da9de772f9412fc5e5ac4a172b6978c191e267b2c04ed477694914868ecbbdbe", "source_anchor": "" }, { "requirement": "source-span-semantics", - "source_span": [ - 427, - 429 - ], + "source_span": [427, 429], "source_hash": "sha256:4d0c5843b38c855e3c725b8a6d1a1a63943f332185481ff1e395006a10a6ccfe", "source_anchor": "" }, { "requirement": "tool-managed-fields", - "source_span": [ - 431, - 433 - ], + "source_span": [431, 433], "source_hash": "sha256:4169ca6d29fd03f03c1cafd61476e2ec7633981b298eced09fb2336ab38e990a", "source_anchor": "" }, { "requirement": "span-shift-heuristic", - "source_span": [ - 443, - 445 - ], + "source_span": [443, 445], "source_hash": "sha256:dc52a0e237b136645c3d6cd05911414d947094eb91323e01969c124301606982", "source_anchor": "" }, { "requirement": "tree-path-reanchor-behavior", - "source_span": [ - 465, - 469 - ], - "source_hash": "sha256:845369bcf7b7044aa1a3087ff8a41c0ec0d5e7be2ce7df0928b6233bf2a59cb9", - "source_anchor": "**Quoting and injection.** Names containing spaces, `::`, or quotes are double-quoted with backslash escaping (`test::\"add function\"`). For multi-language files (M9), an injection marker `//lang` attaches to the preceding segment to cross a language boundary (`key::run//bash::fn::setup_env`); the `//` delimiter requires no shell escaping. The full grammar is specified in the roadmap appendix (tree_path Grammar v0.2)." + "source_span": [469, 473], + "source_hash": "sha256:1df1d3238855b1d8c1b20493e2788af47135f318e399d43b650bded04c7c5a5a", + "source_anchor": "" }, { "requirement": "tree-path-empty-fallback", - "source_span": [ - 473, - 481 - ], - "source_hash": "sha256:0640051cf35edbedf41b53a801a2bd00dddce436ea8cd9ccd0d7a23c8b9ea5da", - "source_anchor": "" + "source_span": [477, 485], + "source_hash": "sha256:0bbc4ec24af3fb1c3abce91342f1249f5b0a029e2a6a0573544d5a13d59722d6", + "source_anchor": "" }, { "requirement": "requirement-name-uniqueness", - "source_span": [ - 629, - 631 - ], - "source_hash": "sha256:36b6ac28f97a670fb5e902406e0e86c51705ddf7cfb47566adea5c4a030f0dd0", - "source_anchor": "No `intent` field — the requirement text lives at the source site, not duplicated in the sidecar. No `reviewed` — the act of writing a requirement *is* the assertion of intent; provenance belongs to VCS (`git blame` tells you who wrote it and when). The `\"requirement\"` key itself signals prescriptiveness — no separate boolean needed." + "source_span": [633, 635], + "source_hash": "sha256:03a65c514532f4f424cbe30ed55fe7455c3a602ac286b1c62efa9722b0968700", + "source_anchor": "" }, { "requirement": "cycle-detection", - "source_span": [ - 754, - 756 - ], - "source_hash": "sha256:638676e2dd2e8b36aaa949892523061b55381f993ce157d71c69159dede96595", - "source_anchor": "```" + "source_span": [758, 760], + "source_hash": "sha256:66311f793af3fcf3539f50be30b4f2e7d4e56fd3d1dbd4069bf34b6699868542", + "source_anchor": "" }, { "requirement": "requirement-discovery-global", - "source_span": [ - 1054, - 1056 - ], - "source_hash": "sha256:e50122cc204abef464a0b88636129acbb9dd870dc31b0bb903a7d8c873bca26d", - "source_anchor": "Inline annotations don’t work for files you can’t modify — generated code, vendored dependencies, protobuf bindings. The generator will overwrite any annotation you add." + "source_span": [1089, 1091], + "source_hash": "sha256:3f0928a2639b4fd062a2af53f159183665817cc3f7ed7a2c45774a622f8873e9", + "source_anchor": "" }, { "requirement": "liyi-check-exit-code", - "source_span": [ - 1177, - 1183 - ], - "source_hash": "sha256:23d3f13cfa09b7e029bf89c69aeae7cdfbb36d0a82d30c4d993b84f6faa12fd2", - "source_anchor": "**Limitation: explicit annotations only.** This mechanism enforces that every *existing* `@liyi:related` annotation has a corresponding sidecar edge. It does not detect *missing* annotations — items that semantically depend on a requirement but lack an `@liyi:related` marker in source. That is an inference problem (the agent or human must recognize the semantic dependency), not a coverage-checking problem, and is outside the linter's deterministic scope." + "source_span": [1214, 1220], + "source_hash": "sha256:6ed25f14e08ea936e927150a3d0cc82f881011edcd1360b5401e7a03658f53f9", + "source_anchor": "" }, { "requirement": "marker-normalization", - "source_span": [ - 1210, - 1212 - ], - "source_hash": "sha256:4f1a4b10b6baac121522a8ed95f10e3eac48e6c5cf5f7418423e27e859a69280", - "source_anchor": "| `error` | Errors only — parse errors, unknown versions, orphaned sources |" + "source_span": [1247, 1249], + "source_hash": "sha256:b4e1f5b698b77c4c661d15cbd070e30839ecfd2f42fb77286e0a7408e294bd1c", + "source_anchor": "" }, { "requirement": "quine-escape-in-source", - "source_span": [ - 1224, - 1226 - ], - "source_hash": "sha256:946f04222d5a129ec28878260c8ca615ccfdf1d1e27c7705603261ae45bfe2b1", - "source_anchor": "- No LLM calls. No API keys. No network access." + "source_span": [1261, 1263], + "source_hash": "sha256:326dee10b54b25cc8f874d8577bb3218efa21313f1329e70dadcc83d03978e38", + "source_anchor": "" }, { "requirement": "markdown-fenced-block-skip", - "source_span": [ - 1230, - 1236 - ], - "source_hash": "sha256:47cccd00938c19259d7676c9531bbeb32c35778094dc7aa8b3dea2c6e25435fc", - "source_anchor": "CJK input methods default to full-width punctuation. Japanese IME often produces full-width `@` as well. Requiring users to switch to half-width mode for every annotation is a constant friction — and a guaranteed source of \"why doesn't the linter see my marker\" bug reports." + "source_span": [1267, 1273], + "source_hash": "sha256:18e3c5d63b28b021f3ef45158fddfc73fbb2bdfd6433b63748d70df9736ba2e1", + "source_anchor": "" }, { "requirement": "fix-never-modifies-human-fields", - "source_span": [ - 1442, - 1444 - ], - "source_hash": "sha256:a55f807047e0c031ee73d97afd444d4da4423ad7a1541d6ad8ced6d0734c4ee3", - "source_anchor": " %% ── Batch path ──" + "source_span": [1479, 1481], + "source_hash": "sha256:2c59f122385a46454e274ecfe0a77e78b47b846ca4f932c151ab3baa3c4c9250", + "source_anchor": "" }, { "requirement": "fix-semantic-drift-protection", - "source_span": [ - 1446, - 1448 - ], - "source_hash": "sha256:607df81587523fe5a12ddb1b5d4ef92aaed48c07811581d4c9fe40a8d6fe0b6a", - "source_anchor": "```" + "source_span": [1483, 1485], + "source_hash": "sha256:320f7c2667bf1a8163a2ec95adb0bdfeac648780042af08a2eab4ff27d0b74a2", + "source_anchor": "" } ] } From f64c127ae453fbd36b8182fd11d914ac012687c4 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:46:38 +0800 Subject: [PATCH 12/21] feat(linter): add EndRequirement marker variant and scanner support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add @liyi:end-requirement marker to pair with @liyi:requirement for deterministic span recovery in files without tree-sitter support. - Add EndRequirement { name, line } variant to SourceMarker enum - Add CANON_END_REQUIREMENT constant and 6 multilingual aliases (Chinese 需求结束, Spanish, Japanese, French, Korean) - Order end-requirement aliases before requirement aliases in ALIAS_TABLE to prevent prefix-matching collisions - Add scan_markers match arm for CANON_END_REQUIREMENT - Add requirement_spans() function to pair open/close markers by name into [start, end] span maps - Add 5 scanner tests for end-requirement parsing and pairing Original prompt: > Based on your own dogfooding experience in this session, what's > your opinion on Markdown semantic anchoring support? > > Agreed that we should allow deterministically tracking requirement > blocks. We may need to define the end-of-block marker more > "officially" than the current ad-hoc convention, such as > @liyi:end-requirement, then make the reanchor tool recognize that. > > Sure, please do, but for Chinese please make it "需求结束" or > "需求完" -- let me know your preference before going ahead. > > Fair point, let's go with "需求结束". Please go ahead with > implementation. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/markers.rs | 92 +++++++++++++++++++++++++++ crates/liyi/src/markers.rs.liyi.jsonc | 50 +++++++-------- 2 files changed, 117 insertions(+), 25 deletions(-) diff --git a/crates/liyi/src/markers.rs b/crates/liyi/src/markers.rs index 7b4ca14..32a7499 100644 --- a/crates/liyi/src/markers.rs +++ b/crates/liyi/src/markers.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + /// Source-file marker scanner with full-width normalization and multilingual aliases. /// /// A discovered marker in a source file. @@ -10,6 +12,10 @@ pub enum SourceMarker { name: String, line: usize, }, + EndRequirement { + name: String, + line: usize, + }, Related { name: String, line: usize, @@ -69,6 +75,7 @@ const CANON_TRIVIAL: &str = "\x40liyi:trivial"; const CANON_NONTRIVIAL: &str = "\x40liyi:nontrivial"; const CANON_MODULE: &str = "\x40liyi:module"; const CANON_REQUIREMENT: &str = "\x40liyi:requirement"; +const CANON_END_REQUIREMENT: &str = "\x40liyi:end-requirement"; const CANON_RELATED: &str = "\x40liyi:related"; const CANON_INTENT: &str = "\x40liyi:intent"; @@ -100,6 +107,13 @@ const ALIAS_TABLE: &[(&str, &str)] = &[ ("\x40liyi:módulo", CANON_MODULE), ("\x40立意:モジュール", CANON_MODULE), ("\x40립의:모듈", CANON_MODULE), + // end-requirement (must precede requirement — longer aliases match first) + (CANON_END_REQUIREMENT, CANON_END_REQUIREMENT), + ("\x40立意:需求结束", CANON_END_REQUIREMENT), + ("\x40liyi:fin-requisito", CANON_END_REQUIREMENT), + ("\x40立意:要件終", CANON_END_REQUIREMENT), + ("\x40liyi:fin-exigence", CANON_END_REQUIREMENT), + ("\x40립의:요건끝", CANON_END_REQUIREMENT), // requirement (CANON_REQUIREMENT, CANON_REQUIREMENT), ("\x40立意:需求", CANON_REQUIREMENT), @@ -284,6 +298,14 @@ pub fn scan_markers(content: &str) -> Vec { }); } } + CANON_END_REQUIREMENT => { + if let Some(name) = extract_name(rest) { + markers.push(SourceMarker::EndRequirement { + name, + line: line_num, + }); + } + } CANON_RELATED => { if let Some(name) = extract_name(rest) { markers.push(SourceMarker::Related { @@ -320,6 +342,32 @@ pub fn scan_markers(content: &str) -> Vec { markers } +/// Build a map from requirement name to `[start_line, end_line]` spans +/// by pairing `Requirement` and `EndRequirement` markers from a scan result. +/// +/// Only requirements that have a matching `EndRequirement` with the same +/// name are included. Unpaired markers are silently skipped (the linter +/// can diagnose those separately). +pub fn requirement_spans(markers: &[SourceMarker]) -> HashMap { + let mut opens: HashMap = HashMap::new(); + let mut spans: HashMap = HashMap::new(); + + for m in markers { + match m { + SourceMarker::Requirement { name, line } => { + opens.insert(name.clone(), *line); + } + SourceMarker::EndRequirement { name, line } => { + if let Some(start) = opens.remove(name) { + spans.insert(name.clone(), [start, *line]); + } + } + _ => {} + } + } + spans +} + #[cfg(test)] mod tests { use super::*; @@ -372,6 +420,50 @@ mod tests { ); } + #[test] + fn scan_end_requirement_paren() { + let m = scan_markers("\n"); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_end_requirement_space() { + let m = scan_markers("\n"); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_end_requirement_chinese_alias() { + let m = scan_markers("\n"); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_requirement_and_end_requirement_pair() { + let input = "\ +\n\ +Exit codes: 0 = clean, 1 = failures.\n\ +\n\ +"; + let m = scan_markers(input); + assert_eq!(m.len(), 2); + assert!( + matches!(&m[0], SourceMarker::Requirement { name, line: 1 } if name == "exit-codes") + ); + assert!( + matches!(&m[1], SourceMarker::EndRequirement { name, line: 3 } if name == "exit-codes") + ); + } + #[test] fn scan_related() { let m = scan_markers("// \x40liyi:related some_req\n"); diff --git a/crates/liyi/src/markers.rs.liyi.jsonc b/crates/liyi/src/markers.rs.liyi.jsonc index b78e1a7..8dfcd42 100644 --- a/crates/liyi/src/markers.rs.liyi.jsonc +++ b/crates/liyi/src/markers.rs.liyi.jsonc @@ -9,19 +9,19 @@ 57, 62 ], - "source_hash": "sha256:c2ad7f02250feab1d1ed85efd874b1782fcc6ea8c197de7f4a9f4517ef19d96d", - "source_anchor": "/// scanner matching these string constants — the classic quine-escape problem." + "source_hash": "sha256:53824f6af56160b7bd65c7b0783b5e1ffb90516088d408e8b0c9b01c648bcadb", + "source_anchor": "// Alias table — maps every accepted marker string to its canonical form." }, { "item": "SourceMarker", "reviewed": false, "intent": "Enumerate all recognized @liyi: marker types (Module, Requirement, Related, Intent, Trivial, Ignore, Nontrivial) with their associated data (name, prose, line number). Each variant carries a 1-indexed line number.", "source_span": [ - 5, - 32 + 7, + 38 ], "tree_path": "enum::SourceMarker", - "source_hash": "sha256:9914dee0aa2de6a23535773503f9d4af08d5704364faef687ab3ed9f73f4a078", + "source_hash": "sha256:03eb78a070a414027c092464920fea46fa92fa89da2ed2bcf6b03682bce2d31a", "source_anchor": "pub enum SourceMarker {" }, { @@ -29,8 +29,8 @@ "reviewed": false, "intent": "Replace full-width Unicode punctuation (@ → @, : → :, ( → (, ) → )) with ASCII equivalents so that markers written with CJK input methods are recognized.", "source_span": [ - 36, - 48 + 42, + 54 ], "tree_path": "fn::normalize_line", "source_hash": "sha256:042886b54393166d59d9cd573722d841651cc1eba4321dc0f62d44de39379b09", @@ -44,11 +44,11 @@ "reviewed": false, "intent": "Map every accepted marker alias (English canonical forms plus Chinese, Japanese, Korean, Spanish, French, Portuguese variants) to its canonical @liyi:* form. This table is the single source of truth for multilingual marker recognition.", "source_span": [ - 77, - 125 + 84, + 139 ], "tree_path": "const::ALIAS_TABLE", - "source_hash": "sha256:0eebe3f11964541df61a658e4aa067747b473032480a5f3f0778ec8e5c88d9f4", + "source_hash": "sha256:d546bd0d6ebccebadea5dd37108f457bbb0804cdcc31665ac948583fda1215c8", "source_anchor": "const ALIAS_TABLE: &[(&str, &str)] = &[", "related": { "marker-normalization": null @@ -59,8 +59,8 @@ "reviewed": false, "intent": "Search a normalized line for any known marker alias from ALIAS_TABLE. Return the canonical marker keyword, the byte offset of the match start, and the byte offset past the matched alias, or None if no marker is found.", "source_span": [ - 130, - 137 + 144, + 151 ], "tree_path": "fn::find_marker", "source_hash": "sha256:f69d93143a9f03203a2ea48d2342b9288d5ce42383a2d4bc0946567d0cdd6817", @@ -74,8 +74,8 @@ "reviewed": false, "intent": "Define the set of quotation-mark characters that suppress marker detection when they immediately precede the @ of a marker. Must cover ASCII quotes, typographic quotes, CJK corner brackets, and guillemets.", "source_span": [ - 169, - 181 + 183, + 195 ], "tree_path": "const::QUOTE_CHARS", "source_hash": "sha256:006f1fb3b6a729bd108d4eaef8d2aba2154414b3cbbad05787d20c006756611f", @@ -87,8 +87,8 @@ "reviewed": false, "intent": "Determine whether a byte position on a line falls inside an inline backtick code span by counting backtick characters before the position. Odd count means inside code, even means outside.", "source_span": [ - 186, - 197 + 200, + 211 ], "tree_path": "fn::is_in_inline_code", "source_hash": "sha256:535d27178d75557f1db35fd81223f988dfb924191959049b97c7eb9212444bae", @@ -100,8 +100,8 @@ "reviewed": false, "intent": "Return true if the character immediately before a byte position on a line is any character in the QUOTE_CHARS set (a quotation mark across locales).", "source_span": [ - 201, - 208 + 215, + 222 ], "tree_path": "fn::preceded_by_quote", "source_hash": "sha256:a88665af3e20b2f6f189402eeac2638e4f496bc80a54f9aa15e61d1ca8bfe1b2", @@ -113,8 +113,8 @@ "reviewed": false, "intent": "Return true if a line (after stripping leading whitespace) starts with ``` or ~~~, indicating a Markdown fenced code block boundary.", "source_span": [ - 212, - 215 + 226, + 229 ], "tree_path": "fn::is_fence_delimiter", "source_hash": "sha256:711e776c8924917ac4ef11c26546de9e31a80c466b2512cb854135558b6b98fe", @@ -129,8 +129,8 @@ "reviewed": false, "intent": "Extract a name argument from the text after a marker keyword: if the first non-whitespace character is '(', take the content up to the matching ')'; otherwise take the first whitespace-delimited token. Return None if no name is present.", "source_span": [ - 142, - 159 + 156, + 173 ], "tree_path": "fn::extract_name", "source_hash": "sha256:49fcb7884fcec40dcb0b329d971eddae3ee78ef7c167a0e63726c37a73ce2e50", @@ -141,11 +141,11 @@ "reviewed": false, "intent": "Scan all lines of a source file, normalize each line for full-width characters, and return a list of SourceMarker values for every recognized @liyi: marker found. Line numbers are 1-indexed. Markers inside fenced code blocks, inline backtick spans, or immediately after quotation marks are suppressed (NL-quoting). Handles =doc and =文档 sentinels for intent markers.", "source_span": [ - 226, - 321 + 240, + 343 ], "tree_path": "fn::scan_markers", - "source_hash": "sha256:5a9629649bab1ac1ed9efb15dfdf5ecb20973369de24b7fcaa12b8a0f12a87da", + "source_hash": "sha256:cce25919ff28dabeb132681d73e8d0aae8a090058d99e8bda6b92b3c7284ec2f", "source_anchor": "pub fn scan_markers(content: &str) -> Vec {", "related": { "quine-escape-in-source": null, From bdd6a5f0cc9ef2d596d52cc61654d0d2683d6ef4 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:47:04 +0800 Subject: [PATCH 13/21] feat(linter): use marker-pair spans in check and reanchor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use requirement_spans() from the scanner to recover deterministic spans for @liyi:requirement blocks in check.rs and reanchor.rs. check.rs: - Import requirement_spans, compute marker_span_map after scanning - In Requirement branch, apply marker-pair span before hash computation so marker-bounded blocks stay current automatically reanchor.rs: - Import requirement_spans and scan_markers - For files without tree-sitter support, build marker_spans map - Use marker-pair span as fallback after tree-sitter resolution in Requirement branch This is the primary span recovery mechanism for Markdown and other files where tree-sitter grammars are unavailable. Original prompt: > Based on your own dogfooding experience in this session, what's > your opinion on Markdown semantic anchoring support? > > Agreed that we should allow deterministically tracking requirement > blocks. We may need to define the end-of-block marker more > "officially" than the current ad-hoc convention, such as > @liyi:end-requirement, then make the reanchor tool recognize that. > > Sure, please do, but for Chinese please make it "需求结束" or > "需求完" -- let me know your preference before going ahead. > > Fair point, let's go with "需求结束". Please go ahead with > implementation. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/check.rs | 15 ++++++++++++++- crates/liyi/src/check.rs.liyi.jsonc | 16 ++++++++-------- crates/liyi/src/reanchor.rs | 14 +++++++++++++- crates/liyi/src/reanchor.rs.liyi.jsonc | 14 +++++++------- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index 82a0e9f..5545022 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -7,7 +7,7 @@ use crate::diagnostics::{ }; use crate::discovery::{SidecarEntry, discover}; use crate::hashing::{SpanError, hash_span}; -use crate::markers::{SourceMarker, scan_markers}; +use crate::markers::{SourceMarker, requirement_spans, scan_markers}; use crate::schema::validate_version; use crate::shift::{ShiftResult, detect_shift}; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; @@ -416,6 +416,7 @@ fn check_sidecar( }; let source_markers = scan_markers(&source_content); + let marker_span_map = requirement_spans(&source_markers); let mut modified = false; // 5. Check each spec @@ -837,6 +838,18 @@ fn check_sidecar( } Spec::Requirement(req) => { let label = req.requirement.clone(); + + // Try marker-based span recovery first: if the file has + // @liyi:end-requirement markers, use those for span. + if let Some(&marker_span) = marker_span_map.get(&req.requirement) + && marker_span != req.source_span + { + req.source_span = marker_span; + if fix { + modified = true; + } + } + match hash_span(&source_content, req.source_span) { Ok((computed_hash, computed_anchor)) => { let is_current = req.source_hash.as_ref() == Some(&computed_hash); diff --git a/crates/liyi/src/check.rs.liyi.jsonc b/crates/liyi/src/check.rs.liyi.jsonc index e830496..2cdfac2 100644 --- a/crates/liyi/src/check.rs.liyi.jsonc +++ b/crates/liyi/src/check.rs.liyi.jsonc @@ -27,9 +27,9 @@ "source_hash": "sha256:13ca74d5432770cc302492fcc1b8c60cee849726441dc80547cf0d20917209f6", "source_anchor": "pub fn run_check(", "related": { + "cycle-detection": null, "requirement-discovery-global": null, - "requirement-name-uniqueness": null, - "cycle-detection": null + "requirement-name-uniqueness": null } }, { @@ -38,14 +38,14 @@ "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", "source_span": [ 282, - 983 + 996 ], "tree_path": "fn::check_sidecar", - "source_hash": "sha256:6e82d215922ba7812b9fd20e105f32469e581ac5d863e9e007c8c57dbd7c8484", + "source_hash": "sha256:1c65da69a12340a590f751478b7e2e9363a45504d4a727b73e09a167cc97fae6", "source_anchor": "fn check_sidecar(", "related": { - "reviewed-semantics": null, - "fix-semantic-drift-protection": null + "fix-semantic-drift-protection": null, + "reviewed-semantics": null } }, { @@ -53,8 +53,8 @@ "reviewed": false, "intent": "Read a file's contents with caching: return the cached string if already loaded, otherwise read from disk, store in the cache, and return. Return None on I/O failure.", "source_span": [ - 1048, - 1059 + 1061, + 1072 ], "tree_path": "fn::read_cached", "source_hash": "sha256:77c7602b283fb2e67c7953f98ef11b417c83903d96011f370b7b0421778f52c2", diff --git a/crates/liyi/src/reanchor.rs b/crates/liyi/src/reanchor.rs index 61b1648..23c934f 100644 --- a/crates/liyi/src/reanchor.rs +++ b/crates/liyi/src/reanchor.rs @@ -1,6 +1,7 @@ use std::path::{Path, PathBuf}; use crate::hashing::hash_span; +use crate::markers::{requirement_spans, scan_markers}; use crate::schema::migrate; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; use crate::tree_path::{compute_tree_path, detect_language, resolve_tree_path}; @@ -80,6 +81,15 @@ pub fn run_reanchor( let lang = detect_language(Path::new(source_path)); + // For files without tree-sitter support, build a span map from + // @liyi:requirement / @liyi:end-requirement marker pairs. + let marker_spans = if lang.is_none() { + let markers = scan_markers(&source_content); + requirement_spans(&markers) + } else { + std::collections::HashMap::new() + }; + for spec in &mut sidecar.specs { match spec { Spec::Item(item) => { @@ -118,11 +128,13 @@ pub fn run_reanchor( continue; // targeted mode only touches items } - // Tree-sitter span recovery for requirements + // Span recovery: prefer tree-sitter, then marker pairing. if let (false, Some(l)) = (req.tree_path.is_empty(), lang) && let Some(new_span) = resolve_tree_path(&source_content, &req.tree_path, l) { req.source_span = new_span; + } else if let Some(&new_span) = marker_spans.get(&req.requirement) { + req.source_span = new_span; } if let Some(l) = lang { diff --git a/crates/liyi/src/reanchor.rs.liyi.jsonc b/crates/liyi/src/reanchor.rs.liyi.jsonc index f06172f..fd520fa 100644 --- a/crates/liyi/src/reanchor.rs.liyi.jsonc +++ b/crates/liyi/src/reanchor.rs.liyi.jsonc @@ -8,8 +8,8 @@ "reviewed": false, "intent": "Expand a list of file and directory paths into concrete .liyi.jsonc file paths. Directories are walked recursively respecting .gitignore and .liyiignore. Files are included directly. Non-existent paths produce an error. Results are sorted and deduplicated.", "source_span": [ - 14, - 39 + 15, + 40 ], "tree_path": "fn::resolve_reanchor_targets", "source_hash": "sha256:7e38409f96830a3b46075be4b7055db02c4a7a7ed388923588570bba01bbcf25", @@ -20,18 +20,18 @@ "reviewed": false, "intent": "Re-hash source spans in a sidecar file. If do_migrate is set, run schema migration and write back. If target_item and target_span are both provided, update only that item's span and rehash. Otherwise, for every spec: if tree_path is non-empty and a tree-sitter grammar is available, locate the item by structural identity and update the span; then recompute hash/anchor. Reject if only one of --item/--span is provided. Derive the source file path by stripping the .liyi.jsonc suffix from the sidecar path.", "source_span": [ - 54, - 144 + 55, + 156 ], "tree_path": "fn::run_reanchor", - "source_hash": "sha256:28a7f0bffb54f043c3497210bc11cb4d040ca85aefef4c8f8453ce2c4afd94b8", + "source_hash": "sha256:3ee785c5a09ff156f5e606425771ab8c12f43445a62c48cc8be659ad7351e5d6", "source_anchor": "pub fn run_reanchor(", "related": { - "tree-path-reanchor-behavior": null, "tool-managed-fields": null, + "fix-never-modifies-human-fields": null, "tree-path-empty-fallback": null, "liyi-sidecar-naming-convention": null, - "fix-never-modifies-human-fields": null + "tree-path-reanchor-behavior": null } } ] From b1cbe68e7a1f084f90715b6b1b8b505e9e5231a4 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 00:47:26 +0800 Subject: [PATCH 14/21] docs(design): specify end-requirement marker and migrate blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the design document to formally specify @liyi:end-requirement as the closing marker for requirement blocks, replacing the previous "not supported in 0.1" statement. Spec additions: - Full alias table (English, Chinese, Spanish, Japanese, French, Korean) matching the scanner implementation - Recommendation for Markdown requirement blocks - Code example showing paired open/close markers Marker migration: - Replace all 17 ad-hoc closing markers with using the corresponding requirement name from each block's opening marker - Reanchor sidecar to reflect updated spans and hashes Original prompt: > Based on your own dogfooding experience in this session, what's > your opinion on Markdown semantic anchoring support? > > Agreed that we should allow deterministically tracking requirement > blocks. We may need to define the end-of-block marker more > "officially" than the current ad-hoc convention, such as > @liyi:end-requirement, then make the reanchor tool recognize that. > > Sure, please do, but for Chinese please make it "需求结束" or > "需求完" -- let me know your preference before going ahead. > > Fair point, let's go with "需求结束". Please go ahead with > implementation. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- docs/liyi-design.md | 55 ++++++++++----- docs/liyi-design.md.liyi.jsonc | 119 +++++++++++++++++++++++---------- 2 files changed, 122 insertions(+), 52 deletions(-) diff --git a/docs/liyi-design.md b/docs/liyi-design.md index a9f0eb6..a6dfba8 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -342,7 +342,7 @@ The linter only checks for the *presence* of `@liyi:module` in a directory's fil **Naming convention.** The sidecar filename is the source filename with `.liyi.jsonc` appended: `money.rs` → `money.rs.liyi.jsonc`. Always append to the full filename, never strip the extension. This avoids ambiguity when files share a stem but differ in extension (`money.rs` and `money.py` would otherwise both claim `money.liyi.jsonc`). The rule is mechanical: one source file, one sidecar, derivable by concatenation. - + One per source file, co-located: @@ -376,7 +376,7 @@ The `source` path is relative to the repository root — the same path you'd pas `"version"` is required. The linter checks it and rejects unknown versions. This costs nothing now and prevents painful migration when the schema evolves (e.g., adding `"related"` edges, structured fields in post-0.1). A JSON Schema definition ships alongside the linter for editor validation and autocompletion (see *Appendix: JSON Schema* below). When the schema changes, the linter will accept both `"0.1"` and the new version during a transition window, and `liyi reanchor --migrate` will upgrade sidecar files in place. - + **`liyi reanchor --migrate` behavior.** When the schema version changes (e.g., 0.1 → 0.2), `--migrate` reads each `.liyi.jsonc`, adds any newly required fields with default values, removes deprecated fields, updates `"version"` to the new version, and writes the file back. It is idempotent — running it twice produces the same output. It does not re-hash spans or re-infer intent; it only transforms the schema envelope. Migration is always additive in 0.x: no field present in 0.1 will change meaning, only new fields may appear. @@ -402,7 +402,7 @@ After human review — either the human adds `@liyi:intent` in the source file ( `"reviewed"` defaults to `false` when absent. The linter considers an item reviewed if **either** `"reviewed": true` in the sidecar **or** `@liyi:intent` exists in source. Source intent takes precedence for adversarial testing — it's the human's assertion, not the agent's inference. See *Source-level intent* and *Security model* below. - + ### Why a single `intent` field, not structured pre/postconditions? @@ -426,11 +426,11 @@ Item-level intent carries machine metadata → JSONC wins. `source_span` is a closed interval of 1-indexed line numbers: `[42, 58]` means lines 42 through 58, inclusive. This matches editor line numbers, `git blame` output, and coincidentally the mathematical convention for closed intervals. `source_hash` is always `sha256:` — the SHA-256 digest of those lines after normalizing line endings to `\n` (LF). This ensures cross-platform consistency: a Windows developer with `core.autocrlf=true` and a Linux CI runner produce identical hashes for identical content. No other hash algorithm is supported in 0.1. `source_anchor` is the literal text of the first line of the span — used by the linter for efficient shift detection (see below). - + Both `source_hash` and `source_anchor` are **tool-managed fields**. The agent writes only `source_span` — the tool (`liyi reanchor`, or `liyi check --fix`) computes the hash and anchor deterministically from the source file. This is the same principle as not letting agents author lockfile checksums: the tool reads the actual bytes, so fabricated or hallucinated hashes are impossible. - + The agent records each item's line range (`source_span`) when writing the spec. The linter reads those lines from the source file, hashes them, and compares against `source_hash`. This gives per-item staleness without the linter needing to parse any language — it just reads a slice of lines. @@ -442,7 +442,7 @@ Without a `tree_path`, the fallback is: batch false positives on any line-shifti **Span-shift detection (included in 0.1).** When the linter detects a hash mismatch and no `tree_path` is available (or tree-sitter has no grammar for the language), it falls back to scanning ±100 lines for content matching the recorded hash. If the same content appears at an offset (e.g., shifted down by 3 lines because an import was added), the linter reports `SHIFTED` rather than `STALE`. With `--fix`, the span is auto-corrected in the sidecar; without `--fix`, the linter reports the shift but does not write. Once a delta is established for one item, subsequent items in the same file are adjusted by the same delta before checking — so a single import insertion resolves in one probe, not twenty. If no match is found within the window, the linter gives up and reports `STALE` as usual. This is the same heuristic `patch(1)` uses with a fuzz factor — a linear scan over a bounded window, ~50 lines, no parser. Combined with `liyi reanchor`, this eliminates the most common source of false positives (line-shifting edits) without language-specific tooling. For files with `tree_path` populated, tree-sitter-based anchoring supersedes this heuristic entirely — see the next section. - + ### Structural identity via `tree_path` @@ -470,7 +470,7 @@ The path identifies the item by node kind and name, not by position. The tool co 1. `liyi reanchor`: Parse the source file with tree-sitter. For each spec with a non-empty `tree_path`, query the parse tree for a node matching the path. If found, update `source_span` to the node's line range, recompute `source_hash` and `source_anchor`. If not found (item was renamed or deleted), report an error — do not silently fall back. 2. `liyi check --fix`: Same tree-sitter lookup. If the hash mismatches but the `tree_path` resolves to a valid node, update the span (the item moved but is still present). If the `tree_path` doesn't resolve, fall back to span-shift heuristic. 3. `liyi check` (without `--fix`): Use `tree_path` to verify the span points to the correct item. If it doesn't (span drifted, but `tree_path` still resolves), report `SHIFTED` with the correct target position. - + **Diagnostic clarity.** When a spec has no `tree_path` and the shift heuristic also fails, the diagnostic indicates why tree-path recovery was skipped — e.g., "no tree_path set, falling back to shift heuristic" — so that users can add the missing field or run `liyi reanchor` to auto-populate it. Diagnostics distinguish "no tree_path available" from "tree_path resolution failed (item may have been renamed or deleted)." @@ -482,7 +482,7 @@ The path identifies the item by node kind and name, not by position. The tool co - **Complex or contrived cases** where the agent or human determines that a tree path is non-obvious or ambiguous. The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi reanchor` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. - + **Language support.** Tree-sitter support is grammar-dependent. Rust, Python, Go, JavaScript, and TypeScript are built-in. For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. @@ -632,11 +632,30 @@ No `intent` field — the requirement text lives at the source site, not duplica **Naming and scope.** Requirement names are unique per repository. The linter reports an error if two `@liyi:requirement` markers declare the same name. Names are matched as exact strings (case-sensitive) after trimming leading/trailing whitespace inside parens. The name is a human-readable identifier, not a path — it can be in any language. No character set restriction: `multi-currency-addition`, `多币种加法`, and `인출한도` are all valid names. - + **Requirements can live anywhere:** in the source file near the code they govern, in `README.md` alongside `@liyi:module`, in a dedicated requirements file, or in doc comments. The linter scans all non-ignored files for the marker. -**End-of-block markers.** The linter does not require an explicit end marker for requirement blocks — `source_span` in the sidecar defines the block boundaries. An optional `@liyi:end-requirement` (or `@立意:需求止`) marker is **not supported in 0.1** — the linter does not look for it. A future version could accept it for visual clarity in Markdown files where contiguous-comment heuristics don't apply; adding it would be additive and non-breaking. +**End-of-block markers.** The `@liyi:end-requirement ` marker closes a requirement block. The name must match the opening `@liyi:requirement `. When both markers are present, the linter and reanchor tool pair them by name to deterministically compute `source_span` — this is the primary span recovery mechanism for files without tree-sitter support (e.g., Markdown). The end marker uses the same name syntax (parenthesized or whitespace-delimited), full-width normalization, and multilingual aliases as the opening marker: + +| Alias | Language | +|---|---| +| `@liyi:end-requirement` | English (canonical) | +| `@立意:需求结束` | Chinese | +| `@liyi:fin-requisito` | Spanish | +| `@立意:要件終` | Japanese | +| `@liyi:fin-exigence` | French | +| `@립의:요건끝` | Korean | + +The end marker is **recommended** for Markdown requirement blocks but not required. When absent, the sidecar's recorded `source_span` is the only span authority and must be maintained manually or via tree-sitter recovery. + +Example: + +```markdown + +Exit codes: 0 = clean, 1 = failures found, 2 = internal error. + +``` ### `@立意:有关` / `@liyi:related` — dependency edges @@ -757,7 +776,7 @@ If `payment-security` changes → `multi-currency-addition` is flagged REQ CHANG The linter detects cycles (A → B → A) and reports them as errors without looping. - + **Use this sparingly.** Most teams should use flat requirements — one level of `@liyi:requirement` blocks with `@liyi:related` edges from code items. Requirement hierarchies are for organizations that already think in terms of system requirements decomposing into subsystem requirements (defense, aerospace, regulated industries). If you don't already have a requirement hierarchy, don't build one just because the tool allows it — the cascading noise from deep trees (a change at the root flags everything below) can be worse than the traceability it provides. @@ -1088,7 +1107,7 @@ The linter resolves `"source"` paths in `.liyi.jsonc` relative to the repository **Requirement discovery is project-global.** Positional args scope which items are checked (pass 2), not which requirements are indexed. Pass 1 always walks the full project root to discover all `@liyi:requirement` markers, regardless of CLI positional args. This ensures that `liyi check src/billing/` can resolve `@liyi:related` edges pointing to requirements defined in `docs/requirements.md` or any other location in the repo. - + This handles the common case without configuration. `.gitignore` already excludes `node_modules/`, `.venv/`, `target/`, `__pycache__/`, `build/`, etc. `.liyiignore` picks up the rest — checked-in vendored code, generated protobuf bindings, FFI stubs. @@ -1217,7 +1236,7 @@ Exit codes: 0 = clean, 1 = check failures (stale, unreviewed, or diverged specs) - `--fail-on-unreviewed` (default: false) — exit 1 if specs exist without `@liyi:intent` in source or `"reviewed": true` in sidecar - `--fail-on-req-changed` (default: true) — exit 1 if any reviewed spec references a requirement whose hash changed - `--fail-on-untracked` (default: true) — exit 1 if any `@liyi:requirement` marker has no sidecar entry, or any `@liyi:related` marker has no corresponding edge in the enclosing item's sidecar spec - + ### What it doesn't do @@ -1246,7 +1265,7 @@ All of the following are equivalent: **Implementation approach: normalize-then-match.** The linter runs a single normalization pass on each scanned line — replacing the four full-width characters with their half-width equivalents — before applying the marker regex. This is a four-entry `str::replace` chain (or a single `translate` table), not a regex concern. The normalization happens only on lines being scanned for markers, not on the entire file, so it has negligible cost. The alias lookup table stores only half-width forms; normalization ensures they match regardless of what the user typed. - + This is strictly more robust than the alternative (doubling every regex to accept both forms), keeps the alias table simple, and confines the full-width concern to one function in the lexer. @@ -1260,7 +1279,7 @@ This is strictly more robust than the alternative (doubling every regex to accep In **source code**, the `@` character is escaped in string constants: `\x40` in Rust, `\u0040` in JSON. This is invisible to the reader (it's inside a string literal) and prevents the scanner from matching constants in the alias table, format strings, and test data. The `@liyi:requirement(quine-escape)` in `markers.rs` enforces this invariant. - + In **documentation and prose** — Markdown files, design docs, READMEs, contributing guides — character escapes are unacceptable. A design document that writes `\x40liyi:module` instead of `@liyi:module` is unreadable. The scanner instead uses **natural-language context** to distinguish real markers from mentions: @@ -1270,7 +1289,7 @@ In **documentation and prose** — Markdown files, design docs, READMEs, contrib 2. **Inline code spans.** If the marker's position falls inside an inline backtick span on the same line (determined by counting backtick characters before the match position — odd count means inside code), the marker is rejected. This covers inline mentions like `` `@liyi:module` `` and `` `` ``. 3. **Preceding quote characters.** If the character immediately before the `@` is a quotation mark — ASCII quotes (`'`, `"`), typographic quotes (`'`, `'`, `"`, `"`), CJK brackets (`「`, `」`), or guillemets (`«`, `»`) — the marker is rejected. This covers natural-language quoting conventions across locales: `"@liyi:intent"`, `'@liyi:module'`, `「@liyi:requirement」`, etc. - + Together, these three checks cover every conventional way that prose references a technical term without asserting it. The scanner remains line-oriented — fenced block state is a single boolean; inline code detection is a character count within one line; preceding-char is a one-character lookbehind. No Markdown parser is needed. @@ -1478,11 +1497,11 @@ The agent instruction (rule 10) permits both paths. Teams can mandate triage for `--fix` never modifies `"intent"`, `"reviewed"`, `"related"`, or any human-authored field. It only writes tool-managed fields. This is the same contract as `eslint --fix` or `cargo clippy --fix` — mechanical corrections, no semantic changes. - + **Semantic drift protection.** When `tree_path` resolves an item to a new span, `--fix` compares the hash at the new location against the recorded `source_hash`. If the content is unchanged (pure positional shift), the span, hash, and anchor are all updated — this is a safe mechanical correction. If the content at the new span also changed (semantic drift), `--fix` updates `source_span` to track the item's current location but does **not** rewrite `source_hash` — the spec remains stale so the next `liyi check` flags it for human review. This prevents `--fix` from silently blessing semantic changes that may invalidate the declared intent. - + The shift heuristic (non-`tree_path` fallback) is inherently safe — it only matches when the *exact same content* is found at an offset — so no additional protection is needed there. diff --git a/docs/liyi-design.md.liyi.jsonc b/docs/liyi-design.md.liyi.jsonc index 0ac4d79..b72472b 100644 --- a/docs/liyi-design.md.liyi.jsonc +++ b/docs/liyi-design.md.liyi.jsonc @@ -5,104 +5,155 @@ "specs": [ { "requirement": "liyi-sidecar-naming-convention", - "source_span": [343, 345], - "source_hash": "sha256:0000eb15eb6a1ebe192c1f1b3d43b1f60e619935997bbfd8df5d1e80c02fe163", + "source_span": [ + 343, + 345 + ], + "source_hash": "sha256:e6619cfe8d91cfefa6b099e10a71bc40db7624e1a973783815f04d18d69ff470", "source_anchor": "" }, { "requirement": "version-field-required", - "source_span": [377, 379], - "source_hash": "sha256:b4f52a48481fe808a550884bee8949e665b3f34fd4dd77e55c51a0f46abd3009", + "source_span": [ + 377, + 379 + ], + "source_hash": "sha256:e5d076462c6128635873db10142cb9046c9cdc176bc9dbed93864fbfbda861c6", "source_anchor": "" }, { "requirement": "reviewed-semantics", - "source_span": [403, 405], - "source_hash": "sha256:da9de772f9412fc5e5ac4a172b6978c191e267b2c04ed477694914868ecbbdbe", + "source_span": [ + 403, + 405 + ], + "source_hash": "sha256:40020571498ef149fe7a67ebd78bc430320921aae1faac8b7be1c830f35ae636", "source_anchor": "" }, { "requirement": "source-span-semantics", - "source_span": [427, 429], - "source_hash": "sha256:4d0c5843b38c855e3c725b8a6d1a1a63943f332185481ff1e395006a10a6ccfe", + "source_span": [ + 427, + 429 + ], + "source_hash": "sha256:867bdee2970960b1cad64d2c07d813bdd6c0e0a8c0fae52cec7ed398fc2b9402", "source_anchor": "" }, { "requirement": "tool-managed-fields", - "source_span": [431, 433], - "source_hash": "sha256:4169ca6d29fd03f03c1cafd61476e2ec7633981b298eced09fb2336ab38e990a", + "source_span": [ + 431, + 433 + ], + "source_hash": "sha256:d8dc3d37bad6f8fd51309f85e67bfcb2e737d4dfa9d8d9d3227b7eec0f9a1149", "source_anchor": "" }, { "requirement": "span-shift-heuristic", - "source_span": [443, 445], - "source_hash": "sha256:dc52a0e237b136645c3d6cd05911414d947094eb91323e01969c124301606982", + "source_span": [ + 443, + 445 + ], + "source_hash": "sha256:ecf1e7a7a12c28b804bad2697be04e9c7dbadf053b7d93d7e8e7ae2fd46b6de7", "source_anchor": "" }, { "requirement": "tree-path-reanchor-behavior", - "source_span": [469, 473], - "source_hash": "sha256:1df1d3238855b1d8c1b20493e2788af47135f318e399d43b650bded04c7c5a5a", + "source_span": [ + 469, + 473 + ], + "source_hash": "sha256:e3f8460c2acdfa065b975c590abf0f4e869ff4dcfabc75674ae7ef0ef2f65600", "source_anchor": "" }, { "requirement": "tree-path-empty-fallback", - "source_span": [477, 485], - "source_hash": "sha256:0bbc4ec24af3fb1c3abce91342f1249f5b0a029e2a6a0573544d5a13d59722d6", + "source_span": [ + 477, + 485 + ], + "source_hash": "sha256:3b5b2113802d61b766fd0462d7706b0813b60cc373794d1d99f4732f6c5a6280", "source_anchor": "" }, { "requirement": "requirement-name-uniqueness", - "source_span": [633, 635], - "source_hash": "sha256:03a65c514532f4f424cbe30ed55fe7455c3a602ac286b1c62efa9722b0968700", + "source_span": [ + 633, + 635 + ], + "source_hash": "sha256:bf5fcadbb09429fc0b9a6f55b6b1ac685d5a88f134b3e89f2861dc20bc0e9928", "source_anchor": "" }, { "requirement": "cycle-detection", - "source_span": [758, 760], - "source_hash": "sha256:66311f793af3fcf3539f50be30b4f2e7d4e56fd3d1dbd4069bf34b6699868542", + "source_span": [ + 777, + 779 + ], + "source_hash": "sha256:925289a107cb255f3c48f0b5395395038278f56f7006dab3522e511cc8d7699f", "source_anchor": "" }, { "requirement": "requirement-discovery-global", - "source_span": [1089, 1091], - "source_hash": "sha256:3f0928a2639b4fd062a2af53f159183665817cc3f7ed7a2c45774a622f8873e9", + "source_span": [ + 1108, + 1110 + ], + "source_hash": "sha256:d01ce1382642d6ad05162bbba79d4cd35dc7715294f8cc3d6c54de9e743fa38c", "source_anchor": "" }, { "requirement": "liyi-check-exit-code", - "source_span": [1214, 1220], - "source_hash": "sha256:6ed25f14e08ea936e927150a3d0cc82f881011edcd1360b5401e7a03658f53f9", + "source_span": [ + 1233, + 1239 + ], + "source_hash": "sha256:d7c4ad4bd1d1abe361598373a376aacda6bc0d948bd611f9b8bdea0b23a74c7a", "source_anchor": "" }, { "requirement": "marker-normalization", - "source_span": [1247, 1249], - "source_hash": "sha256:b4e1f5b698b77c4c661d15cbd070e30839ecfd2f42fb77286e0a7408e294bd1c", + "source_span": [ + 1266, + 1268 + ], + "source_hash": "sha256:7d9e05684eeaf175288fbd0629a2352dbe3353416172691de4ab3dcbe435b7d3", "source_anchor": "" }, { "requirement": "quine-escape-in-source", - "source_span": [1261, 1263], - "source_hash": "sha256:326dee10b54b25cc8f874d8577bb3218efa21313f1329e70dadcc83d03978e38", + "source_span": [ + 1280, + 1282 + ], + "source_hash": "sha256:7bb4de54441a4fadbd4204d8e950cedff7874181b111f2b6789f693ac22b982b", "source_anchor": "" }, { "requirement": "markdown-fenced-block-skip", - "source_span": [1267, 1273], - "source_hash": "sha256:18e3c5d63b28b021f3ef45158fddfc73fbb2bdfd6433b63748d70df9736ba2e1", + "source_span": [ + 1286, + 1292 + ], + "source_hash": "sha256:8160197ac9ec02de2896ceac8e8edf334a43efe294ccaf976b2909a629689ceb", "source_anchor": "" }, { "requirement": "fix-never-modifies-human-fields", - "source_span": [1479, 1481], - "source_hash": "sha256:2c59f122385a46454e274ecfe0a77e78b47b846ca4f932c151ab3baa3c4c9250", + "source_span": [ + 1498, + 1500 + ], + "source_hash": "sha256:e5c1b041dab62b4533a84b2bfc73c2ca8697a9ae7a454540686c8d479305ef5f", "source_anchor": "" }, { "requirement": "fix-semantic-drift-protection", - "source_span": [1483, 1485], - "source_hash": "sha256:320f7c2667bf1a8163a2ec95adb0bdfeac648780042af08a2eab4ff27d0b74a2", + "source_span": [ + 1502, + 1504 + ], + "source_hash": "sha256:87f70b0fa07c386eb7478b420f8edc70f7b7bb709c416e2aaf17a1a913fa0ce5", "source_anchor": "" } ] From 2564749b88bd5f6aebf2585289c9ca41c9aa2ca9 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:00:11 +0800 Subject: [PATCH 15/21] style: cargo fmt Signed-off-by: WANG Xuerui --- crates/liyi/src/markers.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/liyi/src/markers.rs b/crates/liyi/src/markers.rs index 32a7499..2b8058f 100644 --- a/crates/liyi/src/markers.rs +++ b/crates/liyi/src/markers.rs @@ -440,7 +440,9 @@ mod tests { #[test] fn scan_end_requirement_chinese_alias() { - let m = scan_markers("\n"); + let m = scan_markers( + "\n", + ); assert_eq!(m.len(), 1); assert!( matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") From f88742811f002baa8a0d8501f9f8777c30c21537 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:07:26 +0800 Subject: [PATCH 16/21] feat(linter): compute tree_path in check --fix When check --fix updates a span (via tree-sitter recovery, shift heuristic, or filling a missing hash), also compute and update the tree_path field from the new span. This makes check --fix feature-complete with reanchor for tree_path maintenance. Previously only liyi reanchor refreshed tree_path; now the standard lint-and-fix workflow does it too, removing one reason to need a separate reanchor subcommand. Original prompt: > Let's do that now, I haven't released v0.1.0 yet exactly because > I don't feel the dogfooding experience to be smooth and pleasant > enough. Everything can still be changed. Let's revise the docs > and just drop "reanchor". AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/check.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index 5545022..c05418f 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -11,7 +11,7 @@ use crate::markers::{SourceMarker, requirement_spans, scan_markers}; use crate::schema::validate_version; use crate::shift::{ShiftResult, detect_shift}; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; -use crate::tree_path::{detect_language, resolve_tree_path}; +use crate::tree_path::{compute_tree_path, detect_language, resolve_tree_path}; // --------------------------------------------------------------------------- // Internal types @@ -445,6 +445,10 @@ fn check_sidecar( if fix { item.source_hash = Some(computed_hash.clone()); item.source_anchor = Some(computed_anchor.clone()); + let lang = detect_language(&entry.source_path); + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, item.source_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -499,6 +503,9 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -526,6 +533,9 @@ fn check_sidecar( // Intentionally NOT updating hash — // leaves the spec stale so the next // `liyi check` flags it. + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } let msg = if new_span != old_span { @@ -559,6 +569,10 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + let lang = detect_language(&entry.source_path); + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -639,6 +653,9 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -660,6 +677,9 @@ fn check_sidecar( // but leave hash stale. if fix { item.source_span = new_span; + if let Some(l) = lang { + item.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -916,6 +936,9 @@ fn check_sidecar( req.source_hash = Some(h); req.source_anchor = Some(a); } + if let Some(l) = lang { + req.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -935,6 +958,9 @@ fn check_sidecar( } else { if fix { req.source_span = new_span; + if let Some(l) = lang { + req.tree_path = compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { From e01808f664d3a69c512429ad8fde6e88bd5474ec Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:09:24 +0800 Subject: [PATCH 17/21] refactor: move sidecar file resolution to discovery module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move resolve_reanchor_targets from reanchor.rs to discovery.rs as resolve_sidecar_targets. The function resolves file/directory paths into concrete .liyi.jsonc paths — this is sidecar discovery, not reanchor-specific logic. - Add resolve_sidecar_targets to discovery.rs - Update approve.rs to import from discovery - Keep backward-compatible alias in reanchor.rs (will be removed when the reanchor subcommand is dropped) Original prompt: > Let's do that now, I haven't released v0.1.0 yet exactly because > I don't feel the dogfooding experience to be smooth and pleasant > enough. Everything can still be changed. Let's revise the docs > and just drop "reanchor". AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/approve.rs | 4 ++-- crates/liyi/src/discovery.rs | 31 +++++++++++++++++++++++++++++++ crates/liyi/src/reanchor.rs | 33 +++------------------------------ 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/crates/liyi/src/approve.rs b/crates/liyi/src/approve.rs index 03772ad..9b3890e 100644 --- a/crates/liyi/src/approve.rs +++ b/crates/liyi/src/approve.rs @@ -3,7 +3,7 @@ use std::io; use std::path::{Path, PathBuf}; use crate::hashing::hash_span; -use crate::reanchor::resolve_reanchor_targets; +use crate::discovery::resolve_sidecar_targets; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; /// Result of an approve operation on a single sidecar. @@ -75,7 +75,7 @@ pub fn collect_approval_candidates( paths: &[PathBuf], item_filter: Option<&str>, ) -> Result, ApproveError> { - let targets = resolve_reanchor_targets(paths).map_err(ApproveError::Parse)?; + let targets = resolve_sidecar_targets(paths).map_err(ApproveError::Parse)?; if targets.is_empty() { return Err(ApproveError::NoTargets); } diff --git a/crates/liyi/src/discovery.rs b/crates/liyi/src/discovery.rs index 0d42a10..bd55327 100644 --- a/crates/liyi/src/discovery.rs +++ b/crates/liyi/src/discovery.rs @@ -177,6 +177,37 @@ fn source_name_from_sidecar(sidecar: &Path) -> String { .to_string() } +/// Expand a list of file/directory paths into concrete `.liyi.jsonc` file +/// paths. If a path is a directory, walk it recursively (respecting +/// `.gitignore` and `.liyiignore`) and collect all sidecar files found. +/// If a path is a file, include it directly. +pub fn resolve_sidecar_targets(paths: &[PathBuf]) -> Result, String> { + let mut result: Vec = Vec::new(); + for p in paths { + if p.is_dir() { + let walker = WalkBuilder::new(p) + .add_custom_ignore_filename(".liyiignore") + .build(); + for entry in walker { + let entry = entry.map_err(|e| format!("walk error: {e}"))?; + if entry.file_type().is_some_and(|ft| ft.is_file()) + && let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) + && name.ends_with(SIDECAR_SUFFIX) + { + result.push(entry.into_path()); + } + } + } else if p.is_file() { + result.push(p.clone()); + } else { + return Err(format!("path does not exist: {}", p.display())); + } + } + result.sort(); + result.dedup(); + Ok(result) +} + /// Compute `path` relative to `base` using pure lexical processing. fn pathdiff(path: &Path, base: &Path) -> Option { path.strip_prefix(base) diff --git a/crates/liyi/src/reanchor.rs b/crates/liyi/src/reanchor.rs index 23c934f..6d824e1 100644 --- a/crates/liyi/src/reanchor.rs +++ b/crates/liyi/src/reanchor.rs @@ -1,42 +1,15 @@ use std::path::{Path, PathBuf}; +use crate::discovery::resolve_sidecar_targets; use crate::hashing::hash_span; use crate::markers::{requirement_spans, scan_markers}; use crate::schema::migrate; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; use crate::tree_path::{compute_tree_path, detect_language, resolve_tree_path}; -const SIDECAR_SUFFIX: &str = ".liyi.jsonc"; - -/// Expand a list of file/directory paths into concrete `.liyi.jsonc` file -/// paths. If a path is a directory, walk it recursively (respecting -/// `.gitignore` and `.liyiignore`) and collect all sidecar files found. -/// If a path is a file, include it directly. +/// Backward-compatible alias for [`crate::discovery::resolve_sidecar_targets`]. pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, String> { - let mut result: Vec = Vec::new(); - for p in paths { - if p.is_dir() { - let walker = ignore::WalkBuilder::new(p) - .add_custom_ignore_filename(".liyiignore") - .build(); - for entry in walker { - let entry = entry.map_err(|e| format!("walk error: {e}"))?; - if entry.file_type().is_some_and(|ft| ft.is_file()) - && let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) - && name.ends_with(SIDECAR_SUFFIX) - { - result.push(entry.into_path()); - } - } - } else if p.is_file() { - result.push(p.clone()); - } else { - return Err(format!("path does not exist: {}", p.display())); - } - } - result.sort(); - result.dedup(); - Ok(result) + resolve_sidecar_targets(paths) } /// Re-hash source spans in a sidecar file. From c2083168f48b394e5edc743642b50ec7bf596969 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:11:50 +0800 Subject: [PATCH 18/21] feat(cli): replace reanchor subcommand with migrate Drop the `liyi reanchor` subcommand. Its unique capabilities are now covered by `liyi check --fix` (tree_path computation, span recovery) or the new `liyi migrate` subcommand (schema version migration). CLI changes: - Remove Reanchor variant from Commands enum - Add Migrate subcommand (takes files/dirs, runs schema migration) - Remove parse_span helper (no longer needed) - Update main.rs dispatch to use discovery::resolve_sidecar_targets Diagnostic changes: - Replace all "liyi reanchor " fix_hints with "liyi check --fix" or "liyi migrate " as appropriate Original prompt: > Let's do that now, I haven't released v0.1.0 yet exactly because > I don't feel the dogfooding experience to be smooth and pleasant > enough. Everything can still be changed. Let's revise the docs > and just drop "reanchor". AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi-cli/src/cli.rs | 35 +++-------------------------------- crates/liyi-cli/src/main.rs | 22 ++++------------------ crates/liyi/src/check.rs | 8 ++++---- 3 files changed, 11 insertions(+), 54 deletions(-) diff --git a/crates/liyi-cli/src/cli.rs b/crates/liyi-cli/src/cli.rs index 80c9309..3b66d19 100644 --- a/crates/liyi-cli/src/cli.rs +++ b/crates/liyi-cli/src/cli.rs @@ -68,23 +68,10 @@ pub enum Commands { level: DiagnosticLevel, }, - /// Re-hash source spans in sidecar files - Reanchor { - /// Sidecar files or directories to reanchor (recursive) - #[arg(required_unless_present = "migrate")] + /// Migrate sidecar files to the current schema version + Migrate { + /// Sidecar files or directories to migrate (recursive) files: Vec, - - /// Target a specific item by name - #[arg(long, requires = "span")] - item: Option, - - /// Override span (start,end) - #[arg(long, requires = "item", value_parser = parse_span)] - span: Option<[usize; 2]>, - - /// Migrate sidecar to current schema version - #[arg(long)] - migrate: bool, }, /// Scaffold AGENTS.md or skeleton .liyi.jsonc sidecar @@ -117,19 +104,3 @@ pub enum Commands { }, } -/// Parse a "start,end" string into a [usize; 2] span. -fn parse_span(s: &str) -> Result<[usize; 2], String> { - let parts: Vec<&str> = s.split(',').collect(); - if parts.len() != 2 { - return Err(format!("expected format 'start,end', got '{s}'")); - } - let start: usize = parts[0] - .trim() - .parse() - .map_err(|_| format!("invalid start: '{}'", parts[0].trim()))?; - let end: usize = parts[1] - .trim() - .parse() - .map_err(|_| format!("invalid end: '{}'", parts[1].trim()))?; - Ok([start, end]) -} diff --git a/crates/liyi-cli/src/main.rs b/crates/liyi-cli/src/main.rs index 181ee34..1e0750f 100644 --- a/crates/liyi-cli/src/main.rs +++ b/crates/liyi-cli/src/main.rs @@ -89,23 +89,13 @@ fn main() { process::exit(exit_code as i32); } - Commands::Reanchor { - files, - item, - span, - migrate, - } => { - if migrate && files.is_empty() { - eprintln!("--migrate requires at least one sidecar file path"); - process::exit(2); - } - + Commands::Migrate { files } => { if files.is_empty() { eprintln!("at least one sidecar file or directory required"); process::exit(2); } - let targets = match liyi::reanchor::resolve_reanchor_targets(&files) { + let targets = match liyi::discovery::resolve_sidecar_targets(&files) { Ok(t) => t, Err(e) => { eprintln!("Error: {e}"); @@ -120,13 +110,9 @@ fn main() { let mut errors = 0; for sidecar_path in &targets { - match liyi::reanchor::run_reanchor(sidecar_path, item.as_deref(), span, migrate) { + match liyi::reanchor::run_reanchor(sidecar_path, None, None, true) { Ok(()) => { - if migrate { - println!("Migrated: {}", sidecar_path.display()); - } else { - println!("Reanchored: {}", sidecar_path.display()); - } + println!("Migrated: {}", sidecar_path.display()); } Err(e) => { eprintln!("Error ({}): {e}", sidecar_path.display()); diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index c05418f..d79985f 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -338,7 +338,7 @@ fn check_sidecar( }, severity: Severity::Error, message: e, - fix_hint: Some(format!("liyi reanchor --migrate {rel_sidecar}")), + fix_hint: Some(format!("liyi migrate {rel_sidecar}")), }); return; } @@ -457,7 +457,7 @@ fn check_sidecar( kind: DiagnosticKind::Stale, severity: Severity::Warning, message: "missing source_hash".into(), - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } else { // Hash mismatch — try tree_path first, then shift @@ -709,7 +709,7 @@ fn check_sidecar( }, severity: Severity::Error, message: detail, - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } } @@ -990,7 +990,7 @@ fn check_sidecar( }, severity: Severity::Error, message: detail, - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } } From 253c75c05d5bf869700d6674a08bcdd0dfd725fe Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:36:54 +0800 Subject: [PATCH 19/21] docs: replace reanchor references with check --fix and migrate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reanchor CLI subcommand was removed in prior commits. This updates all documentation, schemas, AGENTS.md, READMEs, source comments, and sidecar files to reflect the new terminology: - "liyi reanchor" → "liyi check --fix" throughout - "liyi reanchor --migrate" → "liyi migrate" - Rename requirement tree-path-reanchor-behavior → tree-path-fix-behavior - Rename triage action auto-reanchor → auto-fix - Remove deleted parse_span from cli.rs sidecar - Update intent text for affected sidecars - Clear and recompute stale hashes via check --fix Original prompt: > Let's revise the docs and just drop 'reanchor'. Remember to > commit logical changes frequently. AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- AGENTS.md | 14 +-- README.md | 10 +-- README.zh.md | 10 +-- crates/liyi-cli/src/cli.rs.liyi.jsonc | 18 +--- crates/liyi-cli/src/main.rs.liyi.jsonc | 8 +- crates/liyi/src/approve.rs.liyi.jsonc | 4 +- crates/liyi/src/check.rs.liyi.jsonc | 18 ++-- crates/liyi/src/reanchor.rs | 2 +- crates/liyi/src/reanchor.rs.liyi.jsonc | 16 ++-- crates/liyi/src/tree_path/mod.rs | 2 +- docs/liyi-01x-roadmap.md | 2 +- docs/liyi-design.md | 118 ++++++++++++------------- docs/liyi-design.md.liyi.jsonc | 56 ++++++------ docs/liyi-mvp-roadmap.md | 77 +++++++--------- schema/liyi.schema.json | 6 +- schema/triage.schema.json | 4 +- 16 files changed, 165 insertions(+), 200 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d54f96e..c09865a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,14 +23,14 @@ When writing or modifying code: 2. When module-level invariants are apparent, write an `@liyi:module` block — in the directory's existing module doc (`README.md`, `doc.go`, `mod.rs` doc comment, etc.) or in a dedicated `LIYI.md`. Use the doc markup language's comment syntax for the marker. 3. If a source item has a `@liyi:related ` annotation, record the dependency in `.liyi.jsonc` as `"related": {"": null}`. The tool fills in the requirement's current hash. 4. For each `@liyi:requirement ` block encountered, ensure it has a corresponding entry in the co-located `.liyi.jsonc` with `"requirement"` and `"source_span"`. (The tool fills in `"source_hash"`.) -5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Re-anchor the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. +5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Update the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. 6. Only generate adversarial tests from items that have a `@liyi:intent` annotation in source or `"reviewed": true` in the sidecar (i.e., human-reviewed intent). When `@liyi:intent` is present in source, use its prose (or the docstring for `=doc`) as the authoritative intent for test generation. 7. Tests should target boundary conditions, error-handling gaps, property violations, and semantic mismatches. Prioritize tests a subtly wrong implementation would fail. 8. Skip items annotated with `@liyi:ignore` or `@liyi:trivial`, and files matched by `.liyiignore`. Respect `@liyi:nontrivial` — if present, always infer a spec for that item and never override with `@liyi:trivial`. 9. Use a different model for test generation than the one that wrote the code, when possible. 10. When `liyi check` reports stale items, choose one of two paths: - **Direct re-inference** (preferred during interactive editing with few stale items): re-read the source, update `source_span` and `intent` in the sidecar, leave `"reviewed"` unset. Appropriate when you are the agent that just made the change, the number of stale items is small, and the changes are straightforward. - - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-reanchor. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. + - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-fix. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. 11. Before committing, run `liyi check`. If it reports coverage gaps (missing requirement specs, missing related edges), resolve **all** gaps in the same commit. Do not commit with unresolved coverage gaps — CI will reject it. ### `.liyi.jsonc` Schema (v0.1) @@ -104,7 +104,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -128,7 +128,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "_hints": { "type": "object", - "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi reanchor after initial review. Tools MUST NOT rely on any specific shape." + "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi check --fix after initial review. Tools MUST NOT rely on any specific shape." } } }, @@ -149,7 +149,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", @@ -215,8 +215,8 @@ When `liyi check` reports stale items, the agent assesses each and writes the re }, "action": { "type": "string", - "enum": ["auto-reanchor", "update-intent", "fix-code-or-update-intent", "manual-review"], - "description": "Recommended action. auto-reanchor for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." + "enum": ["auto-fix", "update-intent", "fix-code-or-update-intent", "manual-review"], + "description": "Recommended action. auto-fix for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." }, "summary": { "type": "object", diff --git a/README.md b/README.md index 5b4fea2..34e8fd8 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,8 @@ liyi check --root . ## How It Works 1. **Agent infers intent** — today's agents automatically read `AGENTS.md`, which teaches them the 立意 pattern. During normal development they maintain `.liyi.jsonc` sidecar files for each code item, with `source_span` and natural-language `intent`. If they don't do it automatically, you can always tell them to. -2. **`liyi check`** — hashes source spans, detects staleness and shifts, checks review status, tracks requirement edges. Zero network, zero LLM, fully deterministic. -3. **`liyi reanchor`** — re-hashes spans after intentional code changes. Never modifies intent or review state. +2. **`liyi check`** — hashes source spans, detects staleness and shifts, checks review status, tracks requirement edges. Zero network, zero LLM, fully deterministic. With `--fix`, auto-corrects shifted spans, fills missing hashes, and computes `tree_path`. +3. **`liyi migrate`** — upgrades sidecar files when the schema version changes. Idempotent. 4. **Human reviews** — sets `"reviewed": true` in the sidecar to approve, or adds `@liyi:intent` in source to provide the authoritative human version. ## Progressive Adoption @@ -55,10 +55,8 @@ liyi check [OPTIONS] [PATHS]... --fail-on-req-changed Fail on changed requirements (default: true) --root Override repo root -liyi reanchor [FILE] - --item Target a specific item - --span Override span (1-indexed, inclusive) - --migrate Schema version migration +liyi migrate [FILE|DIR]... + Upgrade sidecar schema version ``` ## Exit Codes diff --git a/README.zh.md b/README.zh.md index 1f753c9..befd50b 100644 --- a/README.zh.md +++ b/README.zh.md @@ -31,8 +31,8 @@ liyi check --root . ## 工作原理 1. **由智能体推断意图** — 当今的智能体会自动读取 `AGENTS.md`,于是便掌握了《立意》设计模式。在正常开发流程中,它们便会自动为每个代码条目维护 `.liyi.jsonc` sidecar 文件,包含 `source_span` 和自然语言 `intent`。如果没有自动维护,也总可以明确告诉它这么干。 -2. **`liyi check`** — 为智能体提供的源码区间计算内容哈希,检测内容是否过时、行号是否偏移、是否被复核过,并追踪需求边。零网络访问、零 LLM 依赖、行为完全确定。 -3. **`liyi reanchor`** — 在有意的代码变更后重新计算区间哈希。不修改意图或复核状态。 +2. **`liyi check`** — 为智能体提供的源码区间计算内容哈希,检测内容是否过时、行号是否偏移、是否被复核过,并追踪需求边。零网络访问、零 LLM 依赖、行为完全确定。加 `--fix` 可自动修正偏移区间、填充缺失哈希、计算 `tree_path`。 +3. **`liyi migrate`** — 当 schema 版本变更时升级 sidecar 文件。幂等。 4. **由人类复核** — 在 `.liyi.jsonc` 中设置 `"reviewed": true` 以批准,或在源码中添加 `@liyi:intent` 以明确给出人类版本。 ## 渐进式采用 @@ -56,10 +56,8 @@ liyi check [OPTIONS] [PATHS]... --fail-on-req-changed 对已变更需求报错(默认:true) --root 覆盖仓库根目录 -liyi reanchor [FILE] - --item 指定目标条目 - --span 覆盖区间(1 起始,闭区间) - --migrate 执行 schema 版本迁移 +liyi migrate [FILE|DIR]... + 升级 sidecar schema 版本 ``` ## 退出状态码 diff --git a/crates/liyi-cli/src/cli.rs.liyi.jsonc b/crates/liyi-cli/src/cli.rs.liyi.jsonc index 3f3308e..dbeb54a 100644 --- a/crates/liyi-cli/src/cli.rs.liyi.jsonc +++ b/crates/liyi-cli/src/cli.rs.liyi.jsonc @@ -30,26 +30,14 @@ { "item": "Commands", "reviewed": false, - "intent": "Enumerate all CLI subcommands (Check, Reanchor, Init, Approve) with their full set of flags and arguments, providing defaults and mutual constraints (e.g. --item requires --span, file is required unless --migrate, --level filters diagnostic output).", + "intent": "Enumerate all CLI subcommands (Check, Migrate, Init, Approve) with their full set of flags and arguments, providing defaults and mutual constraints (e.g. --level filters diagnostic output).", "source_span": [ 28, - 118 + 105 ], "tree_path": "enum::Commands", - "source_hash": "sha256:383b23d5dbade566788d2673cb55fc05e8e7e33b70f4e2edd77df0245d09db65", + "source_hash": "sha256:94096c9cb7d64c3fd721c783da0c3f0482f6b548e317daac6b2c1d0ec902dd63", "source_anchor": "pub enum Commands {" - }, - { - "item": "parse_span", - "reviewed": true, - "intent": "Parse a 'start,end' string into a [usize; 2] span, rejecting inputs that are not exactly two comma-separated unsigned integers.", - "source_span": [ - 121, - 135 - ], - "tree_path": "fn::parse_span", - "source_hash": "sha256:d57d01b6fb8d7fbefc54c62e3240b46d80cc2370a7d148811caad6d809b23977", - "source_anchor": "fn parse_span(s: &str) -> Result<[usize; 2], String> {" } ] } diff --git a/crates/liyi-cli/src/main.rs.liyi.jsonc b/crates/liyi-cli/src/main.rs.liyi.jsonc index c984016..1069c8f 100644 --- a/crates/liyi-cli/src/main.rs.liyi.jsonc +++ b/crates/liyi-cli/src/main.rs.liyi.jsonc @@ -21,10 +21,10 @@ "intent": "=doc", "source_span": [ 26, - 242 + 228 ], "tree_path": "fn::main", - "source_hash": "sha256:a11828b49d8e7c5b144423442c9bab41b1d84aff99c551e159e0c03558f70b06", + "source_hash": "sha256:5c447be7165dcba8bfd3d08fff588db7b2d95309ec9c474aa30487deeec02d49", "source_anchor": "fn main() {" }, { @@ -32,8 +32,8 @@ "reviewed": true, "intent": "=doc", "source_span": [ - 248, - 250 + 234, + 236 ], "tree_path": "fn::is_tty", "source_hash": "sha256:36dcd447c8fa9e666c6682395c3148c216b7c07dce8cc88f3d76f90714207ccd", diff --git a/crates/liyi/src/approve.rs.liyi.jsonc b/crates/liyi/src/approve.rs.liyi.jsonc index 50ec563..671f32b 100644 --- a/crates/liyi/src/approve.rs.liyi.jsonc +++ b/crates/liyi/src/approve.rs.liyi.jsonc @@ -60,13 +60,13 @@ 130 ], "tree_path": "fn::collect_approval_candidates", - "source_hash": "sha256:1fc98ea41edd3bed9c13d936635399d5e27a81f1f8acec326368b1eb532bda7c", + "source_hash": "sha256:f16f381eb67bf6126a0acfe022aac1ee575696b1d32a5cb46cee6bf2eb15a1d2", "source_anchor": "pub fn collect_approval_candidates(" }, { "item": "apply_approval_decisions", "reviewed": false, - "intent": "Apply a parallel slice of Decision values to the candidates, grouped by sidecar file. For Yes: set reviewed=true and reanchor hashes. For No: set reviewed=false. For Skip: no mutation. Write back modified sidecars unless dry_run. Returns per-sidecar ApproveResult.", + "intent": "Apply a parallel slice of Decision values to the candidates, grouped by sidecar file. For Yes: set reviewed=true and fill hashes. For No: set reviewed=false. For Skip: no mutation. Write back modified sidecars unless dry_run. Returns per-sidecar ApproveResult.", "source_span": [ 137, 206 diff --git a/crates/liyi/src/check.rs.liyi.jsonc b/crates/liyi/src/check.rs.liyi.jsonc index 2cdfac2..7f620bd 100644 --- a/crates/liyi/src/check.rs.liyi.jsonc +++ b/crates/liyi/src/check.rs.liyi.jsonc @@ -28,24 +28,24 @@ "source_anchor": "pub fn run_check(", "related": { "cycle-detection": null, - "requirement-discovery-global": null, - "requirement-name-uniqueness": null + "requirement-name-uniqueness": null, + "requirement-discovery-global": null } }, { "item": "check_sidecar", "reviewed": false, - "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", + "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection, tree_path computation, and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", "source_span": [ 282, - 996 + 1022 ], "tree_path": "fn::check_sidecar", - "source_hash": "sha256:1c65da69a12340a590f751478b7e2e9363a45504d4a727b73e09a167cc97fae6", + "source_hash": "sha256:e1919fe4c31925d44e75df6525ba3716524fb7dc6436b85e49cdbafa54fa6fef", "source_anchor": "fn check_sidecar(", "related": { - "fix-semantic-drift-protection": null, - "reviewed-semantics": null + "reviewed-semantics": null, + "fix-semantic-drift-protection": null } }, { @@ -53,8 +53,8 @@ "reviewed": false, "intent": "Read a file's contents with caching: return the cached string if already loaded, otherwise read from disk, store in the cache, and return. Return None on I/O failure.", "source_span": [ - 1061, - 1072 + 1087, + 1098 ], "tree_path": "fn::read_cached", "source_hash": "sha256:77c7602b283fb2e67c7953f98ef11b417c83903d96011f370b7b0421778f52c2", diff --git a/crates/liyi/src/reanchor.rs b/crates/liyi/src/reanchor.rs index 6d824e1..ccac66c 100644 --- a/crates/liyi/src/reanchor.rs +++ b/crates/liyi/src/reanchor.rs @@ -21,7 +21,7 @@ pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, Strin /// span. Then recompute hash/anchor. When tree_path is empty or the language is /// unsupported, fall back to re-hashing at the recorded span. // @liyi:related tool-managed-fields -// @liyi:related tree-path-reanchor-behavior +// @liyi:related tree-path-fix-behavior // @liyi:related tree-path-empty-fallback // @liyi:related fix-never-modifies-human-fields // @liyi:related liyi-sidecar-naming-convention diff --git a/crates/liyi/src/reanchor.rs.liyi.jsonc b/crates/liyi/src/reanchor.rs.liyi.jsonc index fd520fa..cf0a936 100644 --- a/crates/liyi/src/reanchor.rs.liyi.jsonc +++ b/crates/liyi/src/reanchor.rs.liyi.jsonc @@ -6,13 +6,13 @@ { "item": "resolve_reanchor_targets", "reviewed": false, - "intent": "Expand a list of file and directory paths into concrete .liyi.jsonc file paths. Directories are walked recursively respecting .gitignore and .liyiignore. Files are included directly. Non-existent paths produce an error. Results are sorted and deduplicated.", + "intent": "Backward-compatible alias that delegates to `discovery::resolve_sidecar_targets`.", "source_span": [ - 15, - 40 + 11, + 13 ], "tree_path": "fn::resolve_reanchor_targets", - "source_hash": "sha256:7e38409f96830a3b46075be4b7055db02c4a7a7ed388923588570bba01bbcf25", + "source_hash": "sha256:da4d96d82f119c72456ff6b56219b8c109c2b2a2a26f1282967e6317b30e00c8", "source_anchor": "pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, String> {" }, { @@ -20,18 +20,18 @@ "reviewed": false, "intent": "Re-hash source spans in a sidecar file. If do_migrate is set, run schema migration and write back. If target_item and target_span are both provided, update only that item's span and rehash. Otherwise, for every spec: if tree_path is non-empty and a tree-sitter grammar is available, locate the item by structural identity and update the span; then recompute hash/anchor. Reject if only one of --item/--span is provided. Derive the source file path by stripping the .liyi.jsonc suffix from the sidecar path.", "source_span": [ - 55, - 156 + 28, + 129 ], "tree_path": "fn::run_reanchor", "source_hash": "sha256:3ee785c5a09ff156f5e606425771ab8c12f43445a62c48cc8be659ad7351e5d6", "source_anchor": "pub fn run_reanchor(", "related": { "tool-managed-fields": null, - "fix-never-modifies-human-fields": null, "tree-path-empty-fallback": null, "liyi-sidecar-naming-convention": null, - "tree-path-reanchor-behavior": null + "fix-never-modifies-human-fields": null, + "tree-path-fix-behavior": null } } ] diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index 515cb05..5627bdd 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -5,7 +5,7 @@ //! For example, `fn::add_money` or `impl::Money::fn::new`. //! //! When `tree_path` is populated and a tree-sitter grammar is available for -//! the source language, `liyi reanchor` and `liyi check --fix` use it to +//! the source language, `liyi check --fix` uses it to //! locate items by structural identity, making span recovery deterministic //! across formatting changes, import additions, and line reflows. diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index ba26d41..43b74ac 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -680,7 +680,7 @@ The primary mechanism for transitioning intent from "agent-inferred" to "human-a - Interactive by default when stdin is a TTY: show intent + source span, prompt `[y]es / [n]o / [e]dit / [s]kip`. - Batch mode via `--yes` or when non-TTY. - `--dry-run`, `--item ` flags. -- Reanchors on approval (fills `source_hash`, `source_anchor`). +- Fills `source_hash` and `source_anchor` on approval. ### M3.2. `liyi init` — scaffold command ✅ diff --git a/docs/liyi-design.md b/docs/liyi-design.md index a6dfba8..590d50e 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -372,13 +372,13 @@ The `source` path is relative to the repository root — the same path you'd pas } ``` -`source_hash`, `source_anchor`, and `tree_path` are tool-managed — the agent writes only `source_span` and the tool fills in the rest (see *Per-item staleness* and *Structural identity via `tree_path`* below). Agents MAY write `tree_path` if they can infer the AST path, but the tool will overwrite it with the canonical form on the next `liyi reanchor`. `"intent": "=doc"` is a reserved sentinel meaning "the docstring already captures intent" — the agent uses it when the source docstring contains behavioral requirements (constraints, error conditions, properties), not just a functional summary (see *`"=doc"` in the sidecar* below). +`source_hash`, `source_anchor`, and `tree_path` are tool-managed — the agent writes only `source_span` and the tool fills in the rest (see *Per-item staleness* and *Structural identity via `tree_path`* below). Agents MAY write `tree_path` if they can infer the AST path, but the tool will overwrite it with the canonical form on the next `liyi check --fix`. `"intent": "=doc"` is a reserved sentinel meaning "the docstring already captures intent" — the agent uses it when the source docstring contains behavioral requirements (constraints, error conditions, properties), not just a functional summary (see *`"=doc"` in the sidecar* below). -`"version"` is required. The linter checks it and rejects unknown versions. This costs nothing now and prevents painful migration when the schema evolves (e.g., adding `"related"` edges, structured fields in post-0.1). A JSON Schema definition ships alongside the linter for editor validation and autocompletion (see *Appendix: JSON Schema* below). When the schema changes, the linter will accept both `"0.1"` and the new version during a transition window, and `liyi reanchor --migrate` will upgrade sidecar files in place. +`"version"` is required. The linter checks it and rejects unknown versions. This costs nothing now and prevents painful migration when the schema evolves (e.g., adding `"related"` edges, structured fields in post-0.1). A JSON Schema definition ships alongside the linter for editor validation and autocompletion (see *Appendix: JSON Schema* below). When the schema changes, the linter will accept both `"0.1"` and the new version during a transition window, and `liyi migrate` will upgrade sidecar files in place. -**`liyi reanchor --migrate` behavior.** When the schema version changes (e.g., 0.1 → 0.2), `--migrate` reads each `.liyi.jsonc`, adds any newly required fields with default values, removes deprecated fields, updates `"version"` to the new version, and writes the file back. It is idempotent — running it twice produces the same output. It does not re-hash spans or re-infer intent; it only transforms the schema envelope. Migration is always additive in 0.x: no field present in 0.1 will change meaning, only new fields may appear. +**`liyi migrate` behavior.** When the schema version changes (e.g., 0.1 → 0.2), `--migrate` reads each `.liyi.jsonc`, adds any newly required fields with default values, removes deprecated fields, updates `"version"` to the new version, and writes the file back. It is idempotent — running it twice produces the same output. It does not re-hash spans or re-infer intent; it only transforms the schema envelope. Migration is always additive in 0.x: no field present in 0.1 will change meaning, only new fields may appear. After human review — either the human adds `@liyi:intent` in the source file (see *Source-level intent* below), or sets `"reviewed": true` in the sidecar via CLI or IDE code action. Both paths mark the item as reviewed. When `"reviewed"` is set to `true`, `"confidence"` is removed — a human voucher replaces agent self-assessment. If the source later changes and the agent re-infers (producing a new unreviewed spec), `"confidence"` reappears: @@ -429,7 +429,7 @@ Item-level intent carries machine metadata → JSONC wins. -Both `source_hash` and `source_anchor` are **tool-managed fields**. The agent writes only `source_span` — the tool (`liyi reanchor`, or `liyi check --fix`) computes the hash and anchor deterministically from the source file. This is the same principle as not letting agents author lockfile checksums: the tool reads the actual bytes, so fabricated or hallucinated hashes are impossible. +Both `source_hash` and `source_anchor` are **tool-managed fields**. The agent writes only `source_span` — the tool (`liyi check --fix`) computes the hash and anchor deterministically from the source file. This is the same principle as not letting agents author lockfile checksums: the tool reads the actual bytes, so fabricated or hallucinated hashes are impossible. The agent records each item's line range (`source_span`) when writing the spec. The linter reads those lines from the source file, hashes them, and compares against `source_hash`. This gives per-item staleness without the linter needing to parse any language — it just reads a slice of lines. @@ -441,12 +441,12 @@ The correct mitigation is language-aware span anchoring — resolving spec posit Without a `tree_path`, the fallback is: batch false positives on any line-shifting edit, corrected on the next agent inference pass. The damage is transient and mechanical — the agent re-reads the file, re-records spans, re-hashes — but noisy in CI until it does. Still fewer false positives than file-level hashing (where a docstring typo marks every spec in the file stale with no way to distinguish which items actually changed). -**Span-shift detection (included in 0.1).** When the linter detects a hash mismatch and no `tree_path` is available (or tree-sitter has no grammar for the language), it falls back to scanning ±100 lines for content matching the recorded hash. If the same content appears at an offset (e.g., shifted down by 3 lines because an import was added), the linter reports `SHIFTED` rather than `STALE`. With `--fix`, the span is auto-corrected in the sidecar; without `--fix`, the linter reports the shift but does not write. Once a delta is established for one item, subsequent items in the same file are adjusted by the same delta before checking — so a single import insertion resolves in one probe, not twenty. If no match is found within the window, the linter gives up and reports `STALE` as usual. This is the same heuristic `patch(1)` uses with a fuzz factor — a linear scan over a bounded window, ~50 lines, no parser. Combined with `liyi reanchor`, this eliminates the most common source of false positives (line-shifting edits) without language-specific tooling. For files with `tree_path` populated, tree-sitter-based anchoring supersedes this heuristic entirely — see the next section. +**Span-shift detection (included in 0.1).** When the linter detects a hash mismatch and no `tree_path` is available (or tree-sitter has no grammar for the language), it falls back to scanning ±100 lines for content matching the recorded hash. If the same content appears at an offset (e.g., shifted down by 3 lines because an import was added), the linter reports `SHIFTED` rather than `STALE`. With `--fix`, the span is auto-corrected in the sidecar; without `--fix`, the linter reports the shift but does not write. Once a delta is established for one item, subsequent items in the same file are adjusted by the same delta before checking — so a single import insertion resolves in one probe, not twenty. If no match is found within the window, the linter gives up and reports `STALE` as usual. This is the same heuristic `patch(1)` uses with a fuzz factor — a linear scan over a bounded window, ~50 lines, no parser. Combined with `liyi check --fix`, this eliminates the most common source of false positives (line-shifting edits) without language-specific tooling. For files with `tree_path` populated, tree-sitter-based anchoring supersedes this heuristic entirely — see the next section. ### Structural identity via `tree_path` -`tree_path` is an optional field on both `itemSpec` and `requirementSpec` that provides **structural identity** — matching a spec to its source item by AST node path rather than line number. When present and non-empty, `liyi reanchor` and `liyi check --fix` use tree-sitter to locate the item by its structural position in the parse tree, then update `source_span` to the item's current line range. This makes span recovery deterministic across formatting changes, import additions, line reflows, and any other edit that moves lines without changing the item's identity. +`tree_path` is an optional field on both `itemSpec` and `requirementSpec` that provides **structural identity** — matching a spec to its source item by AST node path rather than line number. When present and non-empty, `liyi check --fix` uses tree-sitter to locate the item by its structural position in the parse tree, then update `source_span` to the item's current line range. This makes span recovery deterministic across formatting changes, import additions, line reflows, and any other edit that moves lines without changing the item's identity. **Format.** A `tree_path` is a `::` delimited path of tree-sitter node kinds and name tokens that uniquely identifies an item within a file. Examples: @@ -464,15 +464,14 @@ The path identifies the item by node kind and name, not by position. The tool co **Quoting and injection.** Names containing spaces, `::`, or quotes are double-quoted with backslash escaping (`test::"add function"`). For multi-language files (M9), an injection marker `//lang` attaches to the preceding segment to cross a language boundary (`key::run//bash::fn::setup_env`); the `//` delimiter requires no shell escaping. The full grammar is specified in the roadmap appendix (tree_path Grammar v0.2). -**Behavior during reanchor and check.** +**Behavior during check.** - -1. `liyi reanchor`: Parse the source file with tree-sitter. For each spec with a non-empty `tree_path`, query the parse tree for a node matching the path. If found, update `source_span` to the node's line range, recompute `source_hash` and `source_anchor`. If not found (item was renamed or deleted), report an error — do not silently fall back. -2. `liyi check --fix`: Same tree-sitter lookup. If the hash mismatches but the `tree_path` resolves to a valid node, update the span (the item moved but is still present). If the `tree_path` doesn't resolve, fall back to span-shift heuristic. -3. `liyi check` (without `--fix`): Use `tree_path` to verify the span points to the correct item. If it doesn't (span drifted, but `tree_path` still resolves), report `SHIFTED` with the correct target position. - + +1. `liyi check --fix`: Parse the source file with tree-sitter. For each spec with a non-empty `tree_path`, query the parse tree for a node matching the path. If found and the content is unchanged (pure positional shift), update `source_span`, `source_hash`, and `source_anchor`. If found but the content also changed (semantic drift), update `source_span` to track the item's location but leave `source_hash` unchanged — the spec remains stale for review. If the `tree_path` doesn't resolve, fall back to span-shift heuristic. +2. `liyi check` (without `--fix`): Use `tree_path` to verify the span points to the correct item. If it doesn't (span drifted, but `tree_path` still resolves), report `SHIFTED` with the correct target position. + -**Diagnostic clarity.** When a spec has no `tree_path` and the shift heuristic also fails, the diagnostic indicates why tree-path recovery was skipped — e.g., "no tree_path set, falling back to shift heuristic" — so that users can add the missing field or run `liyi reanchor` to auto-populate it. Diagnostics distinguish "no tree_path available" from "tree_path resolution failed (item may have been renamed or deleted)." +**Diagnostic clarity.** When a spec has no `tree_path` and the shift heuristic also fails, the diagnostic indicates why tree-path recovery was skipped — e.g., "no tree_path set, falling back to shift heuristic" — so that users can run `liyi check --fix` to auto-populate it. Diagnostics distinguish "no tree_path available" from "tree_path resolution failed (item may have been renamed or deleted)." **Empty string fallback.** When `tree_path` is `""` (empty string) or absent, the tool falls back to the current line-number-based behavior — span-shift heuristic, `source_anchor` matching, delta propagation. This accommodates: @@ -481,7 +480,7 @@ The path identifies the item by node kind and name, not by position. The tool co - **Generated code** where tree-sitter may not produce useful node kinds. - **Complex or contrived cases** where the agent or human determines that a tree path is non-obvious or ambiguous. -The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi reanchor` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. +The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi check --fix` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. **Language support.** Tree-sitter support is grammar-dependent. Rust, Python, Go, JavaScript, and TypeScript are built-in. For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. @@ -636,7 +635,7 @@ No `intent` field — the requirement text lives at the source site, not duplica **Requirements can live anywhere:** in the source file near the code they govern, in `README.md` alongside `@liyi:module`, in a dedicated requirements file, or in doc comments. The linter scans all non-ignored files for the marker. -**End-of-block markers.** The `@liyi:end-requirement ` marker closes a requirement block. The name must match the opening `@liyi:requirement `. When both markers are present, the linter and reanchor tool pair them by name to deterministically compute `source_span` — this is the primary span recovery mechanism for files without tree-sitter support (e.g., Markdown). The end marker uses the same name syntax (parenthesized or whitespace-delimited), full-width normalization, and multilingual aliases as the opening marker: +**End-of-block markers.** The `@liyi:end-requirement ` marker closes a requirement block. The name must match the opening `@liyi:requirement `. When both markers are present, the linter pairs them by name to deterministically compute `source_span` — this is the primary span recovery mechanism for files without tree-sitter support (e.g., Markdown). The end marker uses the same name syntax (parenthesized or whitespace-delimited), full-width normalization, and multilingual aliases as the opening marker: | Alias | Language | |---|---| @@ -728,31 +727,26 @@ Exit code: `--fail-on-req-changed` (default: true) — exit 1 if any reviewed sp This closes the **spec rot gap**: when requirements change, the requirement hash changes, and all items with `"related"` edges to that requirement are transitively flagged. The human reviews whether the code still satisfies the updated requirement. No silent re-inference over a potentially broken implementation — the requirement text is the anchor. -### `liyi reanchor` +### Tool-managed fields and `liyi check --fix` -`source_span` is the only positional field the agent writes. `source_hash` and `source_anchor` are tool-managed — computed by `liyi reanchor` (or the linter on first run) from the actual source file. Humans never compute them by hand. +`source_span` is the only positional field the agent writes. `source_hash`, `source_anchor`, and `tree_path` are tool-managed — computed by `liyi check --fix` from the actual source file. Humans never compute them by hand. -`liyi reanchor` is also the tool that populates hashes for new entries. When an agent writes a sidecar with `source_span` but no `source_hash`, running `liyi reanchor` (or `liyi check --fix`) reads the source lines, computes the SHA-256, and fills in both `source_hash` and `source_anchor`. This means a fresh agent-written sidecar is incomplete until the tool runs — by design. +`liyi check --fix` also populates hashes for new entries. When an agent writes a sidecar with `source_span` but no `source_hash`, running `liyi check --fix` reads the source lines, computes the SHA-256, and fills in `source_hash`, `source_anchor`, and `tree_path`. This means a fresh agent-written sidecar is incomplete until the tool runs — by design. -For resolving CI failures without an agent pass, the `liyi reanchor` subcommand re-hashes existing spans. It accepts one or more sidecar files or directories (recursive): +For resolving CI failures without an agent pass, `liyi check --fix` re-hashes existing spans. It accepts a `--root` flag or operates on the current directory: ```bash -$ liyi reanchor src/billing/money.rs.liyi.jsonc +$ liyi check --fix add_money [42, 58]: hash updated (source changed at same span) convert_currency [60, 85]: hash unchanged -$ liyi reanchor crates/ # reanchor all sidecars under crates/ -$ liyi reanchor a.rs.liyi.jsonc b.rs.liyi.jsonc + billing_handler [10, 35]: ↕ SHIFTED [10,35]→[12,37], hash updated ``` -This handles the case where code at those lines changed but lines didn't shift — the human has reviewed the change and is confirming "the intent still holds." The tool computes the new hash; the human never touches it. +This handles the common cases: missing hashes on fresh sidecars, positional shifts after line-changing edits, and re-hashing after the human has reviewed a change and confirmed "the intent still holds." The tool computes the new hash; the human never touches it. -If lines shifted, the span points to wrong lines. Resolution paths: +If lines shifted and tree-sitter recovery isn't available, `--fix` uses the span-shift heuristic (±100 lines, delta propagation) to auto-correct. If neither tree-sitter nor the heuristic can locate the item, the spec remains `STALE` for the agent or human to re-record the span. -- **The agent finds it** — the standard path. The agent understands code structure, re-records the span. -- **The human specifies it** — `liyi reanchor --item add_money --span 45,61`. The human looked it up in the editor ("go to definition"), the tool computes the hash. -- **Post-MVP: `--find`** — simple heuristics (grep for `fn add_money`, `def add_money`, etc.) to locate the item and update the span. Not a parser, but covers the common case. - -`liyi reanchor` is a thin wrapper on the same hashing logic used by `liyi check`. No LLM calls. +`liyi check --fix` is deterministic. No LLM calls. ### Prescriptive specs without code @@ -971,7 +965,7 @@ The second case matters for the scaffold workflow (see *Tree-sitter item discove The source-level path (`@liyi:intent`) and the sidecar path (`"reviewed": true`) serve different needs: - **No `"reviewed"` field to forge.** The security concern — an agent writing `"reviewed": true` directly — dissolves. Review is visible in source diffs, attributable via `git blame` on the actual source file, and covered by CODEOWNERS. An agent would have to write `@liyi:intent` in source to fake review, which is conspicuous in code review. -- **Merge conflicts become trivial.** If humans never touch the sidecar, it's fully regenerable — `liyi reanchor` after merge, zero human intervention. Same model as `Cargo.lock` or `pnpm-lock.yaml`. +- **Merge conflicts become trivial.** If humans never touch the sidecar, it's fully regenerable — `liyi check --fix` after merge, zero human intervention. Same model as `Cargo.lock` or `pnpm-lock.yaml`. - **Review is visible where it matters.** A `@liyi:intent` block above a function is visible in the normal code review flow — no need to open a separate `.liyi.jsonc` diff tab. The sidecar retains: `"item"`, `"reviewed"` (optional, defaults to `false`), `"intent"` (the agent's *inferred* intent or `"=doc"`), `"source_span"`, `"source_hash"`, `"source_anchor"`, `"confidence"`, and `"related"`. The agent writes `"item"`, `"intent"`, `"source_span"`, `"confidence"`, and `"related"`. The tool fills in `"source_hash"` and `"source_anchor"`. The human (or CLI/IDE) sets `"reviewed": true`. @@ -1321,7 +1315,7 @@ All three paths converge on the same report schema and the same `--validate` / ` |---|---|---| | `liyi triage --prompt` | Assemble a self-contained prompt from `liyi check --json` output — includes stale items with full context, the triage schema, assessment instructions, and output format spec. Print to stdout. | No | | `liyi triage --validate ` | Validate an agent-produced triage report against the schema; check that every assessed item corresponds to a real stale item | No | -| `liyi triage --apply [file]` | Auto-reanchor items with `cosmetic` verdict; present `semantic` items with suggested intents; flag `intent-violation` items for human review | No | +| `liyi triage --apply [file]` | Auto-fix items with `cosmetic` verdict; present `semantic` items with suggested intents; flag `intent-violation` items for human review | No | | `liyi triage --summary [file]` | Print human-readable summary of a triage report | No | The `--prompt` flag is the bridge for CI/script pipelines that have an `llm` CLI or API wrapper but no full agentic framework: @@ -1414,7 +1408,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | Verdict | Meaning | Default action | |---|---|---| -| `cosmetic` | Variable rename, reformatting, comment edit — no behavioral change | Auto-reanchor (no human review needed) | +| `cosmetic` | Variable rename, reformatting, comment edit — no behavioral change | Auto-fix (no human review needed) | | `semantic` | Code legitimately evolved — intent is stale but code is correct | Update intent (human reviews suggested intent) | | `intent-violation` | Code contradicts declared intent — either code is wrong or intent is wrong | Fix code or update intent (human decides) | | `unclear` | LLM can't determine with sufficient confidence | Manual review (human decides) | @@ -1431,7 +1425,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | `change_summary` | string | What changed in the code (1–2 sentences) | | `invariant_summary` | string | What stayed the same (1–2 sentences) | | `reasoning` | string | Why the verdict was assigned (2–3 sentences, citable in reviews) | -| `action` | enum | auto-reanchor / update-intent / fix-code-or-update-intent / manual-review | +| `action` | enum | auto-fix / update-intent / fix-code-or-update-intent / manual-review | | `suggested_intent` | string? | Proposed new intent text (only for `semantic` verdict) | | `impact` | array | Transitively affected items via `related` graph | @@ -1442,7 +1436,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | CI/PR comment | `summary` + items with verdict ≠ cosmetic | Format as markdown table in PR comment | | Dashboard | `summary` for aggregate view; items for drill-down | Read JSON, render charts / tables | | LSP | Items for inline diagnostics at `source_span` | Watch `.liyi/triage.json`, map items to diagnostic locations | -| `liyi triage --apply` | Items with `verdict: cosmetic` | Auto-reanchor those items (write back to sidecars) | +| `liyi triage --apply` | Items with `verdict: cosmetic` | Auto-fix those items (write back to sidecars) | | Agent (next session) | `suggested_intent` for items with `verdict: semantic` | Read triage, propose intent updates in sidecar | | Human (terminal) | Formatted summary + triage table | `liyi triage --summary`; `--json` for raw | @@ -1460,7 +1454,7 @@ flowchart TD %% ── Batch path ── Decision -- "batch path
(many items, CI)" --> Triage["Agent triages
reads stale items, reasons about each
(or: liyi triage --prompt | llm-call)
"] - Triage -- "writes .liyi/triage.json" --> Apply["liyi triage --apply
auto-reanchors cosmetic items,
prints remaining for review
"] + Triage -- "writes .liyi/triage.json" --> Apply["liyi triage --apply
auto-fixes cosmetic items,
prints remaining for review
"] Apply --> HumanB["Human reviews
reads triage report or PR comment,
accepts suggested intents or fixes code
"] ``` @@ -1476,7 +1470,7 @@ The tradeoff: direct re-inference does not distinguish intent violations from le |---|---|---| | Agent just wrote or modified the code in the current session | Direct re-inference | The agent has full context; triage would assess its own changes against its own prior inference — little value added. | | Few stale items (≤ 5) from straightforward changes | Direct re-inference | The overhead of a structured triage report exceeds the classification benefit. | -| Many stale items from a large refactor or merge | Triage | The structured report helps humans prioritize which items need re-review vs. auto-reanchor. | +| Many stale items from a large refactor or merge | Triage | The structured report helps humans prioritize which items need re-review vs. auto-fix. | | Stale items from changes made by a *different* agent or human | Triage | The assessing agent lacks the original author's context; the old-intent-vs-new-code comparison is more valuable. | | CI pipeline processing a PR | Triage | Batch assessment with structured output is the natural fit for non-interactive workflows. | | Items with `"reviewed": true` or `@liyi:intent` in source | Triage recommended | These items have human-vouched intent. When they go stale, the change deserves explicit assessment against the human's stated intent, not silent re-inference. | @@ -1505,11 +1499,9 @@ The agent instruction (rule 10) permits both paths. Teams can mandate triage for The shift heuristic (non-`tree_path` fallback) is inherently safe — it only matches when the *exact same content* is found at an offset — so no additional protection is needed there. -`liyi reanchor` remains as the explicit manual tool for targeted re-hashing (e.g., `liyi reanchor --item add_money --span 45,61`). `--fix` is the batch equivalent for CI and post-merge workflows. - ### Implementation -~3000 lines of Rust across two crates (`liyi` library + `liyi-cli` binary), organized as a Cargo workspace under `crates/`. Core check logic is ~900 lines; the remainder covers tree-sitter-based span recovery, CLI, diagnostics, span-shift detection, `--fix` write-back, marker normalization, `reanchor`, and `approve`. Dependencies: `serde`, `serde_json`, `sha2`, `ignore`, `regex`, `tree-sitter`, `tree-sitter-rust` (library); `clap` (CLI). +~3000 lines of Rust across two crates (`liyi` library + `liyi-cli` binary), organized as a Cargo workspace under `crates/`. Core check logic is ~900 lines; the remainder covers tree-sitter-based span recovery, CLI, diagnostics, span-shift detection, `--fix` write-back, marker normalization, `migrate`, and `approve`. Dependencies: `serde`, `serde_json`, `sha2`, `ignore`, `regex`, `tree-sitter`, `tree-sitter-rust` (library); `clap` (CLI). No config file reader. `.liyiignore` handles file exclusion; config-based ignore patterns are a post-MVP consideration. @@ -1517,7 +1509,7 @@ No config file reader. `.liyiignore` handles file exclusion; config-based ignore **Performance.** The linter's work is directory walking + line slicing + SHA-256 hashing — all I/O-bound and parallelizable. A monorepo with 10,000 source files and proportional sidecars should complete in seconds. The `ignore` crate already handles `.gitignore`/`.liyiignore` filtering efficiently. -**Merge conflicts in sidecars.** Two branches editing the same source file will both update `source_span`/`source_hash` in the co-located `.liyi.jsonc`, causing a merge conflict. Resolution: `liyi reanchor` after merge, same model as `pnpm install` / `yarn install` resolving lockfile conflicts — re-run the tool, the derived fields are recomputed from the merged source. True intent-text conflicts (both branches edited the same item's `intent` prose) are rare and handled by normal git conflict resolution. +**Merge conflicts in sidecars.** Two branches editing the same source file will both update `source_span`/`source_hash` in the co-located `.liyi.jsonc`, causing a merge conflict. Resolution: `liyi check --fix` after merge, same model as `pnpm install` / `yarn install` resolving lockfile conflicts — re-run the tool, the derived fields are recomputed from the merged source. True intent-text conflicts (both branches edited the same item's `intent` prose) are rare and handled by normal git conflict resolution. ### Diagnostic catalog @@ -1527,7 +1519,7 @@ Every diagnostic the linter can emit, with its severity, audience, exit code con | Audience | Meaning | Examples | |---|---|---| -| `tool` | Fixable by `liyi reanchor` or `liyi check --fix` — no reasoning required | missing `source_hash`, SHIFTED | +| `tool` | Fixable by `liyi check --fix` — no reasoning required | missing `source_hash`, SHIFTED | | `agent` | Fixable by agent re-inference or sidecar editing — requires reading source but no human judgment | STALE (content changed), UNTRACKED, MISSING RELATED | | `human` | Requires human judgment — review, approval, or design decision | unreviewed, intent-violation, unknown requirement | @@ -1538,7 +1530,7 @@ This distinction was motivated by dogfooding experience: an AI agent maintaining | Spec current and reviewed | info | — | 0 | `: : ✓ reviewed, current` | — | | Spec current but unreviewed | warning | human | 1 if `--fail-on-unreviewed` | `: : ⚠ unreviewed` | `liyi approve ` | | Source hash mismatch (stale) | warning | agent | 1 if `--fail-on-stale` | `: : ⚠ STALE — source changed since spec was written` | — | -| Missing source_hash (fresh spec) | warning | tool | 1 if `--fail-on-stale` | `: : ⚠ missing source_hash` | `liyi reanchor ` | +| Missing source_hash (fresh spec) | warning | tool | 1 if `--fail-on-stale` | `: : ⚠ missing source_hash` | `liyi check --fix` | | Source hash found at offset (shifted) | info | tool | 0 (auto-corrected with `--fix`) | `: : ↕ SHIFTED [old]→[new]` | `liyi check --fix` | | Referenced requirement hash changed | warning | agent | 1 if `--fail-on-req-changed` | `: : ⚠ REQ CHANGED — requirement "" updated` | — | | `@liyi:related X` where X doesn't exist | error | human | 1 | `: : ✗ ERROR — unknown requirement ""` | — | @@ -1547,13 +1539,13 @@ This distinction was motivated by dogfooding experience: an AI agent maintaining | Requirement with no referencing items | info | — | 0 | `: : · requirement has no related items` | — | | Item annotated `@liyi:trivial` | info | — | 0 | `: : · trivial` | — | | Item annotated `@liyi:ignore` | info | — | 0 | `: : · ignored` | — | -| `source_span` past EOF | error | tool | 1 | `: : ✗ source_span [s, e] extends past end of file ( lines)` | `liyi reanchor ` | +| `source_span` past EOF | error | tool | 1 | `: : ✗ source_span [s, e] extends past end of file ( lines)` | `liyi check --fix` | | Inverted or zero-length `source_span` | error | human | 1 | `: : ✗ invalid source_span [e, s]` | — | | Malformed `source_hash` | error | human | 1 | `: : ✗ malformed source_hash` | — | | Duplicate item + span | warning | human | 0 | `: : ⚠ duplicate entry` | — | | Source file deleted / not found | error | human | 1 | `: ✗ source file not found — spec is orphaned` | — | | Malformed JSONC | error | human | 2 | `: ✗ parse error: ` | — | -| Unknown `"version"` | error | tool | 2 | `: ✗ unknown version ""` | `liyi reanchor --migrate ` | +| Unknown `"version"` | error | tool | 2 | `: ✗ unknown version ""` | `liyi migrate ` | | Cycle in requirement hierarchy | error | human | 1 | `: : ✗ requirement cycle detected: ` | — | | Ambiguous sidecar (duplicate naming) | warning | human | 0 | `: ⚠ ambiguous sidecar — both .liyi.jsonc and .liyi.jsonc exist` | — | @@ -1589,14 +1581,14 @@ When writing or modifying code: 2. When module-level invariants are apparent, write an `@liyi:module` block — in the directory's existing module doc (`README.md`, `doc.go`, `mod.rs` doc comment, etc.) or in a dedicated `LIYI.md`. Use the doc markup language's comment syntax for the marker. 3. If a source item has a `@liyi:related ` annotation, record the dependency in `.liyi.jsonc` as `"related": {"": null}`. The tool fills in the requirement's current hash. 4. For each `@liyi:requirement ` block encountered, ensure it has a corresponding entry in the co-located `.liyi.jsonc` with `"requirement"` and `"source_span"`. (The tool fills in `"source_hash"`.) -5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Re-anchor the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. +5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Update the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. 6. Only generate adversarial tests from items that have a `@liyi:intent` annotation in source or `"reviewed": true` in the sidecar (i.e., human-reviewed intent). When `@liyi:intent` is present in source, use its prose (or the docstring for `=doc`) as the authoritative intent for test generation. 7. Tests should target boundary conditions, error-handling gaps, property violations, and semantic mismatches. Prioritize tests a subtly wrong implementation would fail. 8. Skip items annotated with `@liyi:ignore` or `@liyi:trivial`, and files matched by `.liyiignore`. Respect `@liyi:nontrivial` — if present, always infer a spec for that item and never override with `@liyi:trivial`. 9. Use a different model for test generation than the one that wrote the code, when possible. 10. When `liyi check` reports stale items, choose one of two paths: - **Direct re-inference** (preferred during interactive editing with few stale items): re-read the source, update `source_span` and `intent` in the sidecar, leave `"reviewed"` unset. Appropriate when you are the agent that just made the change, the number of stale items is small, and the changes are straightforward. - - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-reanchor. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. + - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-fix. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. 11. Before committing, run `liyi check`. If it reports coverage gaps (missing requirement specs, missing related edges), resolve **all** gaps in the same commit. When running in agent mode, consume the `liyi check --prompt` output and apply its instructions. Do not commit with unresolved coverage gaps — CI will reject it. ``` @@ -1678,7 +1670,7 @@ An LSP (Language Server Protocol) server wraps `liyi check` output as editor dia | LSP Feature | 立意 Use | |---|---| | **Diagnostics** | Inline warnings at STALE, REQ CHANGED, and unreviewed sites | -| **Code Actions** | \"Accept inferred intent\" (sets `\"reviewed\": true` in sidecar), \"Assert intent in source\" (inserts `@liyi:intent`), \"Reanchor span\", \"Go to requirement\", \"Challenge\" (on-demand semantic verification via LLM) | +| **Code Actions** | \"Accept inferred intent\" (sets `\"reviewed\": true` in sidecar), \"Assert intent in source\" (inserts `@liyi:intent`), \"Fix span\", \"Go to requirement\", \"Challenge\" (on-demand semantic verification via LLM) | | **Hover** | Show the intent spec when hovering over a specced item | | **Go to Definition** | Jump from `@liyi:related X` to the `@liyi:requirement X` block | @@ -1694,11 +1686,11 @@ Candidate tools: |---|---| | `liyi_check` | Run `liyi check` on a path, return structured results (stale, reviewed, diverged) | | `liyi_check_json` | Run `liyi check --json` — return full context for stale items, suitable for agent-driven triage | -| `liyi_reanchor` | Re-hash spans for a given file | +| `liyi_fix` | Fix spans and fill tool-managed fields for a given path | | `liyi_get_requirement` | Look up a named requirement — return its text, location, and current hash | | `liyi_list_related` | List all items with `"related"` edges to a given requirement | | `liyi_triage_validate` | Validate an agent-produced triage report against the schema | -| `liyi_triage_apply` | Apply a validated triage report — auto-reanchor cosmetic items | +| `liyi_triage_apply` | Apply a validated triage report — auto-fix cosmetic items | The MCP tools provide *context for* reasoning and *application of* results. The reasoning itself (triage assessment, challenge verdicts) happens in the agent — which already has model access, conversation context, and the AGENTS.md instruction. This avoids duplicating LLM call logic inside the MCP server. @@ -1718,7 +1710,7 @@ For each unapproved item in the target file(s), display: Prompt: `approve? [y]es / [n]o / [e]dit intent / [s]kip` -- **y** — set `"reviewed": true`, update `source_hash` and `source_anchor` via reanchor. +- **y** — set `"reviewed": true`, update `source_hash` and `source_anchor`. - **n** — set `"reviewed": false` (explicit rejection). Leave hash unchanged. - **e** — open `$EDITOR` with the intent text. After save, re-display and re-prompt. - **s** — skip without changing anything. @@ -1731,7 +1723,7 @@ liyi approve --yes src/money.rs "add_money" # approve specific item liyi approve --yes . # approve all sidecars under cwd ``` -Sets `"reviewed": true` and reanchors without prompting. +Sets `"reviewed": true` and fixes hashes without prompting. **Flags:** - `--yes` — non-interactive, approve all matched items. @@ -1870,7 +1862,7 @@ After the agent processes this scaffold, the sidecar might look like: } ``` -Note: the agent removed the `impl::Money` container entry (containers are often not worth speccing independently), used `=doc` for the well-documented struct, `=trivial` for the getter, and wrote explicit intent for the rest. The `_hints` fields are gone — `liyi reanchor` strips them. +Note: the agent removed the `impl::Money` container entry (containers are often not worth speccing independently), used `=doc` for the well-documented struct, `=trivial` for the getter, and wrote explicit intent for the rest. The `_hints` fields are gone — `liyi check --fix` strips them. #### `_hints` — cold-start inference aids @@ -1893,7 +1885,7 @@ When `liyi init ` creates a skeleton sidecar for an existing file u - The absence of a schema contract *is* the contract. Downstream tooling cannot build on `_hints` because the shape is not guaranteed. This prevents accidental coupling to an ephemeral inference aid. - `liyi init` can freely evolve what hints it emits without breaking anything. -**Lifecycle.** `liyi init` writes `_hints` → the agent reads hints, infers intent, fills the `"intent"` field → `liyi reanchor` strips `_hints` from all spec entries. The linter ignores `_hints` (does not error on its presence). Hints are never committed in steady-state sidecars — they exist only during the cold-start inference window. +**Lifecycle.** `liyi init` writes `_hints` → the agent reads hints, infers intent, fills the `"intent"` field → `liyi check --fix` strips `_hints` from all spec entries. The linter ignores `_hints` (does not error on its presence). Hints are never committed in steady-state sidecars — they exist only during the cold-start inference window. **Per-item, not per-file.** Each spec entry gets its own `_hints` based on that item's span. A function with 47 commits and 3 bug fixes gets different hints than the simple getter next to it. @@ -2077,7 +2069,7 @@ This section estimates the effort to *build* 立意 itself — the linter, the c | Agent instruction (AGENTS.md paragraph) | 1 hour | 15 minutes | | `@liyi:module` convention + examples | 30 minutes | 10 minutes | | `.liyi.jsonc` examples for a demo repo | 1–2 hours | 20 minutes | -| CI linter (`liyi check` + `liyi reanchor` + `liyi approve` + `liyi init`, ~3000 lines) | 3–5 days | 2–4 hours | +| CI linter (`liyi check` + `liyi check --fix` + `liyi approve` + `liyi init` + `liyi migrate`, ~3000 lines) | 3–5 days | 2–4 hours | | Blog post explaining the practice | 1 day | 2–3 hours | | **Total** | **3–5 days** | **Half a day** | @@ -2085,7 +2077,7 @@ This section estimates the effort to *build* 立意 itself — the linter, the c ## What This Is -- A **CI linter** — `liyi check` + `liyi reanchor`, ~3000 lines across two crates (with tree-sitter-based span recovery). The enforcement mechanism. +- A **CI linter** — `liyi check` + `liyi check --fix`, ~3000 lines across two crates (with tree-sitter-based span recovery). The enforcement mechanism. - A **spec convention** — `@liyi:module` blocks (module intent) + `@liyi:requirement` blocks (named requirements) + `.liyi.jsonc` (item-level intent and requirement tracking, JSONC). - A **dependency model** — `@liyi:related` edges from code items to named requirements, with transitive staleness. - A **triage protocol** (post-MVP) — `liyi check --json` provides rich stale-item context; an agent (using whatever model it already has) assesses each item and writes a structured report; `liyi triage --apply` acts on the report. The binary stays deterministic and offline; the LLM reasoning lives in the agentic workflow. @@ -2128,7 +2120,7 @@ Each level is independently valuable. Stop wherever the cost outweighs the benef | **1. The review** | Review inferred intent in PRs — set `"reviewed": true` in sidecar (quick) or add `@liyi:intent` in source (explicit) | The review surface for intent is typically ~10% of the code surface — a few lines of spec per item instead of the full implementation. You catch wrong intent before wrong code gets tested. Careless review undermines adversarial testing quality — see *Why careless review is self-limiting* in the Security Model. | Seconds per item | | **2. The docs** | Add `## 立意` sections to READMEs / doc comments | Module-level invariants are documented, visible in rendered docs, discoverable by agents and humans. This is just good documentation practice. | 5 min per module | | **3. The linter** | Run `liyi check` in CI | Stale specs fail the build. You know which items changed since their intent was written. Deterministic enforcement. | Install a binary | -| **3.5. Triage** | When stale items are flagged, the agent assesses each: cosmetic, semantic, or intent violation. `liyi triage --apply` auto-reanchors cosmetics. Skippable — agents can directly re-infer intent instead (see *Direct re-inference* in the triage section). Triage is most valuable for batch workflows (CI, large PRs) and when stale items carry human-reviewed intent. | Noise from refactors and renames is eliminated automatically. Remaining items are sorted by action type — update intent, fix code, or manual review. Graph-aware impact propagation flags transitively affected items. | Agent follows the triage instruction (or skips triage and re-infers directly) | +| **3.5. Triage** | When stale items are flagged, the agent assesses each: cosmetic, semantic, or intent violation. `liyi triage --apply` auto-fixes cosmetics. Skippable — agents can directly re-infer intent instead (see *Direct re-inference* in the triage section). Triage is most valuable for batch workflows (CI, large PRs) and when stale items carry human-reviewed intent. | Noise from refactors and renames is eliminated automatically. Remaining items are sorted by action type — update intent, fix code, or manual review. Graph-aware impact propagation flags transitively affected items. | Agent follows the triage instruction (or skips triage and re-infers directly) | | **4. Challenge** | Click "Challenge" on a specced item in the editor, or include challenge in the agent workflow | A second model verifies code against intent, or intent against requirement. On-demand semantic verification — no pipeline, no test files. The trust gap between reviewing intent and trusting it blindly closes. | One click / prompt per item | | **5. Requirements** | Write `@liyi:requirement` blocks and `@liyi:related` annotations for critical-path items | Requirements are tracked, hashable, versionable. When a requirement changes, all related items are transitively flagged. Challenge verifies intent actually covers the requirement, not just that hashes match. | Minutes per requirement | | **6. The adversarial tests** | Configure a different model for test generation from reviewed specs | A second model reads the *intent* (not the code) and tries to break the implementation. Different training data, different blind spots. | Agent configuration | @@ -2218,7 +2210,7 @@ What the day-to-day experience looks like once all deliverables exist: 1. **Write code.** (Or have an agent write it.) The agent instruction in AGENTS.md tells it to also generate `.liyi.jsonc` specs alongside the code. 2. **Review intent, not implementation.** The agent infers intent and writes the sidecar. Read the inferred intent (via IDE hover or in the sidecar diff). If correct, accept it — either set `"reviewed": true` (one click, zero source noise) or add `@liyi:intent=doc` in source (one line, maximum visibility). If wrong, correct the intent: write `@liyi:intent ` in source with your own words, or edit the docstring. The review surface is ~10% of the code surface per item — a few lines of constraints and invariants instead of the full implementation. 3. **CI runs `liyi check`.** The linter verifies that existing specs aren't stale (source hash matches) and reports unreviewed specs. Stale specs fail the build. -4. **Handle staleness.** When stale items are flagged, the agent takes one of two paths. **Direct re-inference** (the fast path): the agent re-reads the source, updates `source_span` and `intent` in the sidecar, and leaves `"reviewed"` unset — appropriate during interactive editing with few stale items. **Triage** (the batch path, optional): the agent assesses each stale item as cosmetic, semantic, or intent violation, writes `.liyi/triage.json`, and `liyi triage --apply` auto-reanchors cosmetics. Triage is most valuable for large PRs, CI pipelines, or when stale items have human-reviewed intent (`"reviewed": true` or `@liyi:intent`). +4. **Handle staleness.** When stale items are flagged, the agent takes one of two paths. **Direct re-inference** (the fast path): the agent re-reads the source, updates `source_span` and `intent` in the sidecar, and leaves `"reviewed"` unset — appropriate during interactive editing with few stale items. **Triage** (the batch path, optional): the agent assesses each stale item as cosmetic, semantic, or intent violation, writes `.liyi/triage.json`, and `liyi triage --apply` auto-fixes cosmetics. Triage is most valuable for large PRs, CI pipelines, or when stale items have human-reviewed intent (`"reviewed": true` or `@liyi:intent`). 5. **Adversarial testing (optional).** A different model reads the reviewed intents and generates tests designed to break the implementation. Different training data, different blind spots. 6. **Iterate.** When source changes, the hash mismatches, the spec is flagged stale, the agent re-infers or triages, the human re-reviews. The cycle is fast because reviewing intent is fast. @@ -2305,13 +2297,13 @@ The success criterion remains: at least one team reports catching a real defect ### 4. `source_span` brittleness in 0.1 (mitigated by `tree_path`) -Line-number-based spans mean that any edit changing line counts (adding an import, inserting a blank line) invalidates every spec whose `source_span` falls at or below the edit point. The span-shift heuristic (±100-line scan, delta propagation) handles uniform shifts — the most common case — and reports `SHIFTED` (auto-corrected) rather than `STALE`. `liyi reanchor` handles non-uniform shifts manually. +Line-number-based spans mean that any edit changing line counts (adding an import, inserting a blank line) invalidates every spec whose `source_span` falls at or below the edit point. The span-shift heuristic (±100-line scan, delta propagation) handles uniform shifts — the most common case — and reports `SHIFTED` (auto-corrected) rather than `STALE`. `liyi check --fix` handles non-uniform shifts when `tree_path` is available. **v8.4 update:** This risk prompted the introduction of `tree_path` in 0.1 (see *Structural identity via `tree_path`*). When `tree_path` is populated, span recovery is deterministic — the tool locates the item by AST identity regardless of how lines shifted. The span-shift heuristic remains as a fallback for items without a `tree_path` (macros, generated code, unsupported languages). -The remaining friction for items without `tree_path`: between agent sessions, manual edits that shift lines without an agent re-inference will produce CI noise until the developer runs `liyi reanchor`. This is the same class of friction as lockfile conflicts (run `pnpm install` after merge), but it's friction nonetheless. For supported languages (Rust in 0.1), `tree_path` eliminates this friction entirely. +The remaining friction for items without `tree_path`: between agent sessions, manual edits that shift lines without an agent re-inference will produce CI noise until the developer runs `liyi check --fix`. This is the same class of friction as lockfile conflicts (run `pnpm install` after merge), but it's friction nonetheless. For supported languages (Rust in 0.1), `tree_path` eliminates this friction entirely. -**Mitigation in 0.1:** `tree_path` structural anchoring (primary), span-shift auto-correction (fallback), `liyi reanchor`, agent re-inference on next pass. +**Mitigation in 0.1:** `tree_path` structural anchoring (primary), span-shift auto-correction (fallback), `liyi check --fix`, agent re-inference on next pass. ### 5. Convention absorption and licensing (added 2026-03-06) @@ -2388,7 +2380,7 @@ pub fn add_money(a: Money, b: Money) -> Result { ### 2. Sidecar (after agent + tool) -The agent writes `source_span` and `intent`. The tool (`liyi reanchor` or the linter on first run) fills in `source_hash` and `source_anchor`. The result on disk: +The agent writes `source_span` and `intent`. The tool (`liyi check --fix`) fills in `source_hash` and `source_anchor`. The result on disk: ```jsonc // src/billing/money.rs.liyi.jsonc @@ -2551,7 +2543,7 @@ The agent re-infers (updating `source_span`; the tool recomputes `source_hash`), }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -2592,7 +2584,7 @@ The agent re-infers (updating `source_span`; the tool recomputes `source_hash`), }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", diff --git a/docs/liyi-design.md.liyi.jsonc b/docs/liyi-design.md.liyi.jsonc index b72472b..3337542 100644 --- a/docs/liyi-design.md.liyi.jsonc +++ b/docs/liyi-design.md.liyi.jsonc @@ -18,7 +18,7 @@ 377, 379 ], - "source_hash": "sha256:e5d076462c6128635873db10142cb9046c9cdc176bc9dbed93864fbfbda861c6", + "source_hash": "sha256:afe22aa0b9f9625fea107924c2e3d860cd3f10699ed9f7ea996041b1114fec5a", "source_anchor": "" }, { @@ -45,7 +45,7 @@ 431, 433 ], - "source_hash": "sha256:d8dc3d37bad6f8fd51309f85e67bfcb2e737d4dfa9d8d9d3227b7eec0f9a1149", + "source_hash": "sha256:4bc9df80baf61c519493cf82d6fd325a99acf9f2fa56ffbf5c08d15ce2f99994", "source_anchor": "" }, { @@ -54,32 +54,32 @@ 443, 445 ], - "source_hash": "sha256:ecf1e7a7a12c28b804bad2697be04e9c7dbadf053b7d93d7e8e7ae2fd46b6de7", + "source_hash": "sha256:6c9f4b6a6cdb20ca339372b8776bc96c8819c12168f0f881679bd44d1542fc25", "source_anchor": "" }, { - "requirement": "tree-path-reanchor-behavior", + "requirement": "tree-path-fix-behavior", "source_span": [ 469, - 473 + 472 ], - "source_hash": "sha256:e3f8460c2acdfa065b975c590abf0f4e869ff4dcfabc75674ae7ef0ef2f65600", - "source_anchor": "" + "source_hash": "sha256:5dca1e26d0fb939c33b2214db262701daf9a67cc8a2d13a6f58d65b413b5512c", + "source_anchor": "" }, { "requirement": "tree-path-empty-fallback", "source_span": [ - 477, - 485 + 476, + 484 ], - "source_hash": "sha256:3b5b2113802d61b766fd0462d7706b0813b60cc373794d1d99f4732f6c5a6280", + "source_hash": "sha256:aa9a9bd3d6f00ac3aa69d4c4dc33bbedf2ca0b7781be40492679ebfcd21b7340", "source_anchor": "" }, { "requirement": "requirement-name-uniqueness", "source_span": [ - 633, - 635 + 632, + 634 ], "source_hash": "sha256:bf5fcadbb09429fc0b9a6f55b6b1ac685d5a88f134b3e89f2861dc20bc0e9928", "source_anchor": "" @@ -87,8 +87,8 @@ { "requirement": "cycle-detection", "source_span": [ - 777, - 779 + 771, + 773 ], "source_hash": "sha256:925289a107cb255f3c48f0b5395395038278f56f7006dab3522e511cc8d7699f", "source_anchor": "" @@ -96,8 +96,8 @@ { "requirement": "requirement-discovery-global", "source_span": [ - 1108, - 1110 + 1102, + 1104 ], "source_hash": "sha256:d01ce1382642d6ad05162bbba79d4cd35dc7715294f8cc3d6c54de9e743fa38c", "source_anchor": "" @@ -105,8 +105,8 @@ { "requirement": "liyi-check-exit-code", "source_span": [ - 1233, - 1239 + 1227, + 1233 ], "source_hash": "sha256:d7c4ad4bd1d1abe361598373a376aacda6bc0d948bd611f9b8bdea0b23a74c7a", "source_anchor": "" @@ -114,8 +114,8 @@ { "requirement": "marker-normalization", "source_span": [ - 1266, - 1268 + 1260, + 1262 ], "source_hash": "sha256:7d9e05684eeaf175288fbd0629a2352dbe3353416172691de4ab3dcbe435b7d3", "source_anchor": "" @@ -123,8 +123,8 @@ { "requirement": "quine-escape-in-source", "source_span": [ - 1280, - 1282 + 1274, + 1276 ], "source_hash": "sha256:7bb4de54441a4fadbd4204d8e950cedff7874181b111f2b6789f693ac22b982b", "source_anchor": "" @@ -132,8 +132,8 @@ { "requirement": "markdown-fenced-block-skip", "source_span": [ - 1286, - 1292 + 1280, + 1286 ], "source_hash": "sha256:8160197ac9ec02de2896ceac8e8edf334a43efe294ccaf976b2909a629689ceb", "source_anchor": "" @@ -141,8 +141,8 @@ { "requirement": "fix-never-modifies-human-fields", "source_span": [ - 1498, - 1500 + 1492, + 1494 ], "source_hash": "sha256:e5c1b041dab62b4533a84b2bfc73c2ca8697a9ae7a454540686c8d479305ef5f", "source_anchor": "" @@ -150,8 +150,8 @@ { "requirement": "fix-semantic-drift-protection", "source_span": [ - 1502, - 1504 + 1496, + 1498 ], "source_hash": "sha256:87f70b0fa07c386eb7478b420f8edc70f7b7bb709c416e2aaf17a1a913fa0ce5", "source_anchor": "" diff --git a/docs/liyi-mvp-roadmap.md b/docs/liyi-mvp-roadmap.md index dedc57d..3e8a391 100644 --- a/docs/liyi-mvp-roadmap.md +++ b/docs/liyi-mvp-roadmap.md @@ -11,7 +11,7 @@ This document is the implementation plan for 立意 v0.1 — the CI linter, the **Deliverables:** 1. `liyi check` — the CI linter binary (Rust) ✅ -2. `liyi reanchor` — the span re-hashing tool (subcommand of the same binary) ✅ +2. `liyi check --fix` — the span fixing tool (fills hashes, corrects shifts, computes tree_path) ✅ 3. `liyi.schema.json` — the JSON Schema for `.liyi.jsonc` v0.1 ✅ 4. Agent instruction — the ~12-line AGENTS.md paragraph ✅ 5. Demo repo — the linter's own codebase, dogfooded with `.liyi.jsonc` specs and `@liyi:module` markers ✅ @@ -25,14 +25,14 @@ This document is the implementation plan for 立意 v0.1 — the CI linter, the | Module | Status | Notes | |--------|--------|-------| -| `cli.rs` | ✅ Done | `check`, `reanchor`, `init`, `approve` subcommands, all planned flags | +| `cli.rs` | ✅ Done | `check`, `init`, `approve`, `migrate` subcommands, all planned flags | | `discovery.rs` | ✅ Done | `.liyiignore` support, ambiguous sidecar detection, scope filtering | | `sidecar.rs` | ✅ Done | JSONC comment stripping, serde, `deny_unknown_fields`, `tree_path` field | | `markers.rs` | ✅ Done | All 7 marker types, fullwidth normalization, multilingual aliases | | `hashing.rs` | ✅ Done | SHA-256, CRLF normalization, all `SpanError` variants | | `shift.rs` | ✅ Done | ±100-line scan with anchor hint shortcut | | `check.rs` | ✅ Done | Two-pass logic, `--fix` write-back, `--dry-run`, tree-sitter span recovery via `tree_path`, semantic drift protection, all 4 post-pass diagnostics wired | -| `reanchor.rs` | ✅ Done | Targeted + batch re-hashing, multi-file/directory support, `--migrate` scaffold, tree-sitter span recovery | +| `reanchor.rs` | ✅ Done | Internal module for batch re-hashing and `--migrate` logic, now invoked via `check --fix` and `migrate` subcommands | | `tree_path.rs` | ✅ Done | Tree-sitter structural identity & span recovery (R6). Resolve, compute, auto-populate. Rust grammar. | | `diagnostics.rs` | ✅ Done | All diagnostic types, formatting, exit codes, summary line output | | `schema.rs` | ✅ Done | Accepts `"0.1"` only, migration scaffold | @@ -80,7 +80,7 @@ All `DiagnosticKind` variants are defined and emitted: |------|--------|-------| | `shift_proptest.rs` | ✅ Done | 4 property-based tests: insert/delete shifts, content modification, hint agreement | | CI (GitHub Actions) | ✅ Done | Workflow: `cargo test`, `cargo clippy`, `cargo fmt --check`, `liyi check --root .` (dogfood) | -| Dogfooding locally | ✅ Done | Full loop confirmed: agent changes code → `liyi check` detects staleness → agent reanchors specs. CI wired. | +| Dogfooding locally | ✅ Done | Full loop confirmed: agent changes code → `liyi check` detects staleness → agent fixes specs. CI wired. | | Summary line output | ✅ Done | Prints "N current, M stale, K unreviewed, ..." after diagnostics | | `liyi init` subcommand | ✅ Done | Scaffold AGENTS.md or skeleton `.liyi.jsonc` sidecar | | `liyi approve` subcommand | ✅ Done | Batch (`--yes`) and interactive modes, `--dry-run`, `--item` filter | @@ -127,13 +127,13 @@ All `DiagnosticKind` variants are defined and emitted: │ - hash │ └────────────────────┘ │ spans │ │ - check │ ┌────────────────────┐ - │ review │ │ liyi reanchor │ - │ - resolve│ │ │ - │ related│ │ Fills source_hash, │ - │ edges │ │ source_anchor from │ - │ │ │ actual source file │ - │ Exit 0/1/2│ │ bytes. No LLM. │ - └───────────┘ └────────────────────┘ + │ review │ + │ - resolve│ + │ related│ + │ edges │ + │ │ + │ Exit 0/1/2│ + └───────────┘ Post-MVP triage workflow: ┌────────────────────────────────────────────┐ @@ -144,7 +144,7 @@ All `DiagnosticKind` variants are defined and emitted: │ liyi triage --validate │ │ → schema check │ │ liyi triage --apply │ - │ → auto-reanchor cosmetic items │ + │ → auto-fix cosmetic items │ │ → present semantic/violation for review │ └────────────────────────────────────────────┘ ``` @@ -156,15 +156,14 @@ A single Rust binary with subcommands: | Subcommand | Purpose | |---|---| | `liyi check [paths...]` | Lint: staleness, review status, requirement tracking | -| `liyi check --fix` | Lint + auto-correct shifted spans, fill missing hashes | +| `liyi check --fix` | Lint + auto-correct shifted spans, fill missing hashes, compute tree_path | | `liyi check --json` | Machine-readable output with full context for each stale item (feeds `liyi triage`) | | `liyi approve [paths...] [--yes]` | Interactive review: mark specs as human-approved | | `liyi init [source-file]` | Scaffold AGENTS.md or skeleton `.liyi.jsonc` sidecar | -| `liyi reanchor [--item --span ]` | Manual span re-hashing (accepts files or directories, recursive) | -| `liyi reanchor --migrate` | Schema version migration (no-op in 0.1, scaffolded) | +| `liyi migrate ` | Schema version migration (no-op in 0.1, scaffolded) | | `liyi triage --prompt` | Assemble a self-contained LLM prompt from stale items (post-MVP) | | `liyi triage --validate ` | Validate an agent-produced triage report against the schema (post-MVP) | -| `liyi triage --apply [file]` | Auto-reanchor cosmetic items, present remaining for review (post-MVP) | +| `liyi triage --apply [file]` | Auto-fix cosmetic items, present remaining for review (post-MVP) | | `liyi triage --summary [file]` | Human-readable summary of a triage report (post-MVP) | ### Crate structure @@ -183,7 +182,7 @@ liyi/ │ │ │ ├── markers.rs ← Source marker scanning (@liyi:*, normalization) │ │ │ ├── hashing.rs ← source_span → SHA-256, anchor extraction │ │ │ ├── shift.rs ← Span-shift detection -│ │ │ ├── reanchor.rs ← reanchor subcommand logic +│ │ │ ├── reanchor.rs ← internal re-hashing and migrate logic │ │ │ ├── tree_path.rs ← Tree-sitter structural identity & span recovery │ │ │ ├── diagnostics.rs ← Diagnostic types, formatting, exit codes │ │ │ ├── schema.rs ← Version validation, migration scaffold @@ -255,20 +254,11 @@ enum Command { root: Option, }, - /// Re-hash source spans in sidecar files - Reanchor { - /// Sidecar files or directories to reanchor (recursive) + /// Migrate sidecar schema version + Migrate { + /// Sidecar files or directories to migrate (recursive) files: Vec, - - /// Target a specific item by name - #[arg(long)] - item: Option, - - /// Override span (start,end) - #[arg(long, value_parser = parse_span)] - span: Option<(usize, usize)>, - - /// Migrate sidecar to current schema version + }, #[arg(long)] migrate: bool, }, @@ -572,13 +562,13 @@ For each `.liyi.jsonc` in scope: e. Check `@liyi:trivial` / `@liyi:ignore` within or immediately before the span. If found, mark as trivial/ignored (skip review requirement). f. If `related` is present: for each requirement name, look up in the pass-1 map. If not found → `ERROR: unknown requirement`. If found and hash differs from recorded hash → `REQ CHANGED`. 5. For each `Spec::Requirement`: - a. Hash the `source_span`. If `source_hash` present and mismatches → STALE (requirement text changed but sidecar not updated — run `liyi reanchor`). + a. Hash the `source_span`. If `source_hash` present and mismatches → STALE (requirement text changed but sidecar not updated — run `liyi check --fix`). 6. Report requirements from pass 1 that have no referencing items (informational). **`--fix` behavior (integrated into pass 2):** When `--fix` is active: -- Fill in missing `source_hash` and `source_anchor` (same as `reanchor`). +- Fill in missing `source_hash` and `source_anchor` (same behavior as `check --fix`). - Auto-correct SHIFTED spans (write new span, recompute hash/anchor). - Attempt tree-path re-resolution **before** validating span boundaries — if `tree_path` is set and the current `source_span` is past EOF or otherwise invalid, resolve via tree-sitter first. - Write modified sidecars back to disk. @@ -611,23 +601,22 @@ enum ExitCode { --- -### 8. `reanchor.rs` — Reanchor Subcommand +### 8. `reanchor.rs` — Internal Re-hashing Module -**Purpose:** Re-hash source spans in sidecar files. Manual tool for fixing spans after line shifts. Accepts one or more sidecar files or directories (recursive). When `tree_path` is populated, uses tree-sitter to locate items by structural identity before re-hashing. +**Purpose:** Internal module for re-hashing source spans in sidecar files. Invoked by `liyi check --fix` (for span correction and hash filling) and `liyi migrate` (for schema version upgrades). When `tree_path` is populated, uses tree-sitter to locate items by structural identity before re-hashing. **Behavior:** 1. Parse the target sidecar(s). If a directory is given, discover all `.liyi.jsonc` files under it recursively. -2. If `--item` and `--span` are specified: find the named item, update its span, recompute hash/anchor. -3. If neither: for every spec in the sidecar: +2. For every spec in the sidecar: a. If `tree_path` is non-empty and a tree-sitter grammar is available for the source language: parse the source file, locate the item by structural identity, update `source_span` to the item's current line range, recompute hash/anchor. This handles formatting changes, import additions, and any line-shifting edits. b. Otherwise: recompute hash/anchor from the source file at the recorded span (existing behavior). This handles "code changed at the same span" (human confirms intent still holds → re-hash). -4. If `--migrate`: update `"version"` to current (no-op in 0.1, but the scaffold ensures the flag exists and the code path handles future versions). -5. Write modified sidecar back. +3. If migrating: update `"version"` to current (no-op in 0.1, but the scaffold ensures the code path handles future versions). +4. Write modified sidecar back. **Constraints:** -- `reanchor` never modifies `intent`, `reviewed`, or `related`. -- If the source file doesn't exist, emit an error (can't reanchor an orphaned spec). +- Never modifies `intent`, `reviewed`, or `related`. +- If the source file doesn't exist, emit an error (can't fix an orphaned spec). - Idempotent: running twice produces the same output. **Size estimate:** ~60 lines. @@ -773,7 +762,7 @@ The convention defines 7 marker types that the linter recognizes in source files ## Key Constraints ### 1. No language-specific parsing (core path) -The linter's core check path reads line ranges and hashes bytes. It does not parse any programming language. Source markers are found by string matching on individual lines (after normalization). This is the core design constraint that makes the tool work with any language. Tree-sitter is used **only** for `tree_path` span recovery in `liyi reanchor` and `liyi check --fix` — it is an optional enhancement, not a requirement. When tree-sitter has no grammar for a language (or `tree_path` is empty), the tool falls back to the language-agnostic line-number behavior. +The linter's core check path reads line ranges and hashes bytes. It does not parse any programming language. Source markers are found by string matching on individual lines (after normalization). This is the core design constraint that makes the tool work with any language. Tree-sitter is used **only** for `tree_path` span recovery in `liyi check --fix` — it is an optional enhancement, not a requirement. When tree-sitter has no grammar for a language (or `tree_path` is empty), the tool falls back to the language-agnostic line-number behavior. ### 2. No LLM calls, no network access The linter is fully offline and deterministic. SHA-256 hashing, file I/O, string matching. No API keys, no configuration for models, no telemetry. @@ -789,7 +778,7 @@ Configuration is expressed through: | Field | Written by | Never written by | |---|---|---| | `item`, `intent`, `source_span`, `confidence`, `related` (names) | Agent | — | -| `source_hash`, `source_anchor`, `tree_path`, `related` (hashes) | `liyi reanchor` / `liyi check --fix` | Agent (may write initial `tree_path`), human | +| `source_hash`, `source_anchor`, `tree_path`, `related` (hashes) | `liyi check --fix` | Agent (may write initial `tree_path`), human | | `reviewed` | Human (CLI / IDE) | Agent (security model) | ### 5. Exit code contract @@ -815,7 +804,7 @@ All must-have and nice-to-have items are now complete. #### R1. `liyi approve` — interactive review command ✅ -Implemented in `crates/liyi/src/approve.rs`. Interactive by default when stdin is a TTY (show intent + source span, prompt y/n/s). Batch mode via `--yes` or when non-TTY. `--dry-run`, `--item ` flags. Reanchors on approval (fills `source_hash`, `source_anchor`). +Implemented in `crates/liyi/src/approve.rs`. Interactive by default when stdin is a TTY (show intent + source span, prompt y/n/s). Batch mode via `--yes` or when non-TTY. `--dry-run`, `--item ` flags. Fills `source_hash` and `source_anchor` on approval. #### R2. `liyi init` — scaffold command ✅ @@ -938,7 +927,7 @@ The linter's own codebase has `.liyi.jsonc` specs. CI runs `liyi check`. This is 1. **`liyi check` runs on a real codebase** — the linter's own source — and produces correct diagnostics. ✅ (43 unit + 22 golden/integration tests pass) 2. **All golden-file tests pass** — covering every diagnostic in the catalog. ✅ (all 15+ planned fixtures exist) -3. **`liyi reanchor` re-hashes spans** correctly, including `--item`/`--span` targeting. ✅ +3. **`liyi check --fix` fills tool-managed fields** correctly. ✅ 4. **The agent instruction works** — an LLM reading `AGENTS.md` produces valid `.liyi.jsonc` files that `liyi check` can lint. ✅ 5. **CI is green** — GitHub Actions runs `liyi check` on every push. ✅ 6. **The binary is small** — single static binary, <5 MB, zero runtime dependencies. ✅ diff --git a/schema/liyi.schema.json b/schema/liyi.schema.json index 3ed3ce1..00aba76 100644 --- a/schema/liyi.schema.json +++ b/schema/liyi.schema.json @@ -64,7 +64,7 @@ }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -88,7 +88,7 @@ }, "_hints": { "type": "object", - "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi reanchor after initial review. Tools MUST NOT rely on any specific shape." + "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi check --fix after initial review. Tools MUST NOT rely on any specific shape." } } }, @@ -109,7 +109,7 @@ }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", diff --git a/schema/triage.schema.json b/schema/triage.schema.json index cbf0035..1e479ac 100644 --- a/schema/triage.schema.json +++ b/schema/triage.schema.json @@ -49,8 +49,8 @@ }, "action": { "type": "string", - "enum": ["auto-reanchor", "update-intent", "fix-code-or-update-intent", "manual-review"], - "description": "Recommended action. auto-reanchor for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." + "enum": ["auto-fix", "update-intent", "fix-code-or-update-intent", "manual-review"], + "description": "Recommended action. auto-fix for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." }, "summary": { "type": "object", From fbb5b76b766a5d72a306e35193e6aaa09fff285b Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:47:54 +0800 Subject: [PATCH 20/21] fix(linter): deterministic related edge output order Replace HashMap with BTreeMap for the `related` field in ItemSpec so that `check --fix` and `write_sidecar` produce stable, alphabetically sorted key order. Original prompt: > please make `check --fix` related edge output order > deterministic AI-assisted-by: Claude Opus 4.6 (GitHub Copilot) Signed-off-by: WANG Xuerui --- crates/liyi/src/check.rs | 4 ++-- crates/liyi/src/check.rs.liyi.jsonc | 10 +++++----- crates/liyi/src/sidecar.rs | 4 ++-- crates/liyi/src/sidecar.rs.liyi.jsonc | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index d79985f..ddbbd80 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::fs; use std::path::{Path, PathBuf}; @@ -835,7 +835,7 @@ fn check_sidecar( item.related.as_ref().is_some_and(|r| r.contains_key(name)); if !has_edge { if fix { - let related = item.related.get_or_insert_with(HashMap::new); + let related = item.related.get_or_insert_with(BTreeMap::new); related.insert(name.clone(), None); modified = true; } diff --git a/crates/liyi/src/check.rs.liyi.jsonc b/crates/liyi/src/check.rs.liyi.jsonc index 7f620bd..d6539ee 100644 --- a/crates/liyi/src/check.rs.liyi.jsonc +++ b/crates/liyi/src/check.rs.liyi.jsonc @@ -28,8 +28,8 @@ "source_anchor": "pub fn run_check(", "related": { "cycle-detection": null, - "requirement-name-uniqueness": null, - "requirement-discovery-global": null + "requirement-discovery-global": null, + "requirement-name-uniqueness": null } }, { @@ -41,11 +41,11 @@ 1022 ], "tree_path": "fn::check_sidecar", - "source_hash": "sha256:e1919fe4c31925d44e75df6525ba3716524fb7dc6436b85e49cdbafa54fa6fef", + "source_hash": "sha256:4dcb913f15f9da76399b61a71f597592b285655b03c3cba15f7817bc5531ce9a", "source_anchor": "fn check_sidecar(", "related": { - "reviewed-semantics": null, - "fix-semantic-drift-protection": null + "fix-semantic-drift-protection": null, + "reviewed-semantics": null } }, { diff --git a/crates/liyi/src/sidecar.rs b/crates/liyi/src/sidecar.rs index 7e2df00..4fdbba3 100644 --- a/crates/liyi/src/sidecar.rs +++ b/crates/liyi/src/sidecar.rs @@ -1,5 +1,5 @@ use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::BTreeMap; /// Top-level `.liyi.jsonc` file representation #[derive(Debug, Serialize, Deserialize)] @@ -44,7 +44,7 @@ pub struct ItemSpec { pub confidence: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub related: Option>>, + pub related: Option>>, } /// Details of a module requirement/invariant diff --git a/crates/liyi/src/sidecar.rs.liyi.jsonc b/crates/liyi/src/sidecar.rs.liyi.jsonc index 52f204a..29b51cc 100644 --- a/crates/liyi/src/sidecar.rs.liyi.jsonc +++ b/crates/liyi/src/sidecar.rs.liyi.jsonc @@ -46,8 +46,8 @@ 47, 60 ], - "source_hash": "sha256:80fb1f6f3b1f13c07d70d2cae39a895914c96f8e3ae18a908c4f0cb5ac6eafc0", - "source_anchor": " pub related: Option>>," + "source_hash": "sha256:534ce4d553fb5d621cb1f4f0b3c9fc03a4d8ae20d12da9c2e80dc3dad5f8dd52", + "source_anchor": " pub related: Option>>," }, { "item": "strip_jsonc_comments", From cff969829207c448cd508d4b1ef269795dc6b8df Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 12 Mar 2026 01:51:10 +0800 Subject: [PATCH 21/21] style: cargo fmt && liyi check --fix Signed-off-by: WANG Xuerui --- crates/liyi-cli/src/cli.rs | 1 - crates/liyi/src/approve.rs | 2 +- crates/liyi/src/check.rs | 24 ++++++++++++++++-------- crates/liyi/src/check.rs.liyi.jsonc | 8 ++++---- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/crates/liyi-cli/src/cli.rs b/crates/liyi-cli/src/cli.rs index 3b66d19..7bff8b6 100644 --- a/crates/liyi-cli/src/cli.rs +++ b/crates/liyi-cli/src/cli.rs @@ -103,4 +103,3 @@ pub enum Commands { item: Option, }, } - diff --git a/crates/liyi/src/approve.rs b/crates/liyi/src/approve.rs index 9b3890e..d08505a 100644 --- a/crates/liyi/src/approve.rs +++ b/crates/liyi/src/approve.rs @@ -2,8 +2,8 @@ use std::fs; use std::io; use std::path::{Path, PathBuf}; -use crate::hashing::hash_span; use crate::discovery::resolve_sidecar_targets; +use crate::hashing::hash_span; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; /// Result of an approve operation on a single sidecar. diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index ddbbd80..d0cb06d 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -447,7 +447,8 @@ fn check_sidecar( item.source_anchor = Some(computed_anchor.clone()); let lang = detect_language(&entry.source_path); if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, item.source_span, l); + item.tree_path = + compute_tree_path(&source_content, item.source_span, l); } modified = true; } @@ -504,7 +505,8 @@ fn check_sidecar( item.source_anchor = Some(a); } if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, new_span, l); + item.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -534,7 +536,8 @@ fn check_sidecar( // leaves the spec stale so the next // `liyi check` flags it. if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, new_span, l); + item.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -571,7 +574,8 @@ fn check_sidecar( } let lang = detect_language(&entry.source_path); if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, new_span, l); + item.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -654,7 +658,8 @@ fn check_sidecar( item.source_anchor = Some(a); } if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, new_span, l); + item.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -678,7 +683,8 @@ fn check_sidecar( if fix { item.source_span = new_span; if let Some(l) = lang { - item.tree_path = compute_tree_path(&source_content, new_span, l); + item.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -937,7 +943,8 @@ fn check_sidecar( req.source_anchor = Some(a); } if let Some(l) = lang { - req.tree_path = compute_tree_path(&source_content, new_span, l); + req.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } @@ -959,7 +966,8 @@ fn check_sidecar( if fix { req.source_span = new_span; if let Some(l) = lang { - req.tree_path = compute_tree_path(&source_content, new_span, l); + req.tree_path = + compute_tree_path(&source_content, new_span, l); } modified = true; } diff --git a/crates/liyi/src/check.rs.liyi.jsonc b/crates/liyi/src/check.rs.liyi.jsonc index d6539ee..47aaa84 100644 --- a/crates/liyi/src/check.rs.liyi.jsonc +++ b/crates/liyi/src/check.rs.liyi.jsonc @@ -38,10 +38,10 @@ "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection, tree_path computation, and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", "source_span": [ 282, - 1022 + 1030 ], "tree_path": "fn::check_sidecar", - "source_hash": "sha256:4dcb913f15f9da76399b61a71f597592b285655b03c3cba15f7817bc5531ce9a", + "source_hash": "sha256:15af1ed0747e0e2f748ef2c59dc1fcdf916f08893cd6f0488e8070ea96aae6a9", "source_anchor": "fn check_sidecar(", "related": { "fix-semantic-drift-protection": null, @@ -53,8 +53,8 @@ "reviewed": false, "intent": "Read a file's contents with caching: return the cached string if already loaded, otherwise read from disk, store in the cache, and return. Return None on I/O failure.", "source_span": [ - 1087, - 1098 + 1095, + 1106 ], "tree_path": "fn::read_cached", "source_hash": "sha256:77c7602b283fb2e67c7953f98ef11b417c83903d96011f370b7b0421778f52c2",