diff --git a/AGENTS.md b/AGENTS.md index d54f96e..c09865a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,14 +23,14 @@ When writing or modifying code: 2. When module-level invariants are apparent, write an `@liyi:module` block — in the directory's existing module doc (`README.md`, `doc.go`, `mod.rs` doc comment, etc.) or in a dedicated `LIYI.md`. Use the doc markup language's comment syntax for the marker. 3. If a source item has a `@liyi:related ` annotation, record the dependency in `.liyi.jsonc` as `"related": {"": null}`. The tool fills in the requirement's current hash. 4. For each `@liyi:requirement ` block encountered, ensure it has a corresponding entry in the co-located `.liyi.jsonc` with `"requirement"` and `"source_span"`. (The tool fills in `"source_hash"`.) -5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Re-anchor the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. +5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Update the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. 6. Only generate adversarial tests from items that have a `@liyi:intent` annotation in source or `"reviewed": true` in the sidecar (i.e., human-reviewed intent). When `@liyi:intent` is present in source, use its prose (or the docstring for `=doc`) as the authoritative intent for test generation. 7. Tests should target boundary conditions, error-handling gaps, property violations, and semantic mismatches. Prioritize tests a subtly wrong implementation would fail. 8. Skip items annotated with `@liyi:ignore` or `@liyi:trivial`, and files matched by `.liyiignore`. Respect `@liyi:nontrivial` — if present, always infer a spec for that item and never override with `@liyi:trivial`. 9. Use a different model for test generation than the one that wrote the code, when possible. 10. When `liyi check` reports stale items, choose one of two paths: - **Direct re-inference** (preferred during interactive editing with few stale items): re-read the source, update `source_span` and `intent` in the sidecar, leave `"reviewed"` unset. Appropriate when you are the agent that just made the change, the number of stale items is small, and the changes are straightforward. - - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-reanchor. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. + - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-fix. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. 11. Before committing, run `liyi check`. If it reports coverage gaps (missing requirement specs, missing related edges), resolve **all** gaps in the same commit. Do not commit with unresolved coverage gaps — CI will reject it. ### `.liyi.jsonc` Schema (v0.1) @@ -104,7 +104,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -128,7 +128,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "_hints": { "type": "object", - "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi reanchor after initial review. Tools MUST NOT rely on any specific shape." + "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi check --fix after initial review. Tools MUST NOT rely on any specific shape." } } }, @@ -149,7 +149,7 @@ Sidecar files must conform to the following JSON Schema. The top-level object ha }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", @@ -215,8 +215,8 @@ When `liyi check` reports stale items, the agent assesses each and writes the re }, "action": { "type": "string", - "enum": ["auto-reanchor", "update-intent", "fix-code-or-update-intent", "manual-review"], - "description": "Recommended action. auto-reanchor for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." + "enum": ["auto-fix", "update-intent", "fix-code-or-update-intent", "manual-review"], + "description": "Recommended action. auto-fix for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." }, "summary": { "type": "object", diff --git a/Cargo.lock b/Cargo.lock index eaad99c..e1c11b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -597,6 +597,7 @@ name = "liyi" version = "0.1.0" dependencies = [ "ignore", + "nom", "proptest", "regex", "serde", @@ -604,6 +605,7 @@ dependencies = [ "sha2", "tempfile", "tree-sitter", + "tree-sitter-bash", "tree-sitter-c", "tree-sitter-c-sharp", "tree-sitter-cpp", @@ -614,9 +616,11 @@ dependencies = [ "tree-sitter-objc", "tree-sitter-php", "tree-sitter-python", + "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-swift", "tree-sitter-typescript", + "tree-sitter-zig", ] [[package]] @@ -683,6 +687,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -1195,6 +1208,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-bash" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-c" version = "0.24.1" @@ -1301,6 +1324,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.24.0" @@ -1331,6 +1364,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "typenum" version = "1.19.0" diff --git a/README.md b/README.md index 5b4fea2..34e8fd8 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,8 @@ liyi check --root . ## How It Works 1. **Agent infers intent** — today's agents automatically read `AGENTS.md`, which teaches them the 立意 pattern. During normal development they maintain `.liyi.jsonc` sidecar files for each code item, with `source_span` and natural-language `intent`. If they don't do it automatically, you can always tell them to. -2. **`liyi check`** — hashes source spans, detects staleness and shifts, checks review status, tracks requirement edges. Zero network, zero LLM, fully deterministic. -3. **`liyi reanchor`** — re-hashes spans after intentional code changes. Never modifies intent or review state. +2. **`liyi check`** — hashes source spans, detects staleness and shifts, checks review status, tracks requirement edges. Zero network, zero LLM, fully deterministic. With `--fix`, auto-corrects shifted spans, fills missing hashes, and computes `tree_path`. +3. **`liyi migrate`** — upgrades sidecar files when the schema version changes. Idempotent. 4. **Human reviews** — sets `"reviewed": true` in the sidecar to approve, or adds `@liyi:intent` in source to provide the authoritative human version. ## Progressive Adoption @@ -55,10 +55,8 @@ liyi check [OPTIONS] [PATHS]... --fail-on-req-changed Fail on changed requirements (default: true) --root Override repo root -liyi reanchor [FILE] - --item Target a specific item - --span Override span (1-indexed, inclusive) - --migrate Schema version migration +liyi migrate [FILE|DIR]... + Upgrade sidecar schema version ``` ## Exit Codes diff --git a/README.zh.md b/README.zh.md index 1f753c9..befd50b 100644 --- a/README.zh.md +++ b/README.zh.md @@ -31,8 +31,8 @@ liyi check --root . ## 工作原理 1. **由智能体推断意图** — 当今的智能体会自动读取 `AGENTS.md`,于是便掌握了《立意》设计模式。在正常开发流程中,它们便会自动为每个代码条目维护 `.liyi.jsonc` sidecar 文件,包含 `source_span` 和自然语言 `intent`。如果没有自动维护,也总可以明确告诉它这么干。 -2. **`liyi check`** — 为智能体提供的源码区间计算内容哈希,检测内容是否过时、行号是否偏移、是否被复核过,并追踪需求边。零网络访问、零 LLM 依赖、行为完全确定。 -3. **`liyi reanchor`** — 在有意的代码变更后重新计算区间哈希。不修改意图或复核状态。 +2. **`liyi check`** — 为智能体提供的源码区间计算内容哈希,检测内容是否过时、行号是否偏移、是否被复核过,并追踪需求边。零网络访问、零 LLM 依赖、行为完全确定。加 `--fix` 可自动修正偏移区间、填充缺失哈希、计算 `tree_path`。 +3. **`liyi migrate`** — 当 schema 版本变更时升级 sidecar 文件。幂等。 4. **由人类复核** — 在 `.liyi.jsonc` 中设置 `"reviewed": true` 以批准,或在源码中添加 `@liyi:intent` 以明确给出人类版本。 ## 渐进式采用 @@ -56,10 +56,8 @@ liyi check [OPTIONS] [PATHS]... --fail-on-req-changed 对已变更需求报错(默认:true) --root 覆盖仓库根目录 -liyi reanchor [FILE] - --item 指定目标条目 - --span 覆盖区间(1 起始,闭区间) - --migrate 执行 schema 版本迁移 +liyi migrate [FILE|DIR]... + 升级 sidecar schema 版本 ``` ## 退出状态码 diff --git a/crates/liyi-cli/src/cli.rs b/crates/liyi-cli/src/cli.rs index 80c9309..7bff8b6 100644 --- a/crates/liyi-cli/src/cli.rs +++ b/crates/liyi-cli/src/cli.rs @@ -68,23 +68,10 @@ pub enum Commands { level: DiagnosticLevel, }, - /// Re-hash source spans in sidecar files - Reanchor { - /// Sidecar files or directories to reanchor (recursive) - #[arg(required_unless_present = "migrate")] + /// Migrate sidecar files to the current schema version + Migrate { + /// Sidecar files or directories to migrate (recursive) files: Vec, - - /// Target a specific item by name - #[arg(long, requires = "span")] - item: Option, - - /// Override span (start,end) - #[arg(long, requires = "item", value_parser = parse_span)] - span: Option<[usize; 2]>, - - /// Migrate sidecar to current schema version - #[arg(long)] - migrate: bool, }, /// Scaffold AGENTS.md or skeleton .liyi.jsonc sidecar @@ -116,20 +103,3 @@ pub enum Commands { item: Option, }, } - -/// Parse a "start,end" string into a [usize; 2] span. -fn parse_span(s: &str) -> Result<[usize; 2], String> { - let parts: Vec<&str> = s.split(',').collect(); - if parts.len() != 2 { - return Err(format!("expected format 'start,end', got '{s}'")); - } - let start: usize = parts[0] - .trim() - .parse() - .map_err(|_| format!("invalid start: '{}'", parts[0].trim()))?; - let end: usize = parts[1] - .trim() - .parse() - .map_err(|_| format!("invalid end: '{}'", parts[1].trim()))?; - Ok([start, end]) -} diff --git a/crates/liyi-cli/src/cli.rs.liyi.jsonc b/crates/liyi-cli/src/cli.rs.liyi.jsonc index 3f3308e..dbeb54a 100644 --- a/crates/liyi-cli/src/cli.rs.liyi.jsonc +++ b/crates/liyi-cli/src/cli.rs.liyi.jsonc @@ -30,26 +30,14 @@ { "item": "Commands", "reviewed": false, - "intent": "Enumerate all CLI subcommands (Check, Reanchor, Init, Approve) with their full set of flags and arguments, providing defaults and mutual constraints (e.g. --item requires --span, file is required unless --migrate, --level filters diagnostic output).", + "intent": "Enumerate all CLI subcommands (Check, Migrate, Init, Approve) with their full set of flags and arguments, providing defaults and mutual constraints (e.g. --level filters diagnostic output).", "source_span": [ 28, - 118 + 105 ], "tree_path": "enum::Commands", - "source_hash": "sha256:383b23d5dbade566788d2673cb55fc05e8e7e33b70f4e2edd77df0245d09db65", + "source_hash": "sha256:94096c9cb7d64c3fd721c783da0c3f0482f6b548e317daac6b2c1d0ec902dd63", "source_anchor": "pub enum Commands {" - }, - { - "item": "parse_span", - "reviewed": true, - "intent": "Parse a 'start,end' string into a [usize; 2] span, rejecting inputs that are not exactly two comma-separated unsigned integers.", - "source_span": [ - 121, - 135 - ], - "tree_path": "fn::parse_span", - "source_hash": "sha256:d57d01b6fb8d7fbefc54c62e3240b46d80cc2370a7d148811caad6d809b23977", - "source_anchor": "fn parse_span(s: &str) -> Result<[usize; 2], String> {" } ] } diff --git a/crates/liyi-cli/src/main.rs b/crates/liyi-cli/src/main.rs index 181ee34..1e0750f 100644 --- a/crates/liyi-cli/src/main.rs +++ b/crates/liyi-cli/src/main.rs @@ -89,23 +89,13 @@ fn main() { process::exit(exit_code as i32); } - Commands::Reanchor { - files, - item, - span, - migrate, - } => { - if migrate && files.is_empty() { - eprintln!("--migrate requires at least one sidecar file path"); - process::exit(2); - } - + Commands::Migrate { files } => { if files.is_empty() { eprintln!("at least one sidecar file or directory required"); process::exit(2); } - let targets = match liyi::reanchor::resolve_reanchor_targets(&files) { + let targets = match liyi::discovery::resolve_sidecar_targets(&files) { Ok(t) => t, Err(e) => { eprintln!("Error: {e}"); @@ -120,13 +110,9 @@ fn main() { let mut errors = 0; for sidecar_path in &targets { - match liyi::reanchor::run_reanchor(sidecar_path, item.as_deref(), span, migrate) { + match liyi::reanchor::run_reanchor(sidecar_path, None, None, true) { Ok(()) => { - if migrate { - println!("Migrated: {}", sidecar_path.display()); - } else { - println!("Reanchored: {}", sidecar_path.display()); - } + println!("Migrated: {}", sidecar_path.display()); } Err(e) => { eprintln!("Error ({}): {e}", sidecar_path.display()); diff --git a/crates/liyi-cli/src/main.rs.liyi.jsonc b/crates/liyi-cli/src/main.rs.liyi.jsonc index c984016..1069c8f 100644 --- a/crates/liyi-cli/src/main.rs.liyi.jsonc +++ b/crates/liyi-cli/src/main.rs.liyi.jsonc @@ -21,10 +21,10 @@ "intent": "=doc", "source_span": [ 26, - 242 + 228 ], "tree_path": "fn::main", - "source_hash": "sha256:a11828b49d8e7c5b144423442c9bab41b1d84aff99c551e159e0c03558f70b06", + "source_hash": "sha256:5c447be7165dcba8bfd3d08fff588db7b2d95309ec9c474aa30487deeec02d49", "source_anchor": "fn main() {" }, { @@ -32,8 +32,8 @@ "reviewed": true, "intent": "=doc", "source_span": [ - 248, - 250 + 234, + 236 ], "tree_path": "fn::is_tty", "source_hash": "sha256:36dcd447c8fa9e666c6682395c3148c216b7c07dce8cc88f3d76f90714207ccd", diff --git a/crates/liyi/Cargo.toml b/crates/liyi/Cargo.toml index d6658e8..d64070a 100644 --- a/crates/liyi/Cargo.toml +++ b/crates/liyi/Cargo.toml @@ -30,6 +30,10 @@ tree-sitter-php = "0.24.2" tree-sitter-objc = "3.0.2" tree-sitter-kotlin-ng = "1.1.0" tree-sitter-swift = "0.7.1" +tree-sitter-bash = "0.25.1" +tree-sitter-ruby = "0.23.1" +tree-sitter-zig = "1.1.2" +nom = "8" [dev-dependencies] proptest = "1" diff --git a/crates/liyi/src/approve.rs b/crates/liyi/src/approve.rs index 03772ad..d08505a 100644 --- a/crates/liyi/src/approve.rs +++ b/crates/liyi/src/approve.rs @@ -2,8 +2,8 @@ use std::fs; use std::io; use std::path::{Path, PathBuf}; +use crate::discovery::resolve_sidecar_targets; use crate::hashing::hash_span; -use crate::reanchor::resolve_reanchor_targets; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; /// Result of an approve operation on a single sidecar. @@ -75,7 +75,7 @@ pub fn collect_approval_candidates( paths: &[PathBuf], item_filter: Option<&str>, ) -> Result, ApproveError> { - let targets = resolve_reanchor_targets(paths).map_err(ApproveError::Parse)?; + let targets = resolve_sidecar_targets(paths).map_err(ApproveError::Parse)?; if targets.is_empty() { return Err(ApproveError::NoTargets); } diff --git a/crates/liyi/src/approve.rs.liyi.jsonc b/crates/liyi/src/approve.rs.liyi.jsonc index 50ec563..671f32b 100644 --- a/crates/liyi/src/approve.rs.liyi.jsonc +++ b/crates/liyi/src/approve.rs.liyi.jsonc @@ -60,13 +60,13 @@ 130 ], "tree_path": "fn::collect_approval_candidates", - "source_hash": "sha256:1fc98ea41edd3bed9c13d936635399d5e27a81f1f8acec326368b1eb532bda7c", + "source_hash": "sha256:f16f381eb67bf6126a0acfe022aac1ee575696b1d32a5cb46cee6bf2eb15a1d2", "source_anchor": "pub fn collect_approval_candidates(" }, { "item": "apply_approval_decisions", "reviewed": false, - "intent": "Apply a parallel slice of Decision values to the candidates, grouped by sidecar file. For Yes: set reviewed=true and reanchor hashes. For No: set reviewed=false. For Skip: no mutation. Write back modified sidecars unless dry_run. Returns per-sidecar ApproveResult.", + "intent": "Apply a parallel slice of Decision values to the candidates, grouped by sidecar file. For Yes: set reviewed=true and fill hashes. For No: set reviewed=false. For Skip: no mutation. Write back modified sidecars unless dry_run. Returns per-sidecar ApproveResult.", "source_span": [ 137, 206 diff --git a/crates/liyi/src/check.rs b/crates/liyi/src/check.rs index 82a0e9f..d0cb06d 100644 --- a/crates/liyi/src/check.rs +++ b/crates/liyi/src/check.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::fs; use std::path::{Path, PathBuf}; @@ -7,11 +7,11 @@ use crate::diagnostics::{ }; use crate::discovery::{SidecarEntry, discover}; use crate::hashing::{SpanError, hash_span}; -use crate::markers::{SourceMarker, scan_markers}; +use crate::markers::{SourceMarker, requirement_spans, scan_markers}; use crate::schema::validate_version; use crate::shift::{ShiftResult, detect_shift}; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; -use crate::tree_path::{detect_language, resolve_tree_path}; +use crate::tree_path::{compute_tree_path, detect_language, resolve_tree_path}; // --------------------------------------------------------------------------- // Internal types @@ -338,7 +338,7 @@ fn check_sidecar( }, severity: Severity::Error, message: e, - fix_hint: Some(format!("liyi reanchor --migrate {rel_sidecar}")), + fix_hint: Some(format!("liyi migrate {rel_sidecar}")), }); return; } @@ -416,6 +416,7 @@ fn check_sidecar( }; let source_markers = scan_markers(&source_content); + let marker_span_map = requirement_spans(&source_markers); let mut modified = false; // 5. Check each spec @@ -444,6 +445,11 @@ fn check_sidecar( if fix { item.source_hash = Some(computed_hash.clone()); item.source_anchor = Some(computed_anchor.clone()); + let lang = detect_language(&entry.source_path); + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, item.source_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -452,7 +458,7 @@ fn check_sidecar( kind: DiagnosticKind::Stale, severity: Severity::Warning, message: "missing source_hash".into(), - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } else { // Hash mismatch — try tree_path first, then shift @@ -498,6 +504,10 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -525,6 +535,10 @@ fn check_sidecar( // Intentionally NOT updating hash — // leaves the spec stale so the next // `liyi check` flags it. + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } let msg = if new_span != old_span { @@ -558,6 +572,11 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + let lang = detect_language(&entry.source_path); + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -638,6 +657,10 @@ fn check_sidecar( item.source_hash = Some(h); item.source_anchor = Some(a); } + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -659,6 +682,10 @@ fn check_sidecar( // but leave hash stale. if fix { item.source_span = new_span; + if let Some(l) = lang { + item.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -688,7 +715,7 @@ fn check_sidecar( }, severity: Severity::Error, message: detail, - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } } @@ -814,7 +841,7 @@ fn check_sidecar( item.related.as_ref().is_some_and(|r| r.contains_key(name)); if !has_edge { if fix { - let related = item.related.get_or_insert_with(HashMap::new); + let related = item.related.get_or_insert_with(BTreeMap::new); related.insert(name.clone(), None); modified = true; } @@ -837,6 +864,18 @@ fn check_sidecar( } Spec::Requirement(req) => { let label = req.requirement.clone(); + + // Try marker-based span recovery first: if the file has + // @liyi:end-requirement markers, use those for span. + if let Some(&marker_span) = marker_span_map.get(&req.requirement) + && marker_span != req.source_span + { + req.source_span = marker_span; + if fix { + modified = true; + } + } + match hash_span(&source_content, req.source_span) { Ok((computed_hash, computed_anchor)) => { let is_current = req.source_hash.as_ref() == Some(&computed_hash); @@ -903,6 +942,10 @@ fn check_sidecar( req.source_hash = Some(h); req.source_anchor = Some(a); } + if let Some(l) = lang { + req.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -922,6 +965,10 @@ fn check_sidecar( } else { if fix { req.source_span = new_span; + if let Some(l) = lang { + req.tree_path = + compute_tree_path(&source_content, new_span, l); + } modified = true; } diagnostics.push(Diagnostic { @@ -951,7 +998,7 @@ fn check_sidecar( }, severity: Severity::Error, message: detail, - fix_hint: Some(format!("liyi reanchor {rel_sidecar}")), + fix_hint: Some("liyi check --fix".into()), }); } } diff --git a/crates/liyi/src/check.rs.liyi.jsonc b/crates/liyi/src/check.rs.liyi.jsonc index e830496..47aaa84 100644 --- a/crates/liyi/src/check.rs.liyi.jsonc +++ b/crates/liyi/src/check.rs.liyi.jsonc @@ -27,25 +27,25 @@ "source_hash": "sha256:13ca74d5432770cc302492fcc1b8c60cee849726441dc80547cf0d20917209f6", "source_anchor": "pub fn run_check(", "related": { + "cycle-detection": null, "requirement-discovery-global": null, - "requirement-name-uniqueness": null, - "cycle-detection": null + "requirement-name-uniqueness": null } }, { "item": "check_sidecar", "reviewed": false, - "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", + "intent": "For a single sidecar entry: parse the sidecar, validate its version, verify the source file exists, then for each spec check hash freshness (with shift detection, tree_path computation, and --fix support), review status (sidecar reviewed flag or @liyi:intent marker), trivial/ignore markers, and related-requirement edges. Write the sidecar back if --fix produced modifications.", "source_span": [ 282, - 983 + 1030 ], "tree_path": "fn::check_sidecar", - "source_hash": "sha256:6e82d215922ba7812b9fd20e105f32469e581ac5d863e9e007c8c57dbd7c8484", + "source_hash": "sha256:15af1ed0747e0e2f748ef2c59dc1fcdf916f08893cd6f0488e8070ea96aae6a9", "source_anchor": "fn check_sidecar(", "related": { - "reviewed-semantics": null, - "fix-semantic-drift-protection": null + "fix-semantic-drift-protection": null, + "reviewed-semantics": null } }, { @@ -53,8 +53,8 @@ "reviewed": false, "intent": "Read a file's contents with caching: return the cached string if already loaded, otherwise read from disk, store in the cache, and return. Return None on I/O failure.", "source_span": [ - 1048, - 1059 + 1095, + 1106 ], "tree_path": "fn::read_cached", "source_hash": "sha256:77c7602b283fb2e67c7953f98ef11b417c83903d96011f370b7b0421778f52c2", diff --git a/crates/liyi/src/discovery.rs b/crates/liyi/src/discovery.rs index 0d42a10..bd55327 100644 --- a/crates/liyi/src/discovery.rs +++ b/crates/liyi/src/discovery.rs @@ -177,6 +177,37 @@ fn source_name_from_sidecar(sidecar: &Path) -> String { .to_string() } +/// Expand a list of file/directory paths into concrete `.liyi.jsonc` file +/// paths. If a path is a directory, walk it recursively (respecting +/// `.gitignore` and `.liyiignore`) and collect all sidecar files found. +/// If a path is a file, include it directly. +pub fn resolve_sidecar_targets(paths: &[PathBuf]) -> Result, String> { + let mut result: Vec = Vec::new(); + for p in paths { + if p.is_dir() { + let walker = WalkBuilder::new(p) + .add_custom_ignore_filename(".liyiignore") + .build(); + for entry in walker { + let entry = entry.map_err(|e| format!("walk error: {e}"))?; + if entry.file_type().is_some_and(|ft| ft.is_file()) + && let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) + && name.ends_with(SIDECAR_SUFFIX) + { + result.push(entry.into_path()); + } + } + } else if p.is_file() { + result.push(p.clone()); + } else { + return Err(format!("path does not exist: {}", p.display())); + } + } + result.sort(); + result.dedup(); + Ok(result) +} + /// Compute `path` relative to `base` using pure lexical processing. fn pathdiff(path: &Path, base: &Path) -> Option { path.strip_prefix(base) diff --git a/crates/liyi/src/markers.rs b/crates/liyi/src/markers.rs index 7b4ca14..2b8058f 100644 --- a/crates/liyi/src/markers.rs +++ b/crates/liyi/src/markers.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + /// Source-file marker scanner with full-width normalization and multilingual aliases. /// /// A discovered marker in a source file. @@ -10,6 +12,10 @@ pub enum SourceMarker { name: String, line: usize, }, + EndRequirement { + name: String, + line: usize, + }, Related { name: String, line: usize, @@ -69,6 +75,7 @@ const CANON_TRIVIAL: &str = "\x40liyi:trivial"; const CANON_NONTRIVIAL: &str = "\x40liyi:nontrivial"; const CANON_MODULE: &str = "\x40liyi:module"; const CANON_REQUIREMENT: &str = "\x40liyi:requirement"; +const CANON_END_REQUIREMENT: &str = "\x40liyi:end-requirement"; const CANON_RELATED: &str = "\x40liyi:related"; const CANON_INTENT: &str = "\x40liyi:intent"; @@ -100,6 +107,13 @@ const ALIAS_TABLE: &[(&str, &str)] = &[ ("\x40liyi:módulo", CANON_MODULE), ("\x40立意:モジュール", CANON_MODULE), ("\x40립의:모듈", CANON_MODULE), + // end-requirement (must precede requirement — longer aliases match first) + (CANON_END_REQUIREMENT, CANON_END_REQUIREMENT), + ("\x40立意:需求结束", CANON_END_REQUIREMENT), + ("\x40liyi:fin-requisito", CANON_END_REQUIREMENT), + ("\x40立意:要件終", CANON_END_REQUIREMENT), + ("\x40liyi:fin-exigence", CANON_END_REQUIREMENT), + ("\x40립의:요건끝", CANON_END_REQUIREMENT), // requirement (CANON_REQUIREMENT, CANON_REQUIREMENT), ("\x40立意:需求", CANON_REQUIREMENT), @@ -284,6 +298,14 @@ pub fn scan_markers(content: &str) -> Vec { }); } } + CANON_END_REQUIREMENT => { + if let Some(name) = extract_name(rest) { + markers.push(SourceMarker::EndRequirement { + name, + line: line_num, + }); + } + } CANON_RELATED => { if let Some(name) = extract_name(rest) { markers.push(SourceMarker::Related { @@ -320,6 +342,32 @@ pub fn scan_markers(content: &str) -> Vec { markers } +/// Build a map from requirement name to `[start_line, end_line]` spans +/// by pairing `Requirement` and `EndRequirement` markers from a scan result. +/// +/// Only requirements that have a matching `EndRequirement` with the same +/// name are included. Unpaired markers are silently skipped (the linter +/// can diagnose those separately). +pub fn requirement_spans(markers: &[SourceMarker]) -> HashMap { + let mut opens: HashMap = HashMap::new(); + let mut spans: HashMap = HashMap::new(); + + for m in markers { + match m { + SourceMarker::Requirement { name, line } => { + opens.insert(name.clone(), *line); + } + SourceMarker::EndRequirement { name, line } => { + if let Some(start) = opens.remove(name) { + spans.insert(name.clone(), [start, *line]); + } + } + _ => {} + } + } + spans +} + #[cfg(test)] mod tests { use super::*; @@ -372,6 +420,52 @@ mod tests { ); } + #[test] + fn scan_end_requirement_paren() { + let m = scan_markers("\n"); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_end_requirement_space() { + let m = scan_markers("\n"); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_end_requirement_chinese_alias() { + let m = scan_markers( + "\n", + ); + assert_eq!(m.len(), 1); + assert!( + matches!(&m[0], SourceMarker::EndRequirement { name, line: 1 } if name == "exit-codes") + ); + } + + #[test] + fn scan_requirement_and_end_requirement_pair() { + let input = "\ +\n\ +Exit codes: 0 = clean, 1 = failures.\n\ +\n\ +"; + let m = scan_markers(input); + assert_eq!(m.len(), 2); + assert!( + matches!(&m[0], SourceMarker::Requirement { name, line: 1 } if name == "exit-codes") + ); + assert!( + matches!(&m[1], SourceMarker::EndRequirement { name, line: 3 } if name == "exit-codes") + ); + } + #[test] fn scan_related() { let m = scan_markers("// \x40liyi:related some_req\n"); diff --git a/crates/liyi/src/markers.rs.liyi.jsonc b/crates/liyi/src/markers.rs.liyi.jsonc index b78e1a7..8dfcd42 100644 --- a/crates/liyi/src/markers.rs.liyi.jsonc +++ b/crates/liyi/src/markers.rs.liyi.jsonc @@ -9,19 +9,19 @@ 57, 62 ], - "source_hash": "sha256:c2ad7f02250feab1d1ed85efd874b1782fcc6ea8c197de7f4a9f4517ef19d96d", - "source_anchor": "/// scanner matching these string constants — the classic quine-escape problem." + "source_hash": "sha256:53824f6af56160b7bd65c7b0783b5e1ffb90516088d408e8b0c9b01c648bcadb", + "source_anchor": "// Alias table — maps every accepted marker string to its canonical form." }, { "item": "SourceMarker", "reviewed": false, "intent": "Enumerate all recognized @liyi: marker types (Module, Requirement, Related, Intent, Trivial, Ignore, Nontrivial) with their associated data (name, prose, line number). Each variant carries a 1-indexed line number.", "source_span": [ - 5, - 32 + 7, + 38 ], "tree_path": "enum::SourceMarker", - "source_hash": "sha256:9914dee0aa2de6a23535773503f9d4af08d5704364faef687ab3ed9f73f4a078", + "source_hash": "sha256:03eb78a070a414027c092464920fea46fa92fa89da2ed2bcf6b03682bce2d31a", "source_anchor": "pub enum SourceMarker {" }, { @@ -29,8 +29,8 @@ "reviewed": false, "intent": "Replace full-width Unicode punctuation (@ → @, : → :, ( → (, ) → )) with ASCII equivalents so that markers written with CJK input methods are recognized.", "source_span": [ - 36, - 48 + 42, + 54 ], "tree_path": "fn::normalize_line", "source_hash": "sha256:042886b54393166d59d9cd573722d841651cc1eba4321dc0f62d44de39379b09", @@ -44,11 +44,11 @@ "reviewed": false, "intent": "Map every accepted marker alias (English canonical forms plus Chinese, Japanese, Korean, Spanish, French, Portuguese variants) to its canonical @liyi:* form. This table is the single source of truth for multilingual marker recognition.", "source_span": [ - 77, - 125 + 84, + 139 ], "tree_path": "const::ALIAS_TABLE", - "source_hash": "sha256:0eebe3f11964541df61a658e4aa067747b473032480a5f3f0778ec8e5c88d9f4", + "source_hash": "sha256:d546bd0d6ebccebadea5dd37108f457bbb0804cdcc31665ac948583fda1215c8", "source_anchor": "const ALIAS_TABLE: &[(&str, &str)] = &[", "related": { "marker-normalization": null @@ -59,8 +59,8 @@ "reviewed": false, "intent": "Search a normalized line for any known marker alias from ALIAS_TABLE. Return the canonical marker keyword, the byte offset of the match start, and the byte offset past the matched alias, or None if no marker is found.", "source_span": [ - 130, - 137 + 144, + 151 ], "tree_path": "fn::find_marker", "source_hash": "sha256:f69d93143a9f03203a2ea48d2342b9288d5ce42383a2d4bc0946567d0cdd6817", @@ -74,8 +74,8 @@ "reviewed": false, "intent": "Define the set of quotation-mark characters that suppress marker detection when they immediately precede the @ of a marker. Must cover ASCII quotes, typographic quotes, CJK corner brackets, and guillemets.", "source_span": [ - 169, - 181 + 183, + 195 ], "tree_path": "const::QUOTE_CHARS", "source_hash": "sha256:006f1fb3b6a729bd108d4eaef8d2aba2154414b3cbbad05787d20c006756611f", @@ -87,8 +87,8 @@ "reviewed": false, "intent": "Determine whether a byte position on a line falls inside an inline backtick code span by counting backtick characters before the position. Odd count means inside code, even means outside.", "source_span": [ - 186, - 197 + 200, + 211 ], "tree_path": "fn::is_in_inline_code", "source_hash": "sha256:535d27178d75557f1db35fd81223f988dfb924191959049b97c7eb9212444bae", @@ -100,8 +100,8 @@ "reviewed": false, "intent": "Return true if the character immediately before a byte position on a line is any character in the QUOTE_CHARS set (a quotation mark across locales).", "source_span": [ - 201, - 208 + 215, + 222 ], "tree_path": "fn::preceded_by_quote", "source_hash": "sha256:a88665af3e20b2f6f189402eeac2638e4f496bc80a54f9aa15e61d1ca8bfe1b2", @@ -113,8 +113,8 @@ "reviewed": false, "intent": "Return true if a line (after stripping leading whitespace) starts with ``` or ~~~, indicating a Markdown fenced code block boundary.", "source_span": [ - 212, - 215 + 226, + 229 ], "tree_path": "fn::is_fence_delimiter", "source_hash": "sha256:711e776c8924917ac4ef11c26546de9e31a80c466b2512cb854135558b6b98fe", @@ -129,8 +129,8 @@ "reviewed": false, "intent": "Extract a name argument from the text after a marker keyword: if the first non-whitespace character is '(', take the content up to the matching ')'; otherwise take the first whitespace-delimited token. Return None if no name is present.", "source_span": [ - 142, - 159 + 156, + 173 ], "tree_path": "fn::extract_name", "source_hash": "sha256:49fcb7884fcec40dcb0b329d971eddae3ee78ef7c167a0e63726c37a73ce2e50", @@ -141,11 +141,11 @@ "reviewed": false, "intent": "Scan all lines of a source file, normalize each line for full-width characters, and return a list of SourceMarker values for every recognized @liyi: marker found. Line numbers are 1-indexed. Markers inside fenced code blocks, inline backtick spans, or immediately after quotation marks are suppressed (NL-quoting). Handles =doc and =文档 sentinels for intent markers.", "source_span": [ - 226, - 321 + 240, + 343 ], "tree_path": "fn::scan_markers", - "source_hash": "sha256:5a9629649bab1ac1ed9efb15dfdf5ecb20973369de24b7fcaa12b8a0f12a87da", + "source_hash": "sha256:cce25919ff28dabeb132681d73e8d0aae8a090058d99e8bda6b92b3c7284ec2f", "source_anchor": "pub fn scan_markers(content: &str) -> Vec {", "related": { "quine-escape-in-source": null, diff --git a/crates/liyi/src/reanchor.rs b/crates/liyi/src/reanchor.rs index 61b1648..ccac66c 100644 --- a/crates/liyi/src/reanchor.rs +++ b/crates/liyi/src/reanchor.rs @@ -1,41 +1,15 @@ use std::path::{Path, PathBuf}; +use crate::discovery::resolve_sidecar_targets; use crate::hashing::hash_span; +use crate::markers::{requirement_spans, scan_markers}; use crate::schema::migrate; use crate::sidecar::{Spec, parse_sidecar, write_sidecar}; use crate::tree_path::{compute_tree_path, detect_language, resolve_tree_path}; -const SIDECAR_SUFFIX: &str = ".liyi.jsonc"; - -/// Expand a list of file/directory paths into concrete `.liyi.jsonc` file -/// paths. If a path is a directory, walk it recursively (respecting -/// `.gitignore` and `.liyiignore`) and collect all sidecar files found. -/// If a path is a file, include it directly. +/// Backward-compatible alias for [`crate::discovery::resolve_sidecar_targets`]. pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, String> { - let mut result: Vec = Vec::new(); - for p in paths { - if p.is_dir() { - let walker = ignore::WalkBuilder::new(p) - .add_custom_ignore_filename(".liyiignore") - .build(); - for entry in walker { - let entry = entry.map_err(|e| format!("walk error: {e}"))?; - if entry.file_type().is_some_and(|ft| ft.is_file()) - && let Some(name) = entry.path().file_name().and_then(|n| n.to_str()) - && name.ends_with(SIDECAR_SUFFIX) - { - result.push(entry.into_path()); - } - } - } else if p.is_file() { - result.push(p.clone()); - } else { - return Err(format!("path does not exist: {}", p.display())); - } - } - result.sort(); - result.dedup(); - Ok(result) + resolve_sidecar_targets(paths) } /// Re-hash source spans in a sidecar file. @@ -47,7 +21,7 @@ pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, Strin /// span. Then recompute hash/anchor. When tree_path is empty or the language is /// unsupported, fall back to re-hashing at the recorded span. // @liyi:related tool-managed-fields -// @liyi:related tree-path-reanchor-behavior +// @liyi:related tree-path-fix-behavior // @liyi:related tree-path-empty-fallback // @liyi:related fix-never-modifies-human-fields // @liyi:related liyi-sidecar-naming-convention @@ -80,6 +54,15 @@ pub fn run_reanchor( let lang = detect_language(Path::new(source_path)); + // For files without tree-sitter support, build a span map from + // @liyi:requirement / @liyi:end-requirement marker pairs. + let marker_spans = if lang.is_none() { + let markers = scan_markers(&source_content); + requirement_spans(&markers) + } else { + std::collections::HashMap::new() + }; + for spec in &mut sidecar.specs { match spec { Spec::Item(item) => { @@ -118,11 +101,13 @@ pub fn run_reanchor( continue; // targeted mode only touches items } - // Tree-sitter span recovery for requirements + // Span recovery: prefer tree-sitter, then marker pairing. if let (false, Some(l)) = (req.tree_path.is_empty(), lang) && let Some(new_span) = resolve_tree_path(&source_content, &req.tree_path, l) { req.source_span = new_span; + } else if let Some(&new_span) = marker_spans.get(&req.requirement) { + req.source_span = new_span; } if let Some(l) = lang { diff --git a/crates/liyi/src/reanchor.rs.liyi.jsonc b/crates/liyi/src/reanchor.rs.liyi.jsonc index f06172f..cf0a936 100644 --- a/crates/liyi/src/reanchor.rs.liyi.jsonc +++ b/crates/liyi/src/reanchor.rs.liyi.jsonc @@ -6,13 +6,13 @@ { "item": "resolve_reanchor_targets", "reviewed": false, - "intent": "Expand a list of file and directory paths into concrete .liyi.jsonc file paths. Directories are walked recursively respecting .gitignore and .liyiignore. Files are included directly. Non-existent paths produce an error. Results are sorted and deduplicated.", + "intent": "Backward-compatible alias that delegates to `discovery::resolve_sidecar_targets`.", "source_span": [ - 14, - 39 + 11, + 13 ], "tree_path": "fn::resolve_reanchor_targets", - "source_hash": "sha256:7e38409f96830a3b46075be4b7055db02c4a7a7ed388923588570bba01bbcf25", + "source_hash": "sha256:da4d96d82f119c72456ff6b56219b8c109c2b2a2a26f1282967e6317b30e00c8", "source_anchor": "pub fn resolve_reanchor_targets(paths: &[PathBuf]) -> Result, String> {" }, { @@ -20,18 +20,18 @@ "reviewed": false, "intent": "Re-hash source spans in a sidecar file. If do_migrate is set, run schema migration and write back. If target_item and target_span are both provided, update only that item's span and rehash. Otherwise, for every spec: if tree_path is non-empty and a tree-sitter grammar is available, locate the item by structural identity and update the span; then recompute hash/anchor. Reject if only one of --item/--span is provided. Derive the source file path by stripping the .liyi.jsonc suffix from the sidecar path.", "source_span": [ - 54, - 144 + 28, + 129 ], "tree_path": "fn::run_reanchor", - "source_hash": "sha256:28a7f0bffb54f043c3497210bc11cb4d040ca85aefef4c8f8453ce2c4afd94b8", + "source_hash": "sha256:3ee785c5a09ff156f5e606425771ab8c12f43445a62c48cc8be659ad7351e5d6", "source_anchor": "pub fn run_reanchor(", "related": { - "tree-path-reanchor-behavior": null, "tool-managed-fields": null, "tree-path-empty-fallback": null, "liyi-sidecar-naming-convention": null, - "fix-never-modifies-human-fields": null + "fix-never-modifies-human-fields": null, + "tree-path-fix-behavior": null } } ] diff --git a/crates/liyi/src/sidecar.rs b/crates/liyi/src/sidecar.rs index 7e2df00..4fdbba3 100644 --- a/crates/liyi/src/sidecar.rs +++ b/crates/liyi/src/sidecar.rs @@ -1,5 +1,5 @@ use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use std::collections::BTreeMap; /// Top-level `.liyi.jsonc` file representation #[derive(Debug, Serialize, Deserialize)] @@ -44,7 +44,7 @@ pub struct ItemSpec { pub confidence: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub related: Option>>, + pub related: Option>>, } /// Details of a module requirement/invariant diff --git a/crates/liyi/src/sidecar.rs.liyi.jsonc b/crates/liyi/src/sidecar.rs.liyi.jsonc index 52f204a..29b51cc 100644 --- a/crates/liyi/src/sidecar.rs.liyi.jsonc +++ b/crates/liyi/src/sidecar.rs.liyi.jsonc @@ -46,8 +46,8 @@ 47, 60 ], - "source_hash": "sha256:80fb1f6f3b1f13c07d70d2cae39a895914c96f8e3ae18a908c4f0cb5ac6eafc0", - "source_anchor": " pub related: Option>>," + "source_hash": "sha256:534ce4d553fb5d621cb1f4f0b3c9fc03a4d8ae20d12da9c2e80dc3dad5f8dd52", + "source_anchor": " pub related: Option>>," }, { "item": "strip_jsonc_comments", diff --git a/crates/liyi/src/tree_path/lang_bash.rs b/crates/liyi/src/tree_path/lang_bash.rs new file mode 100644 index 0000000..4cb39cf --- /dev/null +++ b/crates/liyi/src/tree_path/lang_bash.rs @@ -0,0 +1,93 @@ +use super::LanguageConfig; + +/// Bash language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_bash::LANGUAGE.into(), + extensions: &["sh", "bash"], + kind_map: &[("fn", "function_definition")], + name_field: "name", + name_overrides: &[], + body_fields: &["body"], + custom_name: None, +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_BASH: &str = r#"#!/bin/bash + +# Helper function +function helper() { + echo "helping" +} + +# Main function with alternate syntax +main_func() { + echo "main" +} + +# Function with no parens style (some shells) +another_func { + echo "another" +} +"#; + + #[test] + fn resolve_bash_function_with_function_keyword() { + let span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash); + assert!(span.is_some(), "should resolve fn::helper"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_BASH.lines().collect(); + assert!( + lines[start - 1].contains("function helper"), + "span should point to helper function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_bash_function_with_parens_syntax() { + let span = resolve_tree_path(SAMPLE_BASH, "fn::main_func", Language::Bash); + assert!(span.is_some(), "should resolve fn::main_func"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_BASH.lines().collect(); + assert!( + lines[start - 1].contains("main_func()"), + "span should point to main_func function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn compute_bash_function_path() { + // Use resolve to get the span, then verify compute produces the same path + let resolved_span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash).unwrap(); + let path = compute_tree_path(SAMPLE_BASH, resolved_span, Language::Bash); + assert_eq!(path, "fn::helper"); + } + + #[test] + fn roundtrip_bash() { + let resolved_span = resolve_tree_path(SAMPLE_BASH, "fn::helper", Language::Bash).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_BASH, resolved_span, Language::Bash); + assert_eq!(computed_path, "fn::helper"); + + let re_resolved = resolve_tree_path(SAMPLE_BASH, &computed_path, Language::Bash).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_bash_extensions() { + assert_eq!( + detect_language(Path::new("script.sh")), + Some(Language::Bash) + ); + assert_eq!( + detect_language(Path::new("script.bash")), + Some(Language::Bash) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc new file mode 100644 index 0000000..da84352 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_bash.rs.liyi.jsonc @@ -0,0 +1,19 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_bash.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Bash language configuration for tree_path resolution: register tree-sitter-bash grammar v0.25.1, .sh/.bash extensions, kind mapping for fn (function_definition), using standard name field with no custom name extraction and body field for function body traversal.", + "source_span": [ + 4, + 12 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:0afdf1f1e9ab2d5031ac0f157339f189b73aca7179e5114fc21764fe73206b29", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_ruby.rs b/crates/liyi/src/tree_path/lang_ruby.rs new file mode 100644 index 0000000..0c395c9 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_ruby.rs @@ -0,0 +1,186 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Custom name extraction for Ruby nodes. +/// +/// Handles `singleton_method` (class methods like `def self.foo`) which encodes +/// the class name in the path: `singleton_method::ClassName.method_name`. +fn ruby_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "singleton_method" => { + let method_name_node = node.child_by_field_name("name")?; + let method_name = &source[method_name_node.byte_range()]; + + // The object field holds the receiver (e.g., `self` or class name) + // For `def self.foo`, object is `self` + // For `def ClassName.foo`, object is the class name identifier + let object = node.child_by_field_name("object")?; + let receiver = if object.kind() == "self" { + "self".to_string() + } else { + source[object.byte_range()].to_string() + }; + + Some(format!("{receiver}.{method_name}")) + } + _ => None, + } +} + +/// Ruby language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_ruby::LANGUAGE.into(), + extensions: &["rb", "rake", "gemspec"], + kind_map: &[ + ("fn", "method"), + ("class", "class"), + ("module", "module"), + ("singleton_method", "singleton_method"), + ], + name_field: "name", + name_overrides: &[], + body_fields: &["body", "statements"], + custom_name: Some(ruby_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_RUBY: &str = r#"# A billing module +module Billing + class Invoice + def total + @items.sum + end + + def self.calculate_tax(amount) + amount * 0.1 + end + end + + def standalone_helper + "helper" + end +end + +class Order + def process + "processing" + end +end +"#; + + #[test] + fn resolve_ruby_module() { + let span = resolve_tree_path(SAMPLE_RUBY, "module::Billing", Language::Ruby); + assert!(span.is_some(), "should resolve module::Billing"); + } + + #[test] + fn resolve_ruby_class_in_module() { + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice", + Language::Ruby, + ); + assert!( + span.is_some(), + "should resolve module::Billing::class::Invoice" + ); + } + + #[test] + fn resolve_ruby_method_in_class() { + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::fn::total", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve nested method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUBY.lines().collect(); + assert!( + lines[start - 1].contains("def total"), + "span should point to total method" + ); + } + + #[test] + fn resolve_ruby_singleton_method() { + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::singleton_method::self.calculate_tax", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve singleton method"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_RUBY.lines().collect(); + assert!( + lines[start - 1].contains("def self.calculate_tax"), + "span should point to class method" + ); + } + + #[test] + fn resolve_ruby_module_function() { + // standalone_helper is defined directly in the module body + let span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::fn::standalone_helper", + Language::Ruby, + ); + assert!(span.is_some(), "should resolve module-level function"); + } + + #[test] + fn resolve_ruby_top_level_class() { + let span = resolve_tree_path(SAMPLE_RUBY, "class::Order", Language::Ruby); + assert!(span.is_some(), "should resolve top-level class"); + } + + #[test] + fn resolve_ruby_method_in_top_level_class() { + let span = resolve_tree_path(SAMPLE_RUBY, "class::Order::fn::process", Language::Ruby); + assert!(span.is_some(), "should resolve method in top-level class"); + } + + #[test] + fn compute_ruby_method_path() { + let resolved_span = resolve_tree_path( + SAMPLE_RUBY, + "module::Billing::class::Invoice::fn::total", + Language::Ruby, + ) + .unwrap(); + let path = compute_tree_path(SAMPLE_RUBY, resolved_span, Language::Ruby); + assert_eq!(path, "module::Billing::class::Invoice::fn::total"); + } + + #[test] + fn roundtrip_ruby() { + let resolved_span = + resolve_tree_path(SAMPLE_RUBY, "class::Order::fn::process", Language::Ruby).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_RUBY, resolved_span, Language::Ruby); + assert_eq!(computed_path, "class::Order::fn::process"); + + let re_resolved = resolve_tree_path(SAMPLE_RUBY, &computed_path, Language::Ruby).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_ruby_extensions() { + assert_eq!(detect_language(Path::new("app.rb")), Some(Language::Ruby)); + assert_eq!( + detect_language(Path::new("tasks.rake")), + Some(Language::Ruby) + ); + assert_eq!( + detect_language(Path::new("my_gem.gemspec")), + Some(Language::Ruby) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc new file mode 100644 index 0000000..9bb67e0 --- /dev/null +++ b/crates/liyi/src/tree_path/lang_ruby.rs.liyi.jsonc @@ -0,0 +1,31 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_ruby.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Ruby language configuration for tree_path resolution: register tree-sitter-ruby grammar v0.23.1, .rb/.rake/.gemspec extensions, kind mappings for fn (method), class (class), module (module), and singleton_method with custom name extraction for receiver encoding.", + "source_span": [ + 32, + 45 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:bde809cf0dc7c3f2f5aebd07f6f2da126783075cb50dc80fb941fc04c0da348b", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "ruby_node_name", + "reviewed": false, + "intent": "Custom name extraction for Ruby singleton methods (def self.foo). Encodes the receiver type into the name as ReceiverType.method_name or self.method_name, enabling disambiguation of class methods.", + "source_span": [ + 9, + 29 + ], + "tree_path": "fn::ruby_node_name", + "source_hash": "sha256:0cae66fec47ab63a69c7ed15043bf099bcb9e2f1cb935a42682cf6be168a1ec5", + "source_anchor": "fn ruby_node_name(node: &Node, source: &str) -> Option {" + } + ] +} diff --git a/crates/liyi/src/tree_path/lang_zig.rs b/crates/liyi/src/tree_path/lang_zig.rs new file mode 100644 index 0000000..1b319ae --- /dev/null +++ b/crates/liyi/src/tree_path/lang_zig.rs @@ -0,0 +1,199 @@ +use super::LanguageConfig; + +use tree_sitter::Node; + +/// Find the first child with a given kind. +fn find_child_by_kind<'a>(node: &Node<'a>, kind: &str) -> Option> { + let mut cursor = node.walk(); + node.children(&mut cursor).find(|c| c.kind() == kind) +} + +/// Custom name extraction for Zig nodes. +/// +/// Handles two Zig-specific patterns: +/// - `variable_declaration` with `const` qualifier holding a `struct_declaration`: +/// emits `struct::Name` instead of `const::Name` to support Zig's struct-as-namespace pattern. +/// - `function_declaration`: extracts name from child `identifier` node. +/// - `test_declaration`: extracts the name from the string literal. +fn zig_node_name(node: &Node, source: &str) -> Option { + match node.kind() { + "function_declaration" => { + // Find the identifier child which is the function name + find_child_by_kind(node, "identifier").map(|n| source[n.byte_range()].to_string()) + } + "variable_declaration" => { + // Check if this is a `const` declaration + let is_const = node.children(&mut node.walk()).any(|c| c.kind() == "const"); + + if !is_const { + return None; + } + + // Check if the value is a struct_declaration + let has_struct = node + .children(&mut node.walk()) + .any(|c| c.kind() == "struct_declaration"); + + if has_struct { + // This is `const Name = struct { ... }` — extract just the name + // (the "struct::" prefix is added by compute_tree_path) + find_child_by_kind(node, "identifier").map(|n| source[n.byte_range()].to_string()) + } else { + None + } + } + "test_declaration" => { + // Test declarations have a string child for the name + // e.g., test "my test" { ... } + find_child_by_kind(node, "string").map(|n| { + let raw = &source[n.byte_range()]; + // Remove surrounding quotes + raw.strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .map(|s| s.to_string()) + .unwrap_or_default() + }) + } + _ => None, + } +} + +/// Zig language configuration. +pub(super) static CONFIG: LanguageConfig = LanguageConfig { + ts_language: || tree_sitter_zig::LANGUAGE.into(), + extensions: &["zig"], + kind_map: &[ + ("fn", "function_declaration"), + ("struct", "variable_declaration"), // const Name = struct { ... } + ("test", "test_declaration"), + ], + name_field: "", // Not used - we extract names via custom callback + name_overrides: &[], + // Zig uses "block" for function bodies, and "struct_declaration" is the + // container for struct-as-namespace contents (methods, fields). + body_fields: &["block", "struct_declaration"], + custom_name: Some(zig_node_name), +}; + +#[cfg(test)] +mod tests { + use crate::tree_path::*; + use std::path::Path; + + const SAMPLE_ZIG: &str = r#"const std = @import("std"); + +const Point = struct { + x: i32, + y: i32, + + pub fn new(x: i32, y: i32) Point { + return Point{ .x = x, .y = y }; + } +}; + +const MAX_SIZE = 100; + +fn add(a: i32, b: i32) i32 { + return a + b; +} + +test "add function" { + try std.testing.expectEqual(add(2, 3), 5); +} +"#; + + #[test] + fn resolve_zig_function() { + let span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig); + assert!(span.is_some(), "should resolve fn::add"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("fn add("), + "span should point to add function, got: {}", + lines[start - 1] + ); + } + + #[test] + fn resolve_zig_struct_as_namespace() { + let span = resolve_tree_path(SAMPLE_ZIG, "struct::Point", Language::Zig); + assert!(span.is_some(), "should resolve struct::Point"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("const Point = struct"), + "span should point to Point struct definition" + ); + } + + #[test] + fn resolve_zig_method_in_struct() { + let span = resolve_tree_path(SAMPLE_ZIG, "struct::Point::fn::new", Language::Zig); + assert!(span.is_some(), "should resolve method in struct"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("fn new("), + "span should point to new method" + ); + } + + #[test] + fn resolve_zig_test() { + let span = resolve_tree_path(SAMPLE_ZIG, "test::add function", Language::Zig); + assert!(span.is_some(), "should resolve test declaration"); + let [start, _end] = span.unwrap(); + let lines: Vec<&str> = SAMPLE_ZIG.lines().collect(); + assert!( + lines[start - 1].contains("test \"add function\""), + "span should point to test declaration" + ); + } + + #[test] + fn compute_zig_function_path() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig).unwrap(); + let path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(path, "fn::add"); + } + + #[test] + fn compute_zig_struct_namespace_path() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "struct::Point", Language::Zig).unwrap(); + let path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(path, "struct::Point"); + } + + #[test] + fn roundtrip_zig() { + let resolved_span = resolve_tree_path(SAMPLE_ZIG, "fn::add", Language::Zig).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(computed_path, "fn::add"); + + let re_resolved = resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn roundtrip_zig_struct_namespace() { + let resolved_span = + resolve_tree_path(SAMPLE_ZIG, "struct::Point::fn::new", Language::Zig).unwrap(); + + let computed_path = compute_tree_path(SAMPLE_ZIG, resolved_span, Language::Zig); + assert_eq!(computed_path, "struct::Point::fn::new"); + + let re_resolved = resolve_tree_path(SAMPLE_ZIG, &computed_path, Language::Zig).unwrap(); + assert_eq!(re_resolved, resolved_span); + } + + #[test] + fn detect_zig_extensions() { + assert_eq!(detect_language(Path::new("main.zig")), Some(Language::Zig)); + assert_eq!( + detect_language(Path::new("lib/foo.zig")), + Some(Language::Zig) + ); + } +} diff --git a/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc new file mode 100644 index 0000000..564fa7a --- /dev/null +++ b/crates/liyi/src/tree_path/lang_zig.rs.liyi.jsonc @@ -0,0 +1,43 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/lang_zig.rs", + "specs": [ + { + "item": "CONFIG", + "reviewed": false, + "intent": "Define the Zig language configuration for tree_path resolution: register tree-sitter-zig grammar v1.1.2, .zig extension, kind mappings for fn (function_declaration), struct (variable_declaration for const Name = struct { ... }), and test (test_declaration). Uses custom name extraction and struct_declaration as body container for struct-as-namespace pattern.", + "source_span": [ + 62, + 76 + ], + "tree_path": "static::CONFIG", + "source_hash": "sha256:54a2ac5ee9d70ab368f11ba3fcc6cd7ffad5bac03f61e43b377ab2c29af73ba9", + "source_anchor": "pub(super) static CONFIG: LanguageConfig = LanguageConfig {" + }, + { + "item": "zig_node_name", + "reviewed": false, + "intent": "Custom name extraction for Zig nodes. Handles function_declaration (finds identifier child), variable_declaration (detects const + struct_declaration pattern for struct-as-namespace), and test_declaration (extracts string literal content for test names).", + "source_span": [ + 18, + 59 + ], + "tree_path": "fn::zig_node_name", + "source_hash": "sha256:c7ad83623acd418403fcf7630c35bd2286ebbed7c3a6d668aa5590d110b62c02", + "source_anchor": "fn zig_node_name(node: &Node, source: &str) -> Option {" + }, + { + "item": "find_child_by_kind", + "reviewed": false, + "intent": "Helper function to find the first child node with a given kind. Used by zig_node_name for identifier and string extraction.", + "source_span": [ + 6, + 9 + ], + "tree_path": "fn::find_child_by_kind", + "source_hash": "sha256:b6c6c11eb73bb8ae27a5f77ebde1125b9e808b6efa96850bb3f398ddff5604ab", + "source_anchor": "fn find_child_by_kind<'a>(node: &Node<'a>, kind: &str) -> Option> {" + } + ] +} diff --git a/crates/liyi/src/tree_path/mod.rs b/crates/liyi/src/tree_path/mod.rs index f300fe5..5627bdd 100644 --- a/crates/liyi/src/tree_path/mod.rs +++ b/crates/liyi/src/tree_path/mod.rs @@ -5,10 +5,11 @@ //! For example, `fn::add_money` or `impl::Money::fn::new`. //! //! When `tree_path` is populated and a tree-sitter grammar is available for -//! the source language, `liyi reanchor` and `liyi check --fix` use it to +//! the source language, `liyi check --fix` uses it to //! locate items by structural identity, making span recovery deterministic //! across formatting changes, import additions, and line reflows. +mod lang_bash; mod lang_c; mod lang_cpp; mod lang_csharp; @@ -19,9 +20,12 @@ mod lang_kotlin; mod lang_objc; mod lang_php; mod lang_python; +mod lang_ruby; mod lang_rust; mod lang_swift; mod lang_typescript; +mod lang_zig; +pub mod parser; use std::borrow::Cow; use std::path::Path; @@ -123,7 +127,9 @@ impl LanguageConfig { /// Supported languages for tree_path resolution. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Language { + Bash, Rust, + Ruby, Python, Go, JavaScript, @@ -137,13 +143,16 @@ pub enum Language { ObjectiveC, Kotlin, Swift, + Zig, } impl Language { /// Get the language configuration for this language. fn config(&self) -> &'static LanguageConfig { match self { + Language::Bash => &lang_bash::CONFIG, Language::Rust => &lang_rust::CONFIG, + Language::Ruby => &lang_ruby::CONFIG, Language::Python => &lang_python::CONFIG, Language::Go => &lang_go::CONFIG, Language::JavaScript => &lang_javascript::CONFIG, @@ -157,6 +166,7 @@ impl Language { Language::ObjectiveC => &lang_objc::CONFIG, Language::Kotlin => &lang_kotlin::CONFIG, Language::Swift => &lang_swift::CONFIG, + Language::Zig => &lang_zig::CONFIG, } } @@ -176,15 +186,23 @@ impl Language { /// /// If two languages share an extension (unlikely with built-in languages), /// the first match in the following order is returned: -/// Rust → Python → Go → JavaScript → TypeScript → TSX → C → C++ → -/// Java → C# → PHP → Objective-C → Kotlin → Swift. +/// Bash → Rust → Ruby → Python → Go → JavaScript → TypeScript → TSX → C → C++ → +/// Java → C# → PHP → Objective-C → Kotlin → Swift → Zig. pub fn detect_language(path: &Path) -> Option { let ext = path.extension()?.to_str()?; + if lang_bash::CONFIG.matches_extension(ext) { + return Some(Language::Bash); + } + if lang_rust::CONFIG.matches_extension(ext) { return Some(Language::Rust); } + if lang_ruby::CONFIG.matches_extension(ext) { + return Some(Language::Ruby); + } + if lang_python::CONFIG.matches_extension(ext) { return Some(Language::Python); } @@ -228,6 +246,9 @@ pub fn detect_language(path: &Path) -> Option { if lang_swift::CONFIG.matches_extension(ext) { return Some(Language::Swift); } + if lang_zig::CONFIG.matches_extension(ext) { + return Some(Language::Zig); + } None } diff --git a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc index 18303df..18ad9ff 100644 --- a/crates/liyi/src/tree_path/mod.rs.liyi.jsonc +++ b/crates/liyi/src/tree_path/mod.rs.liyi.jsonc @@ -8,8 +8,8 @@ "reviewed": false, "intent": "Define the data-driven abstraction for language-specific tree_path behaviour. Each field captures one language-dependent axis: grammar loader (ts_language), file extensions, kind shorthand mapping, name extraction field and overrides, body-descending fields, and an optional custom_name callback for languages with non-trivial name extraction (e.g., Go receiver encoding).", "source_span": [ - 36, - 53 + 40, + 57 ], "tree_path": "struct::LanguageConfig", "source_hash": "sha256:cc0ae5ada967354b9d5e9863be2c72136c5dd85832b29ee5e44e118d1c99f5da", @@ -20,11 +20,11 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 181, - 233 + 191, + 254 ], "tree_path": "fn::detect_language", - "source_hash": "sha256:93745e6791e0ebb9eb4704bbcdaeaa193791e75c010fd498ec8cf8c79e1bc26c", + "source_hash": "sha256:4b0b5ae1855a5953f477cac3fe787210af7e4e1692fa6f48f934b63fe5482e2d", "source_anchor": "pub fn detect_language(path: &Path) -> Option {" }, { @@ -32,11 +32,11 @@ "reviewed": false, "intent": "Enumerate all built-in tree-sitter languages for tree_path operations: Rust, Python, Go, JavaScript, TypeScript, and TSX. Each variant maps to a static LanguageConfig via config().", "source_span": [ - 125, - 140 + 129, + 147 ], "tree_path": "enum::Language", - "source_hash": "sha256:df5bfa956c1b92e1ab2320378cf6e1c79b0788feded9f8a4ccb2bf97ced49381", + "source_hash": "sha256:6fd356b6c60278caa7845762f2656d794abd3a113c1b120dfe4c8c8c627e8776", "source_anchor": "pub enum Language {" }, { @@ -44,8 +44,8 @@ "reviewed": false, "intent": "Extract the user-visible name of an AST node via the language's LanguageConfig. Checks the custom_name callback first (for complex patterns like Go receiver encoding). Falls back to name_overrides for special cases (e.g., impl_item uses type field). Otherwise reads the standard name field. Returns Cow::Owned for constructed names, Cow::Borrowed for field-extracted names.", "source_span": [ - 76, - 96 + 80, + 100 ], "tree_path": "impl::LanguageConfig::fn::node_name", "source_hash": "sha256:d459d381bbc30689c1dd009aa6df01f7815da0b36ed5592ff2b45da8abe27edd", @@ -56,8 +56,8 @@ "reviewed": false, "intent": "Handle Go-specific name extraction for four node kinds: method_declaration encodes receiver type into the name as ReceiverType.Method or (*ReceiverType).Method for pointer receivers; type_declaration navigates to the inner type_spec for the name; const_declaration and var_declaration similarly navigate to their inner spec nodes. Returns None for unrecognized node kinds to fall through to default name extraction.", "source_span": [ - 345, - 366 + 366, + 387 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -68,8 +68,8 @@ "reviewed": false, "intent": "Parse a tree_path string into segments of (kind, name) pairs by splitting on '::' and grouping consecutive pairs. Return None if the number of parts is odd (malformed). Validate each kind against the known shorthand set.", "source_span": [ - 255, - 271 + 276, + 292 ], "tree_path": "fn::parse_tree_path", "source_hash": "sha256:eb1bdb126bb090d769612797d5428edd3c20ba72ba04dad58071bbfa955240c2", @@ -80,8 +80,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 278, - 295 + 299, + 316 ], "tree_path": "fn::resolve_tree_path", "source_hash": "sha256:8cd19d6e6704970f8cbead0b56b05a9196ca29b0439b37b31a819a958dc03dbe", @@ -92,8 +92,8 @@ "reviewed": false, "intent": "Walk tree-sitter children of the given parent to find nodes matching each path segment in order. For single-segment paths, return the matching child directly. For multi-segment paths, descend into the first matching child via resolve_in_body for subsequent segments.", "source_span": [ - 298, - 327 + 319, + 348 ], "tree_path": "fn::resolve_segments", "source_hash": "sha256:15731dca9653e45052c706fbc2f193fcfe96ca98afe00bbf259f23f86288c414", @@ -104,8 +104,8 @@ "reviewed": false, "intent": "Find subsequent path segments inside an item's body or declaration_list. Try the 'body' field first (mod, fn), then fall back to looking for a declaration_list child (impl, trait). Delegate to resolve_segments for the recursive match.", "source_span": [ - 330, - 338 + 351, + 359 ], "tree_path": "fn::resolve_in_body", "source_hash": "sha256:f1514f012bc8d300c425867e4a1cce1aaf72f1f58885eeaf24456114234473d6", @@ -116,8 +116,8 @@ "reviewed": false, "intent": "=doc", "source_span": [ - 345, - 366 + 366, + 387 ], "tree_path": "fn::compute_tree_path", "source_hash": "sha256:30ecd47287f846a39cdbd906075c6eae16d286eda5c3bc92d87cfbae67ec2e74", @@ -128,8 +128,8 @@ "reviewed": false, "intent": "Find the widest item-bearing tree-sitter node whose start and end rows both fall within [target_start, target_end]. Must handle the attribute-sibling pattern where Rust attributes (#[derive(...)]) are siblings of the item node — the sidecar span can start before the item node. Prefer the outermost (widest) item when multiple items fall within the range.", "source_span": [ - 374, - 420 + 395, + 441 ], "tree_path": "fn::find_item_in_range", "source_hash": "sha256:187c06169aae241150cb9bd88810da07aef5d967431ae25b50aab3ff111fc220", @@ -140,8 +140,8 @@ "reviewed": false, "intent": "Recursively walk from root to target node, collecting (kind::name) path segments. At the target node, push its segment and return true. During descent, only enter children that spatially contain the target. When a child's subtree contains the target, prepend the current node's segment if it is an item node. Return false if the target cannot be found.", "source_span": [ - 438, - 484 + 459, + 505 ], "tree_path": "fn::collect_path", "source_hash": "sha256:0086ee43dc7c085025e553af9914df58fab43d8e8b579486f21d5788d8d0d221", diff --git a/crates/liyi/src/tree_path/parser.rs b/crates/liyi/src/tree_path/parser.rs new file mode 100644 index 0000000..d94c885 --- /dev/null +++ b/crates/liyi/src/tree_path/parser.rs @@ -0,0 +1,399 @@ +//! tree_path parser — formal grammar implementation using nom. +//! +//! This module implements the tree_path grammar spec (v0.2) from +//! `docs/liyi-01x-roadmap.md` Appendix A. + +use nom::{ + IResult, Parser as _, + branch::alt, + bytes::complete::tag, + character::complete::{char, digit1, none_of, one_of}, + combinator::{map, recognize}, + multi::many0, + sequence::{delimited, pair, preceded}, +}; + +/// A segment in a tree_path — either a kind, name, or injection marker. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Segment { + /// Kind shorthand (e.g., "fn", "class", "struct") + Kind(String), + /// Item name (e.g., "add", "MyClass", "add function") + Name(String), + /// Injection marker for M9 (e.g., "//bash") + Injection(String), +} + +/// Parsed tree_path representation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TreePath { + pub segments: Vec, +} + +impl TreePath { + /// Parse a tree_path string. + pub fn parse(input: &str) -> Result { + match parse_tree_path(input) { + Ok(("", path)) => Ok(path), + Ok((remainder, _)) => Err(format!("Unexpected trailing input: {remainder:?}")), + Err(e) => Err(format!("Parse error: {e:?}")), + } + } + + /// Serialize a tree_path to string. + pub fn serialize(&self) -> String { + let mut out = String::new(); + for (i, seg) in self.segments.iter().enumerate() { + // Injection markers attach to the preceding segment without :: + if i > 0 && !matches!(seg, Segment::Injection(_)) { + out.push_str("::"); + } + match seg { + Segment::Kind(k) => out.push_str(k), + Segment::Name(n) => out.push_str(&serialize_name(n)), + Segment::Injection(lang) => { + out.push_str("//"); + out.push_str(lang); + } + } + } + out + } +} + +/// Serialize a name, quoting if necessary. +fn serialize_name(name: &str) -> String { + // Check if we need quoting + let needs_quote = name.is_empty() + || name.contains('"') + || name.contains('\\') + || name.contains("::") + || name.contains(' ') + || name.contains('\t') + || name.contains('\n') + || name.contains(':') + || !is_simple_identifier(name); + + if !needs_quote { + return name.to_string(); + } + + // Escape quotes and backslashes + let escaped = name.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{escaped}\"") +} + +/// Check if a string is a simple identifier (no quoting needed). +fn is_simple_identifier(s: &str) -> bool { + let mut chars = s.chars(); + match chars.next() { + Some(c) if c.is_ascii_alphabetic() || c == '_' => {} + _ => return false, + } + chars.all(|c| { + c.is_ascii_alphanumeric() || c == '_' || c == '.' || c == '(' || c == ')' || c == '*' + }) +} + +/// Parse a complete tree_path. +fn parse_tree_path(input: &str) -> IResult<&str, TreePath> { + let (input, first) = parse_segment(input)?; + let (input, rest) = many0(alt(( + // Injection marker directly after a segment (no :: separator): run//bash + parse_injection_marker, + // Standard :: separated segment + preceded(tag("::"), parse_segment), + ))) + .parse(input)?; + let mut segments = vec![first]; + segments.extend(rest); + Ok((input, TreePath { segments })) +} + +/// Parse a single segment. +fn parse_segment(input: &str) -> IResult<&str, Segment> { + alt(( + parse_injection_marker, + map(parse_quoted_string, Segment::Name), + map(parse_simple_name, |s| { + // Heuristic: if it matches common kind patterns, treat as Kind + // This is a simplification — full implementation would check LanguageConfig + if is_common_kind(s) { + Segment::Kind(s.to_string()) + } else { + Segment::Name(s.to_string()) + } + }), + )) + .parse(input) +} + +/// Common kind shorthands (for heuristic parsing). +fn is_common_kind(s: &str) -> bool { + matches!( + s, + "fn" | "class" + | "struct" + | "enum" + | "trait" + | "impl" + | "mod" + | "module" + | "const" + | "type" + | "test" + | "namespace" + | "interface" + | "protocol" + ) +} + +/// Parse an injection marker (//lang). +fn parse_injection_marker(input: &str) -> IResult<&str, Segment> { + map(preceded(tag("//"), parse_identifier), |lang| { + Segment::Injection(lang.to_string()) + }) + .parse(input) +} + +/// Parse a quoted string. +fn parse_quoted_string(input: &str) -> IResult<&str, String> { + delimited( + char('"'), + map(many0(parse_escaped_char), |chars| { + chars.into_iter().collect() + }), + char('"'), + ) + .parse(input) +} + +/// Parse a single character or escaped sequence inside a quoted string. +fn parse_escaped_char(input: &str) -> IResult<&str, char> { + alt((preceded(char('\\'), one_of("\\\"n:t")), none_of("\""))).parse(input) +} + +/// Parse a simple name (unquoted identifier, number, or special values). +fn parse_simple_name(input: &str) -> IResult<&str, &str> { + recognize(alt(( + parse_identifier, + parse_number, + tag("self"), + tag("Self"), + ))) + .parse(input) +} + +/// Parse an identifier. +fn parse_identifier(input: &str) -> IResult<&str, &str> { + recognize(pair( + one_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"), + many0(one_of( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_", + )), + )) + .parse(input) +} + +/// Parse a number. +fn parse_number(input: &str) -> IResult<&str, &str> { + digit1(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_simple_fn_path() { + let path = TreePath::parse("fn::add").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("fn".to_string()), + Segment::Name("add".to_string()) + ] + ); + } + + #[test] + fn parse_class_method_path() { + let path = TreePath::parse("class::MyClass::fn::method").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("class".to_string()), + Segment::Name("MyClass".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("method".to_string()), + ] + ); + } + + #[test] + fn parse_quoted_name_with_spaces() { + let path = TreePath::parse("test::\"add function\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("test".to_string()), + Segment::Name("add function".to_string()), + ] + ); + } + + #[test] + fn parse_quoted_name_with_colons() { + let path = TreePath::parse("fn::\"foo::bar\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("fn".to_string()), + Segment::Name("foo::bar".to_string()), + ] + ); + } + + #[test] + fn parse_escaped_quote() { + let path = TreePath::parse("test::\"with \\\"quote\\\"\"").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("test".to_string()), + Segment::Name("with \"quote\"".to_string()), + ] + ); + } + + #[test] + fn parse_injection_marker() { + // Injection appended to preceding segment (canonical M9 syntax) + let path = TreePath::parse("key::run//bash::fn::setup").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Name("key".to_string()), + Segment::Name("run".to_string()), + Segment::Injection("bash".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("setup".to_string()), + ] + ); + } + + #[test] + fn parse_injection_marker_standalone() { + // Injection as standalone :: separated segment (also accepted) + let path = TreePath::parse("key::run:://bash::fn::setup").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Name("key".to_string()), + Segment::Name("run".to_string()), + Segment::Injection("bash".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("setup".to_string()), + ] + ); + } + + #[test] + fn parse_module_nested_path() { + let path = TreePath::parse("module::Billing::class::Invoice::fn::total").unwrap(); + assert_eq!(path.segments.len(), 6); + } + + #[test] + fn parse_zig_struct_namespace() { + let path = TreePath::parse("struct::Point::fn::new").unwrap(); + assert_eq!( + path.segments, + vec![ + Segment::Kind("struct".to_string()), + Segment::Name("Point".to_string()), + Segment::Kind("fn".to_string()), + Segment::Name("new".to_string()), + ] + ); + } + + #[test] + fn serialize_simple_name() { + let path = TreePath { + segments: vec![ + Segment::Kind("fn".to_string()), + Segment::Name("add".to_string()), + ], + }; + assert_eq!(path.serialize(), "fn::add"); + } + + #[test] + fn serialize_name_with_spaces() { + let path = TreePath { + segments: vec![ + Segment::Kind("test".to_string()), + Segment::Name("add function".to_string()), + ], + }; + assert_eq!(path.serialize(), "test::\"add function\""); + } + + #[test] + fn serialize_name_with_double_colons() { + let path = TreePath { + segments: vec![ + Segment::Kind("fn".to_string()), + Segment::Name("foo::bar".to_string()), + ], + }; + assert_eq!(path.serialize(), "fn::\"foo::bar\""); + } + + #[test] + fn serialize_name_with_quote() { + let path = TreePath { + segments: vec![ + Segment::Kind("test".to_string()), + Segment::Name("with \"quote\"".to_string()), + ], + }; + assert_eq!(path.serialize(), "test::\"with \\\"quote\\\"\""); + } + + #[test] + fn roundtrip_simple_path() { + let original = "class::MyClass::fn::method"; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.serialize(), original); + } + + #[test] + fn roundtrip_complex_path() { + let original = "test::\"add function\""; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.serialize(), original); + } + + #[test] + fn roundtrip_with_escapes() { + let original = "test::\"with \\\"quote\\\"\""; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.serialize(), original); + } + + #[test] + fn roundtrip_injection_canonical() { + let original = "key::run//bash::fn::setup"; + let path = TreePath::parse(original).unwrap(); + assert_eq!(path.serialize(), original); + } + + #[test] + fn standalone_injection_serializes_canonical() { + // Standalone form (with ::) normalizes to canonical (without ::) + let path = TreePath::parse("key::run:://bash::fn::setup").unwrap(); + assert_eq!(path.serialize(), "key::run//bash::fn::setup"); + } +} diff --git a/crates/liyi/src/tree_path/parser.rs.liyi.jsonc b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc new file mode 100644 index 0000000..46c9d51 --- /dev/null +++ b/crates/liyi/src/tree_path/parser.rs.liyi.jsonc @@ -0,0 +1,67 @@ +// liyi v0.1 spec file +{ + "version": "0.1", + "source": "crates/liyi/src/tree_path/parser.rs", + "specs": [ + { + "item": "Segment", + "reviewed": false, + "intent": "Represents a single segment in a parsed tree_path: Kind for kind shorthands (fn, class), Name for item names, or Injection for M9 language injection markers.", + "source_span": [ + 18, + 25 + ], + "tree_path": "enum::Segment", + "source_hash": "sha256:a39c9ad666dce2c942b63befc555c5ee3b0d31bd3a533892187c90441f107266", + "source_anchor": "pub enum Segment {" + }, + { + "item": "TreePath", + "reviewed": false, + "intent": "Parsed representation of a complete tree_path string. Provides parse() for deserializing from string and to_string() for serializing back with automatic quoting of complex names.", + "source_span": [ + 29, + 31 + ], + "tree_path": "struct::TreePath", + "source_hash": "sha256:0de76bc568b5f672322f5e55065fbde484625f3fb22da038178653ae22231793", + "source_anchor": "pub struct TreePath {" + }, + { + "item": "parse_tree_path", + "reviewed": false, + "intent": "Nom parser combinator for parsing a complete tree_path into a TreePath struct. Handles :: delimited segments and injection markers attached directly to the preceding segment (name//lang) without requiring a :: separator.", + "source_span": [ + 99, + 111 + ], + "tree_path": "fn::parse_tree_path", + "source_hash": "sha256:209d3af3a95c59cfc448b099fea352329bd4fc895d6fd7737ed95fb063d38c06", + "source_anchor": "fn parse_tree_path(input: &str) -> IResult<&str, TreePath> {" + }, + { + "item": "parse_segment", + "reviewed": false, + "intent": "Parses a single tree_path segment, trying injection markers, quoted strings, and simple names in order. Uses heuristic to classify simple names as Kind or Name.", + "source_span": [ + 114, + 129 + ], + "tree_path": "fn::parse_segment", + "source_hash": "sha256:2d570524b2adefd28ed3af36218c7b77db1eff350d48dd830a7651b2eae2debd", + "source_anchor": "fn parse_segment(input: &str) -> IResult<&str, Segment> {" + }, + { + "item": "serialize_name", + "reviewed": false, + "intent": "Serializes a name, automatically quoting if it contains spaces, :: delimiters, quotes, or other special characters. Handles backslash and quote escaping.", + "source_span": [ + 65, + 84 + ], + "tree_path": "fn::serialize_name", + "source_hash": "sha256:0398cceb21d0564c22c3105074f081f493aaab7e7e6a7b171df93ace78192ad6", + "source_anchor": "fn serialize_name(name: &str) -> String {" + } + ] +} diff --git a/docs/liyi-01x-roadmap.md b/docs/liyi-01x-roadmap.md index e342a3e..43b74ac 100644 --- a/docs/liyi-01x-roadmap.md +++ b/docs/liyi-01x-roadmap.md @@ -26,7 +26,10 @@ The MVP roadmap (`docs/liyi-mvp-roadmap.md`) covers the 0.1.0 release. This docu | M5.4 Golden fixtures | ✅ Complete | `missing_related/` and `missing_related_pass/` added | | M5.5 AGENTS.md rule 11 | ✅ Complete | Pre-commit check requirement added | | M5.3 `--prompt` mode | ⏳ Design | Design doc at `docs/prompt-mode-design.md` | -| M7 Additional languages | ⏳ Planned | Ruby, Bash, Dart, Zig | +| M7.1 Ruby | ✅ Complete | tree-sitter-ruby v0.23.1 | +| M7.2 Bash | ✅ Complete | tree-sitter-bash v0.25.1 | +| M7.3 Dart | ⏳ Planned | Flutter ecosystem | +| M7.4 Zig | ✅ Complete | tree-sitter-zig v1.1.2 | | M8 Data file support | ⏳ Design | TOML, JSON, YAML; key-path tree_path paradigm | | M9 Injection framework | ⏳ Design | Multi-language files (YAML+shell, Vue SFC) | | M6.1–M6.3 NL-quoting core | ✅ Complete | Fenced blocks, inline backticks, quote chars | @@ -677,7 +680,7 @@ The primary mechanism for transitioning intent from "agent-inferred" to "human-a - Interactive by default when stdin is a TTY: show intent + source span, prompt `[y]es / [n]o / [e]dit / [s]kip`. - Batch mode via `--yes` or when non-TTY. - `--dry-run`, `--item ` flags. -- Reanchors on approval (fills `source_hash`, `source_anchor`). +- Fills `source_hash` and `source_anchor` on approval. ### M3.2. `liyi init` — scaffold command ✅ @@ -971,17 +974,76 @@ End-to-end golden test demonstrating the full scaffold workflow: | 13 | M10.2 Doc comment heuristic | ⏳ Planned | ~2h | `=doc` suggestions | | 14 | M10.3 Item size heuristic | ⏳ Planned | ~1h | Trivial suggestions | | 15 | M10.5 Combined scaffold test | ⏳ Planned | ~1h | Regression guard | -| 16 | M7.1 Ruby | ⏳ Planned | ~2h | Ruby/Rails ecosystem | -| 17 | M7.2 Bash | ⏳ Planned | ~1h | CI scripts, devops | +| ~~16~~ | ~~M7.1 Ruby~~ | ✅ Done | — | Ruby/Rails ecosystem | +| ~~17~~ | ~~M7.2 Bash~~ | ✅ Done | — | CI scripts, devops | | 18 | M8.2 TOML | ⏳ Planned | ~3h | Config-as-source (dogfooding) | | 19 | M8.3 JSON | ⏳ Planned | ~2h | Schemas, package.json | | 20 | M7.3 Dart | ⏳ Planned | ~3h | Flutter ecosystem | -| 21 | M7.4 Zig | ⏳ Planned | ~3h | Systems lang, growing | +| ~~21~~ | ~~M7.4 Zig~~ | ✅ Done | — | Systems lang, growing | | 22 | M8.4 YAML (no injection) | ⏳ Planned | ~2h | CI/k8s (limited without M9) | | 18 | M9 Injection framework | ⏳ Design | ~20h | Multi-language files | --- +## Appendix: tree_path Grammar Specification (v0.2) + +**Status:** ⏳ Partial — nom parser implemented (`tree_path/parser.rs`), integration into `resolve_tree_path`/`compute_tree_path` pending. + +The current `split("::")` parser is ambiguous when names contain `::` or spaces (as seen in Zig `test "add function"`). This appendix defines a formal grammar for unambiguous tree_path parsing. + +### A.1 Grammar (EBNF) + +```ebnf +tree_path := segment ("::" segment)* +segment := kind | name +kind := identifier +name := simple_name | quoted_string +simple_name := identifier | "self" | number +quoted_string:= '"' (escaped_char | any_unicode_except_quote)* '"' +identifier := [A-Za-z_][A-Za-z0-9_]* +number := [0-9]+ +escaped_char := '\\' ( '"' | '\\' | 'n' | ':' ) +``` + +### A.2 Design decisions + +1. **Quoted strings for complex names:** Any name containing spaces, `::`, quotes, or Unicode control characters must be quoted. Example: `test::"add function"`. + +2. **Backslash escaping:** Inside quoted strings, `"` and `\` must be escaped. `\:` is provided as a convenience for names containing colons (though `::` is the delimiter). + +3. **Unquoted shorthand:** Simple identifiers (alphanumeric + underscore) can remain unquoted for ergonomics. This preserves backward compatibility with existing tree_paths like `fn::add` or `class::MyClass`. + +4. **Kind disambiguation:** The parser doesn't validate that a segment is a "kind" vs "name" — that happens at resolution time using the `LanguageConfig::kind_map`. The grammar treats both uniformly at the syntactic level. + +### A.3 Injection syntax (future) + +When M9 (injection framework) is implemented, the grammar will extend to: + +```ebnf +tree_path := segment (":" segment)* +segment := (kind | name) injection_marker? | injection_marker +injection_marker := "//" language +language := identifier +``` + +The injection marker `//lang` attaches to the preceding name segment (`run//bash`), preserving the even-pair invariant for shell-safe paths. The standalone form (`:://lang`) is also accepted for parsing but the canonical serialization always uses the appended form. + +### A.4 Implementation plan + +1. Add `nom = "8"` to `crates/liyi/Cargo.toml` ✅ +2. Create `tree_path/parser.rs` with nom combinators ✅ +3. Update `resolve_tree_path` to use the new parser +4. Update `compute_tree_path` to escape names containing `::`, quotes, or spaces +5. Add roundtrip property tests: `parse(serialize(path)) == path` + +### A.5 Migration path + +- **Phase 1:** Parser accepts both old (unquoted) and new (quoted) syntax +- **Phase 2:** `compute_tree_path` starts quoting names that need it +- **Phase 3:** (Optional) Deprecate unquoted complex names with a warning + +--- + ## AIGC Disclaimer This document contains content from the following AI agents: diff --git a/docs/liyi-design.md b/docs/liyi-design.md index 0499e16..590d50e 100644 --- a/docs/liyi-design.md +++ b/docs/liyi-design.md @@ -342,7 +342,7 @@ The linter only checks for the *presence* of `@liyi:module` in a directory's fil **Naming convention.** The sidecar filename is the source filename with `.liyi.jsonc` appended: `money.rs` → `money.rs.liyi.jsonc`. Always append to the full filename, never strip the extension. This avoids ambiguity when files share a stem but differ in extension (`money.rs` and `money.py` would otherwise both claim `money.liyi.jsonc`). The rule is mechanical: one source file, one sidecar, derivable by concatenation. - + One per source file, co-located: @@ -372,13 +372,13 @@ The `source` path is relative to the repository root — the same path you'd pas } ``` -`source_hash`, `source_anchor`, and `tree_path` are tool-managed — the agent writes only `source_span` and the tool fills in the rest (see *Per-item staleness* and *Structural identity via `tree_path`* below). Agents MAY write `tree_path` if they can infer the AST path, but the tool will overwrite it with the canonical form on the next `liyi reanchor`. `"intent": "=doc"` is a reserved sentinel meaning "the docstring already captures intent" — the agent uses it when the source docstring contains behavioral requirements (constraints, error conditions, properties), not just a functional summary (see *`"=doc"` in the sidecar* below). +`source_hash`, `source_anchor`, and `tree_path` are tool-managed — the agent writes only `source_span` and the tool fills in the rest (see *Per-item staleness* and *Structural identity via `tree_path`* below). Agents MAY write `tree_path` if they can infer the AST path, but the tool will overwrite it with the canonical form on the next `liyi check --fix`. `"intent": "=doc"` is a reserved sentinel meaning "the docstring already captures intent" — the agent uses it when the source docstring contains behavioral requirements (constraints, error conditions, properties), not just a functional summary (see *`"=doc"` in the sidecar* below). -`"version"` is required. The linter checks it and rejects unknown versions. This costs nothing now and prevents painful migration when the schema evolves (e.g., adding `"related"` edges, structured fields in post-0.1). A JSON Schema definition ships alongside the linter for editor validation and autocompletion (see *Appendix: JSON Schema* below). When the schema changes, the linter will accept both `"0.1"` and the new version during a transition window, and `liyi reanchor --migrate` will upgrade sidecar files in place. - +`"version"` is required. The linter checks it and rejects unknown versions. This costs nothing now and prevents painful migration when the schema evolves (e.g., adding `"related"` edges, structured fields in post-0.1). A JSON Schema definition ships alongside the linter for editor validation and autocompletion (see *Appendix: JSON Schema* below). When the schema changes, the linter will accept both `"0.1"` and the new version during a transition window, and `liyi migrate` will upgrade sidecar files in place. + -**`liyi reanchor --migrate` behavior.** When the schema version changes (e.g., 0.1 → 0.2), `--migrate` reads each `.liyi.jsonc`, adds any newly required fields with default values, removes deprecated fields, updates `"version"` to the new version, and writes the file back. It is idempotent — running it twice produces the same output. It does not re-hash spans or re-infer intent; it only transforms the schema envelope. Migration is always additive in 0.x: no field present in 0.1 will change meaning, only new fields may appear. +**`liyi migrate` behavior.** When the schema version changes (e.g., 0.1 → 0.2), `--migrate` reads each `.liyi.jsonc`, adds any newly required fields with default values, removes deprecated fields, updates `"version"` to the new version, and writes the file back. It is idempotent — running it twice produces the same output. It does not re-hash spans or re-infer intent; it only transforms the schema envelope. Migration is always additive in 0.x: no field present in 0.1 will change meaning, only new fields may appear. After human review — either the human adds `@liyi:intent` in the source file (see *Source-level intent* below), or sets `"reviewed": true` in the sidecar via CLI or IDE code action. Both paths mark the item as reviewed. When `"reviewed"` is set to `true`, `"confidence"` is removed — a human voucher replaces agent self-assessment. If the source later changes and the agent re-infers (producing a new unreviewed spec), `"confidence"` reappears: @@ -402,7 +402,7 @@ After human review — either the human adds `@liyi:intent` in the source file ( `"reviewed"` defaults to `false` when absent. The linter considers an item reviewed if **either** `"reviewed": true` in the sidecar **or** `@liyi:intent` exists in source. Source intent takes precedence for adversarial testing — it's the human's assertion, not the agent's inference. See *Source-level intent* and *Security model* below. - + ### Why a single `intent` field, not structured pre/postconditions? @@ -426,11 +426,11 @@ Item-level intent carries machine metadata → JSONC wins. `source_span` is a closed interval of 1-indexed line numbers: `[42, 58]` means lines 42 through 58, inclusive. This matches editor line numbers, `git blame` output, and coincidentally the mathematical convention for closed intervals. `source_hash` is always `sha256:` — the SHA-256 digest of those lines after normalizing line endings to `\n` (LF). This ensures cross-platform consistency: a Windows developer with `core.autocrlf=true` and a Linux CI runner produce identical hashes for identical content. No other hash algorithm is supported in 0.1. `source_anchor` is the literal text of the first line of the span — used by the linter for efficient shift detection (see below). - + -Both `source_hash` and `source_anchor` are **tool-managed fields**. The agent writes only `source_span` — the tool (`liyi reanchor`, or `liyi check --fix`) computes the hash and anchor deterministically from the source file. This is the same principle as not letting agents author lockfile checksums: the tool reads the actual bytes, so fabricated or hallucinated hashes are impossible. - +Both `source_hash` and `source_anchor` are **tool-managed fields**. The agent writes only `source_span` — the tool (`liyi check --fix`) computes the hash and anchor deterministically from the source file. This is the same principle as not letting agents author lockfile checksums: the tool reads the actual bytes, so fabricated or hallucinated hashes are impossible. + The agent records each item's line range (`source_span`) when writing the spec. The linter reads those lines from the source file, hashes them, and compares against `source_hash`. This gives per-item staleness without the linter needing to parse any language — it just reads a slice of lines. @@ -441,12 +441,12 @@ The correct mitigation is language-aware span anchoring — resolving spec posit Without a `tree_path`, the fallback is: batch false positives on any line-shifting edit, corrected on the next agent inference pass. The damage is transient and mechanical — the agent re-reads the file, re-records spans, re-hashes — but noisy in CI until it does. Still fewer false positives than file-level hashing (where a docstring typo marks every spec in the file stale with no way to distinguish which items actually changed). -**Span-shift detection (included in 0.1).** When the linter detects a hash mismatch and no `tree_path` is available (or tree-sitter has no grammar for the language), it falls back to scanning ±100 lines for content matching the recorded hash. If the same content appears at an offset (e.g., shifted down by 3 lines because an import was added), the linter reports `SHIFTED` rather than `STALE`. With `--fix`, the span is auto-corrected in the sidecar; without `--fix`, the linter reports the shift but does not write. Once a delta is established for one item, subsequent items in the same file are adjusted by the same delta before checking — so a single import insertion resolves in one probe, not twenty. If no match is found within the window, the linter gives up and reports `STALE` as usual. This is the same heuristic `patch(1)` uses with a fuzz factor — a linear scan over a bounded window, ~50 lines, no parser. Combined with `liyi reanchor`, this eliminates the most common source of false positives (line-shifting edits) without language-specific tooling. For files with `tree_path` populated, tree-sitter-based anchoring supersedes this heuristic entirely — see the next section. - +**Span-shift detection (included in 0.1).** When the linter detects a hash mismatch and no `tree_path` is available (or tree-sitter has no grammar for the language), it falls back to scanning ±100 lines for content matching the recorded hash. If the same content appears at an offset (e.g., shifted down by 3 lines because an import was added), the linter reports `SHIFTED` rather than `STALE`. With `--fix`, the span is auto-corrected in the sidecar; without `--fix`, the linter reports the shift but does not write. Once a delta is established for one item, subsequent items in the same file are adjusted by the same delta before checking — so a single import insertion resolves in one probe, not twenty. If no match is found within the window, the linter gives up and reports `STALE` as usual. This is the same heuristic `patch(1)` uses with a fuzz factor — a linear scan over a bounded window, ~50 lines, no parser. Combined with `liyi check --fix`, this eliminates the most common source of false positives (line-shifting edits) without language-specific tooling. For files with `tree_path` populated, tree-sitter-based anchoring supersedes this heuristic entirely — see the next section. + ### Structural identity via `tree_path` -`tree_path` is an optional field on both `itemSpec` and `requirementSpec` that provides **structural identity** — matching a spec to its source item by AST node path rather than line number. When present and non-empty, `liyi reanchor` and `liyi check --fix` use tree-sitter to locate the item by its structural position in the parse tree, then update `source_span` to the item's current line range. This makes span recovery deterministic across formatting changes, import additions, line reflows, and any other edit that moves lines without changing the item's identity. +`tree_path` is an optional field on both `itemSpec` and `requirementSpec` that provides **structural identity** — matching a spec to its source item by AST node path rather than line number. When present and non-empty, `liyi check --fix` uses tree-sitter to locate the item by its structural position in the parse tree, then update `source_span` to the item's current line range. This makes span recovery deterministic across formatting changes, import additions, line reflows, and any other edit that moves lines without changing the item's identity. **Format.** A `tree_path` is a `::` delimited path of tree-sitter node kinds and name tokens that uniquely identifies an item within a file. Examples: @@ -457,18 +457,21 @@ Without a `tree_path`, the fallback is: batch false positives on any line-shifti | `struct Money { … }` | `struct::Money` | | `mod billing { fn charge(…) }` | `mod::billing::fn::charge` | | `#[test] fn test_add()` | `fn::test_add` | +| Zig `test "add function" { … }` | `test::"add function"` | +| YAML `run:` with embedded Bash `setup_env()` | `key::run//bash::fn::setup_env` | The path identifies the item by node kind and name, not by position. The tool constructs the path by walking the tree-sitter CST from root to the node that covers `source_span`, recording each named ancestor. This is deterministic — the same source item always produces the same path regardless of where it appears in the file. -**Behavior during reanchor and check.** +**Quoting and injection.** Names containing spaces, `::`, or quotes are double-quoted with backslash escaping (`test::"add function"`). For multi-language files (M9), an injection marker `//lang` attaches to the preceding segment to cross a language boundary (`key::run//bash::fn::setup_env`); the `//` delimiter requires no shell escaping. The full grammar is specified in the roadmap appendix (tree_path Grammar v0.2). - -1. `liyi reanchor`: Parse the source file with tree-sitter. For each spec with a non-empty `tree_path`, query the parse tree for a node matching the path. If found, update `source_span` to the node's line range, recompute `source_hash` and `source_anchor`. If not found (item was renamed or deleted), report an error — do not silently fall back. -2. `liyi check --fix`: Same tree-sitter lookup. If the hash mismatches but the `tree_path` resolves to a valid node, update the span (the item moved but is still present). If the `tree_path` doesn't resolve, fall back to span-shift heuristic. -3. `liyi check` (without `--fix`): Use `tree_path` to verify the span points to the correct item. If it doesn't (span drifted, but `tree_path` still resolves), report `SHIFTED` with the correct target position. - +**Behavior during check.** -**Diagnostic clarity.** When a spec has no `tree_path` and the shift heuristic also fails, the diagnostic indicates why tree-path recovery was skipped — e.g., "no tree_path set, falling back to shift heuristic" — so that users can add the missing field or run `liyi reanchor` to auto-populate it. Diagnostics distinguish "no tree_path available" from "tree_path resolution failed (item may have been renamed or deleted)." + +1. `liyi check --fix`: Parse the source file with tree-sitter. For each spec with a non-empty `tree_path`, query the parse tree for a node matching the path. If found and the content is unchanged (pure positional shift), update `source_span`, `source_hash`, and `source_anchor`. If found but the content also changed (semantic drift), update `source_span` to track the item's location but leave `source_hash` unchanged — the spec remains stale for review. If the `tree_path` doesn't resolve, fall back to span-shift heuristic. +2. `liyi check` (without `--fix`): Use `tree_path` to verify the span points to the correct item. If it doesn't (span drifted, but `tree_path` still resolves), report `SHIFTED` with the correct target position. + + +**Diagnostic clarity.** When a spec has no `tree_path` and the shift heuristic also fails, the diagnostic indicates why tree-path recovery was skipped — e.g., "no tree_path set, falling back to shift heuristic" — so that users can run `liyi check --fix` to auto-populate it. Diagnostics distinguish "no tree_path available" from "tree_path resolution failed (item may have been renamed or deleted)." **Empty string fallback.** When `tree_path` is `""` (empty string) or absent, the tool falls back to the current line-number-based behavior — span-shift heuristic, `source_anchor` matching, delta propagation. This accommodates: @@ -477,8 +480,8 @@ The path identifies the item by node kind and name, not by position. The tool co - **Generated code** where tree-sitter may not produce useful node kinds. - **Complex or contrived cases** where the agent or human determines that a tree path is non-obvious or ambiguous. -The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi reanchor` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. - +The agent MAY set `tree_path` to `""` explicitly to signal "I considered structural identity and it doesn't apply here." Absence of the field is equivalent to `""`. `liyi check --fix` auto-populates `tree_path` for every spec where a clear structural path can be resolved from the current `source_span` and a supported tree-sitter grammar — agents need not set it manually. When the span doesn't correspond to a recognizable AST item (macros, generated code, unsupported languages), the tool leaves `tree_path` empty. + **Language support.** Tree-sitter support is grammar-dependent. Rust, Python, Go, JavaScript, and TypeScript are built-in. For unsupported languages, `tree_path` is left empty and the tool falls back to line-number behavior. Adding a language is a matter of adding its tree-sitter grammar crate and a small mapping of node kinds — no changes to the core protocol or schema. @@ -505,15 +508,15 @@ All languages are built-in — the binary ships with every supported tree-sitter | Go | `tree-sitter-go` | `type_declaration` wraps `type_spec` for structs/interfaces — custom name extraction navigates the indirection. Methods encode receiver type: `method::(*MyType).DoThing` (pointer) or `method::MyType.DoThing` (value). | | JavaScript | `tree-sitter-javascript` | Arrow functions in `const` declarations are pervasive — `const foo = () => ...` maps to `fn::foo` (tracking the `variable_declarator` when its value is an `arrow_function`). | | TypeScript | `tree-sitter-typescript` | Superset of JS; adds `interface_declaration`, `type_alias_declaration`, `enum_declaration`. Dual grammar: `.ts` → typescript, `.tsx` → tsx. | +| Ruby | `tree-sitter-ruby` | `class`, `module`, `method`, `singleton_method`. Class methods use `custom_name` callback for receiver encoding. | +| Bash | `tree-sitter-bash` | `function_definition` only. Simplest config — structurally flat. | +| Zig | `tree-sitter-zig` | `fn`, `const`, `test`. Struct-as-namespace pattern (`const Foo = struct { ... }`) uses `custom_name`. | **Planned languages (0.1.x, see roadmap M7–M9):** | Language | Grammar | Notes | |---|---|---| -| Ruby | `tree-sitter-ruby` | `class`, `module`, `method`, `singleton_method`. Class methods need `custom_name` callback. | -| Bash | `tree-sitter-bash` | `function_definition` only. Simplest config — structurally flat. | | Dart | `tree-sitter-dart` | `class`, `method`, `mixin`, `extension`, `enum`. Grammar crate stability TBD. | -| Zig | `tree-sitter-zig` | `fn`, `const`, `test`. Struct-as-namespace pattern (`const Foo = struct { ... }`) needs `custom_name`. | | TOML | `tree-sitter-toml` | Data file — `table`, `key`. Key-path identity, not named items. | | JSON | `tree-sitter-json` | Data file — `key` (from `pair`). Targets schemas, `package.json`. | | YAML | `tree-sitter-yaml` | Data file — `key` (from `block_mapping_pair`). Limited without injection framework (M9). | @@ -628,11 +631,30 @@ No `intent` field — the requirement text lives at the source site, not duplica **Naming and scope.** Requirement names are unique per repository. The linter reports an error if two `@liyi:requirement` markers declare the same name. Names are matched as exact strings (case-sensitive) after trimming leading/trailing whitespace inside parens. The name is a human-readable identifier, not a path — it can be in any language. No character set restriction: `multi-currency-addition`, `多币种加法`, and `인출한도` are all valid names. - + **Requirements can live anywhere:** in the source file near the code they govern, in `README.md` alongside `@liyi:module`, in a dedicated requirements file, or in doc comments. The linter scans all non-ignored files for the marker. -**End-of-block markers.** The linter does not require an explicit end marker for requirement blocks — `source_span` in the sidecar defines the block boundaries. An optional `@liyi:end-requirement` (or `@立意:需求止`) marker is **not supported in 0.1** — the linter does not look for it. A future version could accept it for visual clarity in Markdown files where contiguous-comment heuristics don't apply; adding it would be additive and non-breaking. +**End-of-block markers.** The `@liyi:end-requirement ` marker closes a requirement block. The name must match the opening `@liyi:requirement `. When both markers are present, the linter pairs them by name to deterministically compute `source_span` — this is the primary span recovery mechanism for files without tree-sitter support (e.g., Markdown). The end marker uses the same name syntax (parenthesized or whitespace-delimited), full-width normalization, and multilingual aliases as the opening marker: + +| Alias | Language | +|---|---| +| `@liyi:end-requirement` | English (canonical) | +| `@立意:需求结束` | Chinese | +| `@liyi:fin-requisito` | Spanish | +| `@立意:要件終` | Japanese | +| `@liyi:fin-exigence` | French | +| `@립의:요건끝` | Korean | + +The end marker is **recommended** for Markdown requirement blocks but not required. When absent, the sidecar's recorded `source_span` is the only span authority and must be maintained manually or via tree-sitter recovery. + +Example: + +```markdown + +Exit codes: 0 = clean, 1 = failures found, 2 = internal error. + +``` ### `@立意:有关` / `@liyi:related` — dependency edges @@ -705,31 +727,26 @@ Exit code: `--fail-on-req-changed` (default: true) — exit 1 if any reviewed sp This closes the **spec rot gap**: when requirements change, the requirement hash changes, and all items with `"related"` edges to that requirement are transitively flagged. The human reviews whether the code still satisfies the updated requirement. No silent re-inference over a potentially broken implementation — the requirement text is the anchor. -### `liyi reanchor` +### Tool-managed fields and `liyi check --fix` -`source_span` is the only positional field the agent writes. `source_hash` and `source_anchor` are tool-managed — computed by `liyi reanchor` (or the linter on first run) from the actual source file. Humans never compute them by hand. +`source_span` is the only positional field the agent writes. `source_hash`, `source_anchor`, and `tree_path` are tool-managed — computed by `liyi check --fix` from the actual source file. Humans never compute them by hand. -`liyi reanchor` is also the tool that populates hashes for new entries. When an agent writes a sidecar with `source_span` but no `source_hash`, running `liyi reanchor` (or `liyi check --fix`) reads the source lines, computes the SHA-256, and fills in both `source_hash` and `source_anchor`. This means a fresh agent-written sidecar is incomplete until the tool runs — by design. +`liyi check --fix` also populates hashes for new entries. When an agent writes a sidecar with `source_span` but no `source_hash`, running `liyi check --fix` reads the source lines, computes the SHA-256, and fills in `source_hash`, `source_anchor`, and `tree_path`. This means a fresh agent-written sidecar is incomplete until the tool runs — by design. -For resolving CI failures without an agent pass, the `liyi reanchor` subcommand re-hashes existing spans. It accepts one or more sidecar files or directories (recursive): +For resolving CI failures without an agent pass, `liyi check --fix` re-hashes existing spans. It accepts a `--root` flag or operates on the current directory: ```bash -$ liyi reanchor src/billing/money.rs.liyi.jsonc +$ liyi check --fix add_money [42, 58]: hash updated (source changed at same span) convert_currency [60, 85]: hash unchanged -$ liyi reanchor crates/ # reanchor all sidecars under crates/ -$ liyi reanchor a.rs.liyi.jsonc b.rs.liyi.jsonc + billing_handler [10, 35]: ↕ SHIFTED [10,35]→[12,37], hash updated ``` -This handles the case where code at those lines changed but lines didn't shift — the human has reviewed the change and is confirming "the intent still holds." The tool computes the new hash; the human never touches it. - -If lines shifted, the span points to wrong lines. Resolution paths: +This handles the common cases: missing hashes on fresh sidecars, positional shifts after line-changing edits, and re-hashing after the human has reviewed a change and confirmed "the intent still holds." The tool computes the new hash; the human never touches it. -- **The agent finds it** — the standard path. The agent understands code structure, re-records the span. -- **The human specifies it** — `liyi reanchor --item add_money --span 45,61`. The human looked it up in the editor ("go to definition"), the tool computes the hash. -- **Post-MVP: `--find`** — simple heuristics (grep for `fn add_money`, `def add_money`, etc.) to locate the item and update the span. Not a parser, but covers the common case. +If lines shifted and tree-sitter recovery isn't available, `--fix` uses the span-shift heuristic (±100 lines, delta propagation) to auto-correct. If neither tree-sitter nor the heuristic can locate the item, the spec remains `STALE` for the agent or human to re-record the span. -`liyi reanchor` is a thin wrapper on the same hashing logic used by `liyi check`. No LLM calls. +`liyi check --fix` is deterministic. No LLM calls. ### Prescriptive specs without code @@ -753,7 +770,7 @@ If `payment-security` changes → `multi-currency-addition` is flagged REQ CHANG The linter detects cycles (A → B → A) and reports them as errors without looping. - + **Use this sparingly.** Most teams should use flat requirements — one level of `@liyi:requirement` blocks with `@liyi:related` edges from code items. Requirement hierarchies are for organizations that already think in terms of system requirements decomposing into subsystem requirements (defense, aerospace, regulated industries). If you don't already have a requirement hierarchy, don't build one just because the tool allows it — the cascading noise from deep trees (a change at the root flags everything below) can be worse than the traceability it provides. @@ -948,7 +965,7 @@ The second case matters for the scaffold workflow (see *Tree-sitter item discove The source-level path (`@liyi:intent`) and the sidecar path (`"reviewed": true`) serve different needs: - **No `"reviewed"` field to forge.** The security concern — an agent writing `"reviewed": true` directly — dissolves. Review is visible in source diffs, attributable via `git blame` on the actual source file, and covered by CODEOWNERS. An agent would have to write `@liyi:intent` in source to fake review, which is conspicuous in code review. -- **Merge conflicts become trivial.** If humans never touch the sidecar, it's fully regenerable — `liyi reanchor` after merge, zero human intervention. Same model as `Cargo.lock` or `pnpm-lock.yaml`. +- **Merge conflicts become trivial.** If humans never touch the sidecar, it's fully regenerable — `liyi check --fix` after merge, zero human intervention. Same model as `Cargo.lock` or `pnpm-lock.yaml`. - **Review is visible where it matters.** A `@liyi:intent` block above a function is visible in the normal code review flow — no need to open a separate `.liyi.jsonc` diff tab. The sidecar retains: `"item"`, `"reviewed"` (optional, defaults to `false`), `"intent"` (the agent's *inferred* intent or `"=doc"`), `"source_span"`, `"source_hash"`, `"source_anchor"`, `"confidence"`, and `"related"`. The agent writes `"item"`, `"intent"`, `"source_span"`, `"confidence"`, and `"related"`. The tool fills in `"source_hash"` and `"source_anchor"`. The human (or CLI/IDE) sets `"reviewed": true`. @@ -1084,7 +1101,7 @@ The linter resolves `"source"` paths in `.liyi.jsonc` relative to the repository **Requirement discovery is project-global.** Positional args scope which items are checked (pass 2), not which requirements are indexed. Pass 1 always walks the full project root to discover all `@liyi:requirement` markers, regardless of CLI positional args. This ensures that `liyi check src/billing/` can resolve `@liyi:related` edges pointing to requirements defined in `docs/requirements.md` or any other location in the repo. - + This handles the common case without configuration. `.gitignore` already excludes `node_modules/`, `.venv/`, `target/`, `__pycache__/`, `build/`, etc. `.liyiignore` picks up the rest — checked-in vendored code, generated protobuf bindings, FFI stubs. @@ -1213,7 +1230,7 @@ Exit codes: 0 = clean, 1 = check failures (stale, unreviewed, or diverged specs) - `--fail-on-unreviewed` (default: false) — exit 1 if specs exist without `@liyi:intent` in source or `"reviewed": true` in sidecar - `--fail-on-req-changed` (default: true) — exit 1 if any reviewed spec references a requirement whose hash changed - `--fail-on-untracked` (default: true) — exit 1 if any `@liyi:requirement` marker has no sidecar entry, or any `@liyi:related` marker has no corresponding edge in the enclosing item's sidecar spec - + ### What it doesn't do @@ -1242,7 +1259,7 @@ All of the following are equivalent: **Implementation approach: normalize-then-match.** The linter runs a single normalization pass on each scanned line — replacing the four full-width characters with their half-width equivalents — before applying the marker regex. This is a four-entry `str::replace` chain (or a single `translate` table), not a regex concern. The normalization happens only on lines being scanned for markers, not on the entire file, so it has negligible cost. The alias lookup table stores only half-width forms; normalization ensures they match regardless of what the user typed. - + This is strictly more robust than the alternative (doubling every regex to accept both forms), keeps the alias table simple, and confines the full-width concern to one function in the lexer. @@ -1256,7 +1273,7 @@ This is strictly more robust than the alternative (doubling every regex to accep In **source code**, the `@` character is escaped in string constants: `\x40` in Rust, `\u0040` in JSON. This is invisible to the reader (it's inside a string literal) and prevents the scanner from matching constants in the alias table, format strings, and test data. The `@liyi:requirement(quine-escape)` in `markers.rs` enforces this invariant. - + In **documentation and prose** — Markdown files, design docs, READMEs, contributing guides — character escapes are unacceptable. A design document that writes `\x40liyi:module` instead of `@liyi:module` is unreadable. The scanner instead uses **natural-language context** to distinguish real markers from mentions: @@ -1266,7 +1283,7 @@ In **documentation and prose** — Markdown files, design docs, READMEs, contrib 2. **Inline code spans.** If the marker's position falls inside an inline backtick span on the same line (determined by counting backtick characters before the match position — odd count means inside code), the marker is rejected. This covers inline mentions like `` `@liyi:module` `` and `` `` ``. 3. **Preceding quote characters.** If the character immediately before the `@` is a quotation mark — ASCII quotes (`'`, `"`), typographic quotes (`'`, `'`, `"`, `"`), CJK brackets (`「`, `」`), or guillemets (`«`, `»`) — the marker is rejected. This covers natural-language quoting conventions across locales: `"@liyi:intent"`, `'@liyi:module'`, `「@liyi:requirement」`, etc. - + Together, these three checks cover every conventional way that prose references a technical term without asserting it. The scanner remains line-oriented — fenced block state is a single boolean; inline code detection is a character count within one line; preceding-char is a one-character lookbehind. No Markdown parser is needed. @@ -1298,7 +1315,7 @@ All three paths converge on the same report schema and the same `--validate` / ` |---|---|---| | `liyi triage --prompt` | Assemble a self-contained prompt from `liyi check --json` output — includes stale items with full context, the triage schema, assessment instructions, and output format spec. Print to stdout. | No | | `liyi triage --validate ` | Validate an agent-produced triage report against the schema; check that every assessed item corresponds to a real stale item | No | -| `liyi triage --apply [file]` | Auto-reanchor items with `cosmetic` verdict; present `semantic` items with suggested intents; flag `intent-violation` items for human review | No | +| `liyi triage --apply [file]` | Auto-fix items with `cosmetic` verdict; present `semantic` items with suggested intents; flag `intent-violation` items for human review | No | | `liyi triage --summary [file]` | Print human-readable summary of a triage report | No | The `--prompt` flag is the bridge for CI/script pipelines that have an `llm` CLI or API wrapper but no full agentic framework: @@ -1391,7 +1408,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | Verdict | Meaning | Default action | |---|---|---| -| `cosmetic` | Variable rename, reformatting, comment edit — no behavioral change | Auto-reanchor (no human review needed) | +| `cosmetic` | Variable rename, reformatting, comment edit — no behavioral change | Auto-fix (no human review needed) | | `semantic` | Code legitimately evolved — intent is stale but code is correct | Update intent (human reviews suggested intent) | | `intent-violation` | Code contradicts declared intent — either code is wrong or intent is wrong | Fix code or update intent (human decides) | | `unclear` | LLM can't determine with sufficient confidence | Manual review (human decides) | @@ -1408,7 +1425,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | `change_summary` | string | What changed in the code (1–2 sentences) | | `invariant_summary` | string | What stayed the same (1–2 sentences) | | `reasoning` | string | Why the verdict was assigned (2–3 sentences, citable in reviews) | -| `action` | enum | auto-reanchor / update-intent / fix-code-or-update-intent / manual-review | +| `action` | enum | auto-fix / update-intent / fix-code-or-update-intent / manual-review | | `suggested_intent` | string? | Proposed new intent text (only for `semantic` verdict) | | `impact` | array | Transitively affected items via `related` graph | @@ -1419,7 +1436,7 @@ This is the full context an assessor needs. The agent (or script, or CI wrapper) | CI/PR comment | `summary` + items with verdict ≠ cosmetic | Format as markdown table in PR comment | | Dashboard | `summary` for aggregate view; items for drill-down | Read JSON, render charts / tables | | LSP | Items for inline diagnostics at `source_span` | Watch `.liyi/triage.json`, map items to diagnostic locations | -| `liyi triage --apply` | Items with `verdict: cosmetic` | Auto-reanchor those items (write back to sidecars) | +| `liyi triage --apply` | Items with `verdict: cosmetic` | Auto-fix those items (write back to sidecars) | | Agent (next session) | `suggested_intent` for items with `verdict: semantic` | Read triage, propose intent updates in sidecar | | Human (terminal) | Formatted summary + triage table | `liyi triage --summary`; `--json` for raw | @@ -1437,7 +1454,7 @@ flowchart TD %% ── Batch path ── Decision -- "batch path
(many items, CI)" --> Triage["Agent triages
reads stale items, reasons about each
(or: liyi triage --prompt | llm-call)
"] - Triage -- "writes .liyi/triage.json" --> Apply["liyi triage --apply
auto-reanchors cosmetic items,
prints remaining for review
"] + Triage -- "writes .liyi/triage.json" --> Apply["liyi triage --apply
auto-fixes cosmetic items,
prints remaining for review
"] Apply --> HumanB["Human reviews
reads triage report or PR comment,
accepts suggested intents or fixes code
"] ``` @@ -1453,7 +1470,7 @@ The tradeoff: direct re-inference does not distinguish intent violations from le |---|---|---| | Agent just wrote or modified the code in the current session | Direct re-inference | The agent has full context; triage would assess its own changes against its own prior inference — little value added. | | Few stale items (≤ 5) from straightforward changes | Direct re-inference | The overhead of a structured triage report exceeds the classification benefit. | -| Many stale items from a large refactor or merge | Triage | The structured report helps humans prioritize which items need re-review vs. auto-reanchor. | +| Many stale items from a large refactor or merge | Triage | The structured report helps humans prioritize which items need re-review vs. auto-fix. | | Stale items from changes made by a *different* agent or human | Triage | The assessing agent lacks the original author's context; the old-intent-vs-new-code comparison is more valuable. | | CI pipeline processing a PR | Triage | Batch assessment with structured output is the natural fit for non-interactive workflows. | | Items with `"reviewed": true` or `@liyi:intent` in source | Triage recommended | These items have human-vouched intent. When they go stale, the change deserves explicit assessment against the human's stated intent, not silent re-inference. | @@ -1474,19 +1491,17 @@ The agent instruction (rule 10) permits both paths. Teams can mandate triage for `--fix` never modifies `"intent"`, `"reviewed"`, `"related"`, or any human-authored field. It only writes tool-managed fields. This is the same contract as `eslint --fix` or `cargo clippy --fix` — mechanical corrections, no semantic changes. - + **Semantic drift protection.** When `tree_path` resolves an item to a new span, `--fix` compares the hash at the new location against the recorded `source_hash`. If the content is unchanged (pure positional shift), the span, hash, and anchor are all updated — this is a safe mechanical correction. If the content at the new span also changed (semantic drift), `--fix` updates `source_span` to track the item's current location but does **not** rewrite `source_hash` — the spec remains stale so the next `liyi check` flags it for human review. This prevents `--fix` from silently blessing semantic changes that may invalidate the declared intent. - + The shift heuristic (non-`tree_path` fallback) is inherently safe — it only matches when the *exact same content* is found at an offset — so no additional protection is needed there. -`liyi reanchor` remains as the explicit manual tool for targeted re-hashing (e.g., `liyi reanchor --item add_money --span 45,61`). `--fix` is the batch equivalent for CI and post-merge workflows. - ### Implementation -~3000 lines of Rust across two crates (`liyi` library + `liyi-cli` binary), organized as a Cargo workspace under `crates/`. Core check logic is ~900 lines; the remainder covers tree-sitter-based span recovery, CLI, diagnostics, span-shift detection, `--fix` write-back, marker normalization, `reanchor`, and `approve`. Dependencies: `serde`, `serde_json`, `sha2`, `ignore`, `regex`, `tree-sitter`, `tree-sitter-rust` (library); `clap` (CLI). +~3000 lines of Rust across two crates (`liyi` library + `liyi-cli` binary), organized as a Cargo workspace under `crates/`. Core check logic is ~900 lines; the remainder covers tree-sitter-based span recovery, CLI, diagnostics, span-shift detection, `--fix` write-back, marker normalization, `migrate`, and `approve`. Dependencies: `serde`, `serde_json`, `sha2`, `ignore`, `regex`, `tree-sitter`, `tree-sitter-rust` (library); `clap` (CLI). No config file reader. `.liyiignore` handles file exclusion; config-based ignore patterns are a post-MVP consideration. @@ -1494,7 +1509,7 @@ No config file reader. `.liyiignore` handles file exclusion; config-based ignore **Performance.** The linter's work is directory walking + line slicing + SHA-256 hashing — all I/O-bound and parallelizable. A monorepo with 10,000 source files and proportional sidecars should complete in seconds. The `ignore` crate already handles `.gitignore`/`.liyiignore` filtering efficiently. -**Merge conflicts in sidecars.** Two branches editing the same source file will both update `source_span`/`source_hash` in the co-located `.liyi.jsonc`, causing a merge conflict. Resolution: `liyi reanchor` after merge, same model as `pnpm install` / `yarn install` resolving lockfile conflicts — re-run the tool, the derived fields are recomputed from the merged source. True intent-text conflicts (both branches edited the same item's `intent` prose) are rare and handled by normal git conflict resolution. +**Merge conflicts in sidecars.** Two branches editing the same source file will both update `source_span`/`source_hash` in the co-located `.liyi.jsonc`, causing a merge conflict. Resolution: `liyi check --fix` after merge, same model as `pnpm install` / `yarn install` resolving lockfile conflicts — re-run the tool, the derived fields are recomputed from the merged source. True intent-text conflicts (both branches edited the same item's `intent` prose) are rare and handled by normal git conflict resolution. ### Diagnostic catalog @@ -1504,7 +1519,7 @@ Every diagnostic the linter can emit, with its severity, audience, exit code con | Audience | Meaning | Examples | |---|---|---| -| `tool` | Fixable by `liyi reanchor` or `liyi check --fix` — no reasoning required | missing `source_hash`, SHIFTED | +| `tool` | Fixable by `liyi check --fix` — no reasoning required | missing `source_hash`, SHIFTED | | `agent` | Fixable by agent re-inference or sidecar editing — requires reading source but no human judgment | STALE (content changed), UNTRACKED, MISSING RELATED | | `human` | Requires human judgment — review, approval, or design decision | unreviewed, intent-violation, unknown requirement | @@ -1515,7 +1530,7 @@ This distinction was motivated by dogfooding experience: an AI agent maintaining | Spec current and reviewed | info | — | 0 | `: : ✓ reviewed, current` | — | | Spec current but unreviewed | warning | human | 1 if `--fail-on-unreviewed` | `: : ⚠ unreviewed` | `liyi approve ` | | Source hash mismatch (stale) | warning | agent | 1 if `--fail-on-stale` | `: : ⚠ STALE — source changed since spec was written` | — | -| Missing source_hash (fresh spec) | warning | tool | 1 if `--fail-on-stale` | `: : ⚠ missing source_hash` | `liyi reanchor ` | +| Missing source_hash (fresh spec) | warning | tool | 1 if `--fail-on-stale` | `: : ⚠ missing source_hash` | `liyi check --fix` | | Source hash found at offset (shifted) | info | tool | 0 (auto-corrected with `--fix`) | `: : ↕ SHIFTED [old]→[new]` | `liyi check --fix` | | Referenced requirement hash changed | warning | agent | 1 if `--fail-on-req-changed` | `: : ⚠ REQ CHANGED — requirement "" updated` | — | | `@liyi:related X` where X doesn't exist | error | human | 1 | `: : ✗ ERROR — unknown requirement ""` | — | @@ -1524,13 +1539,13 @@ This distinction was motivated by dogfooding experience: an AI agent maintaining | Requirement with no referencing items | info | — | 0 | `: : · requirement has no related items` | — | | Item annotated `@liyi:trivial` | info | — | 0 | `: : · trivial` | — | | Item annotated `@liyi:ignore` | info | — | 0 | `: : · ignored` | — | -| `source_span` past EOF | error | tool | 1 | `: : ✗ source_span [s, e] extends past end of file ( lines)` | `liyi reanchor ` | +| `source_span` past EOF | error | tool | 1 | `: : ✗ source_span [s, e] extends past end of file ( lines)` | `liyi check --fix` | | Inverted or zero-length `source_span` | error | human | 1 | `: : ✗ invalid source_span [e, s]` | — | | Malformed `source_hash` | error | human | 1 | `: : ✗ malformed source_hash` | — | | Duplicate item + span | warning | human | 0 | `: : ⚠ duplicate entry` | — | | Source file deleted / not found | error | human | 1 | `: ✗ source file not found — spec is orphaned` | — | | Malformed JSONC | error | human | 2 | `: ✗ parse error: ` | — | -| Unknown `"version"` | error | tool | 2 | `: ✗ unknown version ""` | `liyi reanchor --migrate ` | +| Unknown `"version"` | error | tool | 2 | `: ✗ unknown version ""` | `liyi migrate ` | | Cycle in requirement hierarchy | error | human | 1 | `: : ✗ requirement cycle detected: ` | — | | Ambiguous sidecar (duplicate naming) | warning | human | 0 | `: ⚠ ambiguous sidecar — both .liyi.jsonc and .liyi.jsonc exist` | — | @@ -1566,14 +1581,14 @@ When writing or modifying code: 2. When module-level invariants are apparent, write an `@liyi:module` block — in the directory's existing module doc (`README.md`, `doc.go`, `mod.rs` doc comment, etc.) or in a dedicated `LIYI.md`. Use the doc markup language's comment syntax for the marker. 3. If a source item has a `@liyi:related ` annotation, record the dependency in `.liyi.jsonc` as `"related": {"": null}`. The tool fills in the requirement's current hash. 4. For each `@liyi:requirement ` block encountered, ensure it has a corresponding entry in the co-located `.liyi.jsonc` with `"requirement"` and `"source_span"`. (The tool fills in `"source_hash"`.) -5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Re-anchor the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. +5. If a spec has `"related"` edges referencing a requirement, do not overwrite the requirement text during inference. Update the spec (update `source_span`) but preserve the `"related"` edges. Do not write `source_hash` — the tool fills it in. 6. Only generate adversarial tests from items that have a `@liyi:intent` annotation in source or `"reviewed": true` in the sidecar (i.e., human-reviewed intent). When `@liyi:intent` is present in source, use its prose (or the docstring for `=doc`) as the authoritative intent for test generation. 7. Tests should target boundary conditions, error-handling gaps, property violations, and semantic mismatches. Prioritize tests a subtly wrong implementation would fail. 8. Skip items annotated with `@liyi:ignore` or `@liyi:trivial`, and files matched by `.liyiignore`. Respect `@liyi:nontrivial` — if present, always infer a spec for that item and never override with `@liyi:trivial`. 9. Use a different model for test generation than the one that wrote the code, when possible. 10. When `liyi check` reports stale items, choose one of two paths: - **Direct re-inference** (preferred during interactive editing with few stale items): re-read the source, update `source_span` and `intent` in the sidecar, leave `"reviewed"` unset. Appropriate when you are the agent that just made the change, the number of stale items is small, and the changes are straightforward. - - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-reanchor. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. + - **Triage** (preferred for batch workflows, CI, or when many items are stale): assess each item — is the change cosmetic, semantic, or an intent violation? Write the assessment to `.liyi/triage.json` following the triage report schema. For cosmetic changes, run `liyi triage --apply` to auto-fix. For semantic changes, propose updated intent in `suggested_intent`. For intent violations, flag for human review. Prefer triage when stale items have `"reviewed": true` or `@liyi:intent` in source — these carry human-vouched intent that deserves explicit assessment, not silent re-inference. 11. Before committing, run `liyi check`. If it reports coverage gaps (missing requirement specs, missing related edges), resolve **all** gaps in the same commit. When running in agent mode, consume the `liyi check --prompt` output and apply its instructions. Do not commit with unresolved coverage gaps — CI will reject it. ``` @@ -1655,7 +1670,7 @@ An LSP (Language Server Protocol) server wraps `liyi check` output as editor dia | LSP Feature | 立意 Use | |---|---| | **Diagnostics** | Inline warnings at STALE, REQ CHANGED, and unreviewed sites | -| **Code Actions** | \"Accept inferred intent\" (sets `\"reviewed\": true` in sidecar), \"Assert intent in source\" (inserts `@liyi:intent`), \"Reanchor span\", \"Go to requirement\", \"Challenge\" (on-demand semantic verification via LLM) | +| **Code Actions** | \"Accept inferred intent\" (sets `\"reviewed\": true` in sidecar), \"Assert intent in source\" (inserts `@liyi:intent`), \"Fix span\", \"Go to requirement\", \"Challenge\" (on-demand semantic verification via LLM) | | **Hover** | Show the intent spec when hovering over a specced item | | **Go to Definition** | Jump from `@liyi:related X` to the `@liyi:requirement X` block | @@ -1671,11 +1686,11 @@ Candidate tools: |---|---| | `liyi_check` | Run `liyi check` on a path, return structured results (stale, reviewed, diverged) | | `liyi_check_json` | Run `liyi check --json` — return full context for stale items, suitable for agent-driven triage | -| `liyi_reanchor` | Re-hash spans for a given file | +| `liyi_fix` | Fix spans and fill tool-managed fields for a given path | | `liyi_get_requirement` | Look up a named requirement — return its text, location, and current hash | | `liyi_list_related` | List all items with `"related"` edges to a given requirement | | `liyi_triage_validate` | Validate an agent-produced triage report against the schema | -| `liyi_triage_apply` | Apply a validated triage report — auto-reanchor cosmetic items | +| `liyi_triage_apply` | Apply a validated triage report — auto-fix cosmetic items | The MCP tools provide *context for* reasoning and *application of* results. The reasoning itself (triage assessment, challenge verdicts) happens in the agent — which already has model access, conversation context, and the AGENTS.md instruction. This avoids duplicating LLM call logic inside the MCP server. @@ -1695,7 +1710,7 @@ For each unapproved item in the target file(s), display: Prompt: `approve? [y]es / [n]o / [e]dit intent / [s]kip` -- **y** — set `"reviewed": true`, update `source_hash` and `source_anchor` via reanchor. +- **y** — set `"reviewed": true`, update `source_hash` and `source_anchor`. - **n** — set `"reviewed": false` (explicit rejection). Leave hash unchanged. - **e** — open `$EDITOR` with the intent text. After save, re-display and re-prompt. - **s** — skip without changing anything. @@ -1708,7 +1723,7 @@ liyi approve --yes src/money.rs "add_money" # approve specific item liyi approve --yes . # approve all sidecars under cwd ``` -Sets `"reviewed": true` and reanchors without prompting. +Sets `"reviewed": true` and fixes hashes without prompting. **Flags:** - `--yes` — non-interactive, approve all matched items. @@ -1847,7 +1862,7 @@ After the agent processes this scaffold, the sidecar might look like: } ``` -Note: the agent removed the `impl::Money` container entry (containers are often not worth speccing independently), used `=doc` for the well-documented struct, `=trivial` for the getter, and wrote explicit intent for the rest. The `_hints` fields are gone — `liyi reanchor` strips them. +Note: the agent removed the `impl::Money` container entry (containers are often not worth speccing independently), used `=doc` for the well-documented struct, `=trivial` for the getter, and wrote explicit intent for the rest. The `_hints` fields are gone — `liyi check --fix` strips them. #### `_hints` — cold-start inference aids @@ -1870,7 +1885,7 @@ When `liyi init ` creates a skeleton sidecar for an existing file u - The absence of a schema contract *is* the contract. Downstream tooling cannot build on `_hints` because the shape is not guaranteed. This prevents accidental coupling to an ephemeral inference aid. - `liyi init` can freely evolve what hints it emits without breaking anything. -**Lifecycle.** `liyi init` writes `_hints` → the agent reads hints, infers intent, fills the `"intent"` field → `liyi reanchor` strips `_hints` from all spec entries. The linter ignores `_hints` (does not error on its presence). Hints are never committed in steady-state sidecars — they exist only during the cold-start inference window. +**Lifecycle.** `liyi init` writes `_hints` → the agent reads hints, infers intent, fills the `"intent"` field → `liyi check --fix` strips `_hints` from all spec entries. The linter ignores `_hints` (does not error on its presence). Hints are never committed in steady-state sidecars — they exist only during the cold-start inference window. **Per-item, not per-file.** Each spec entry gets its own `_hints` based on that item's span. A function with 47 commits and 3 bug fixes gets different hints than the simple getter next to it. @@ -2054,7 +2069,7 @@ This section estimates the effort to *build* 立意 itself — the linter, the c | Agent instruction (AGENTS.md paragraph) | 1 hour | 15 minutes | | `@liyi:module` convention + examples | 30 minutes | 10 minutes | | `.liyi.jsonc` examples for a demo repo | 1–2 hours | 20 minutes | -| CI linter (`liyi check` + `liyi reanchor` + `liyi approve` + `liyi init`, ~3000 lines) | 3–5 days | 2–4 hours | +| CI linter (`liyi check` + `liyi check --fix` + `liyi approve` + `liyi init` + `liyi migrate`, ~3000 lines) | 3–5 days | 2–4 hours | | Blog post explaining the practice | 1 day | 2–3 hours | | **Total** | **3–5 days** | **Half a day** | @@ -2062,7 +2077,7 @@ This section estimates the effort to *build* 立意 itself — the linter, the c ## What This Is -- A **CI linter** — `liyi check` + `liyi reanchor`, ~3000 lines across two crates (with tree-sitter-based span recovery). The enforcement mechanism. +- A **CI linter** — `liyi check` + `liyi check --fix`, ~3000 lines across two crates (with tree-sitter-based span recovery). The enforcement mechanism. - A **spec convention** — `@liyi:module` blocks (module intent) + `@liyi:requirement` blocks (named requirements) + `.liyi.jsonc` (item-level intent and requirement tracking, JSONC). - A **dependency model** — `@liyi:related` edges from code items to named requirements, with transitive staleness. - A **triage protocol** (post-MVP) — `liyi check --json` provides rich stale-item context; an agent (using whatever model it already has) assesses each item and writes a structured report; `liyi triage --apply` acts on the report. The binary stays deterministic and offline; the LLM reasoning lives in the agentic workflow. @@ -2105,7 +2120,7 @@ Each level is independently valuable. Stop wherever the cost outweighs the benef | **1. The review** | Review inferred intent in PRs — set `"reviewed": true` in sidecar (quick) or add `@liyi:intent` in source (explicit) | The review surface for intent is typically ~10% of the code surface — a few lines of spec per item instead of the full implementation. You catch wrong intent before wrong code gets tested. Careless review undermines adversarial testing quality — see *Why careless review is self-limiting* in the Security Model. | Seconds per item | | **2. The docs** | Add `## 立意` sections to READMEs / doc comments | Module-level invariants are documented, visible in rendered docs, discoverable by agents and humans. This is just good documentation practice. | 5 min per module | | **3. The linter** | Run `liyi check` in CI | Stale specs fail the build. You know which items changed since their intent was written. Deterministic enforcement. | Install a binary | -| **3.5. Triage** | When stale items are flagged, the agent assesses each: cosmetic, semantic, or intent violation. `liyi triage --apply` auto-reanchors cosmetics. Skippable — agents can directly re-infer intent instead (see *Direct re-inference* in the triage section). Triage is most valuable for batch workflows (CI, large PRs) and when stale items carry human-reviewed intent. | Noise from refactors and renames is eliminated automatically. Remaining items are sorted by action type — update intent, fix code, or manual review. Graph-aware impact propagation flags transitively affected items. | Agent follows the triage instruction (or skips triage and re-infers directly) | +| **3.5. Triage** | When stale items are flagged, the agent assesses each: cosmetic, semantic, or intent violation. `liyi triage --apply` auto-fixes cosmetics. Skippable — agents can directly re-infer intent instead (see *Direct re-inference* in the triage section). Triage is most valuable for batch workflows (CI, large PRs) and when stale items carry human-reviewed intent. | Noise from refactors and renames is eliminated automatically. Remaining items are sorted by action type — update intent, fix code, or manual review. Graph-aware impact propagation flags transitively affected items. | Agent follows the triage instruction (or skips triage and re-infers directly) | | **4. Challenge** | Click "Challenge" on a specced item in the editor, or include challenge in the agent workflow | A second model verifies code against intent, or intent against requirement. On-demand semantic verification — no pipeline, no test files. The trust gap between reviewing intent and trusting it blindly closes. | One click / prompt per item | | **5. Requirements** | Write `@liyi:requirement` blocks and `@liyi:related` annotations for critical-path items | Requirements are tracked, hashable, versionable. When a requirement changes, all related items are transitively flagged. Challenge verifies intent actually covers the requirement, not just that hashes match. | Minutes per requirement | | **6. The adversarial tests** | Configure a different model for test generation from reviewed specs | A second model reads the *intent* (not the code) and tries to break the implementation. Different training data, different blind spots. | Agent configuration | @@ -2195,7 +2210,7 @@ What the day-to-day experience looks like once all deliverables exist: 1. **Write code.** (Or have an agent write it.) The agent instruction in AGENTS.md tells it to also generate `.liyi.jsonc` specs alongside the code. 2. **Review intent, not implementation.** The agent infers intent and writes the sidecar. Read the inferred intent (via IDE hover or in the sidecar diff). If correct, accept it — either set `"reviewed": true` (one click, zero source noise) or add `@liyi:intent=doc` in source (one line, maximum visibility). If wrong, correct the intent: write `@liyi:intent ` in source with your own words, or edit the docstring. The review surface is ~10% of the code surface per item — a few lines of constraints and invariants instead of the full implementation. 3. **CI runs `liyi check`.** The linter verifies that existing specs aren't stale (source hash matches) and reports unreviewed specs. Stale specs fail the build. -4. **Handle staleness.** When stale items are flagged, the agent takes one of two paths. **Direct re-inference** (the fast path): the agent re-reads the source, updates `source_span` and `intent` in the sidecar, and leaves `"reviewed"` unset — appropriate during interactive editing with few stale items. **Triage** (the batch path, optional): the agent assesses each stale item as cosmetic, semantic, or intent violation, writes `.liyi/triage.json`, and `liyi triage --apply` auto-reanchors cosmetics. Triage is most valuable for large PRs, CI pipelines, or when stale items have human-reviewed intent (`"reviewed": true` or `@liyi:intent`). +4. **Handle staleness.** When stale items are flagged, the agent takes one of two paths. **Direct re-inference** (the fast path): the agent re-reads the source, updates `source_span` and `intent` in the sidecar, and leaves `"reviewed"` unset — appropriate during interactive editing with few stale items. **Triage** (the batch path, optional): the agent assesses each stale item as cosmetic, semantic, or intent violation, writes `.liyi/triage.json`, and `liyi triage --apply` auto-fixes cosmetics. Triage is most valuable for large PRs, CI pipelines, or when stale items have human-reviewed intent (`"reviewed": true` or `@liyi:intent`). 5. **Adversarial testing (optional).** A different model reads the reviewed intents and generates tests designed to break the implementation. Different training data, different blind spots. 6. **Iterate.** When source changes, the hash mismatches, the spec is flagged stale, the agent re-infers or triages, the human re-reviews. The cycle is fast because reviewing intent is fast. @@ -2282,13 +2297,13 @@ The success criterion remains: at least one team reports catching a real defect ### 4. `source_span` brittleness in 0.1 (mitigated by `tree_path`) -Line-number-based spans mean that any edit changing line counts (adding an import, inserting a blank line) invalidates every spec whose `source_span` falls at or below the edit point. The span-shift heuristic (±100-line scan, delta propagation) handles uniform shifts — the most common case — and reports `SHIFTED` (auto-corrected) rather than `STALE`. `liyi reanchor` handles non-uniform shifts manually. +Line-number-based spans mean that any edit changing line counts (adding an import, inserting a blank line) invalidates every spec whose `source_span` falls at or below the edit point. The span-shift heuristic (±100-line scan, delta propagation) handles uniform shifts — the most common case — and reports `SHIFTED` (auto-corrected) rather than `STALE`. `liyi check --fix` handles non-uniform shifts when `tree_path` is available. **v8.4 update:** This risk prompted the introduction of `tree_path` in 0.1 (see *Structural identity via `tree_path`*). When `tree_path` is populated, span recovery is deterministic — the tool locates the item by AST identity regardless of how lines shifted. The span-shift heuristic remains as a fallback for items without a `tree_path` (macros, generated code, unsupported languages). -The remaining friction for items without `tree_path`: between agent sessions, manual edits that shift lines without an agent re-inference will produce CI noise until the developer runs `liyi reanchor`. This is the same class of friction as lockfile conflicts (run `pnpm install` after merge), but it's friction nonetheless. For supported languages (Rust in 0.1), `tree_path` eliminates this friction entirely. +The remaining friction for items without `tree_path`: between agent sessions, manual edits that shift lines without an agent re-inference will produce CI noise until the developer runs `liyi check --fix`. This is the same class of friction as lockfile conflicts (run `pnpm install` after merge), but it's friction nonetheless. For supported languages (Rust in 0.1), `tree_path` eliminates this friction entirely. -**Mitigation in 0.1:** `tree_path` structural anchoring (primary), span-shift auto-correction (fallback), `liyi reanchor`, agent re-inference on next pass. +**Mitigation in 0.1:** `tree_path` structural anchoring (primary), span-shift auto-correction (fallback), `liyi check --fix`, agent re-inference on next pass. ### 5. Convention absorption and licensing (added 2026-03-06) @@ -2365,7 +2380,7 @@ pub fn add_money(a: Money, b: Money) -> Result { ### 2. Sidecar (after agent + tool) -The agent writes `source_span` and `intent`. The tool (`liyi reanchor` or the linter on first run) fills in `source_hash` and `source_anchor`. The result on disk: +The agent writes `source_span` and `intent`. The tool (`liyi check --fix`) fills in `source_hash` and `source_anchor`. The result on disk: ```jsonc // src/billing/money.rs.liyi.jsonc @@ -2528,7 +2543,7 @@ The agent re-infers (updating `source_span`; the tool recomputes `source_hash`), }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -2569,7 +2584,7 @@ The agent re-infers (updating `source_span`; the tool recomputes `source_hash`), }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", diff --git a/docs/liyi-design.md.liyi.jsonc b/docs/liyi-design.md.liyi.jsonc index 3949094..3337542 100644 --- a/docs/liyi-design.md.liyi.jsonc +++ b/docs/liyi-design.md.liyi.jsonc @@ -9,7 +9,7 @@ 343, 345 ], - "source_hash": "sha256:0000eb15eb6a1ebe192c1f1b3d43b1f60e619935997bbfd8df5d1e80c02fe163", + "source_hash": "sha256:e6619cfe8d91cfefa6b099e10a71bc40db7624e1a973783815f04d18d69ff470", "source_anchor": "" }, { @@ -18,7 +18,7 @@ 377, 379 ], - "source_hash": "sha256:b4f52a48481fe808a550884bee8949e665b3f34fd4dd77e55c51a0f46abd3009", + "source_hash": "sha256:afe22aa0b9f9625fea107924c2e3d860cd3f10699ed9f7ea996041b1114fec5a", "source_anchor": "" }, { @@ -27,7 +27,7 @@ 403, 405 ], - "source_hash": "sha256:da9de772f9412fc5e5ac4a172b6978c191e267b2c04ed477694914868ecbbdbe", + "source_hash": "sha256:40020571498ef149fe7a67ebd78bc430320921aae1faac8b7be1c830f35ae636", "source_anchor": "" }, { @@ -36,7 +36,7 @@ 427, 429 ], - "source_hash": "sha256:4d0c5843b38c855e3c725b8a6d1a1a63943f332185481ff1e395006a10a6ccfe", + "source_hash": "sha256:867bdee2970960b1cad64d2c07d813bdd6c0e0a8c0fae52cec7ed398fc2b9402", "source_anchor": "" }, { @@ -45,7 +45,7 @@ 431, 433 ], - "source_hash": "sha256:4169ca6d29fd03f03c1cafd61476e2ec7633981b298eced09fb2336ab38e990a", + "source_hash": "sha256:4bc9df80baf61c519493cf82d6fd325a99acf9f2fa56ffbf5c08d15ce2f99994", "source_anchor": "" }, { @@ -54,107 +54,107 @@ 443, 445 ], - "source_hash": "sha256:dc52a0e237b136645c3d6cd05911414d947094eb91323e01969c124301606982", + "source_hash": "sha256:6c9f4b6a6cdb20ca339372b8776bc96c8819c12168f0f881679bd44d1542fc25", "source_anchor": "" }, { - "requirement": "tree-path-reanchor-behavior", + "requirement": "tree-path-fix-behavior", "source_span": [ - 465, - 469 + 469, + 472 ], - "source_hash": "sha256:1df1d3238855b1d8c1b20493e2788af47135f318e399d43b650bded04c7c5a5a", - "source_anchor": "" + "source_hash": "sha256:5dca1e26d0fb939c33b2214db262701daf9a67cc8a2d13a6f58d65b413b5512c", + "source_anchor": "" }, { "requirement": "tree-path-empty-fallback", "source_span": [ - 473, - 481 + 476, + 484 ], - "source_hash": "sha256:0bbc4ec24af3fb1c3abce91342f1249f5b0a029e2a6a0573544d5a13d59722d6", + "source_hash": "sha256:aa9a9bd3d6f00ac3aa69d4c4dc33bbedf2ca0b7781be40492679ebfcd21b7340", "source_anchor": "" }, { "requirement": "requirement-name-uniqueness", "source_span": [ - 629, - 631 + 632, + 634 ], - "source_hash": "sha256:03a65c514532f4f424cbe30ed55fe7455c3a602ac286b1c62efa9722b0968700", + "source_hash": "sha256:bf5fcadbb09429fc0b9a6f55b6b1ac685d5a88f134b3e89f2861dc20bc0e9928", "source_anchor": "" }, { "requirement": "cycle-detection", "source_span": [ - 754, - 756 + 771, + 773 ], - "source_hash": "sha256:66311f793af3fcf3539f50be30b4f2e7d4e56fd3d1dbd4069bf34b6699868542", + "source_hash": "sha256:925289a107cb255f3c48f0b5395395038278f56f7006dab3522e511cc8d7699f", "source_anchor": "" }, { "requirement": "requirement-discovery-global", "source_span": [ - 1054, - 1056 + 1102, + 1104 ], - "source_hash": "sha256:c6a5b25878f52c9f835e31aa11a060af59e390e64f0730d153004a7993548c6f", - "source_anchor": "```gitignore" + "source_hash": "sha256:d01ce1382642d6ad05162bbba79d4cd35dc7715294f8cc3d6c54de9e743fa38c", + "source_anchor": "" }, { "requirement": "liyi-check-exit-code", "source_span": [ - 1177, - 1183 + 1227, + 1233 ], - "source_hash": "sha256:3c5e25f88155fb2751b7ff2b520cc60e1b6f0172c854077b54dab753e169f682", - "source_anchor": "**Summary-first output.** The summary line is printed before the per-item diagnostics, so that in large projects the user sees the aggregate picture immediately — without scrolling past hundreds of lines. (When the output is short enough, this also serves as a final line for quick scanning.)" + "source_hash": "sha256:d7c4ad4bd1d1abe361598373a376aacda6bc0d948bd611f9b8bdea0b23a74c7a", + "source_anchor": "" }, { "requirement": "marker-normalization", "source_span": [ - 1210, - 1212 + 1260, + 1262 ], - "source_hash": "sha256:5b12f3063785e56636d702bebcbd8f759ad512c72fa4af056edc5548fb13b116", - "source_anchor": "" + "source_hash": "sha256:7d9e05684eeaf175288fbd0629a2352dbe3353416172691de4ab3dcbe435b7d3", + "source_anchor": "" }, { "requirement": "quine-escape-in-source", "source_span": [ - 1224, - 1226 + 1274, + 1276 ], - "source_hash": "sha256:f59014cebee07606dbb103f06b6b6336409222fce1f6bb6191e800e38c40cd61", - "source_anchor": "### Marker normalization (half-width / full-width equivalence)" + "source_hash": "sha256:7bb4de54441a4fadbd4204d8e950cedff7874181b111f2b6789f693ac22b982b", + "source_anchor": "" }, { "requirement": "markdown-fenced-block-skip", "source_span": [ - 1230, - 1236 + 1280, + 1286 ], - "source_hash": "sha256:95608b3b77f9c441d2c9dfe396edfbd9ffe25ea5a5ff4ed2943356c5a30e382d", - "source_anchor": "| Half-width | Full-width | Role |" + "source_hash": "sha256:8160197ac9ec02de2896ceac8e8edf334a43efe294ccaf976b2909a629689ceb", + "source_anchor": "" }, { "requirement": "fix-never-modifies-human-fields", "source_span": [ - 1442, - 1444 + 1492, + 1494 ], - "source_hash": "sha256:607df81587523fe5a12ddb1b5d4ef92aaed48c07811581d4c9fe40a8d6fe0b6a", - "source_anchor": "```" + "source_hash": "sha256:e5c1b041dab62b4533a84b2bfc73c2ca8697a9ae7a454540686c8d479305ef5f", + "source_anchor": "" }, { "requirement": "fix-semantic-drift-protection", "source_span": [ - 1446, - 1448 + 1496, + 1498 ], - "source_hash": "sha256:f40c009e185cefee8531edae568cab5de68a64203695d03e8575b16623e4910a", - "source_anchor": "**Direct re-inference: the fast path.** Triage adds value when many items are stale and a human needs to prioritize — a refactor that touches 30 functions, a CI pipeline processing a large PR. But during interactive editing — an agent making focused changes to 2-3 functions in a single session — triage is overhead. The agent already knows what it changed and why." + "source_hash": "sha256:87f70b0fa07c386eb7478b420f8edc70f7b7bb709c416e2aaf17a1a913fa0ce5", + "source_anchor": "" } ] } diff --git a/docs/liyi-mvp-roadmap.md b/docs/liyi-mvp-roadmap.md index dedc57d..3e8a391 100644 --- a/docs/liyi-mvp-roadmap.md +++ b/docs/liyi-mvp-roadmap.md @@ -11,7 +11,7 @@ This document is the implementation plan for 立意 v0.1 — the CI linter, the **Deliverables:** 1. `liyi check` — the CI linter binary (Rust) ✅ -2. `liyi reanchor` — the span re-hashing tool (subcommand of the same binary) ✅ +2. `liyi check --fix` — the span fixing tool (fills hashes, corrects shifts, computes tree_path) ✅ 3. `liyi.schema.json` — the JSON Schema for `.liyi.jsonc` v0.1 ✅ 4. Agent instruction — the ~12-line AGENTS.md paragraph ✅ 5. Demo repo — the linter's own codebase, dogfooded with `.liyi.jsonc` specs and `@liyi:module` markers ✅ @@ -25,14 +25,14 @@ This document is the implementation plan for 立意 v0.1 — the CI linter, the | Module | Status | Notes | |--------|--------|-------| -| `cli.rs` | ✅ Done | `check`, `reanchor`, `init`, `approve` subcommands, all planned flags | +| `cli.rs` | ✅ Done | `check`, `init`, `approve`, `migrate` subcommands, all planned flags | | `discovery.rs` | ✅ Done | `.liyiignore` support, ambiguous sidecar detection, scope filtering | | `sidecar.rs` | ✅ Done | JSONC comment stripping, serde, `deny_unknown_fields`, `tree_path` field | | `markers.rs` | ✅ Done | All 7 marker types, fullwidth normalization, multilingual aliases | | `hashing.rs` | ✅ Done | SHA-256, CRLF normalization, all `SpanError` variants | | `shift.rs` | ✅ Done | ±100-line scan with anchor hint shortcut | | `check.rs` | ✅ Done | Two-pass logic, `--fix` write-back, `--dry-run`, tree-sitter span recovery via `tree_path`, semantic drift protection, all 4 post-pass diagnostics wired | -| `reanchor.rs` | ✅ Done | Targeted + batch re-hashing, multi-file/directory support, `--migrate` scaffold, tree-sitter span recovery | +| `reanchor.rs` | ✅ Done | Internal module for batch re-hashing and `--migrate` logic, now invoked via `check --fix` and `migrate` subcommands | | `tree_path.rs` | ✅ Done | Tree-sitter structural identity & span recovery (R6). Resolve, compute, auto-populate. Rust grammar. | | `diagnostics.rs` | ✅ Done | All diagnostic types, formatting, exit codes, summary line output | | `schema.rs` | ✅ Done | Accepts `"0.1"` only, migration scaffold | @@ -80,7 +80,7 @@ All `DiagnosticKind` variants are defined and emitted: |------|--------|-------| | `shift_proptest.rs` | ✅ Done | 4 property-based tests: insert/delete shifts, content modification, hint agreement | | CI (GitHub Actions) | ✅ Done | Workflow: `cargo test`, `cargo clippy`, `cargo fmt --check`, `liyi check --root .` (dogfood) | -| Dogfooding locally | ✅ Done | Full loop confirmed: agent changes code → `liyi check` detects staleness → agent reanchors specs. CI wired. | +| Dogfooding locally | ✅ Done | Full loop confirmed: agent changes code → `liyi check` detects staleness → agent fixes specs. CI wired. | | Summary line output | ✅ Done | Prints "N current, M stale, K unreviewed, ..." after diagnostics | | `liyi init` subcommand | ✅ Done | Scaffold AGENTS.md or skeleton `.liyi.jsonc` sidecar | | `liyi approve` subcommand | ✅ Done | Batch (`--yes`) and interactive modes, `--dry-run`, `--item` filter | @@ -127,13 +127,13 @@ All `DiagnosticKind` variants are defined and emitted: │ - hash │ └────────────────────┘ │ spans │ │ - check │ ┌────────────────────┐ - │ review │ │ liyi reanchor │ - │ - resolve│ │ │ - │ related│ │ Fills source_hash, │ - │ edges │ │ source_anchor from │ - │ │ │ actual source file │ - │ Exit 0/1/2│ │ bytes. No LLM. │ - └───────────┘ └────────────────────┘ + │ review │ + │ - resolve│ + │ related│ + │ edges │ + │ │ + │ Exit 0/1/2│ + └───────────┘ Post-MVP triage workflow: ┌────────────────────────────────────────────┐ @@ -144,7 +144,7 @@ All `DiagnosticKind` variants are defined and emitted: │ liyi triage --validate │ │ → schema check │ │ liyi triage --apply │ - │ → auto-reanchor cosmetic items │ + │ → auto-fix cosmetic items │ │ → present semantic/violation for review │ └────────────────────────────────────────────┘ ``` @@ -156,15 +156,14 @@ A single Rust binary with subcommands: | Subcommand | Purpose | |---|---| | `liyi check [paths...]` | Lint: staleness, review status, requirement tracking | -| `liyi check --fix` | Lint + auto-correct shifted spans, fill missing hashes | +| `liyi check --fix` | Lint + auto-correct shifted spans, fill missing hashes, compute tree_path | | `liyi check --json` | Machine-readable output with full context for each stale item (feeds `liyi triage`) | | `liyi approve [paths...] [--yes]` | Interactive review: mark specs as human-approved | | `liyi init [source-file]` | Scaffold AGENTS.md or skeleton `.liyi.jsonc` sidecar | -| `liyi reanchor [--item --span ]` | Manual span re-hashing (accepts files or directories, recursive) | -| `liyi reanchor --migrate` | Schema version migration (no-op in 0.1, scaffolded) | +| `liyi migrate ` | Schema version migration (no-op in 0.1, scaffolded) | | `liyi triage --prompt` | Assemble a self-contained LLM prompt from stale items (post-MVP) | | `liyi triage --validate ` | Validate an agent-produced triage report against the schema (post-MVP) | -| `liyi triage --apply [file]` | Auto-reanchor cosmetic items, present remaining for review (post-MVP) | +| `liyi triage --apply [file]` | Auto-fix cosmetic items, present remaining for review (post-MVP) | | `liyi triage --summary [file]` | Human-readable summary of a triage report (post-MVP) | ### Crate structure @@ -183,7 +182,7 @@ liyi/ │ │ │ ├── markers.rs ← Source marker scanning (@liyi:*, normalization) │ │ │ ├── hashing.rs ← source_span → SHA-256, anchor extraction │ │ │ ├── shift.rs ← Span-shift detection -│ │ │ ├── reanchor.rs ← reanchor subcommand logic +│ │ │ ├── reanchor.rs ← internal re-hashing and migrate logic │ │ │ ├── tree_path.rs ← Tree-sitter structural identity & span recovery │ │ │ ├── diagnostics.rs ← Diagnostic types, formatting, exit codes │ │ │ ├── schema.rs ← Version validation, migration scaffold @@ -255,20 +254,11 @@ enum Command { root: Option, }, - /// Re-hash source spans in sidecar files - Reanchor { - /// Sidecar files or directories to reanchor (recursive) + /// Migrate sidecar schema version + Migrate { + /// Sidecar files or directories to migrate (recursive) files: Vec, - - /// Target a specific item by name - #[arg(long)] - item: Option, - - /// Override span (start,end) - #[arg(long, value_parser = parse_span)] - span: Option<(usize, usize)>, - - /// Migrate sidecar to current schema version + }, #[arg(long)] migrate: bool, }, @@ -572,13 +562,13 @@ For each `.liyi.jsonc` in scope: e. Check `@liyi:trivial` / `@liyi:ignore` within or immediately before the span. If found, mark as trivial/ignored (skip review requirement). f. If `related` is present: for each requirement name, look up in the pass-1 map. If not found → `ERROR: unknown requirement`. If found and hash differs from recorded hash → `REQ CHANGED`. 5. For each `Spec::Requirement`: - a. Hash the `source_span`. If `source_hash` present and mismatches → STALE (requirement text changed but sidecar not updated — run `liyi reanchor`). + a. Hash the `source_span`. If `source_hash` present and mismatches → STALE (requirement text changed but sidecar not updated — run `liyi check --fix`). 6. Report requirements from pass 1 that have no referencing items (informational). **`--fix` behavior (integrated into pass 2):** When `--fix` is active: -- Fill in missing `source_hash` and `source_anchor` (same as `reanchor`). +- Fill in missing `source_hash` and `source_anchor` (same behavior as `check --fix`). - Auto-correct SHIFTED spans (write new span, recompute hash/anchor). - Attempt tree-path re-resolution **before** validating span boundaries — if `tree_path` is set and the current `source_span` is past EOF or otherwise invalid, resolve via tree-sitter first. - Write modified sidecars back to disk. @@ -611,23 +601,22 @@ enum ExitCode { --- -### 8. `reanchor.rs` — Reanchor Subcommand +### 8. `reanchor.rs` — Internal Re-hashing Module -**Purpose:** Re-hash source spans in sidecar files. Manual tool for fixing spans after line shifts. Accepts one or more sidecar files or directories (recursive). When `tree_path` is populated, uses tree-sitter to locate items by structural identity before re-hashing. +**Purpose:** Internal module for re-hashing source spans in sidecar files. Invoked by `liyi check --fix` (for span correction and hash filling) and `liyi migrate` (for schema version upgrades). When `tree_path` is populated, uses tree-sitter to locate items by structural identity before re-hashing. **Behavior:** 1. Parse the target sidecar(s). If a directory is given, discover all `.liyi.jsonc` files under it recursively. -2. If `--item` and `--span` are specified: find the named item, update its span, recompute hash/anchor. -3. If neither: for every spec in the sidecar: +2. For every spec in the sidecar: a. If `tree_path` is non-empty and a tree-sitter grammar is available for the source language: parse the source file, locate the item by structural identity, update `source_span` to the item's current line range, recompute hash/anchor. This handles formatting changes, import additions, and any line-shifting edits. b. Otherwise: recompute hash/anchor from the source file at the recorded span (existing behavior). This handles "code changed at the same span" (human confirms intent still holds → re-hash). -4. If `--migrate`: update `"version"` to current (no-op in 0.1, but the scaffold ensures the flag exists and the code path handles future versions). -5. Write modified sidecar back. +3. If migrating: update `"version"` to current (no-op in 0.1, but the scaffold ensures the code path handles future versions). +4. Write modified sidecar back. **Constraints:** -- `reanchor` never modifies `intent`, `reviewed`, or `related`. -- If the source file doesn't exist, emit an error (can't reanchor an orphaned spec). +- Never modifies `intent`, `reviewed`, or `related`. +- If the source file doesn't exist, emit an error (can't fix an orphaned spec). - Idempotent: running twice produces the same output. **Size estimate:** ~60 lines. @@ -773,7 +762,7 @@ The convention defines 7 marker types that the linter recognizes in source files ## Key Constraints ### 1. No language-specific parsing (core path) -The linter's core check path reads line ranges and hashes bytes. It does not parse any programming language. Source markers are found by string matching on individual lines (after normalization). This is the core design constraint that makes the tool work with any language. Tree-sitter is used **only** for `tree_path` span recovery in `liyi reanchor` and `liyi check --fix` — it is an optional enhancement, not a requirement. When tree-sitter has no grammar for a language (or `tree_path` is empty), the tool falls back to the language-agnostic line-number behavior. +The linter's core check path reads line ranges and hashes bytes. It does not parse any programming language. Source markers are found by string matching on individual lines (after normalization). This is the core design constraint that makes the tool work with any language. Tree-sitter is used **only** for `tree_path` span recovery in `liyi check --fix` — it is an optional enhancement, not a requirement. When tree-sitter has no grammar for a language (or `tree_path` is empty), the tool falls back to the language-agnostic line-number behavior. ### 2. No LLM calls, no network access The linter is fully offline and deterministic. SHA-256 hashing, file I/O, string matching. No API keys, no configuration for models, no telemetry. @@ -789,7 +778,7 @@ Configuration is expressed through: | Field | Written by | Never written by | |---|---|---| | `item`, `intent`, `source_span`, `confidence`, `related` (names) | Agent | — | -| `source_hash`, `source_anchor`, `tree_path`, `related` (hashes) | `liyi reanchor` / `liyi check --fix` | Agent (may write initial `tree_path`), human | +| `source_hash`, `source_anchor`, `tree_path`, `related` (hashes) | `liyi check --fix` | Agent (may write initial `tree_path`), human | | `reviewed` | Human (CLI / IDE) | Agent (security model) | ### 5. Exit code contract @@ -815,7 +804,7 @@ All must-have and nice-to-have items are now complete. #### R1. `liyi approve` — interactive review command ✅ -Implemented in `crates/liyi/src/approve.rs`. Interactive by default when stdin is a TTY (show intent + source span, prompt y/n/s). Batch mode via `--yes` or when non-TTY. `--dry-run`, `--item ` flags. Reanchors on approval (fills `source_hash`, `source_anchor`). +Implemented in `crates/liyi/src/approve.rs`. Interactive by default when stdin is a TTY (show intent + source span, prompt y/n/s). Batch mode via `--yes` or when non-TTY. `--dry-run`, `--item ` flags. Fills `source_hash` and `source_anchor` on approval. #### R2. `liyi init` — scaffold command ✅ @@ -938,7 +927,7 @@ The linter's own codebase has `.liyi.jsonc` specs. CI runs `liyi check`. This is 1. **`liyi check` runs on a real codebase** — the linter's own source — and produces correct diagnostics. ✅ (43 unit + 22 golden/integration tests pass) 2. **All golden-file tests pass** — covering every diagnostic in the catalog. ✅ (all 15+ planned fixtures exist) -3. **`liyi reanchor` re-hashes spans** correctly, including `--item`/`--span` targeting. ✅ +3. **`liyi check --fix` fills tool-managed fields** correctly. ✅ 4. **The agent instruction works** — an LLM reading `AGENTS.md` produces valid `.liyi.jsonc` files that `liyi check` can lint. ✅ 5. **CI is green** — GitHub Actions runs `liyi check` on every push. ✅ 6. **The binary is small** — single static binary, <5 MB, zero runtime dependencies. ✅ diff --git a/schema/liyi.schema.json b/schema/liyi.schema.json index 3ed3ce1..00aba76 100644 --- a/schema/liyi.schema.json +++ b/schema/liyi.schema.json @@ -64,7 +64,7 @@ }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi reanchor or the linter — agents should not produce this." + "description": "Tool-managed. SHA-256 hex digest of the source lines in the span. Computed by liyi check --fix — agents should not produce this." }, "source_anchor": { "type": "string", @@ -88,7 +88,7 @@ }, "_hints": { "type": "object", - "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi reanchor after initial review. Tools MUST NOT rely on any specific shape." + "description": "Transient inference aids emitted by liyi init for cold-start scenarios. LLM-readable, intentionally unstructured. Stripped by liyi check --fix after initial review. Tools MUST NOT rely on any specific shape." } } }, @@ -109,7 +109,7 @@ }, "source_hash": { "$ref": "#/$defs/sourceHash", - "description": "Tool-managed. Computed by liyi reanchor or the linter." + "description": "Tool-managed. Computed by liyi check --fix." }, "source_anchor": { "type": "string", diff --git a/schema/triage.schema.json b/schema/triage.schema.json index cbf0035..1e479ac 100644 --- a/schema/triage.schema.json +++ b/schema/triage.schema.json @@ -49,8 +49,8 @@ }, "action": { "type": "string", - "enum": ["auto-reanchor", "update-intent", "fix-code-or-update-intent", "manual-review"], - "description": "Recommended action. auto-reanchor for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." + "enum": ["auto-fix", "update-intent", "fix-code-or-update-intent", "manual-review"], + "description": "Recommended action. auto-fix for cosmetic, update-intent for semantic, fix-code-or-update-intent for intent-violation, manual-review for unclear." }, "summary": { "type": "object",