diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 5e4d2578..85fdae5b 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -238,7 +238,7 @@ GIT git.rs status, diff, log 85-99% ✓ add, commit, push branch, checkout -CODE SEARCH grep_cmd.rs grep 60-80% ✓ +CODE SEARCH grep_cmd.rs grep 0-80% ✓ diff_cmd.rs diff 70-85% ✓ find_cmd.rs find 50-70% ✓ diff --git a/CLAUDE.md b/CLAUDE.md index ab512961..2f990946 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -209,7 +209,7 @@ rtk gain --history | grep proxy | Module | Purpose | Token Strategy | |--------|---------|----------------| | git.rs | Git operations | Stat summaries + compact diffs | -| grep_cmd.rs | Code search | Group by file, truncate lines | +| grep_cmd.rs | Code search | Raw passthrough <=50 matches, group by file >50 | | ls.rs | Directory listing | Tree format, aggregate counts | | read.rs | File reading | Filter-level based stripping | | runner.rs | Command execution | Stderr only (err), failures only (test) | diff --git a/INSTALL.md b/INSTALL.md index 98457d09..aaba7d2c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -281,7 +281,7 @@ cp ~/.claude/settings.json.bak ~/.claude/settings.json ```bash rtk ls . # Compact tree view rtk read file.rs # Optimized reading -rtk grep "pattern" . # Grouped search results +rtk grep "pattern" . # Search results (raw <=50, grouped >50) ``` ### Git diff --git a/README.md b/README.md index d818e2af..38d97f49 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ rtk read file.rs # Smart file reading rtk read file.rs -l aggressive # Signatures only (strips bodies) rtk smart file.rs # 2-line heuristic code summary rtk find "*.rs" . # Compact find results -rtk grep "pattern" . # Grouped search results +rtk grep "pattern" . # Search results (raw <=50, grouped >50) rtk diff file1 file2 # Condensed diff ``` diff --git a/README_es.md b/README_es.md index c05da936..7ed98a39 100644 --- a/README_es.md +++ b/README_es.md @@ -108,7 +108,7 @@ Cuatro estrategias: rtk ls . # Arbol de directorios optimizado rtk read file.rs # Lectura inteligente rtk find "*.rs" . # Resultados compactos -rtk grep "pattern" . # Busqueda agrupada por archivo +rtk grep "pattern" . # Resultados crudos <=50, agrupados >50 ``` ### Git diff --git a/README_fr.md b/README_fr.md index b8c71734..1dc7bd1b 100644 --- a/README_fr.md +++ b/README_fr.md @@ -119,7 +119,7 @@ rtk ls . # Arbre de repertoires optimise rtk read file.rs # Lecture intelligente rtk read file.rs -l aggressive # Signatures uniquement rtk find "*.rs" . # Resultats compacts -rtk grep "pattern" . # Resultats groupes par fichier +rtk grep "pattern" . # Resultats bruts <=50, groupes >50 rtk diff file1 file2 # Diff condense ``` diff --git a/README_ja.md b/README_ja.md index a6e7dc22..251ad345 100644 --- a/README_ja.md +++ b/README_ja.md @@ -108,7 +108,7 @@ git status # 自動的に rtk git status に書き換え rtk ls . # 最適化されたディレクトリツリー rtk read file.rs # スマートファイル読み取り rtk find "*.rs" . # コンパクトな検索結果 -rtk grep "pattern" . # ファイル別グループ化検索 +rtk grep "pattern" . # 50件以下は生出力、50件超はグループ化 ``` ### Git diff --git a/README_ko.md b/README_ko.md index b9eca724..69824140 100644 --- a/README_ko.md +++ b/README_ko.md @@ -108,7 +108,7 @@ git status # 자동으로 rtk git status로 재작성 rtk ls . # 최적화된 디렉토리 트리 rtk read file.rs # 스마트 파일 읽기 rtk find "*.rs" . # 컴팩트한 검색 결과 -rtk grep "pattern" . # 파일별 그룹화 검색 +rtk grep "pattern" . # 50개 이하 원본 출력, 50개 초과 그룹화 ``` ### Git diff --git a/README_zh.md b/README_zh.md index bd7fce8d..45b20aac 100644 --- a/README_zh.md +++ b/README_zh.md @@ -109,7 +109,7 @@ git status # 自动重写为 rtk git status rtk ls . # 优化的目录树 rtk read file.rs # 智能文件读取 rtk find "*.rs" . # 紧凑的查找结果 -rtk grep "pattern" . # 按文件分组的搜索结果 +rtk grep "pattern" . # 50条以下原始输出,50条以上分组 ``` ### Git diff --git a/docs/AUDIT_GUIDE.md b/docs/AUDIT_GUIDE.md index 8bcebdff..421953fd 100644 --- a/docs/AUDIT_GUIDE.md +++ b/docs/AUDIT_GUIDE.md @@ -270,7 +270,7 @@ Savings % = (Saved / Input) × 100 | `rtk vitest run` | 94-99% | Show failures only | | `rtk find` | 75% | Tree format | | `rtk pnpm list` | 70-90% | Compact dependencies | -| `rtk grep` | 70% | Truncate + group | +| `rtk grep` | 0-70% | Raw passthrough <=50 matches, group >50 | ## Database Management diff --git a/docs/FEATURES.md b/docs/FEATURES.md index 061a604a..4740732a 100644 --- a/docs/FEATURES.md +++ b/docs/FEATURES.md @@ -202,7 +202,7 @@ Supporte a la fois la syntaxe RTK et la syntaxe native `find` (`-name`, `-type`, ### `rtk grep` -- Recherche dans le contenu -**Objectif :** Remplace `grep` et `rg` avec une sortie groupee par fichier, tronquee. +**Objectif :** Remplace `grep` et `rg`. Passthrough brut pour <=50 resultats (format standard `file:line:content`), sortie groupee par fichier pour >50 resultats. Utilise `--no-ignore` pour correspondre au comportement de `grep -r`. **Syntaxe :** ```bash diff --git a/src/filter.rs b/src/filter.rs index d6d9d19b..5830ad91 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -338,47 +338,13 @@ pub fn smart_truncate(content: &str, max_lines: usize, _lang: &Language) -> Stri return content.to_string(); } - let mut result = Vec::with_capacity(max_lines); - let mut kept_lines = 0; - let mut skipped_section = false; - - for line in &lines { - let trimmed = line.trim(); - - // Always keep signatures and important structural elements - let is_important = FUNC_SIGNATURE.is_match(trimmed) - || IMPORT_PATTERN.is_match(trimmed) - || trimmed.starts_with("pub ") - || trimmed.starts_with("export ") - || trimmed == "}" - || trimmed == "{"; - - if is_important || kept_lines < max_lines / 2 { - if skipped_section { - result.push(format!( - " // ... {} lines omitted", - lines.len() - kept_lines - )); - skipped_section = false; - } - result.push((*line).to_string()); - kept_lines += 1; - } else { - skipped_section = true; - } - - if kept_lines >= max_lines - 1 { - break; - } - } - - if skipped_section || kept_lines < lines.len() { - result.push(format!( - "// ... {} more lines (total: {})", - lines.len() - kept_lines, - lines.len() - )); - } + // Clean truncation: take first max_lines lines only. + // The old approach inserted synthetic "// ... N lines omitted" markers + // that AI agents treated as file content, causing parsing confusion + // and retry loops that burned more tokens than the filtering saved. + let mut result: Vec = lines[..max_lines].iter().map(|l| (*l).to_string()).collect(); + let omitted = lines.len() - max_lines; + result.push(format!("[{} more lines]", omitted)); result.join("\n") } @@ -491,4 +457,34 @@ fn main() { assert!(!result.contains("// This is a comment")); assert!(result.contains("fn main()")); } + + #[test] + fn test_smart_truncate_no_annotations() { + let input = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n"; + let output = smart_truncate(input, 3, &Language::Unknown); + // Must NOT contain old-style "// ... N lines omitted" annotations + assert!( + !output.contains("// ..."), + "smart_truncate must not insert synthetic comment annotations" + ); + // Must contain clean truncation marker + assert!(output.contains("[7 more lines]")); + // Must preserve first 3 lines verbatim + assert!(output.starts_with("line1\nline2\nline3\n")); + } + + #[test] + fn test_smart_truncate_no_truncation_when_under_limit() { + let input = "a\nb\nc\n"; + let output = smart_truncate(input, 10, &Language::Unknown); + assert_eq!(output, input); + assert!(!output.contains("more lines")); + } + + #[test] + fn test_smart_truncate_exact_limit() { + let input = "a\nb\nc"; + let output = smart_truncate(input, 3, &Language::Unknown); + assert_eq!(output, input); + } } diff --git a/src/grep_cmd.rs b/src/grep_cmd.rs index c1819dde..056b59e3 100644 --- a/src/grep_cmd.rs +++ b/src/grep_cmd.rs @@ -26,7 +26,10 @@ pub fn run( let rg_pattern = pattern.replace(r"\|", "|"); let mut rg_cmd = resolved_command("rg"); - rg_cmd.args(["-n", "--no-heading", &rg_pattern, path]); + // --no-ignore: match grep -r behavior (don't skip gitignored files). + // Without this, rg returns 0 matches for files in .gitignore, causing + // false negatives that make AI agents draw wrong conclusions. + rg_cmd.args(["-n", "--no-heading", "--no-ignore", &rg_pattern, path]); if let Some(ft) = file_type { rg_cmd.arg("--type").arg(ft); @@ -76,67 +79,113 @@ pub fn run( return Ok(()); } - let mut by_file: HashMap> = HashMap::new(); - let mut total = 0; - - // Compile context regex once (instead of per-line in clean_line) - let context_re = if context_only { - Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok() + // Count total matches to decide output strategy + let total_matches = stdout.lines().count(); + + // Passthrough threshold: small/medium results go through raw so AI agents + // can parse standard grep output. Only summarize large results where + // token savings actually matter. The grouped format ("X matches in YF:") + // confuses AI agents and causes retry loops that burn more tokens than + // the filtering saves. + let passthrough_threshold = 50; + + let rtk_output = if total_matches <= passthrough_threshold { + // Small result set: pass through raw rg/grep output. + // Truncate individual lines but preserve standard file:line:content format. + let mut out = String::new(); + let mut shown = 0; + for line in stdout.lines() { + if shown >= max_results { + break; + } + let parts: Vec<&str> = line.splitn(3, ':').collect(); + if parts.len() == 3 { + let file = parts[0]; + let line_num = parts[1]; + let content = parts[2].trim(); + if content.len() <= max_line_len { + out.push_str(&format!("{}:{}:{}\n", file, line_num, content)); + } else { + let truncated: String = content.chars().take(max_line_len - 3).collect(); + out.push_str(&format!("{}:{}:{}...\n", file, line_num, truncated)); + } + } else { + // Non-standard line (e.g., context separator), pass through + out.push_str(line); + out.push('\n'); + } + shown += 1; + } + if total_matches > max_results { + out.push_str(&format!("... +{} matches\n", total_matches - max_results)); + } + out } else { - None - }; - - for line in stdout.lines() { - let parts: Vec<&str> = line.splitn(3, ':').collect(); + // Large result set: use grouped format for token savings + let mut by_file: HashMap> = HashMap::new(); - let (file, line_num, content) = if parts.len() == 3 { - let ln = parts[1].parse().unwrap_or(0); - (parts[0].to_string(), ln, parts[2]) - } else if parts.len() == 2 { - let ln = parts[0].parse().unwrap_or(0); - (path.to_string(), ln, parts[1]) + let context_re = if context_only { + Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok() } else { - continue; + None }; - total += 1; - let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern); - by_file.entry(file).or_default().push((line_num, cleaned)); - } - - let mut rtk_output = String::new(); - rtk_output.push_str(&format!("{} matches in {}F:\n\n", total, by_file.len())); + for line in stdout.lines() { + let parts: Vec<&str> = line.splitn(3, ':').collect(); - let mut shown = 0; - let mut files: Vec<_> = by_file.iter().collect(); - files.sort_by_key(|(f, _)| *f); + let (file, line_num, content) = if parts.len() == 3 { + let ln = parts[1].parse().unwrap_or(0); + (parts[0].to_string(), ln, parts[2]) + } else if parts.len() == 2 { + let ln = parts[0].parse().unwrap_or(0); + (path.to_string(), ln, parts[1]) + } else { + continue; + }; - for (file, matches) in files { - if shown >= max_results { - break; + let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern); + by_file.entry(file).or_default().push((line_num, cleaned)); } - let file_display = compact_path(file); - rtk_output.push_str(&format!("[file] {} ({}):\n", file_display, matches.len())); + let mut out = String::new(); + out.push_str(&format!( + "{} matches in {} files:\n\n", + total_matches, + by_file.len() + )); + + let mut shown = 0; + let mut files: Vec<_> = by_file.iter().collect(); + files.sort_by_key(|(f, _)| *f); let per_file = config::limits().grep_max_per_file; - for (line_num, content) in matches.iter().take(per_file) { - rtk_output.push_str(&format!(" {:>4}: {}\n", line_num, content)); - shown += 1; + for (file, matches) in files { if shown >= max_results { break; } - } - if matches.len() > per_file { - rtk_output.push_str(&format!(" +{}\n", matches.len() - per_file)); + let file_display = compact_path(file); + out.push_str(&format!("[file] {} ({}):\n", file_display, matches.len())); + + for (line_num, content) in matches.iter().take(per_file) { + out.push_str(&format!(" {:>4}: {}\n", line_num, content)); + shown += 1; + if shown >= max_results { + break; + } + } + + if matches.len() > per_file { + out.push_str(&format!(" +{}\n", matches.len() - per_file)); + } + out.push('\n'); } - rtk_output.push('\n'); - } - if total > shown { - rtk_output.push_str(&format!("... +{}\n", total - shown)); - } + if total_matches > shown { + out.push_str(&format!("... +{}\n", total_matches - shown)); + } + out + }; print!("{}", rtk_output); timer.track( @@ -297,4 +346,23 @@ mod tests { } // If rg is not installed, skip gracefully (test still passes) } + + #[test] + fn test_rg_no_ignore_flag_accepted() { + // Verify rg accepts --no-ignore (used to match grep -r behavior) + let mut cmd = resolved_command("rg"); + cmd.args([ + "-n", + "--no-heading", + "--no-ignore", + "NONEXISTENT_PATTERN_12345", + ".", + ]); + if let Ok(output) = cmd.output() { + assert!( + output.status.code() == Some(1) || output.status.success(), + "rg --no-ignore should be accepted" + ); + } + } }