diff --git a/.leankg b/.leankg new file mode 100644 index 0000000..401b7a9 Binary files /dev/null and b/.leankg differ diff --git a/Cargo.lock b/Cargo.lock index d3f34fb..361c6f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1389,6 +1389,7 @@ dependencies = [ "glob", "ignore", "notify", + "once_cell", "parking_lot", "pulldown-cmark", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 291272d..b84df36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ sha2 = "0.10" rayon = "1.10" pulldown-cmark = "0.12" regex = "1" +once_cell = "1" schemars = "0.8" async-trait = "0.1" dirs = "5" diff --git a/docs/design/hld-leankg.md b/docs/design/hld-leankg.md index 8519a63..325c1f3 100644 --- a/docs/design/hld-leankg.md +++ b/docs/design/hld-leankg.md @@ -19,10 +19,11 @@ - Removed unbounded `elements_cache` and `relationships_cache` - Reduced SQLite memory: cache 64MB→16MB, mmap 256MB→64MB - Lazy parser initialization (TODO) - - Cached regex patterns (TODO) + - Cached regex patterns (COMPLETED) - Connection reuse in watcher (TODO) - Cursor-based relationship iteration (TODO) - File→relationships index for dependent lookup (TODO) + - Removed busy-loop sleep from AsyncFileWatcher (COMPLETED) - Target: Reduce idle CPU from 61% to <5% - v1.20 - Stats API + Adaptive Loading: - Add `GET /api/graph/stats` endpoint returning full DB histogram (nodes_by_type, edges_by_type, nodes_by_depth, folders, services) diff --git a/docs/requirement/prd-leankg.md b/docs/requirement/prd-leankg.md index 29f9509..76c792a 100644 --- a/docs/requirement/prd-leankg.md +++ b/docs/requirement/prd-leankg.md @@ -15,16 +15,18 @@ - Source 2: `prd-leankg-v2.0-enhancements.md` (v2.0, 2026-03-27) - Source 3: `prd-leankg-gitnexus-enhancements.md` (v1.0, 2026-03-27) +<<<<<<< HEAD ### v1.21 (IN PROGRESS) - CPU Optimization - **US-21.1:** Reduce idle CPU from 61% to <5% - **US-21.2:** Cache TTL tuning (300s→60s, max entries 1000→100) - COMPLETED - **US-21.3:** Remove unbounded `elements_cache` and `relationships_cache` - COMPLETED - **US-21.4:** SQLite memory reduction (cache 64MB→16MB, mmap 256MB→64MB) - COMPLETED - **US-21.5:** Lazy parser initialization - PENDING -- **US-21.6:** Cached regex patterns - PENDING +- **US-21.6:** Cached regex patterns - COMPLETED - **US-21.7:** Connection reuse in file watcher - PENDING - **US-21.8:** Cursor-based relationship iteration - PENDING - **US-21.9:** File→relationships index for dependent lookup - PENDING +- **US-21.10:** Remove busy-loop sleep from AsyncFileWatcher - COMPLETED - **Root Causes:** - Tree-sitter AST deep traversal (recursive on every node) - `all_relationships()` loads entire graph into memory diff --git a/src/indexer/extractor.rs b/src/indexer/extractor.rs index cd687e8..ecb7c45 100644 --- a/src/indexer/extractor.rs +++ b/src/indexer/extractor.rs @@ -1,4 +1,5 @@ use crate::db::models::{CodeElement, Relationship}; +use crate::indexer::regex_cache::{KOTLIN_SYNTHETIC_IMPORT, VIEWBINDING_VAR}; use regex::Regex; use std::path::Path; use tree_sitter::{Node, Tree}; @@ -262,9 +263,7 @@ impl<'a> EntityExtractor<'a> { source_path: &str, relationships: &mut Vec, ) { - let synthetic_re = Regex::new(r#"import\s+kotlin\.android\.synthetic\.(\w+)\.\*"#).unwrap(); - - for cap in synthetic_re.captures_iter(content) { + for cap in KOTLIN_SYNTHETIC_IMPORT.captures_iter(content) { if let Some(layout_name) = cap.get(1) { let layout_file = format!("res/layout/{}.xml", layout_name.as_str()); relationships.push(Relationship { @@ -331,13 +330,11 @@ impl<'a> EntityExtractor<'a> { source_path: &str, relationships: &mut Vec, ) { - let binding_var_re = Regex::new(r#"(\w+Binding)\s+(\w+)\s*="#).unwrap(); - let binding_class_names: std::collections::HashSet = binding_var_re + let binding_class_names: std::collections::HashSet = VIEWBINDING_VAR .captures_iter(content) .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string())) .collect(); - let _dot_access_re = Regex::new(r#"(\w+)\.(\w+)"#).unwrap(); let mut seen: std::collections::HashSet = std::collections::HashSet::new(); for binding_name in binding_class_names { diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index 5dd0d2e..0fe0f3d 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -4,6 +4,7 @@ pub mod git; pub mod microservice; pub mod parser; pub mod process_processor; +pub mod regex_cache; pub mod terraform; pub mod android_hilt; diff --git a/src/indexer/regex_cache.rs b/src/indexer/regex_cache.rs new file mode 100644 index 0000000..3536fbf --- /dev/null +++ b/src/indexer/regex_cache.rs @@ -0,0 +1,175 @@ +//! Cached compiled regex patterns for performance optimization +//! +//! Regex patterns are compiled once and reused across all indexing operations. + +use once_cell::sync::Lazy; +use regex::Regex; + +// ============================================================================ +// Android-related patterns (used in extractor.rs) +// ============================================================================ + +/// Kotlin synthetic import: `import kotlin.android.synthetic..*` +pub static KOTLIN_SYNTHETIC_IMPORT: Lazy = + Lazy::new(|| Regex::new(r#"import\s+kotlin\.android\.synthetic\.(\w+)\.\*"#).unwrap()); + +/// ViewBinding variable declaration: `val/var Binding = ...` +pub static VIEWBINDING_VAR: Lazy = + Lazy::new(|| Regex::new(r#"(\w+Binding)\s+(\w+)\s*="#).unwrap()); + +/// Property access pattern: `.` +pub static PROPERTY_ACCESS: Lazy = Lazy::new(|| Regex::new(r#"(\w+)\.(\w+)"#).unwrap()); + +// ============================================================================ +// Microservice patterns (used in microservice.rs) +// ============================================================================ + +/// gRPC client pattern for Kubernetes DNS +pub static GRPC_CLIENT: Lazy = + Lazy::new(|| Regex::new(r#"(?m)grpc\.NewClient\s*\(\s*"([^"]+)"[,\s]""#).unwrap()); + +/// YAML address pattern: `be__address = ...` +pub static YAML_ADDRESS: Lazy = + Lazy::new(|| Regex::new(r#"be_(\w+)_address\s*[=:]\s*["']([^"']+)["']"#).unwrap()); + +// ============================================================================ +// Config file patterns (used in config_extractor.rs) +// ============================================================================ + +/// Comment lines in config files +pub static CONFIG_COMMENT: Lazy = Lazy::new(|| Regex::new(r"(?m)^\s*//.*$").unwrap()); + +/// Gradle/Cargo dependency section header +pub static DEPENDENCY_SECTION: Lazy = + Lazy::new(|| Regex::new(r"^\[(.*dependencies.*)\]").unwrap()); + +/// Gradle/Cargo dependency line: `name = version` +pub static DEPENDENCY_LINE: Lazy = + Lazy::new(|| Regex::new(r#"^([a-zA-Z0-9_\-]+)\s*=\s*(.*)"#).unwrap()); + +// ============================================================================ +// Go module patterns (used in config_extractor.rs) +// ============================================================================ + +/// Go require block single line +pub static GO_REQUIRE_SINGLE: Lazy = + Lazy::new(|| Regex::new(r"^\s*require\s+([^\s]+)\s+(v[^\s]+)").unwrap()); + +/// Go require block start +pub static GO_REQUIRE_BLOCK_START: Lazy = + Lazy::new(|| Regex::new(r"^\s*require\s*\(\s*$").unwrap()); + +/// Go require block end +pub static GO_REQUIRE_BLOCK_END: Lazy = Lazy::new(|| Regex::new(r"^\s*\)\s*$").unwrap()); + +/// Go require block dependency line +pub static GO_REQUIRE_LINE: Lazy = + Lazy::new(|| Regex::new(r"^\s*([^\s]+)\s+(v[^\s]+)").unwrap()); + +// ============================================================================ +// Terraform patterns (used in terraform.rs) +// ============================================================================ + +pub static TF_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"(?m)^resource\s+"([^"]+)"\s+"([^"]+)""#).unwrap()); + +pub static TF_DATA: Lazy = + Lazy::new(|| Regex::new(r#"(?m)^data\s+"([^"]+)"\s+"([^"]+)""#).unwrap()); + +pub static TF_VARIABLE: Lazy = + Lazy::new(|| Regex::new(r#"(?m)^variable\s+"([^"]+)""#).unwrap()); + +pub static TF_OUTPUT: Lazy = Lazy::new(|| Regex::new(r#"(?m)^output\s+"([^"]+)""#).unwrap()); + +pub static TF_MODULE: Lazy = Lazy::new(|| Regex::new(r#"(?m)^module\s+"([^"]+)""#).unwrap()); + +pub static TF_PROVIDER: Lazy = + Lazy::new(|| Regex::new(r#"(?m)^provider\s+"([^"]+)""#).unwrap()); + +// ============================================================================ +// Android manifest patterns (used in android_manifest.rs) +// ============================================================================ + +// Note: ANDROID_MANIFEST_TAG uses backreference \1 which is not supported +// by the regex crate - it must be created dynamically in android_manifest.rs + +pub static ANDROID_NAME_ATTR: Lazy = + Lazy::new(|| Regex::new(r#"android:name\s*=\s*["']([^"']+)["']"#).unwrap()); + +// ============================================================================ +// XML layout patterns (used in xml_layout.rs) +// ============================================================================ + +/// XML element pattern (self-closing or with content) +pub static XML_ELEMENT: Lazy = Lazy::new(|| { + Regex::new(r"<([a-zA-Z][a-zA-Z0-9_.]*)\s[^>]*>|").unwrap() +}); + +/// Android ID reference: `@+id/` or `@id/` +pub static ANDROID_ID: Lazy = + Lazy::new(|| Regex::new(r#"android:id\s*=\s*["']@\+id/([^"']+)["']"#).unwrap()); + +/// Android onClick handler +pub static ANDROID_ONCLICK: Lazy = + Lazy::new(|| Regex::new(r#"android:onClick\s*=\s*["']([^"']+)["']"#).unwrap()); + +/// View ID extraction: `@+id/` +pub static VIEW_ID_PLUS: Lazy = + Lazy::new(|| Regex::new(r"@\+id/([a-zA-Z_][a-zA-Z0-9_]*)").unwrap()); + +/// View ID reference: `@id/` +pub static VIEW_ID: Lazy = + Lazy::new(|| Regex::new(r"@id/([a-zA-Z_][a-zA-Z0-9_]*)").unwrap()); + +/// Tools context attribute +pub static TOOLS_CONTEXT: Lazy = + Lazy::new(|| Regex::new(r#"tools:context\s*=\s*["']([^"']+)["']"#).unwrap()); + +/// Class name in layout: `android:name = "."` +pub static LAYOUT_CLASS: Lazy = + Lazy::new(|| Regex::new(r#"android:name\s*=\s*["']([^"']+\.)([^"']+)["']"#).unwrap()); + +/// Style reference +pub static STYLE_REF: Lazy = + Lazy::new(|| Regex::new(r#"style\s*=\s*["']@style/([^"']+)["']"#).unwrap()); + +// ============================================================================ +// Android resources patterns (used in android_resources.rs) +// ============================================================================ + +pub static STRING_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>([^<]*)"#).unwrap()); + +pub static COLOR_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>([^<]*)"#).unwrap()); + +pub static DIMEN_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>([^<]*)"#).unwrap()); + +pub static THEME_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>[\s\S]*?"#).unwrap()); + +pub static BOOL_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>([^<]*)"#).unwrap()); + +pub static INTEGER_RESOURCE: Lazy = + Lazy::new(|| Regex::new(r#"]*>([^<]*)"#).unwrap()); + +// ============================================================================ +// Maven patterns (used in maven_extractor.rs) +// ============================================================================ + +pub static MAVEN_DEPENDENCY: Lazy = + Lazy::new(|| Regex::new(r"([\s\S]*?)").unwrap()); + +// ============================================================================ +// Build system patterns (used in mod.rs) +// ============================================================================ + +/// Gradle include statement: `include("")` +pub static GRADLE_INCLUDE: Lazy = + Lazy::new(|| Regex::new(r#"include\(["']([^"']+)["']\)"#).unwrap()); + +/// Maven module declaration: `name` +pub static MAVEN_MODULE: Lazy = + Lazy::new(|| Regex::new(r"([^<]+)").unwrap()); diff --git a/src/watcher/notify_handler.rs b/src/watcher/notify_handler.rs index dd079d3..a2c5c80 100644 --- a/src/watcher/notify_handler.rs +++ b/src/watcher/notify_handler.rs @@ -88,6 +88,7 @@ pub struct AsyncFileWatcher { impl AsyncFileWatcher { pub async fn run(self) { loop { + // recv_event() blocks until an event is available - no polling needed if let Some(event) = self.inner.recv_event() { for path in event.paths { let change = FileChange { @@ -97,7 +98,8 @@ impl AsyncFileWatcher { let _ = self.tokio_tx.send(change).await; } } - tokio::time::sleep(Duration::from_millis(100)).await; + // No sleep here - recv_event() blocks properly via the notify library + // which uses OS-level file watching (inotify/FSEvents/kqueue) } } }