Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .leankg
Binary file not shown.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sha2 = "0.10"
rayon = "1.10"
pulldown-cmark = "0.12"
regex = "1"
once_cell = "1"
schemars = "0.8"
async-trait = "0.1"
dirs = "5"
Expand Down
3 changes: 2 additions & 1 deletion docs/design/hld-leankg.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
- Removed unbounded `elements_cache` and `relationships_cache`
- Reduced SQLite memory: cache 64MB→16MB, mmap 256MB→64MB
- Lazy parser initialization (TODO)
- Cached regex patterns (TODO)
- Cached regex patterns (COMPLETED)
- Connection reuse in watcher (TODO)
- Cursor-based relationship iteration (TODO)
- File→relationships index for dependent lookup (TODO)
- Removed busy-loop sleep from AsyncFileWatcher (COMPLETED)
- Target: Reduce idle CPU from 61% to <5%
- v1.20 - Stats API + Adaptive Loading:
- Add `GET /api/graph/stats` endpoint returning full DB histogram (nodes_by_type, edges_by_type, nodes_by_depth, folders, services)
Expand Down
4 changes: 3 additions & 1 deletion docs/requirement/prd-leankg.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@
- Source 2: `prd-leankg-v2.0-enhancements.md` (v2.0, 2026-03-27)
- Source 3: `prd-leankg-gitnexus-enhancements.md` (v1.0, 2026-03-27)

<<<<<<< HEAD
### v1.21 (IN PROGRESS) - CPU Optimization
- **US-21.1:** Reduce idle CPU from 61% to <5%
- **US-21.2:** Cache TTL tuning (300s→60s, max entries 1000→100) - COMPLETED
- **US-21.3:** Remove unbounded `elements_cache` and `relationships_cache` - COMPLETED
- **US-21.4:** SQLite memory reduction (cache 64MB→16MB, mmap 256MB→64MB) - COMPLETED
- **US-21.5:** Lazy parser initialization - PENDING
- **US-21.6:** Cached regex patterns - PENDING
- **US-21.6:** Cached regex patterns - COMPLETED
- **US-21.7:** Connection reuse in file watcher - PENDING
- **US-21.8:** Cursor-based relationship iteration - PENDING
- **US-21.9:** File→relationships index for dependent lookup - PENDING
- **US-21.10:** Remove busy-loop sleep from AsyncFileWatcher - COMPLETED
- **Root Causes:**
- Tree-sitter AST deep traversal (recursive on every node)
- `all_relationships()` loads entire graph into memory
Expand Down
9 changes: 3 additions & 6 deletions src/indexer/extractor.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::db::models::{CodeElement, Relationship};
use crate::indexer::regex_cache::{KOTLIN_SYNTHETIC_IMPORT, VIEWBINDING_VAR};
use regex::Regex;
use std::path::Path;
use tree_sitter::{Node, Tree};
Expand Down Expand Up @@ -262,9 +263,7 @@ impl<'a> EntityExtractor<'a> {
source_path: &str,
relationships: &mut Vec<Relationship>,
) {
let synthetic_re = Regex::new(r#"import\s+kotlin\.android\.synthetic\.(\w+)\.\*"#).unwrap();

for cap in synthetic_re.captures_iter(content) {
for cap in KOTLIN_SYNTHETIC_IMPORT.captures_iter(content) {
if let Some(layout_name) = cap.get(1) {
let layout_file = format!("res/layout/{}.xml", layout_name.as_str());
relationships.push(Relationship {
Expand Down Expand Up @@ -331,13 +330,11 @@ impl<'a> EntityExtractor<'a> {
source_path: &str,
relationships: &mut Vec<Relationship>,
) {
let binding_var_re = Regex::new(r#"(\w+Binding)\s+(\w+)\s*="#).unwrap();
let binding_class_names: std::collections::HashSet<String> = binding_var_re
let binding_class_names: std::collections::HashSet<String> = VIEWBINDING_VAR
.captures_iter(content)
.filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
.collect();

let _dot_access_re = Regex::new(r#"(\w+)\.(\w+)"#).unwrap();
let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();

for binding_name in binding_class_names {
Expand Down
1 change: 1 addition & 0 deletions src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod git;
pub mod microservice;
pub mod parser;
pub mod process_processor;
pub mod regex_cache;
pub mod terraform;

pub mod android_hilt;
Expand Down
175 changes: 175 additions & 0 deletions src/indexer/regex_cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
//! Cached compiled regex patterns for performance optimization
//!
//! Regex patterns are compiled once and reused across all indexing operations.

use once_cell::sync::Lazy;
use regex::Regex;

// ============================================================================
// Android-related patterns (used in extractor.rs)
// ============================================================================

/// Kotlin synthetic import: `import kotlin.android.synthetic.<layout>.*`
pub static KOTLIN_SYNTHETIC_IMPORT: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"import\s+kotlin\.android\.synthetic\.(\w+)\.\*"#).unwrap());

/// ViewBinding variable declaration: `val/var <Name>Binding = ...`
pub static VIEWBINDING_VAR: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(\w+Binding)\s+(\w+)\s*="#).unwrap());

/// Property access pattern: `<obj>.<property>`
pub static PROPERTY_ACCESS: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(\w+)\.(\w+)"#).unwrap());

// ============================================================================
// Microservice patterns (used in microservice.rs)
// ============================================================================

/// gRPC client pattern for Kubernetes DNS
pub static GRPC_CLIENT: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?m)grpc\.NewClient\s*\(\s*"([^"]+)"[,\s]""#).unwrap());

/// YAML address pattern: `be_<service>_address = ...`
pub static YAML_ADDRESS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"be_(\w+)_address\s*[=:]\s*["']([^"']+)["']"#).unwrap());

// ============================================================================
// Config file patterns (used in config_extractor.rs)
// ============================================================================

/// Comment lines in config files
pub static CONFIG_COMMENT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?m)^\s*//.*$").unwrap());

/// Gradle/Cargo dependency section header
pub static DEPENDENCY_SECTION: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\[(.*dependencies.*)\]").unwrap());

/// Gradle/Cargo dependency line: `name = version`
pub static DEPENDENCY_LINE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"^([a-zA-Z0-9_\-]+)\s*=\s*(.*)"#).unwrap());

// ============================================================================
// Go module patterns (used in config_extractor.rs)
// ============================================================================

/// Go require block single line
pub static GO_REQUIRE_SINGLE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\s*require\s+([^\s]+)\s+(v[^\s]+)").unwrap());

/// Go require block start
pub static GO_REQUIRE_BLOCK_START: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\s*require\s*\(\s*$").unwrap());

/// Go require block end
pub static GO_REQUIRE_BLOCK_END: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\s*\)\s*$").unwrap());

/// Go require block dependency line
pub static GO_REQUIRE_LINE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\s*([^\s]+)\s+(v[^\s]+)").unwrap());

// ============================================================================
// Terraform patterns (used in terraform.rs)
// ============================================================================

pub static TF_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?m)^resource\s+"([^"]+)"\s+"([^"]+)""#).unwrap());

pub static TF_DATA: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?m)^data\s+"([^"]+)"\s+"([^"]+)""#).unwrap());

pub static TF_VARIABLE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?m)^variable\s+"([^"]+)""#).unwrap());

pub static TF_OUTPUT: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)^output\s+"([^"]+)""#).unwrap());

pub static TF_MODULE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)^module\s+"([^"]+)""#).unwrap());

pub static TF_PROVIDER: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?m)^provider\s+"([^"]+)""#).unwrap());

// ============================================================================
// Android manifest patterns (used in android_manifest.rs)
// ============================================================================

// Note: ANDROID_MANIFEST_TAG uses backreference \1 which is not supported
// by the regex crate - it must be created dynamically in android_manifest.rs

pub static ANDROID_NAME_ATTR: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"android:name\s*=\s*["']([^"']+)["']"#).unwrap());

// ============================================================================
// XML layout patterns (used in xml_layout.rs)
// ============================================================================

/// XML element pattern (self-closing or with content)
pub static XML_ELEMENT: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"<([a-zA-Z][a-zA-Z0-9_.]*)\s[^>]*>|</([a-zA-Z][a-zA-Z0-9_.]*)\s*>").unwrap()
});

/// Android ID reference: `@+id/<name>` or `@id/<name>`
pub static ANDROID_ID: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"android:id\s*=\s*["']@\+id/([^"']+)["']"#).unwrap());

/// Android onClick handler
pub static ANDROID_ONCLICK: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"android:onClick\s*=\s*["']([^"']+)["']"#).unwrap());

/// View ID extraction: `@+id/<name>`
pub static VIEW_ID_PLUS: Lazy<Regex> =
Lazy::new(|| Regex::new(r"@\+id/([a-zA-Z_][a-zA-Z0-9_]*)").unwrap());

/// View ID reference: `@id/<name>`
pub static VIEW_ID: Lazy<Regex> =
Lazy::new(|| Regex::new(r"@id/([a-zA-Z_][a-zA-Z0-9_]*)").unwrap());

/// Tools context attribute
pub static TOOLS_CONTEXT: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"tools:context\s*=\s*["']([^"']+)["']"#).unwrap());

/// Class name in layout: `android:name = "<package>.<Class>"`
pub static LAYOUT_CLASS: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"android:name\s*=\s*["']([^"']+\.)([^"']+)["']"#).unwrap());

/// Style reference
pub static STYLE_REF: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"style\s*=\s*["']@style/([^"']+)["']"#).unwrap());

// ============================================================================
// Android resources patterns (used in android_resources.rs)
// ============================================================================

pub static STRING_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<string\s+name\s*=\s*"([^"]+)"[^>]*>([^<]*)</string>"#).unwrap());

pub static COLOR_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<color\s+name\s*=\s*"([^"]+)"[^>]*>([^<]*)</color>"#).unwrap());

pub static DIMEN_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<dimen\s+name\s*=\s*"([^"]+)"[^>]*>([^<]*)</dimen>"#).unwrap());

pub static THEME_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<theme\s+name\s*=\s*"([^"]+)"[^>]*>[\s\S]*?</theme>"#).unwrap());

pub static BOOL_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<bool\s+name\s*=\s*"([^"]+)"[^>]*>([^<]*)</bool>"#).unwrap());

pub static INTEGER_RESOURCE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"<integer\s+name\s*=\s*"([^"]+)"[^>]*>([^<]*)</integer>"#).unwrap());

// ============================================================================
// Maven patterns (used in maven_extractor.rs)
// ============================================================================

pub static MAVEN_DEPENDENCY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"<dependency>([\s\S]*?)</dependency>").unwrap());

// ============================================================================
// Build system patterns (used in mod.rs)
// ============================================================================

/// Gradle include statement: `include("<module>")`
pub static GRADLE_INCLUDE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"include\(["']([^"']+)["']\)"#).unwrap());

/// Maven module declaration: `<module>name</module>`
pub static MAVEN_MODULE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"<module>([^<]+)</module>").unwrap());
4 changes: 3 additions & 1 deletion src/watcher/notify_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ pub struct AsyncFileWatcher {
impl AsyncFileWatcher {
pub async fn run(self) {
loop {
// recv_event() blocks until an event is available - no polling needed
if let Some(event) = self.inner.recv_event() {
for path in event.paths {
let change = FileChange {
Expand All @@ -97,7 +98,8 @@ impl AsyncFileWatcher {
let _ = self.tokio_tx.send(change).await;
}
}
tokio::time::sleep(Duration::from_millis(100)).await;
// No sleep here - recv_event() blocks properly via the notify library
// which uses OS-level file watching (inotify/FSEvents/kqueue)
}
}
}
Expand Down
Loading