diff --git a/crates/mcp-brain-server/src/gist.rs b/crates/mcp-brain-server/src/gist.rs index 89f96de1c..2eff3254f 100644 --- a/crates/mcp-brain-server/src/gist.rs +++ b/crates/mcp-brain-server/src/gist.rs @@ -15,26 +15,26 @@ use parking_lot::Mutex; use serde::{Deserialize, Serialize}; // ── Novelty thresholds ── -// VERY aggressive: only publish when something genuinely new is discovered. -// With ~3100 memories and 2.8M edges, the bar must be HIGH to avoid noise. -// Target: ~1 gist per WEEK, only for real innovations. -/// Minimum new inferences: must derive many non-trivial forward-chained claims -const MIN_NEW_INFERENCES: usize = 10; -/// Minimum evidence observations — need substantial data -const MIN_EVIDENCE: usize = 1000; -/// Minimum strange loop quality score — high bar for self-aware reasoning -const MIN_STRANGE_LOOP_SCORE: f32 = 0.1; +// Tuned April 2026: brain has 10K+ memories and 38M edges. +// Previous thresholds were too aggressive — no gists were ever published. +// Target: ~1 gist per day, with genuinely interesting content. +/// Minimum new inferences: at least some non-trivial forward-chained claims +const MIN_NEW_INFERENCES: usize = 3; +/// Minimum evidence observations — brain has 10K+, so this is easy +const MIN_EVIDENCE: usize = 100; +/// Minimum strange loop quality score — lower bar to start publishing +const MIN_STRANGE_LOOP_SCORE: f32 = 0.01; /// Minimum propositions extracted in this cycle -const MIN_PROPOSITIONS: usize = 20; +const MIN_PROPOSITIONS: usize = 5; /// Minimum SONA patterns — require at least some SONA learning const MIN_SONA_PATTERNS: usize = 1; -/// Minimum Pareto front growth — evolution must find multiple new solutions -const MIN_PARETO_GROWTH: usize = 3; +/// Minimum Pareto front growth — any new solution counts +const MIN_PARETO_GROWTH: usize = 1; /// Minimum confidence for ANY inference to be included in a discovery -const MIN_INFERENCE_CONFIDENCE: f64 = 0.70; +const MIN_INFERENCE_CONFIDENCE: f64 = 0.60; /// Minimum number of UNIQUE categories across strong propositions -/// (prevents "debug-architecture-geopolitics" recycling) -const MIN_UNIQUE_CATEGORIES: usize = 4; +/// (prevents single-domain noise — but 2 domains is enough for cross-domain) +const MIN_UNIQUE_CATEGORIES: usize = 2; /// A discovery worthy of publishing. /// @@ -165,8 +165,8 @@ impl Discovery { && self.propositions_extracted >= MIN_PROPOSITIONS && self.sona_patterns >= MIN_SONA_PATTERNS && self.pareto_growth >= MIN_PARETO_GROWTH - && strong.len() >= 3 // Must have at least 3 non-trivial inferences - && strong_props.len() >= 5 // Must have at least 5 substantive propositions + && strong.len() >= 1 // Must have at least 1 non-trivial inference + && strong_props.len() >= 2 // Must have at least 2 substantive propositions && diversity >= MIN_UNIQUE_CATEGORIES // Must span multiple domains } @@ -228,7 +228,7 @@ impl GistPublisher { Some(Self { token, last_publish: Mutex::new(None), - min_interval: Duration::from_secs(259200), // 3 day minimum between gists + min_interval: Duration::from_secs(86400), // 1 day minimum between gists published_count: Mutex::new(0), published_titles: Mutex::new(Vec::new()), }) diff --git a/crates/mcp-brain-server/src/routes.rs b/crates/mcp-brain-server/src/routes.rs index 7b1a06abd..95313f113 100644 --- a/crates/mcp-brain-server/src/routes.rs +++ b/crates/mcp-brain-server/src/routes.rs @@ -5980,20 +5980,45 @@ async fn notify_digest( let topic = body["topic"].as_str(); let hours = body["hours"].as_u64().unwrap_or(24); - // Gather recent discoveries from the store + // Gather recent discoveries from the store — excluding debug/training noise let cutoff = chrono::Utc::now() - chrono::Duration::hours(hours as i64); let mut all = state.store.all_memories(); all.sort_by(|a, b| b.created_at.cmp(&a.created_at)); - // Filter by recency and optionally by topic + // Filter out noise: training cycles, self-reflections, debug entries, + // and low-signal web scraping results + let noise_patterns: &[&str] = &[ + "Self-reflection: training cycle", + "Fact Check: Self-reflection", + "vTools Events", + "Executive Committee Meeting", + "DailyMed", + "AccessGUDID", + "Site en construction", + ]; + let filtered: Vec<_> = all.iter() .filter(|m| { if m.created_at < cutoff { return false; } + // Skip debug/auto-generated training noise + if matches!(m.category, crate::types::BrainCategory::Debug) { + return false; + } + // Skip known noise patterns in titles + let title_lower = m.title.to_lowercase(); + if noise_patterns.iter().any(|p| title_lower.contains(&p.to_lowercase())) { + return false; + } + // Skip very short content (likely scraping artifacts) + if m.content.len() < 50 { + return false; + } + // Apply optional topic filter topic.map_or(true, |t| { let t_lower = t.to_lowercase(); - m.title.to_lowercase().contains(&t_lower) + title_lower.contains(&t_lower) || m.content.to_lowercase().contains(&t_lower) || m.tags.iter().any(|tag| tag.to_lowercase().contains(&t_lower)) }) @@ -6009,27 +6034,52 @@ async fn notify_digest( }))); } - // Build HTML rows + // Build HTML rows — human-readable format let mut rows = String::new(); + let category_emoji = |cat: &crate::types::BrainCategory| -> &str { + use crate::types::BrainCategory::*; + match cat { + Architecture => "🏗️", + Pattern => "🔄", + Solution => "💡", + Security => "🔒", + Convention => "📐", + Performance => "⚡", + Tooling => "🔧", + Debug => "🐛", + _ => "📝", + } + }; + for (i, m) in filtered.iter().enumerate() { - let title = if m.title.len() > 100 { &m.title[..100] } else { &m.title }; - let content = if m.content.len() > 200 { &m.content[..200] } else { &m.content }; - let quality = m.quality_score.mean(); - let tags_html: Vec<_> = m.tags.iter().take(4).map(|t| { - format!("{}", t) - }).collect(); + let title = if m.title.len() > 120 { &m.title[..120] } else { &m.title }; + // Take first ~250 chars but break at sentence boundary + let content_raw = if m.content.len() > 250 { &m.content[..250] } else { &m.content }; + let content = match content_raw.rfind(". ") { + Some(pos) if pos > 80 => &content_raw[..pos + 1], + _ => content_raw, + }; + let emoji = category_emoji(&m.category); + let tags_html: Vec<_> = m.tags.iter() + .filter(|t| !t.contains("auto-generated") && !t.contains("training-cycle")) + .take(3) + .map(|t| { + format!("{}", t) + }).collect(); rows.push_str(&format!( - r#"
Last {hours}h | {count} discoveries | {total} total memories | {edges} edges
+ r#"+{count} new discoveries in the last {hours} hours. +The brain now holds {total} memories connected by {edges} relationships. +
{topic_line} -