From 871dd5bd227f47df6f63f6600a90713c1c192692 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 3 Mar 2026 17:44:55 +0000 Subject: [PATCH] Implement Cognitive Layer (Phi-3/Gemma Integration) - Implemented `OrtAdapter` for ONNX LLM inference. - Implemented `OrtCognitiveAdapter` for high-level thinking. - Updated `ModelManager` to support Phi-3 ONNX models. - Implemented "Thought Loop" orchestrating perception, ethical judgment, cognition, and simulation. - Integrated `thought` command into `synapse-cli` for testing. - Exposed new components in `synapse-infra` and `synapse-cognition`. Co-authored-by: iberi22 <10615454+iberi22@users.noreply.github.com> --- crates/synapse-cli/src/commands.rs | 62 +++++++ crates/synapse-cli/src/main.rs | 13 ++ crates/synapse-cognition/src/lib.rs | 1 + crates/synapse-cognition/src/thought_loop.rs | 75 +++++++++ crates/synapse-infra/src/adapters/mod.rs | 6 +- .../src/adapters/model_manager.rs | 27 +++ .../synapse-infra/src/adapters/ort_adapter.rs | 157 ++++++++++++++++++ .../src/adapters/ort_cognitive_adapter.rs | 63 +++++++ 8 files changed, 402 insertions(+), 2 deletions(-) create mode 100644 crates/synapse-cognition/src/thought_loop.rs create mode 100644 crates/synapse-infra/src/adapters/ort_adapter.rs create mode 100644 crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs diff --git a/crates/synapse-cli/src/commands.rs b/crates/synapse-cli/src/commands.rs index cb5caaf..3890ff3 100644 --- a/crates/synapse-cli/src/commands.rs +++ b/crates/synapse-cli/src/commands.rs @@ -97,6 +97,68 @@ pub async fn init(path: &str) -> Result<()> { Ok(()) } +/// Run a Thought Loop cycle. +pub async fn thought(input: &str, use_ort: bool) -> Result<()> { + println!("šŸ’­ Synapse Thought Loop"); + println!("──────────────────────"); + println!("šŸ“ Input: \"{}\"", input); + + // 1. Initialize Adapters + println!("\n🧠 Loading Cognitive Components..."); + + let data_dir = dirs::data_dir() + .unwrap_or_else(|| std::path::PathBuf::from(".")) + .join("synapse_data"); + + // Initialize Models + let manager = synapse_infra::adapters::model_manager::ModelManager::new(data_dir.join("models"))?; + let paths = manager.ensure_models_exist()?; + + // LLM Selection + use synapse_core::ports::CognitivePort; + use synapse_cognition::thought_loop::ThoughtLoop; + use synapse_core::core::genesis::EnneadMatrix; + use synapse_core::perception::HolographicRetina; + use std::sync::Arc; + + let cognitive: Arc = if use_ort { + println!(" Using ORT (ONNX) Cognitive Adapter..."); + let model_path = paths.phi3_onnx_path.context("Phi-3 ONNX model not found")?; + let tok_path = paths.phi3_tokenizer_path.context("Phi-3 tokenizer not found")?; + + let ort_llm = Arc::new(synapse_infra::adapters::ort_adapter::OrtAdapter::new(model_path, tok_path)?); + Arc::new(synapse_infra::adapters::ort_cognitive_adapter::OrtCognitiveAdapter::new(ort_llm)) + } else { + println!(" Using Candle (GGUF) Cognitive Adapter..."); + let model_path = paths.llm_path.context("TinyLlama GGUF model not found")?; + let tok_path = paths.llm_tokenizer_path.context("TinyLlama tokenizer not found")?; + + let candle_llm = Arc::new(tokio::sync::Mutex::new(synapse_infra::adapters::candle_adapter::CandleAdapter::new( + model_path.to_str().unwrap().to_string(), + Some(tok_path.to_str().unwrap().to_string()) + )?)); + Arc::new(synapse_cognition::CandleCognitiveAdapter::with_llm_adapter(candle_llm)) + }; + + let ennead = EnneadMatrix::new(); + let retina = Arc::new(HolographicRetina::new(ennead)); + + // 2. Initialize ThoughtLoop + let loop_orchestrator = ThoughtLoop::new(retina, cognitive); + + // 3. Execute Cycle + println!("\nšŸ”„ Running Thought Cycle..."); + let result = loop_orchestrator.cycle(input).await?; + + // 4. Print Results + println!("\n✨ Thought Generated:"); + println!(" Content: {}", result.content.trim()); + println!(" Confidence: {:.2}", result.confidence); + println!(" Entropy Reduction: {:.2}", result.entropy_reduction); + + Ok(()) +} + /// Manually dial a peer. pub async fn dial(addr: &str) -> Result<()> { info!("Sending dial command for peer: {}", addr); diff --git a/crates/synapse-cli/src/main.rs b/crates/synapse-cli/src/main.rs index 94c58a2..86a6231 100644 --- a/crates/synapse-cli/src/main.rs +++ b/crates/synapse-cli/src/main.rs @@ -90,6 +90,16 @@ enum Commands { peer: Option, }, + /// Run a Thought Loop cycle (Perception -> Judgment -> Cognition -> Simulation) + Thought { + /// Input to think about + input: String, + + /// Use ORT (ONNX) instead of Candle (GGUF) + #[arg(short, long)] + ort: bool, + }, + /// Translate a message with emotional empathy (Modo Espejo) Translate { /// The message to translate @@ -215,6 +225,9 @@ async fn main() -> anyhow::Result<()> { Commands::Transmit { message, peer } => { commands::transmit(&message, peer).await?; } + Commands::Thought { input, ort } => { + commands::thought(&input, ort).await?; + } Commands::Translate { message, force } => { commands::translate(&message, force).await?; } diff --git a/crates/synapse-cognition/src/lib.rs b/crates/synapse-cognition/src/lib.rs index 0eb5d0c..ba7a466 100644 --- a/crates/synapse-cognition/src/lib.rs +++ b/crates/synapse-cognition/src/lib.rs @@ -128,6 +128,7 @@ use tokio::sync::broadcast; pub mod bootstrap; pub mod commands; +pub mod thought_loop; pub mod tokenomics_service; pub mod dojo_service; pub mod sovereign_service; diff --git a/crates/synapse-cognition/src/thought_loop.rs b/crates/synapse-cognition/src/thought_loop.rs new file mode 100644 index 0000000..a756744 --- /dev/null +++ b/crates/synapse-cognition/src/thought_loop.rs @@ -0,0 +1,75 @@ +//! Thought Loop Orchestration. +//! +//! Implements the cognitive cycle: Perception -> Judgment -> Cognition -> Simulation. + +use synapse_core::{ + error::Result, + perception::{HolographicRetina, EntropyLevel}, + CognitivePort, Thought, +}; +use std::sync::Arc; +use tracing::{info, warn, debug}; + +/// The Thought Loop service. +pub struct ThoughtLoop { + /// The retina for perception. + retina: Arc, + /// The cognitive layer for thinking. + cognitive: Arc, +} + +impl ThoughtLoop { + /// Create a new ThoughtLoop. + pub fn new(retina: Arc, cognitive: Arc) -> Self { + Self { retina, cognitive } + } + + /// Process a single "thought cycle". + pub async fn cycle(&self, input: &str) -> Result { + info!("Starting Thought Loop cycle for input..."); + + // 1. Perception: Input -> Latent Vector + // (Simulating raw input as f32 vector from text for this POC) + let raw_input: Vec = input.as_bytes().iter().map(|&b| b as f32 / 255.0).collect(); + let tensor = self.retina.perceive(&raw_input, "thought_loop_input", Some(input.to_string()))?; + + // 2. Judgment: Analyze Entropy/Suffering (Genesis Matrix) + let (is_suffering, entropy_level) = self.retina.detect_suffering(tensor.latent())?; + + let ethical_state = match entropy_level { + EntropyLevel::Harmonious => "The system is in a state of high moral harmony.", + EntropyLevel::Neutral => "The system is stable.", + EntropyLevel::Chaotic => "Warning: High entropy detected. Restore order.", + EntropyLevel::Suffering => "CRITICAL: Suffering detected. You MUST act to reduce entropy and preserve life.", + }; + + debug!("Ethical State: {}", ethical_state); + + // 3. Cognition: Generate response based on dynamic system prompt + let system_prompt = format!( + "You are the Cognitive Cortex of a Synapse Node. \ + Current Ethical State: {}. \ + Acknowledge this state in your reasoning. \ + Your goal is to reduce entropy and align with the Ennead Matrix.", + ethical_state + ); + + let mut thought = self.cognitive.think(input, &system_prompt).await?; + + // 4. Simulation: Verify entropy reduction + let simulated_entropy = self.cognitive.analyze(&thought.content).await?; + + info!("Thought Loop: Original Entropy Score: {}, Simulated Response Entropy: {}", + tensor.metadata().entropy_score, simulated_entropy); + + if simulated_entropy > tensor.metadata().entropy_score && is_suffering { + warn!("Simulation detected entropy increase during suffering! Applying corrective guidance."); + // In a real implementation, we might re-run generation with stronger guidance. + thought.content = format!("(Guided) {}", thought.content); + } + + thought.entropy_reduction = (tensor.metadata().entropy_score - simulated_entropy).max(0.0); + + Ok(thought) + } +} diff --git a/crates/synapse-infra/src/adapters/mod.rs b/crates/synapse-infra/src/adapters/mod.rs index 39e6dcd..bc446fe 100644 --- a/crates/synapse-infra/src/adapters/mod.rs +++ b/crates/synapse-infra/src/adapters/mod.rs @@ -3,7 +3,8 @@ pub mod surrealdb_adapter; pub mod sled_adapter; pub mod sled_memory_adapter; -// pub mod ort_adapter; // TODO: File missing, needs to be created or imported from feature branch +pub mod ort_adapter; +pub mod ort_cognitive_adapter; pub mod context_adapter; pub mod immune_adapter; pub mod mock_llm_adapter; @@ -34,7 +35,8 @@ pub mod libp2p_sync_adapter; pub use surrealdb_adapter::*; pub use sled_adapter::*; pub use sled_memory_adapter::*; -// pub use ort_adapter::*; +pub use ort_adapter::*; +pub use ort_cognitive_adapter::*; pub use mock_llm_adapter::*; pub use mock_embedding_adapter::*; pub use embedding_adapter::*; diff --git a/crates/synapse-infra/src/adapters/model_manager.rs b/crates/synapse-infra/src/adapters/model_manager.rs index 3aa74e1..9150d1a 100644 --- a/crates/synapse-infra/src/adapters/model_manager.rs +++ b/crates/synapse-infra/src/adapters/model_manager.rs @@ -14,6 +14,10 @@ pub const DEFAULT_LLM_REPO: &str = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"; pub const DEFAULT_LLM_TOKENIZER_REPO: &str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"; pub const DEFAULT_LLM_FILE: &str = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"; +pub const PHI3_ONNX_REPO: &str = "microsoft/Phi-3-mini-4k-instruct-onnx"; +pub const PHI3_ONNX_FILE: &str = "cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx"; +pub const PHI3_ONNX_TOKENIZER: &str = "cpu_and_mobile/cpu-int4-rtn-block-32/tokenizer.json"; + pub const DEFAULT_EMBEDDING_REPO: &str = "sentence-transformers/all-MiniLM-L6-v2"; pub const DEFAULT_EMBEDDING_MODEL: &str = "model.safetensors"; pub const DEFAULT_EMBEDDING_TOKENIZER: &str = "tokenizer.json"; @@ -39,6 +43,10 @@ pub struct ModelPaths { pub unet_path: Option, /// Path to the Genesis Embedder pub genesis_embedder_path: Option, + /// Path to Phi-3 ONNX model + pub phi3_onnx_path: Option, + /// Path to Phi-3 ONNX tokenizer + pub phi3_tokenizer_path: Option, } /// Information about a cached model @@ -153,6 +161,23 @@ impl ModelManager { } }; + // 4. Phi-3 ONNX + let phi3_onnx_path = match self.ensure_model(PHI3_ONNX_REPO, PHI3_ONNX_FILE) { + Ok(path) => Some(path), + Err(e) => { + warn!("Failed to download Phi-3 ONNX: {}", e); + None + } + }; + + let phi3_tokenizer_path = match self.ensure_model(PHI3_ONNX_REPO, PHI3_ONNX_TOKENIZER) { + Ok(path) => Some(path), + Err(e) => { + warn!("Failed to download Phi-3 Tokenizer: {}", e); + None + } + }; + Ok(ModelPaths { llm_path, llm_tokenizer_path, @@ -160,6 +185,8 @@ impl ModelManager { tokenizer_path, unet_path, genesis_embedder_path, + phi3_onnx_path, + phi3_tokenizer_path, }) } diff --git a/crates/synapse-infra/src/adapters/ort_adapter.rs b/crates/synapse-infra/src/adapters/ort_adapter.rs new file mode 100644 index 0000000..1811e89 --- /dev/null +++ b/crates/synapse-infra/src/adapters/ort_adapter.rs @@ -0,0 +1,157 @@ +//! ORT (ONNX Runtime) adapter for LLM inference. +//! +//! Provides a cross-platform implementation of LlmPort using ONNX models. + +use async_trait::async_trait; +use ort::{ + session::{builder::GraphOptimizationLevel, Session}, + inputs, +}; +use std::{path::PathBuf, sync::Arc}; +use synapse_core::{error::Error, LlmPort}; +use tokio::sync::Mutex; +use tokenizers::Tokenizer; +use tracing::{debug, info, warn}; + +/// ORT adapter for LLMs (e.g., Phi-3, Gemma in ONNX format). +pub struct OrtAdapter { + session: Arc>, + tokenizer: Tokenizer, +} + +impl OrtAdapter { + /// Create a new OrtAdapter. + pub fn new(model_path: PathBuf, tokenizer_path: PathBuf) -> Result { + info!("Initializing ORT LLM adapter from {:?}", model_path); + + if !model_path.exists() { + return Err(Error::System(format!("Model not found at {:?}", model_path))); + } + + if !tokenizer_path.exists() { + return Err(Error::System(format!("Tokenizer not found at {:?}", tokenizer_path))); + } + + let session = Session::builder() + .map_err(|e| Error::System(format!("ORT session builder failed: {e}")))? + .with_optimization_level(GraphOptimizationLevel::Level3) + .map_err(|e| Error::System(format!("ORT optimization level failed: {e}")))? + .with_intra_threads(4) + .map_err(|e| Error::System(format!("ORT thread setup failed: {e}")))? + .commit_from_file(&model_path) + .map_err(|e| Error::System(format!("Failed to load ONNX model: {e}")))?; + + let tokenizer = Tokenizer::from_file(&tokenizer_path) + .map_err(|e| Error::System(format!("Failed to load tokenizer: {e}")))?; + + Ok(Self { + session: Arc::new(Mutex::new(session)), + tokenizer, + }) + } +} + +#[async_trait] +impl LlmPort for OrtAdapter { + async fn generate(&self, prompt: &str, max_tokens: usize) -> Result { + self.generate_with_params(prompt, max_tokens, 0.7, 0.9).await + } + + async fn generate_with_params( + &self, + _prompt: &str, + max_tokens: usize, + _temperature: f32, + _top_p: f32, + ) -> Result { + debug!("Generating with ORT adapter for prompt..."); + + let encoding = self.tokenizer.encode(_prompt, true) + .map_err(|e| Error::System(format!("Tokenization failed: {}", e)))?; + + let mut input_ids_vec = encoding.get_ids().iter().map(|&x| x as i64).collect::>(); + let mut generated_tokens = Vec::new(); + + let mut session = self.session.lock().await; + + for _ in 0..max_tokens { + let seq_len = input_ids_vec.len(); + + // Prepare inputs for Phi-3 ONNX (common format) + let shape = vec![1, seq_len]; + let input_ids_val = ort::value::Value::from_array((shape.clone(), input_ids_vec.clone())) + .map_err(|e| Error::System(format!("ORT value error: {}", e)))?; + + let attention_mask_vec = vec![1i64; seq_len]; + let attention_mask_val = ort::value::Value::from_array((shape, attention_mask_vec)) + .map_err(|e| Error::System(format!("ORT value error: {}", e)))?; + + // Run inference + // Note: This is non-KV-cached version. + let outputs = { + let ins = inputs![ + "input_ids" => input_ids_val, + "attention_mask" => attention_mask_val, + ]; + + let res = session.run(ins); + match res { + Ok(out) => out, + Err(e) => { + warn!("ORT inference with attention_mask failed, retrying with only input_ids: {}", e); + let shape = vec![1, seq_len]; + let input_ids_val_retry = ort::value::Value::from_array((shape, input_ids_vec.clone())) + .map_err(|e| Error::System(format!("ORT value error: {}", e)))?; + + let ins_retry = inputs![ + "input_ids" => input_ids_val_retry, + ]; + + session.run(ins_retry).map_err(|e2| Error::System(format!("ORT inference failed: {}", e2)))? + } + } + }; + + // Get logits + let logits_value = outputs.get("logits") + .or_else(|| outputs.get("output")) // Some models name it 'output' + .ok_or_else(|| Error::System("No logits found in model output".to_string()))?; + + let (shape, logits_data) = logits_value.try_extract_tensor::() + .map_err(|e| Error::System(format!("Failed to extract logits: {}", e)))?; + + // Logits shape: [batch, seq, vocab] + if shape.len() < 3 { + return Err(Error::System(format!("Unexpected logits shape: {:?}", shape))); + } + + let vocab_size = shape[2] as usize; + let last_token_logits = &logits_data[(seq_len - 1) * vocab_size .. seq_len * vocab_size]; + + // Greedy selection for now + let next_token = last_token_logits + .iter() + .enumerate() + .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) + .map(|(i, _)| i as i64) + .unwrap(); + + // Check for EOS + if next_token == 2 || next_token == 32000 || next_token == 32007 { + break; + } + + generated_tokens.push(next_token as u32); + input_ids_vec.push(next_token); + + if generated_tokens.len() >= max_tokens { + break; + } + } + + let decoded = self.tokenizer.decode(&generated_tokens, true) + .map_err(|e| Error::System(format!("Decoding failed: {}", e)))?; + + Ok(decoded) + } +} diff --git a/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs b/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs new file mode 100644 index 0000000..1b1cc99 --- /dev/null +++ b/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs @@ -0,0 +1,63 @@ +//! ORT Cognitive Adapter - Cognitive Layer Implementation using ONNX Runtime. +//! +//! Provides the Cognitive Cortex functionality guided by ethical constraints. + +use async_trait::async_trait; +use synapse_core::{ + error::{Error, Result}, + CognitivePort, Thought, LlmPort, +}; +use std::sync::Arc; +use tokio::sync::Mutex; +use tracing::{debug, info}; +use crate::adapters::OrtAdapter; + +/// Cognitive adapter using ONNX Runtime for inference. +pub struct OrtCognitiveAdapter { + /// Internal LLM adapter for text generation. + llm: Arc, +} + +impl OrtCognitiveAdapter { + /// Create a new OrtCognitiveAdapter. + pub fn new(llm: Arc) -> Self { + Self { llm } + } +} + +#[async_trait] +impl CognitivePort for OrtCognitiveAdapter { + async fn think(&self, context: &str, system_prompt: &str) -> Result { + info!("OrtCognitiveAdapter: Thinking with context length {}", context.len()); + + let full_prompt = format!( + "<|system|>\n{}<|end|>\n<|user|>\n{}<|end|>\n<|assistant|>", + system_prompt, context + ); + + let response = self.llm.generate(&full_prompt, 512).await?; + + debug!("Generated thought response: {}", response); + + // Simple heuristic for confidence and entropy reduction for now. + // In a full implementation, these would be derived from logits or moral analysis. + Ok(Thought { + content: response, + confidence: 0.9, + entropy_reduction: 0.4, + }) + } + + async fn analyze(&self, text: &str) -> Result { + // Use the LLM to analyze sentiment/entropy as a float [0, 1] + let prompt = format!( + "<|system|>\nAnalyze the following text and return ONLY a single floating point number between 0.0 and 1.0 representing the entropy/chaos level (0.0 is perfect order, 1.0 is total chaos/suffering).<|end|>\n<|user|>\n{}<|end|>\n<|assistant|>", + text + ); + + let response = self.llm.generate(&prompt, 10).await?; + + let score: f32 = response.trim().parse().unwrap_or(0.5); + Ok(score.clamp(0.0, 1.0)) + } +}