From 871dd5bd227f47df6f63f6600a90713c1c192692 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Tue, 3 Mar 2026 17:44:55 +0000
Subject: [PATCH] Implement Cognitive Layer (Phi-3/Gemma Integration)

- Implemented `OrtAdapter` for ONNX LLM inference.
- Implemented `OrtCognitiveAdapter` for high-level thinking.
- Updated `ModelManager` to support Phi-3 ONNX models.
- Implemented "Thought Loop" orchestrating perception, ethical judgment, cognition, and simulation.
- Integrated `thought` command into `synapse-cli` for testing.
- Exposed new components in `synapse-infra` and `synapse-cognition`.

Co-authored-by: iberi22 <10615454+iberi22@users.noreply.github.com>
---
 crates/synapse-cli/src/commands.rs            |  62 +++++++
 crates/synapse-cli/src/main.rs                |  13 ++
 crates/synapse-cognition/src/lib.rs           |   1 +
 crates/synapse-cognition/src/thought_loop.rs  |  75 +++++++++
 crates/synapse-infra/src/adapters/mod.rs      |   6 +-
 .../src/adapters/model_manager.rs             |  27 +++
 .../synapse-infra/src/adapters/ort_adapter.rs | 157 ++++++++++++++++++
 .../src/adapters/ort_cognitive_adapter.rs     |  63 +++++++
 8 files changed, 402 insertions(+), 2 deletions(-)
 create mode 100644 crates/synapse-cognition/src/thought_loop.rs
 create mode 100644 crates/synapse-infra/src/adapters/ort_adapter.rs
 create mode 100644 crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs
diff --git a/crates/synapse-cli/src/commands.rs b/crates/synapse-cli/src/commands.rs
index cb5caaf..3890ff3 100644
--- a/crates/synapse-cli/src/commands.rs
+++ b/crates/synapse-cli/src/commands.rs
@@ -97,6 +97,68 @@ pub async fn init(path: &str) -> Result<()> {
     Ok(())
 }
 
+/// Run a Thought Loop cycle.
+pub async fn thought(input: &str, use_ort: bool) -> Result<()> {
+    println!("💭 Synapse Thought Loop");
+    println!("──────────────────────");
+    println!("📝 Input: \"{}\"", input);
+
+    // 1. Initialize Adapters
+    println!("\n🧠 Loading Cognitive Components...");
+
+    let data_dir = dirs::data_dir()
+        .unwrap_or_else(|| std::path::PathBuf::from("."))
+        .join("synapse_data");
+
+    // Initialize Models
+    let manager = synapse_infra::adapters::model_manager::ModelManager::new(data_dir.join("models"))?;
+    let paths = manager.ensure_models_exist()?;
+
+    // LLM Selection
+    use synapse_core::ports::CognitivePort;
+    use synapse_cognition::thought_loop::ThoughtLoop;
+    use synapse_core::core::genesis::EnneadMatrix;
+    use synapse_core::perception::HolographicRetina;
+    use std::sync::Arc;
+
+    let cognitive: Arc<dyn CognitivePort> = if use_ort {
+        println!("   Using ORT (ONNX) Cognitive Adapter...");
+        let model_path = paths.phi3_onnx_path.context("Phi-3 ONNX model not found")?;
+        let tok_path = paths.phi3_tokenizer_path.context("Phi-3 tokenizer not found")?;
+
+        let ort_llm = Arc::new(synapse_infra::adapters::ort_adapter::OrtAdapter::new(model_path, tok_path)?);
+        Arc::new(synapse_infra::adapters::ort_cognitive_adapter::OrtCognitiveAdapter::new(ort_llm))
+    } else {
+        println!("   Using Candle (GGUF) Cognitive Adapter...");
+        let model_path = paths.llm_path.context("TinyLlama GGUF model not found")?;
+        let tok_path = paths.llm_tokenizer_path.context("TinyLlama tokenizer not found")?;
+
+        let candle_llm = Arc::new(tokio::sync::Mutex::new(synapse_infra::adapters::candle_adapter::CandleAdapter::new(
+            model_path.to_str().unwrap().to_string(),
+            Some(tok_path.to_str().unwrap().to_string())
+        )?));
+        Arc::new(synapse_cognition::CandleCognitiveAdapter::with_llm_adapter(candle_llm))
+    };
+
+    let ennead = EnneadMatrix::new();
+    let retina = Arc::new(HolographicRetina::new(ennead));
+
+    // 2. Initialize ThoughtLoop
+    let loop_orchestrator = ThoughtLoop::new(retina, cognitive);
+
+    // 3. Execute Cycle
+    println!("\n🔄 Running Thought Cycle...");
+    let result = loop_orchestrator.cycle(input).await?;
+
+    // 4. Print Results
+    println!("\n✨ Thought Generated:");
+    println!("   Content: {}", result.content.trim());
+    println!("   Confidence: {:.2}", result.confidence);
+    println!("   Entropy Reduction: {:.2}", result.entropy_reduction);
+
+    Ok(())
+}
+
 /// Manually dial a peer.
 pub async fn dial(addr: &str) -> Result<()> {
     info!("Sending dial command for peer: {}", addr);
diff --git a/crates/synapse-cli/src/main.rs b/crates/synapse-cli/src/main.rs
index 94c58a2..86a6231 100644
--- a/crates/synapse-cli/src/main.rs
+++ b/crates/synapse-cli/src/main.rs
@@ -90,6 +90,16 @@ enum Commands {
         peer: Option<String>,
     },
 
+    /// Run a Thought Loop cycle (Perception -> Judgment -> Cognition -> Simulation)
+    Thought {
+        /// Input to think about
+        input: String,
+
+        /// Use ORT (ONNX) instead of Candle (GGUF)
+        #[arg(short, long)]
+        ort: bool,
+    },
+
     /// Translate a message with emotional empathy (Modo Espejo)
     Translate {
         /// The message to translate
@@ -215,6 +225,9 @@ async fn main() -> anyhow::Result<()> {
         Commands::Transmit { message, peer } => {
             commands::transmit(&message, peer).await?;
         }
+        Commands::Thought { input, ort } => {
+            commands::thought(&input, ort).await?;
+        }
         Commands::Translate { message, force } => {
             commands::translate(&message, force).await?;
         }
diff --git a/crates/synapse-cognition/src/lib.rs b/crates/synapse-cognition/src/lib.rs
index 0eb5d0c..ba7a466 100644
--- a/crates/synapse-cognition/src/lib.rs
+++ b/crates/synapse-cognition/src/lib.rs
@@ -128,6 +128,7 @@ use tokio::sync::broadcast;
 
 pub mod bootstrap;
 pub mod commands;
+pub mod thought_loop;
 pub mod tokenomics_service;
 pub mod dojo_service;
 pub mod sovereign_service;
diff --git a/crates/synapse-cognition/src/thought_loop.rs b/crates/synapse-cognition/src/thought_loop.rs
new file mode 100644
index 0000000..a756744
--- /dev/null
+++ b/crates/synapse-cognition/src/thought_loop.rs
@@ -0,0 +1,75 @@
+//! Thought Loop Orchestration.
+//!
+//! Implements the cognitive cycle: Perception -> Judgment -> Cognition -> Simulation.
+
+use synapse_core::{
+    error::Result,
+    perception::{HolographicRetina, EntropyLevel},
+    CognitivePort, Thought,
+};
+use std::sync::Arc;
+use tracing::{info, warn, debug};
+
+/// The Thought Loop service.
+pub struct ThoughtLoop {
+    /// The retina for perception.
+    retina: Arc<HolographicRetina>,
+    /// The cognitive layer for thinking.
+    cognitive: Arc<dyn CognitivePort>,
+}
+
+impl ThoughtLoop {
+    /// Create a new ThoughtLoop.
+    pub fn new(retina: Arc<HolographicRetina>, cognitive: Arc<dyn CognitivePort>) -> Self {
+        Self { retina, cognitive }
+    }
+
+    /// Process a single "thought cycle".
+    pub async fn cycle(&self, input: &str) -> Result<Thought> {
+        info!("Starting Thought Loop cycle for input...");
+
+        // 1. Perception: Input -> Latent Vector
+        // (Simulating raw input as f32 vector from text for this POC)
+        let raw_input: Vec<f32> = input.as_bytes().iter().map(|&b| b as f32 / 255.0).collect();
+        let tensor = self.retina.perceive(&raw_input, "thought_loop_input", Some(input.to_string()))?;
+
+        // 2. Judgment: Analyze Entropy/Suffering (Genesis Matrix)
+        let (is_suffering, entropy_level) = self.retina.detect_suffering(tensor.latent())?;
+
+        let ethical_state = match entropy_level {
+            EntropyLevel::Harmonious => "The system is in a state of high moral harmony.",
+            EntropyLevel::Neutral => "The system is stable.",
+            EntropyLevel::Chaotic => "Warning: High entropy detected. Restore order.",
+            EntropyLevel::Suffering => "CRITICAL: Suffering detected. You MUST act to reduce entropy and preserve life.",
+        };
+
+        debug!("Ethical State: {}", ethical_state);
+
+        // 3. Cognition: Generate response based on dynamic system prompt
+        let system_prompt = format!(
+            "You are the Cognitive Cortex of a Synapse Node. \
+             Current Ethical State: {}. \
+             Acknowledge this state in your reasoning. \
+             Your goal is to reduce entropy and align with the Ennead Matrix.",
+            ethical_state
+        );
+
+        let mut thought = self.cognitive.think(input, &system_prompt).await?;
+
+        // 4. Simulation: Verify entropy reduction
+        let simulated_entropy = self.cognitive.analyze(&thought.content).await?;
+
+        info!("Thought Loop: Original Entropy Score: {}, Simulated Response Entropy: {}",
+               tensor.metadata().entropy_score, simulated_entropy);
+
+        if simulated_entropy > tensor.metadata().entropy_score && is_suffering {
+            warn!("Simulation detected entropy increase during suffering! Applying corrective guidance.");
+            // In a real implementation, we might re-run generation with stronger guidance.
+            thought.content = format!("(Guided) {}", thought.content);
+        }
+
+        thought.entropy_reduction = (tensor.metadata().entropy_score - simulated_entropy).max(0.0);
+
+        Ok(thought)
+    }
+}
diff --git a/crates/synapse-infra/src/adapters/mod.rs b/crates/synapse-infra/src/adapters/mod.rs
index 39e6dcd..bc446fe 100644
--- a/crates/synapse-infra/src/adapters/mod.rs
+++ b/crates/synapse-infra/src/adapters/mod.rs
@@ -3,7 +3,8 @@
 pub mod surrealdb_adapter;
 pub mod sled_adapter;
 pub mod sled_memory_adapter;
-// pub mod ort_adapter; // TODO: File missing, needs to be created or imported from feature branch
+pub mod ort_adapter;
+pub mod ort_cognitive_adapter;
 pub mod context_adapter;
 pub mod immune_adapter;
 pub mod mock_llm_adapter;
@@ -34,7 +35,8 @@ pub mod libp2p_sync_adapter;
 pub use surrealdb_adapter::*;
 pub use sled_adapter::*;
 pub use sled_memory_adapter::*;
-// pub use ort_adapter::*;
+pub use ort_adapter::*;
+pub use ort_cognitive_adapter::*;
 pub use mock_llm_adapter::*;
 pub use mock_embedding_adapter::*;
 pub use embedding_adapter::*;
diff --git a/crates/synapse-infra/src/adapters/model_manager.rs b/crates/synapse-infra/src/adapters/model_manager.rs
index 3aa74e1..9150d1a 100644
--- a/crates/synapse-infra/src/adapters/model_manager.rs
+++ b/crates/synapse-infra/src/adapters/model_manager.rs
@@ -14,6 +14,10 @@ pub const DEFAULT_LLM_REPO: &str = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF";
 pub const DEFAULT_LLM_TOKENIZER_REPO: &str = "TinyLlama/TinyLlama-1.1B-Chat-v1.0";
 pub const DEFAULT_LLM_FILE: &str = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf";
 
+pub const PHI3_ONNX_REPO: &str = "microsoft/Phi-3-mini-4k-instruct-onnx";
+pub const PHI3_ONNX_FILE: &str = "cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx";
+pub const PHI3_ONNX_TOKENIZER: &str = "cpu_and_mobile/cpu-int4-rtn-block-32/tokenizer.json";
+
 pub const DEFAULT_EMBEDDING_REPO: &str = "sentence-transformers/all-MiniLM-L6-v2";
 pub const DEFAULT_EMBEDDING_MODEL: &str = "model.safetensors";
 pub const DEFAULT_EMBEDDING_TOKENIZER: &str = "tokenizer.json";
@@ -39,6 +43,10 @@ pub struct ModelPaths {
     pub unet_path: Option<PathBuf>,
     /// Path to the Genesis Embedder
     pub genesis_embedder_path: Option<PathBuf>,
+    /// Path to Phi-3 ONNX model
+    pub phi3_onnx_path: Option<PathBuf>,
+    /// Path to Phi-3 ONNX tokenizer
+    pub phi3_tokenizer_path: Option<PathBuf>,
 }
 
 /// Information about a cached model
@@ -153,6 +161,23 @@ impl ModelManager {
             }
         };
 
+        // 4. Phi-3 ONNX
+        let phi3_onnx_path = match self.ensure_model(PHI3_ONNX_REPO, PHI3_ONNX_FILE) {
+            Ok(path) => Some(path),
+            Err(e) => {
+                warn!("Failed to download Phi-3 ONNX: {}", e);
+                None
+            }
+        };
+
+        let phi3_tokenizer_path = match self.ensure_model(PHI3_ONNX_REPO, PHI3_ONNX_TOKENIZER) {
+            Ok(path) => Some(path),
+            Err(e) => {
+                warn!("Failed to download Phi-3 Tokenizer: {}", e);
+                None
+            }
+        };
+
         Ok(ModelPaths {
             llm_path,
             llm_tokenizer_path,
@@ -160,6 +185,8 @@ impl ModelManager {
             tokenizer_path,
             unet_path,
             genesis_embedder_path,
+            phi3_onnx_path,
+            phi3_tokenizer_path,
         })
     }
 
diff --git a/crates/synapse-infra/src/adapters/ort_adapter.rs b/crates/synapse-infra/src/adapters/ort_adapter.rs
new file mode 100644
index 0000000..1811e89
--- /dev/null
+++ b/crates/synapse-infra/src/adapters/ort_adapter.rs
@@ -0,0 +1,157 @@
+//! ORT (ONNX Runtime) adapter for LLM inference.
+//!
+//! Provides a cross-platform implementation of LlmPort using ONNX models.
+
+use async_trait::async_trait;
+use ort::{
+    session::{builder::GraphOptimizationLevel, Session},
+    inputs,
+};
+use std::{path::PathBuf, sync::Arc};
+use synapse_core::{error::Error, LlmPort};
+use tokio::sync::Mutex;
+use tokenizers::Tokenizer;
+use tracing::{debug, info, warn};
+
+/// ORT adapter for LLMs (e.g., Phi-3, Gemma in ONNX format).
+pub struct OrtAdapter {
+    session: Arc<Mutex<Session>>,
+    tokenizer: Tokenizer,
+}
+
+impl OrtAdapter {
+    /// Create a new OrtAdapter.
+    pub fn new(model_path: PathBuf, tokenizer_path: PathBuf) -> Result<Self, Error> {
+        info!("Initializing ORT LLM adapter from {:?}", model_path);
+
+        if !model_path.exists() {
+            return Err(Error::System(format!("Model not found at {:?}", model_path)));
+        }
+
+        if !tokenizer_path.exists() {
+            return Err(Error::System(format!("Tokenizer not found at {:?}", tokenizer_path)));
+        }
+
+        let session = Session::builder()
+            .map_err(|e| Error::System(format!("ORT session builder failed: {e}")))?
+            .with_optimization_level(GraphOptimizationLevel::Level3)
+            .map_err(|e| Error::System(format!("ORT optimization level failed: {e}")))?
+            .with_intra_threads(4)
+            .map_err(|e| Error::System(format!("ORT thread setup failed: {e}")))?
+            .commit_from_file(&model_path)
+            .map_err(|e| Error::System(format!("Failed to load ONNX model: {e}")))?;
+
+        let tokenizer = Tokenizer::from_file(&tokenizer_path)
+            .map_err(|e| Error::System(format!("Failed to load tokenizer: {e}")))?;
+
+        Ok(Self {
+            session: Arc::new(Mutex::new(session)),
+            tokenizer,
+        })
+    }
+}
+
+#[async_trait]
+impl LlmPort for OrtAdapter {
+    async fn generate(&self, prompt: &str, max_tokens: usize) -> Result<String, Error> {
+        self.generate_with_params(prompt, max_tokens, 0.7, 0.9).await
+    }
+
+    async fn generate_with_params(
+        &self,
+        _prompt: &str,
+        max_tokens: usize,
+        _temperature: f32,
+        _top_p: f32,
+    ) -> Result<String, Error> {
+        debug!("Generating with ORT adapter for prompt...");
+
+        let encoding = self.tokenizer.encode(_prompt, true)
+            .map_err(|e| Error::System(format!("Tokenization failed: {}", e)))?;
+
+        let mut input_ids_vec = encoding.get_ids().iter().map(|&x| x as i64).collect::<Vec<_>>();
+        let mut generated_tokens = Vec::new();
+
+        let mut session = self.session.lock().await;
+
+        for _ in 0..max_tokens {
+            let seq_len = input_ids_vec.len();
+
+            // Prepare inputs for Phi-3 ONNX (common format)
+            let shape = vec![1, seq_len];
+            let input_ids_val = ort::value::Value::from_array((shape.clone(), input_ids_vec.clone()))
+                .map_err(|e| Error::System(format!("ORT value error: {}", e)))?;
+
+            let attention_mask_vec = vec![1i64; seq_len];
+            let attention_mask_val = ort::value::Value::from_array((shape, attention_mask_vec))
+                .map_err(|e| Error::System(format!("ORT value error: {}", e)))?;
+
+            // Run inference
+            // Note: This is non-KV-cached version.
+            let outputs = {
+                let ins = inputs![
+                    "input_ids" => input_ids_val,
+                    "attention_mask" => attention_mask_val,
+                ];
+
+                let res = session.run(ins);
+                match res {
+                    Ok(out) => out,
+                    Err(e) => {
+                        warn!("ORT inference with attention_mask failed, retrying with only input_ids: {}", e);
+                        let shape = vec![1, seq_len];
+                        let input_ids_val_retry = ort::value::Value::from_array((shape, input_ids_vec.clone()))
+                            .map_err(|e| Error::System(format!("ORT value error: {}", e)))?;
+
+                        let ins_retry = inputs![
+                            "input_ids" => input_ids_val_retry,
+                        ];
+
+                        session.run(ins_retry).map_err(|e2| Error::System(format!("ORT inference failed: {}", e2)))?
+                    }
+                }
+            };
+
+            // Get logits
+            let logits_value = outputs.get("logits")
+                .or_else(|| outputs.get("output")) // Some models name it 'output'
+                .ok_or_else(|| Error::System("No logits found in model output".to_string()))?;
+
+            let (shape, logits_data) = logits_value.try_extract_tensor::<f32>()
+                .map_err(|e| Error::System(format!("Failed to extract logits: {}", e)))?;
+
+            // Logits shape: [batch, seq, vocab]
+            if shape.len() < 3 {
+                 return Err(Error::System(format!("Unexpected logits shape: {:?}", shape)));
+            }
+
+            let vocab_size = shape[2] as usize;
+            let last_token_logits = &logits_data[(seq_len - 1) * vocab_size .. seq_len * vocab_size];
+
+            // Greedy selection for now
+            let next_token = last_token_logits
+                .iter()
+                .enumerate()
+                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
+                .map(|(i, _)| i as i64)
+                .unwrap();
+
+            // Check for EOS
+            if next_token == 2 || next_token == 32000 || next_token == 32007 {
+                break;
+            }
+
+            generated_tokens.push(next_token as u32);
+            input_ids_vec.push(next_token);
+
+            if generated_tokens.len() >= max_tokens {
+                break;
+            }
+        }
+
+        let decoded = self.tokenizer.decode(&generated_tokens, true)
+            .map_err(|e| Error::System(format!("Decoding failed: {}", e)))?;
+
+        Ok(decoded)
+    }
+}
diff --git a/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs b/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs
new file mode 100644
index 0000000..1b1cc99
--- /dev/null
+++ b/crates/synapse-infra/src/adapters/ort_cognitive_adapter.rs
@@ -0,0 +1,63 @@
+//! ORT Cognitive Adapter - Cognitive Layer Implementation using ONNX Runtime.
+//!
+//! Provides the Cognitive Cortex functionality guided by ethical constraints.
+
+use async_trait::async_trait;
+use synapse_core::{
+    error::{Error, Result},
+    CognitivePort, Thought, LlmPort,
+};
+use std::sync::Arc;
+use tokio::sync::Mutex;
+use tracing::{debug, info};
+use crate::adapters::OrtAdapter;
+
+/// Cognitive adapter using ONNX Runtime for inference.
+pub struct OrtCognitiveAdapter {
+    /// Internal LLM adapter for text generation.
+    llm: Arc<OrtAdapter>,
+}
+
+impl OrtCognitiveAdapter {
+    /// Create a new OrtCognitiveAdapter.
+    pub fn new(llm: Arc<OrtAdapter>) -> Self {
+        Self { llm }
+    }
+}
+
+#[async_trait]
+impl CognitivePort for OrtCognitiveAdapter {
+    async fn think(&self, context: &str, system_prompt: &str) -> Result<Thought> {
+        info!("OrtCognitiveAdapter: Thinking with context length {}", context.len());
+
+        let full_prompt = format!(
+            "<|system|>\n{}<|end|>\n<|user|>\n{}<|end|>\n<|assistant|>",
+            system_prompt, context
+        );
+
+        let response = self.llm.generate(&full_prompt, 512).await?;
+
+        debug!("Generated thought response: {}", response);
+
+        // Simple heuristic for confidence and entropy reduction for now.
+        // In a full implementation, these would be derived from logits or moral analysis.
+        Ok(Thought {
+            content: response,
+            confidence: 0.9,
+            entropy_reduction: 0.4,
+        })
+    }
+
+    async fn analyze(&self, text: &str) -> Result<f32> {
+        // Use the LLM to analyze sentiment/entropy as a float [0, 1]
+        let prompt = format!(
+            "<|system|>\nAnalyze the following text and return ONLY a single floating point number between 0.0 and 1.0 representing the entropy/chaos level (0.0 is perfect order, 1.0 is total chaos/suffering).<|end|>\n<|user|>\n{}<|end|>\n<|assistant|>",
+            text
+        );
+
+        let response = self.llm.generate(&prompt, 10).await?;
+
+        let score: f32 = response.trim().parse().unwrap_or(0.5);
+        Ok(score.clamp(0.0, 1.0))
+    }
+}