Shreyas582 · Shreyas582 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -52,7 +52,7 @@ jobs:
   stdin-integration:
     name: CLI stdin integration (${{ matrix.os }})
     runs-on: ${{ matrix.os }}
-    timeout-minutes: 20
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
@@ -70,6 +70,9 @@ jobs:
           rustup default 1.92.0
           rustup show active-toolchain
 
+      - name: Cache cargo artifacts
+        uses: Swatinem/rust-cache@v2
+
       - name: Run stdin integration tests
         run: cargo test -p wraithrun --test stdin_integration
 
@@ -214,27 +217,31 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v6
 
-      - name: Install Rust toolchain (powershell)
+      - name: Install Rust toolchain
         shell: powershell
         run: |
           $cargobin = "$env:USERPROFILE\.cargo\bin"
-          if (-not ($env:Path -split ';' | Where-Object { $_ -eq $cargobin })) {
-            $env:Path = "$cargobin;$env:Path"
-            echo "$cargobin" >> $env:GITHUB_PATH
-          }
+          $env:Path = "$cargobin;$env:Path"
           if (-not (Get-Command rustup -ErrorAction SilentlyContinue)) {
             Write-Host "rustup not found, installing..."
             Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
             .\rustup-init.exe -y --default-toolchain 1.92.0 --profile minimal
             Remove-Item .\rustup-init.exe
-            $env:Path = "$cargobin;$env:Path"
           }
           rustup toolchain install 1.92.0 --profile minimal
           rustup default 1.92.0
-          rustup show active-toolchain
+          # Resolve the real toolchain bin directory (cargo proxy may be absent)
+          $realBin = Split-Path (& rustup which cargo)
+          Write-Host "Toolchain bin: $realBin"
+          $env:Path = "$realBin;$env:Path"
+          cargo --version
+          $utf8 = New-Object System.Text.UTF8Encoding($false)
+          [IO.File]::AppendAllText($env:GITHUB_ENV, "CARGO_BIN=$realBin`n", $utf8)
+          [IO.File]::AppendAllText($env:GITHUB_PATH, "$realBin`n", $utf8)
 
       - name: Cache cargo artifacts
         uses: Swatinem/rust-cache@v2
+        continue-on-error: true
 
       - name: Validate live e2e fixture configuration
         shell: powershell
@@ -247,6 +254,9 @@ jobs:
       - name: Run live success e2e test (no fallback)
         shell: powershell
         run: |
+          $bin = if ($env:CARGO_BIN) { $env:CARGO_BIN } else { "$env:USERPROFILE\.cargo\bin" }
+          $env:Path = "$bin;$env:Path"
+          Write-Host "cargo at: $(Get-Command cargo -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Source)"
           cargo test -p wraithrun --features inference_bridge/onnx --test stdin_integration live_mode_e2e_success_without_fallback_when_fixture_is_configured -- --exact --nocapture
 
       - name: Upload live success e2e artifacts

diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,9 @@ Thumbs.db
 /launch-assets/generated/
 /launch-assets/reports/
 /launch-assets/*.json
+/test_outputs/*.db
+/test_outputs/*.db-shm
+/test_outputs/*.db-wal
 
 # Local GitHub Actions runner
 /actions-runner/
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
@@ -5,7 +5,7 @@ edition.workspace = true
 license.workspace = true
 
 [features]
-default = []
+default = ["onnx"]
 onnx = ["inference_bridge/onnx"]
 vitis = ["inference_bridge/vitis"]
 directml = ["inference_bridge/directml"]

diff --git a/cli/src/main.rs b/cli/src/main.rs
@@ -294,6 +294,13 @@ fn validate_live_runtime_preflight(runtime: &RuntimeConfig) -> Result<()> {
         return Ok(());
     }
 
+    // Fail fast when the binary was compiled without inference support (#149).
+    #[cfg(not(feature = "onnx"))]
+    bail!(
+        "Live inference requested but this binary was built without inference support. \
+         Rebuild with `--features onnx` (or `--features vitis`/`--features directml`)."
+    );
+
     if !runtime.model.is_file() {
         bail!(
             "Live mode model file not found: {}. Run '--doctor --live --introspection-format json' (or '--doctor --live --fix') and provide a readable --model path.",

diff --git a/core_engine/src/agent.rs b/core_engine/src/agent.rs
@@ -351,9 +351,12 @@ impl<B: InferenceEngine> Agent<B> {
     fn check_tool_precondition(&self, tool_name: &str) -> bool {
         match tool_name {
             "read_syslog" => {
-                // Default path is ./agent.log — skip if it doesn't exist and
-                // the sandbox policy would deny access anyway.
-                let default_path = std::path::Path::new("./agent.log");
+                // Use a platform-appropriate default log path (#153).
+                let default_path = if cfg!(target_os = "windows") {
+                    std::path::Path::new("C:\\Windows\\System32\\winevt\\Logs\\System.evtx")
+                } else {
+                    std::path::Path::new("/var/log/syslog")
+                };
                 if !default_path.exists() {
                     return false;
                 }

diff --git a/core_engine/src/lib.rs b/core_engine/src/lib.rs
@@ -1,6 +1,6 @@
 pub mod agent;
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 use inference_bridge::ModelCapabilityProbe;
 use serde::{Deserialize, Serialize, Serializer};
@@ -101,7 +101,9 @@ static BUILTIN_TEMPLATES: [InvestigationTemplate; 7] = [
 // ── Capability tiering thresholds (const, easy to tune) ──
 
 /// Models below this parameter count (billions) are classified as Basic.
-const PARAM_BASIC_CEILING_B: f32 = 2.0;
+/// Lowered from 2.0 → 1.0 so common 1B+ models (Qwen2.5-0.5B overestimates
+/// to ~1.4B, Llama-3.2-1B at ~1.12B) reach Moderate tier and use inference (#157).
+const PARAM_BASIC_CEILING_B: f32 = 1.0;
 /// Models above this parameter count (billions) are classified as Strong.
 const PARAM_STRONG_FLOOR_B: f32 = 10.0;
 /// Latency above this (ms/tok) demotes to Basic.
@@ -1179,45 +1181,117 @@ pub fn basic_tier_summary_for_task(findings: &[Finding], task: Option<&str>) ->
         return format!("{prefix}\nFINDINGS:\n(none)\nRISK: info\nACTIONS:\n(none)");
     }
 
-    let max_sev = findings
-        .iter()
+    // Sort findings: highest severity first, then by confidence descending (#155).
+    let mut sorted: Vec<&Finding> = findings.iter().collect();
+    sorted.sort_by(|a, b| {
+        b.severity.cmp(&a.severity).then_with(|| {
+            b.confidence
+                .partial_cmp(&a.confidence)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        })
+    });
+
+    let max_sev = sorted
+        .first()
         .map(|f| f.severity)
-        .max()
         .unwrap_or(FindingSeverity::Info);
 
     let distinct_tools: HashSet<&str> = findings
         .iter()
         .filter_map(|f| f.evidence_pointer.tool.as_deref())
         .collect();
 
+    // -- Header --
     let mut out = match task {
         Some(t) => format!(
-            "SUMMARY: Task \"{t}\" produced {} findings across {} tool(s). Maximum severity: {}.\nFINDINGS:\n",
-            findings.len(),
-            distinct_tools.len(),
-            max_sev.token()
+            "INVESTIGATION SUMMARY — \"{t}\"\n\
+             {total} findings across {tools} tool(s). Maximum severity: {sev}.\n\n",
+            total = findings.len(),
+            tools = distinct_tools.len(),
+            sev = max_sev.token().to_uppercase()
         ),
         None => format!(
-            "SUMMARY: {} findings detected. Maximum severity: {}.\nFINDINGS:\n",
-            findings.len(),
-            max_sev.token()
+            "INVESTIGATION SUMMARY\n\
+             {total} findings detected. Maximum severity: {sev}.\n\n",
+            total = findings.len(),
+            sev = max_sev.token().to_uppercase()
         ),
     };
 
-    for (i, f) in findings.iter().enumerate() {
-        out.push_str(&format!(
-            "{}. {} [{}] — {}\n",
-            i + 1,
-            f.title,
-            f.severity.token(),
-            f.recommended_action
-        ));
+    // -- Group by severity (highest first) --
+    let severity_order = [
+        FindingSeverity::Critical,
+        FindingSeverity::High,
+        FindingSeverity::Medium,
+        FindingSeverity::Low,
+        FindingSeverity::Info,
+    ];
+
+    for &sev in &severity_order {
+        let group: Vec<&&Finding> = sorted.iter().filter(|f| f.severity == sev).collect();
+        if group.is_empty() {
+            continue;
+        }
+        let header = match sev {
+            FindingSeverity::Critical => "🔴 CRITICAL",
+            FindingSeverity::High => "🟠 HIGH",
+            FindingSeverity::Medium => "🟡 MEDIUM",
+            FindingSeverity::Low => "🔵 LOW",
+            FindingSeverity::Info => "ℹ️  INFO",
+        };
+        out.push_str(&format!("── {} ({}) ──\n", header, group.len()));
+        for f in &group {
+            let tool_tag = f
+                .evidence_pointer
+                .tool
+                .as_deref()
+                .map(|t| format!(" [{}]", t))
+                .unwrap_or_default();
+            out.push_str(&format!(
+                "  • {}{} — {}\n",
+                f.title, tool_tag, f.recommended_action
+            ));
+        }
+        out.push('\n');
     }
 
-    out.push_str(&format!("RISK: {}\nACTIONS:\n", max_sev.token()));
+    // -- Cross-references: find tools whose findings overlap --
+    if distinct_tools.len() > 1 {
+        // Collect tool→titles mapping for cross-reference hints.
+        let mut tool_titles: HashMap<&str, Vec<&str>> = HashMap::new();
+        for f in &sorted {
+            if let Some(tool) = f.evidence_pointer.tool.as_deref() {
+                tool_titles.entry(tool).or_default().push(&f.title);
+            }
+        }
+        if tool_titles.len() > 1 {
+            out.push_str("CROSS-REFERENCES:\n");
+            let tools_vec: Vec<&&str> = tool_titles.keys().collect();
+            out.push_str(&format!(
+                "  Data was collected from {} sources ({}). ",
+                tools_vec.len(),
+                tools_vec.iter().map(|t| **t).collect::<Vec<_>>().join(", ")
+            ));
+            out.push_str("Correlate findings across tools for a complete picture.\n\n");
+        }
+    }
 
-    for (i, f) in findings.iter().enumerate() {
-        out.push_str(&format!("{}. {}\n", i + 1, f.recommended_action));
+    // -- Risk assessment --
+    out.push_str(&format!(
+        "OVERALL RISK: {}\n\n",
+        max_sev.token().to_uppercase()
+    ));
+
+    // -- Prioritized actions (deduplicated, urgent first) --
+    out.push_str("RECOMMENDED ACTIONS (priority order):\n");
+    let mut seen_actions: HashSet<&str> = HashSet::new();
+    let mut action_idx = 0usize;
+    for f in &sorted {
+        let action = f.recommended_action.as_str();
+        if seen_actions.insert(action) {
+            action_idx += 1;
+            out.push_str(&format!("  {}. {}\n", action_idx, action));
+        }
     }
 
     // Remove trailing newline for clean output.
@@ -1733,15 +1807,28 @@ mod tests {
 
     #[test]
     fn classify_small_model_as_basic() {
+        // With PARAM_BASIC_CEILING_B = 1.0 (#157), a 0.5B model is Basic.
         let probe = ModelCapabilityProbe {
-            estimated_param_billions: 1.2,
+            estimated_param_billions: 0.5,
             execution_provider: "CPUExecutionProvider".to_string(),
             smoke_latency_ms: 80,
             vocab_size: 32000,
         };
         assert_eq!(classify_capability(&probe), ModelCapabilityTier::Basic);
     }
 
+    #[test]
+    fn classify_1b_model_as_moderate() {
+        // With PARAM_BASIC_CEILING_B = 1.0 (#157), a 1.2B model reaches Moderate.
+        let probe = ModelCapabilityProbe {
+            estimated_param_billions: 1.2,
+            execution_provider: "CPUExecutionProvider".to_string(),
+            smoke_latency_ms: 80,
+            vocab_size: 32000,
+        };
+        assert_eq!(classify_capability(&probe), ModelCapabilityTier::Moderate);
+    }
+
     #[test]
     fn classify_medium_model_moderate_latency_as_moderate() {
         let probe = ModelCapabilityProbe {
@@ -1859,12 +1946,15 @@ mod tests {
         ];
 
         let summary = basic_tier_summary(&findings);
-        assert!(summary.starts_with("SUMMARY: 2 findings detected. Maximum severity: high."));
-        assert!(summary.contains("FINDINGS:"));
-        assert!(summary.contains("1. Active listeners [high]"));
-        assert!(summary.contains("2. Suspicious persistence [medium]"));
-        assert!(summary.contains("RISK: high"));
-        assert!(summary.contains("ACTIONS:"));
+        // Updated format groups by severity and provides cross-references (#155).
+        assert!(summary.contains("INVESTIGATION SUMMARY"));
+        assert!(summary.contains("2 findings"));
+        assert!(summary.contains("HIGH"));
+        assert!(summary.contains("Active listeners"));
+        assert!(summary.contains("Suspicious persistence"));
+        assert!(summary.contains("OVERALL RISK: HIGH"));
+        assert!(summary.contains("RECOMMENDED ACTIONS"));
+        assert!(summary.contains("CROSS-REFERENCES"));
     }
 
     #[test]
@@ -1891,8 +1981,10 @@ mod tests {
             "observation.indicator_count",
         )];
         let summary = basic_tier_summary_for_task(&findings, Some("windows-triage"));
-        assert!(summary.contains("Task \"windows-triage\""));
+        // Updated format includes task name in header (#155).
+        assert!(summary.contains("windows-triage"));
         assert!(summary.contains("1 tool(s)"));
+        assert!(summary.contains("RECOMMENDED ACTIONS"));
     }
 
     // ── Discrete confidence label tests (#85) ──

diff --git a/cyber_tools/src/lib.rs b/cyber_tools/src/lib.rs
@@ -81,10 +81,12 @@ impl SandboxPolicy {
         }
 
         #[cfg(target_os = "windows")]
-        let command_allowlist: HashSet<String> = ["whoami", "netstat", "net", "tasklist", "reg"]
-            .into_iter()
-            .map(|c| c.to_string())
-            .collect();
+        let command_allowlist: HashSet<String> = [
+            "whoami", "netstat", "net", "tasklist", "reg", "sc", "wmic", "schtasks",
+        ]
+        .into_iter()
+        .map(|c| c.to_string())
+        .collect();
 
         #[cfg(not(target_os = "windows"))]
         let command_allowlist: HashSet<String> = ["id", "ss", "sudo"]

diff --git a/inference_bridge/src/lib.rs b/inference_bridge/src/lib.rs
@@ -495,8 +495,16 @@ impl OnnxVitisEngine {
                 format!(r#"<call>{{"tool":"hash_binary","args":{{"path":"{path}"}}}}</call>"#)
             }
             "read_syslog" => {
+                // Use a platform-appropriate default log path instead of a
+                // project file like README.md, which would produce bogus
+                // findings (#153).
+                let default_path = if cfg!(target_os = "windows") {
+                    "C:\\Windows\\System32\\winevt\\Logs\\System.evtx".to_string()
+                } else {
+                    "/var/log/syslog".to_string()
+                };
                 let path = Self::guess_path_from_task(task)
-                    .unwrap_or_else(|| "./README.md".to_string());
+                    .unwrap_or(default_path);
                 let path = Self::escape_json_string(&path);
                 let max_lines = Self::guess_line_count_from_task(task).unwrap_or(200);
                 format!(