Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
stdin-integration:
name: CLI stdin integration (${{ matrix.os }})
runs-on: ${{ matrix.os }}
timeout-minutes: 20
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
Expand All @@ -70,6 +70,9 @@ jobs:
rustup default 1.92.0
rustup show active-toolchain

- name: Cache cargo artifacts
uses: Swatinem/rust-cache@v2

- name: Run stdin integration tests
run: cargo test -p wraithrun --test stdin_integration

Expand Down Expand Up @@ -214,27 +217,31 @@ jobs:
- name: Checkout
uses: actions/checkout@v6

- name: Install Rust toolchain (powershell)
- name: Install Rust toolchain
shell: powershell
run: |
$cargobin = "$env:USERPROFILE\.cargo\bin"
if (-not ($env:Path -split ';' | Where-Object { $_ -eq $cargobin })) {
$env:Path = "$cargobin;$env:Path"
echo "$cargobin" >> $env:GITHUB_PATH
}
$env:Path = "$cargobin;$env:Path"
if (-not (Get-Command rustup -ErrorAction SilentlyContinue)) {
Write-Host "rustup not found, installing..."
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
.\rustup-init.exe -y --default-toolchain 1.92.0 --profile minimal
Remove-Item .\rustup-init.exe
$env:Path = "$cargobin;$env:Path"
}
rustup toolchain install 1.92.0 --profile minimal
rustup default 1.92.0
rustup show active-toolchain
# Resolve the real toolchain bin directory (cargo proxy may be absent)
$realBin = Split-Path (& rustup which cargo)
Write-Host "Toolchain bin: $realBin"
$env:Path = "$realBin;$env:Path"
cargo --version
$utf8 = New-Object System.Text.UTF8Encoding($false)
[IO.File]::AppendAllText($env:GITHUB_ENV, "CARGO_BIN=$realBin`n", $utf8)
[IO.File]::AppendAllText($env:GITHUB_PATH, "$realBin`n", $utf8)

- name: Cache cargo artifacts
uses: Swatinem/rust-cache@v2
continue-on-error: true

- name: Validate live e2e fixture configuration
shell: powershell
Expand All @@ -247,6 +254,9 @@ jobs:
- name: Run live success e2e test (no fallback)
shell: powershell
run: |
$bin = if ($env:CARGO_BIN) { $env:CARGO_BIN } else { "$env:USERPROFILE\.cargo\bin" }
$env:Path = "$bin;$env:Path"
Write-Host "cargo at: $(Get-Command cargo -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Source)"
cargo test -p wraithrun --features inference_bridge/onnx --test stdin_integration live_mode_e2e_success_without_fallback_when_fixture_is_configured -- --exact --nocapture

- name: Upload live success e2e artifacts
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Thumbs.db
/launch-assets/generated/
/launch-assets/reports/
/launch-assets/*.json
/test_outputs/*.db
/test_outputs/*.db-shm
/test_outputs/*.db-wal

# Local GitHub Actions runner
/actions-runner/
2 changes: 1 addition & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition.workspace = true
license.workspace = true

[features]
default = []
default = ["onnx"]
onnx = ["inference_bridge/onnx"]
vitis = ["inference_bridge/vitis"]
directml = ["inference_bridge/directml"]
Expand Down
7 changes: 7 additions & 0 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,13 @@ fn validate_live_runtime_preflight(runtime: &RuntimeConfig) -> Result<()> {
return Ok(());
}

// Fail fast when the binary was compiled without inference support (#149).
#[cfg(not(feature = "onnx"))]
bail!(
"Live inference requested but this binary was built without inference support. \
Rebuild with `--features onnx` (or `--features vitis`/`--features directml`)."
);

if !runtime.model.is_file() {
bail!(
"Live mode model file not found: {}. Run '--doctor --live --introspection-format json' (or '--doctor --live --fix') and provide a readable --model path.",
Expand Down
9 changes: 6 additions & 3 deletions core_engine/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,12 @@ impl<B: InferenceEngine> Agent<B> {
fn check_tool_precondition(&self, tool_name: &str) -> bool {
match tool_name {
"read_syslog" => {
// Default path is ./agent.log — skip if it doesn't exist and
// the sandbox policy would deny access anyway.
let default_path = std::path::Path::new("./agent.log");
// Use a platform-appropriate default log path (#153).
let default_path = if cfg!(target_os = "windows") {
std::path::Path::new("C:\\Windows\\System32\\winevt\\Logs\\System.evtx")
} else {
std::path::Path::new("/var/log/syslog")
};
if !default_path.exists() {
return false;
}
Expand Down
154 changes: 123 additions & 31 deletions core_engine/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub mod agent;

use std::collections::HashSet;
use std::collections::{HashMap, HashSet};

use inference_bridge::ModelCapabilityProbe;
use serde::{Deserialize, Serialize, Serializer};
Expand Down Expand Up @@ -101,7 +101,9 @@ static BUILTIN_TEMPLATES: [InvestigationTemplate; 7] = [
// ── Capability tiering thresholds (const, easy to tune) ──

/// Models below this parameter count (billions) are classified as Basic.
const PARAM_BASIC_CEILING_B: f32 = 2.0;
/// Lowered from 2.0 → 1.0 so common 1B+ models (Qwen2.5-0.5B overestimates
/// to ~1.4B, Llama-3.2-1B at ~1.12B) reach Moderate tier and use inference (#157).
const PARAM_BASIC_CEILING_B: f32 = 1.0;
/// Models above this parameter count (billions) are classified as Strong.
const PARAM_STRONG_FLOOR_B: f32 = 10.0;
/// Latency above this (ms/tok) demotes to Basic.
Expand Down Expand Up @@ -1179,45 +1181,117 @@ pub fn basic_tier_summary_for_task(findings: &[Finding], task: Option<&str>) ->
return format!("{prefix}\nFINDINGS:\n(none)\nRISK: info\nACTIONS:\n(none)");
}

let max_sev = findings
.iter()
// Sort findings: highest severity first, then by confidence descending (#155).
let mut sorted: Vec<&Finding> = findings.iter().collect();
sorted.sort_by(|a, b| {
b.severity.cmp(&a.severity).then_with(|| {
b.confidence
.partial_cmp(&a.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
})
});

let max_sev = sorted
.first()
.map(|f| f.severity)
.max()
.unwrap_or(FindingSeverity::Info);

let distinct_tools: HashSet<&str> = findings
.iter()
.filter_map(|f| f.evidence_pointer.tool.as_deref())
.collect();

// -- Header --
let mut out = match task {
Some(t) => format!(
"SUMMARY: Task \"{t}\" produced {} findings across {} tool(s). Maximum severity: {}.\nFINDINGS:\n",
findings.len(),
distinct_tools.len(),
max_sev.token()
"INVESTIGATION SUMMARY — \"{t}\"\n\
{total} findings across {tools} tool(s). Maximum severity: {sev}.\n\n",
total = findings.len(),
tools = distinct_tools.len(),
sev = max_sev.token().to_uppercase()
),
None => format!(
"SUMMARY: {} findings detected. Maximum severity: {}.\nFINDINGS:\n",
findings.len(),
max_sev.token()
"INVESTIGATION SUMMARY\n\
{total} findings detected. Maximum severity: {sev}.\n\n",
total = findings.len(),
sev = max_sev.token().to_uppercase()
),
};

for (i, f) in findings.iter().enumerate() {
out.push_str(&format!(
"{}. {} [{}] — {}\n",
i + 1,
f.title,
f.severity.token(),
f.recommended_action
));
// -- Group by severity (highest first) --
let severity_order = [
FindingSeverity::Critical,
FindingSeverity::High,
FindingSeverity::Medium,
FindingSeverity::Low,
FindingSeverity::Info,
];

for &sev in &severity_order {
let group: Vec<&&Finding> = sorted.iter().filter(|f| f.severity == sev).collect();
if group.is_empty() {
continue;
}
let header = match sev {
FindingSeverity::Critical => "🔴 CRITICAL",
FindingSeverity::High => "🟠 HIGH",
FindingSeverity::Medium => "🟡 MEDIUM",
FindingSeverity::Low => "🔵 LOW",
FindingSeverity::Info => "ℹ️ INFO",
};
out.push_str(&format!("── {} ({}) ──\n", header, group.len()));
for f in &group {
let tool_tag = f
.evidence_pointer
.tool
.as_deref()
.map(|t| format!(" [{}]", t))
.unwrap_or_default();
out.push_str(&format!(
" • {}{} — {}\n",
f.title, tool_tag, f.recommended_action
));
}
out.push('\n');
}

out.push_str(&format!("RISK: {}\nACTIONS:\n", max_sev.token()));
// -- Cross-references: find tools whose findings overlap --
if distinct_tools.len() > 1 {
// Collect tool→titles mapping for cross-reference hints.
let mut tool_titles: HashMap<&str, Vec<&str>> = HashMap::new();
for f in &sorted {
if let Some(tool) = f.evidence_pointer.tool.as_deref() {
tool_titles.entry(tool).or_default().push(&f.title);
}
}
if tool_titles.len() > 1 {
out.push_str("CROSS-REFERENCES:\n");
let tools_vec: Vec<&&str> = tool_titles.keys().collect();
out.push_str(&format!(
" Data was collected from {} sources ({}). ",
tools_vec.len(),
tools_vec.iter().map(|t| **t).collect::<Vec<_>>().join(", ")
));
out.push_str("Correlate findings across tools for a complete picture.\n\n");
}
}

for (i, f) in findings.iter().enumerate() {
out.push_str(&format!("{}. {}\n", i + 1, f.recommended_action));
// -- Risk assessment --
out.push_str(&format!(
"OVERALL RISK: {}\n\n",
max_sev.token().to_uppercase()
));

// -- Prioritized actions (deduplicated, urgent first) --
out.push_str("RECOMMENDED ACTIONS (priority order):\n");
let mut seen_actions: HashSet<&str> = HashSet::new();
let mut action_idx = 0usize;
for f in &sorted {
let action = f.recommended_action.as_str();
if seen_actions.insert(action) {
action_idx += 1;
out.push_str(&format!(" {}. {}\n", action_idx, action));
}
}

// Remove trailing newline for clean output.
Expand Down Expand Up @@ -1733,15 +1807,28 @@ mod tests {

#[test]
fn classify_small_model_as_basic() {
// With PARAM_BASIC_CEILING_B = 1.0 (#157), a 0.5B model is Basic.
let probe = ModelCapabilityProbe {
estimated_param_billions: 1.2,
estimated_param_billions: 0.5,
execution_provider: "CPUExecutionProvider".to_string(),
smoke_latency_ms: 80,
vocab_size: 32000,
};
assert_eq!(classify_capability(&probe), ModelCapabilityTier::Basic);
}

#[test]
fn classify_1b_model_as_moderate() {
// With PARAM_BASIC_CEILING_B = 1.0 (#157), a 1.2B model reaches Moderate.
let probe = ModelCapabilityProbe {
estimated_param_billions: 1.2,
execution_provider: "CPUExecutionProvider".to_string(),
smoke_latency_ms: 80,
vocab_size: 32000,
};
assert_eq!(classify_capability(&probe), ModelCapabilityTier::Moderate);
}

#[test]
fn classify_medium_model_moderate_latency_as_moderate() {
let probe = ModelCapabilityProbe {
Expand Down Expand Up @@ -1859,12 +1946,15 @@ mod tests {
];

let summary = basic_tier_summary(&findings);
assert!(summary.starts_with("SUMMARY: 2 findings detected. Maximum severity: high."));
assert!(summary.contains("FINDINGS:"));
assert!(summary.contains("1. Active listeners [high]"));
assert!(summary.contains("2. Suspicious persistence [medium]"));
assert!(summary.contains("RISK: high"));
assert!(summary.contains("ACTIONS:"));
// Updated format groups by severity and provides cross-references (#155).
assert!(summary.contains("INVESTIGATION SUMMARY"));
assert!(summary.contains("2 findings"));
assert!(summary.contains("HIGH"));
assert!(summary.contains("Active listeners"));
assert!(summary.contains("Suspicious persistence"));
assert!(summary.contains("OVERALL RISK: HIGH"));
assert!(summary.contains("RECOMMENDED ACTIONS"));
assert!(summary.contains("CROSS-REFERENCES"));
}

#[test]
Expand All @@ -1891,8 +1981,10 @@ mod tests {
"observation.indicator_count",
)];
let summary = basic_tier_summary_for_task(&findings, Some("windows-triage"));
assert!(summary.contains("Task \"windows-triage\""));
// Updated format includes task name in header (#155).
assert!(summary.contains("windows-triage"));
assert!(summary.contains("1 tool(s)"));
assert!(summary.contains("RECOMMENDED ACTIONS"));
}

// ── Discrete confidence label tests (#85) ──
Expand Down
10 changes: 6 additions & 4 deletions cyber_tools/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ impl SandboxPolicy {
}

#[cfg(target_os = "windows")]
let command_allowlist: HashSet<String> = ["whoami", "netstat", "net", "tasklist", "reg"]
.into_iter()
.map(|c| c.to_string())
.collect();
let command_allowlist: HashSet<String> = [
"whoami", "netstat", "net", "tasklist", "reg", "sc", "wmic", "schtasks",
]
.into_iter()
.map(|c| c.to_string())
.collect();

#[cfg(not(target_os = "windows"))]
let command_allowlist: HashSet<String> = ["id", "ss", "sudo"]
Expand Down
10 changes: 9 additions & 1 deletion inference_bridge/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,16 @@ impl OnnxVitisEngine {
format!(r#"<call>{{"tool":"hash_binary","args":{{"path":"{path}"}}}}</call>"#)
}
"read_syslog" => {
// Use a platform-appropriate default log path instead of a
// project file like README.md, which would produce bogus
// findings (#153).
let default_path = if cfg!(target_os = "windows") {
"C:\\Windows\\System32\\winevt\\Logs\\System.evtx".to_string()
} else {
"/var/log/syslog".to_string()
};
let path = Self::guess_path_from_task(task)
.unwrap_or_else(|| "./README.md".to_string());
.unwrap_or(default_path);
let path = Self::escape_json_string(&path);
let max_lines = Self::guess_line_count_from_task(task).unwrap_or(200);
format!(
Expand Down
Loading
Loading