Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ The format is inspired by Keep a Changelog and this project follows Semantic Ver

- (none yet)

## 0.13.0 - 2026-04-05

### Added

- Discrete confidence label model (`FindingConfidence` enum: Informational, Possible, Likely, Confirmed) derived from continuous confidence float using calibrated thresholds (#85).
- `confidence_label` field on all findings, auto-derived from confidence float via `confidence_to_label()`.
- `Finding::new()` constructor that auto-populates confidence label; `with_derived_label()` backfill method for existing findings.
- Declarative investigation templates replacing hardcoded keyword matching in `investigation_plan()` (#84).
- Six built-in investigation templates: broad-host-triage, ssh-key-investigation, persistence-analysis, network-exposure-audit, privilege-escalation-check, file-integrity-check.
- `resolve_investigation_template()` scores templates by keyword match count and falls back to broad-host-triage.
- Investigation templates shown in `--list-task-templates` output with tool lists.
- Task scope validation: detects out-of-scope tasks (cloud/AWS/Azure/GCP/Kubernetes/etc.) and emits an info finding instead of misleading host data (#83).
- `FindingRelevance` enum (Primary/Supplementary) for tagging finding relevance to the user's task (#86).
- `relevance` field on all findings; template-primary tools produce Primary findings, others produce Supplementary.
- `supplementary_findings` array in run report for findings separated in compact mode.
- Compact output mode now moves supplementary findings to `supplementary_findings` array; full mode shows all with relevance tags.
- 22 new tests covering confidence labels, template resolution, scope validation, and finding filtering.

### Changed

- `investigation_plan()` replaced by template-based `resolve_investigation_template()` — tool selection is now declarative and extensible.
- Run report JSON schema updated with `confidence_label`, `relevance`, and `supplementary_findings` fields.
- Integration tests updated to find tool turns by tool name rather than assuming fixed order.

## 0.12.0 - 2026-04-05

### Added
Expand Down
142 changes: 123 additions & 19 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,9 +478,9 @@ use anyhow::{anyhow, bail, Context, Result};
use clap::{Parser, ValueEnum};
use core_engine::agent::Agent;
use core_engine::{
classify_capability, CoverageBaseline, EvidencePointer, Finding, FindingSeverity,
LiveFailureReasonCount, LiveFallbackDecision, LiveRunMetrics, ModelCapabilityReport,
ModelCapabilityTier, RunReport,
builtin_investigation_templates, classify_capability, CoverageBaseline, EvidencePointer,
Finding, FindingSeverity, LiveFailureReasonCount, LiveFallbackDecision, LiveRunMetrics,
ModelCapabilityReport, ModelCapabilityTier, RunReport,
};
use cyber_tools::{ToolRegistry, ToolSpec};
use inference_bridge::onnx_vitis::{inspect_runtime_compatibility, RuntimeCompatibilitySeverity};
Expand Down Expand Up @@ -2083,7 +2083,7 @@ fn render_task_template_list() -> String {
let templates = task_template_descriptors();

let _ = writeln!(output, "WraithRun Task Templates");
for descriptor in templates {
for descriptor in &templates {
let _ = writeln!(output, "- {}: {}", descriptor.name, descriptor.prompt);
if descriptor.supports_template_target && descriptor.supports_template_lines {
let default_target = descriptor.default_target.unwrap_or("(none)");
Expand All @@ -2104,6 +2104,13 @@ fn render_task_template_list() -> String {
}
}

let _ = writeln!(output);
let _ = writeln!(output, "Investigation Templates");
for template in builtin_investigation_templates() {
let _ = writeln!(output, "- {}: {}", template.name, template.description);
let _ = writeln!(output, " tools: {}", template.tools.join(", "));
}

output.trim_end().to_string()
}

Expand Down Expand Up @@ -4377,6 +4384,34 @@ fn render_json_compact(report: &RunReport) -> Result<String> {

object.remove("turns");

// In compact mode, move supplementary findings to supplementary_findings (#86).
if let Some(findings_val) = object.remove("findings") {
if let Value::Array(all_findings) = findings_val {
let mut primary = Vec::new();
let mut supplementary = Vec::new();

for f in all_findings {
let is_supplementary = f
.get("relevance")
.and_then(Value::as_str)
== Some("supplementary");
if is_supplementary {
supplementary.push(f);
} else {
primary.push(f);
}
}

object.insert("findings".to_string(), Value::Array(primary));
if !supplementary.is_empty() {
object.insert(
"supplementary_findings".to_string(),
Value::Array(supplementary),
);
}
}
}

object.insert(
"contract_version".to_string(),
Value::String(JSON_CONTRACT_VERSION.to_string()),
Expand Down Expand Up @@ -5428,21 +5463,21 @@ fn append_live_fallback_finding(report: &mut RunReport, decision: &LiveFallbackD
return;
}

report.findings.push(Finding {
title: "Live mode fallback applied after inference failure".to_string(),
severity: FindingSeverity::Info,
confidence: 1.0,
evidence_pointer: EvidencePointer {
report.findings.push(Finding::new(
"Live mode fallback applied after inference failure".to_string(),
FindingSeverity::Info,
1.0,
EvidencePointer {
turn: None,
tool: None,
field: "live_fallback_decision.live_error".to_string(),
},
recommended_action: format!(
format!(
"Review live inference error details and model-pack readiness, then rerun live mode after fixing root cause. Fallback reason: {} (code: {}).",
decision.reason,
decision.reason_code
),
});
));
}

fn classify_live_error_reason_code(live_error: &str) -> &'static str {
Expand Down Expand Up @@ -5542,7 +5577,7 @@ mod tests {
use std::time::{SystemTime, UNIX_EPOCH};
use std::{env, fs};

use serde_json::json;
use serde_json::{json, Value};

use core_engine::{
AgentTurn, EvidencePointer, Finding, FindingSeverity, LiveFailureReasonCount,
Expand Down Expand Up @@ -5639,18 +5674,19 @@ mod tests {
observation: Some(json!({ "listener_count": 3, "listeners": [] })),
}],
final_answer: "Dry-run cycle complete.".to_string(),
findings: vec![Finding {
title: "Active listening sockets observed (3)".to_string(),
severity: FindingSeverity::Medium,
confidence: 0.68,
evidence_pointer: EvidencePointer {
findings: vec![Finding::new(
"Active listening sockets observed (3)".to_string(),
FindingSeverity::Medium,
0.68,
EvidencePointer {
turn: Some(1),
tool: Some("scan_network".to_string()),
field: "observation.listener_count".to_string(),
},
recommended_action: "Correlate listener PIDs and ports with expected services."
"Correlate listener PIDs and ports with expected services."
.to_string(),
}],
)],
supplementary_findings: Vec::new(),
}
}

Expand All @@ -5676,6 +5712,74 @@ mod tests {
assert!(!rendered.contains("\"turns\""));
}

#[test]
fn compact_mode_separates_supplementary_findings() {
use core_engine::FindingRelevance;

let mut report = sample_report();
let mut supp_finding = Finding::new(
"Generic persistence noise".to_string(),
FindingSeverity::Low,
0.50,
EvidencePointer {
turn: Some(1),
tool: Some("inspect_persistence_locations".to_string()),
field: "observation.suspicious_entry_count".to_string(),
},
"Review entries.".to_string(),
);
supp_finding.relevance = FindingRelevance::Supplementary;
report.findings.push(supp_finding);

let rendered = render_report(&report, OutputFormat::Json, OutputMode::Compact, None)
.expect("compact render should work");

let json: Value = serde_json::from_str(&rendered).unwrap();
let findings = json["findings"].as_array().unwrap();
let supplementary = json["supplementary_findings"].as_array().unwrap();

assert_eq!(findings.len(), 1, "only primary finding in findings");
assert_eq!(
supplementary.len(),
1,
"supplementary finding moved to supplementary_findings"
);
assert!(supplementary[0]["title"]
.as_str()
.unwrap()
.contains("persistence"));
}

#[test]
fn full_mode_keeps_all_findings_with_relevance_tags() {
use core_engine::FindingRelevance;

let mut report = sample_report();
let mut supp_finding = Finding::new(
"Generic persistence noise".to_string(),
FindingSeverity::Low,
0.50,
EvidencePointer {
turn: Some(1),
tool: Some("inspect_persistence_locations".to_string()),
field: "observation.suspicious_entry_count".to_string(),
},
"Review entries.".to_string(),
);
supp_finding.relevance = FindingRelevance::Supplementary;
report.findings.push(supp_finding);

let rendered = render_report(&report, OutputFormat::Json, OutputMode::Full, None)
.expect("full render should work");

let json: Value = serde_json::from_str(&rendered).unwrap();
let findings = json["findings"].as_array().unwrap();

// Full mode keeps all findings in the main array with relevance tags.
assert_eq!(findings.len(), 2);
assert!(rendered.contains("\"relevance\""));
}

#[test]
fn renders_json_output_with_live_metrics() {
let mut report = sample_report();
Expand Down
30 changes: 21 additions & 9 deletions cli/tests/stdin_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1048,20 +1048,26 @@ fn baseline_bundle_import_populates_drift_tool_arguments() {
.get("turns")
.and_then(Value::as_array)
.expect("turns should be an array");
let first_args = turns
.first()
let audit_turn_args = turns
.iter()
.find(|turn| {
turn.get("tool_call")
.and_then(|call| call.get("tool"))
.and_then(Value::as_str)
== Some("audit_account_changes")
})
.and_then(|turn| turn.get("tool_call"))
.and_then(|call| call.get("args"))
.and_then(Value::as_object)
.expect("first tool call args should be an object");
.expect("audit_account_changes tool call args should be present");

let baseline_accounts = first_args
let baseline_accounts = audit_turn_args
.get("baseline_privileged_accounts")
.and_then(Value::as_array)
.expect("baseline_privileged_accounts should be present");
assert!(baseline_accounts.iter().any(|entry| entry == "svc-admin"));

let approved_accounts = first_args
let approved_accounts = audit_turn_args
.get("approved_privileged_accounts")
.and_then(Value::as_array)
.expect("approved_privileged_accounts should be present");
Expand Down Expand Up @@ -1121,14 +1127,20 @@ fn baseline_bundle_import_accepts_raw_file_path_with_spaces() {
.get("turns")
.and_then(Value::as_array)
.expect("turns should be an array");
let first_args = turns
.first()
let audit_turn_args = turns
.iter()
.find(|turn| {
turn.get("tool_call")
.and_then(|call| call.get("tool"))
.and_then(Value::as_str)
== Some("audit_account_changes")
})
.and_then(|turn| turn.get("tool_call"))
.and_then(|call| call.get("args"))
.and_then(Value::as_object)
.expect("first tool call args should be an object");
.expect("audit_account_changes tool call args should be present");

let baseline_accounts = first_args
let baseline_accounts = audit_turn_args
.get("baseline_privileged_accounts")
.and_then(Value::as_array)
.expect("baseline_privileged_accounts should be present");
Expand Down
Loading
Loading