Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions plexe/agents/model_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,9 @@ def run(
),
"baseline_performance": self.context.baseline_performance, # Validation set performance (for reference)
"baseline_predictor": self.context.baseline_predictor, # For re-evaluation on test set
"core_metrics_report": self.context.scratch.get(
"_core_metrics_report"
), # Model's test metrics from Phase 1
}
synthesis_args = {**additional_args, **baseline_context}

Expand Down Expand Up @@ -469,6 +472,11 @@ def _get_phase_synthesis_prompt(task: str, explainability_required: bool) -> str
f"2. Synthesize prioritized recommendations (HIGH/MEDIUM/LOW)\\n"
f"3. Write executive summary (2-3 sentences)\\n"
f"4. Determine deployment readiness\\n\\n"
f"Verdict rubric:\\n"
f"- FAIL: Model is broken, materially worse than heuristic baseline, or has catastrophic/high-severity issues.\\n"
f"- CONDITIONAL_PASS: Model is roughly on par with baseline, or is better than baseline but has material unresolved issues.\\n"
f"- PASS: Model is clearly better than baseline, has no catastrophic/high-severity issues, and is generally robust.\\n"
f"- Do not assign CONDITIONAL_PASS only because minor improvement opportunities exist.\\n\\n"
f"Register using:\\n"
f"register_final_evaluation_report(\\n"
f" verdict='PASS'|'CONDITIONAL_PASS'|'FAIL',\\n"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "plexe"
version = "1.4.1"
version = "1.4.2"
description = "An agentic framework for building ML models from natural language"
authors = [
"Marcello De Bernardi <mdebernardi@plexe.ai>",
Expand Down