From 05c03ea850af93620298b461aa5341d5424a1110 Mon Sep 17 00:00:00 2001
From: Mouse <mouseparker123@gmail.com>
Date: Tue, 7 Apr 2026 13:32:17 -0700
Subject: [PATCH] feat: harden run configuration validation and format
 normalization

---
 README.md                       |  1 +
 promptlens/models/config.py     | 98 ++++++++++++++++++++++++++++++++-
 tests/test_config_validation.py | 41 ++++++++++++++
 3 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_config_validation.py

diff --git a/README.md b/README.md
index 68d076e..784a6eb 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ PromptLens runs golden test sets against multiple models, scores outputs using L
 - **Beautiful Reports** - Interactive HTML reports with charts, comparisons, and detailed results
 - **Multiple Export Formats** - HTML, JSON, CSV, and Markdown outputs
 - **Parallel Execution** - Async execution with configurable concurrency and retry logic
+- **Strict Config Validation** - Early validation for model temperatures, token limits, retries, and output formats
 - **Portable & Local** - No cloud backend, all data stays on your machine
 - **Easy to Extend** - Plugin architecture for custom providers, judges, and exporters
 
diff --git a/promptlens/models/config.py b/promptlens/models/config.py
index 7c85798..c51d1cf 100644
--- a/promptlens/models/config.py
+++ b/promptlens/models/config.py
@@ -2,7 +2,7 @@
 
 from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator, model_validator
 
 
 class ProviderConfig(BaseModel):
@@ -28,6 +28,22 @@ class ProviderConfig(BaseModel):
     endpoint: Optional[str] = None
     additional_params: Dict[str, Any] = Field(default_factory=dict)
 
+    @field_validator("temperature")
+    @classmethod
+    def validate_temperature(cls, value: float) -> float:
+        """Ensure temperature stays within a sane range."""
+        if not 0 <= value <= 2:
+            raise ValueError("temperature must be between 0 and 2")
+        return value
+
+    @field_validator("max_tokens", "timeout")
+    @classmethod
+    def validate_positive_ints(cls, value: int) -> int:
+        """Ensure max_tokens/timeout are positive."""
+        if value <= 0:
+            raise ValueError("value must be greater than 0")
+        return value
+
 
 class ModelConfig(BaseModel):
     """Configuration for a model to test.
@@ -48,6 +64,22 @@ class ModelConfig(BaseModel):
     max_tokens: int = 1024
     additional_params: Dict[str, Any] = Field(default_factory=dict)
 
+    @field_validator("temperature")
+    @classmethod
+    def validate_temperature(cls, value: float) -> float:
+        """Ensure temperature stays within a sane range."""
+        if not 0 <= value <= 2:
+            raise ValueError("temperature must be between 0 and 2")
+        return value
+
+    @field_validator("max_tokens")
+    @classmethod
+    def validate_max_tokens(cls, value: int) -> int:
+        """Ensure token limits are positive."""
+        if value <= 0:
+            raise ValueError("max_tokens must be greater than 0")
+        return value
+
 
 class JudgeConfig(BaseModel):
     """Configuration for the judge.
@@ -66,6 +98,14 @@ class JudgeConfig(BaseModel):
     custom_prompt: Optional[str] = None
     criteria: List[str] = Field(default_factory=lambda: ["accuracy", "helpfulness"])
 
+    @field_validator("temperature")
+    @classmethod
+    def validate_temperature(cls, value: float) -> float:
+        """Ensure judge temperature stays within a sane range."""
+        if not 0 <= value <= 2:
+            raise ValueError("temperature must be between 0 and 2")
+        return value
+
 
 class ExecutionConfig(BaseModel):
     """Configuration for execution settings.
@@ -82,6 +122,30 @@ class ExecutionConfig(BaseModel):
     retry_delay_seconds: float = 1.0
     timeout_seconds: int = 60
 
+    @field_validator("parallel_requests", "timeout_seconds")
+    @classmethod
+    def validate_positive_ints(cls, value: int) -> int:
+        """Ensure values are positive."""
+        if value <= 0:
+            raise ValueError("value must be greater than 0")
+        return value
+
+    @field_validator("retry_attempts")
+    @classmethod
+    def validate_retry_attempts(cls, value: int) -> int:
+        """Allow zero retries, but disallow negative values."""
+        if value < 0:
+            raise ValueError("retry_attempts cannot be negative")
+        return value
+
+    @field_validator("retry_delay_seconds")
+    @classmethod
+    def validate_retry_delay(cls, value: float) -> float:
+        """Allow immediate retries, but disallow negative delay."""
+        if value < 0:
+            raise ValueError("retry_delay_seconds cannot be negative")
+        return value
+
 
 class OutputConfig(BaseModel):
     """Configuration for output settings.
@@ -96,6 +160,30 @@ class OutputConfig(BaseModel):
     formats: List[str] = Field(default_factory=lambda: ["html", "json"])
     run_name: Optional[str] = None
 
+    @field_validator("formats")
+    @classmethod
+    def validate_formats(cls, formats: List[str]) -> List[str]:
+        """Validate formats and normalize to lowercase unique entries."""
+        allowed_formats = {"html", "json", "csv", "md", "markdown"}
+        normalized: List[str] = []
+
+        for fmt in formats:
+            fmt_lower = fmt.lower().strip()
+            if fmt_lower == "markdown":
+                fmt_lower = "md"
+
+            if fmt_lower not in allowed_formats:
+                allowed = ", ".join(sorted(allowed_formats))
+                raise ValueError(f"Unsupported output format '{fmt}'. Allowed: {allowed}")
+
+            if fmt_lower not in normalized:
+                normalized.append(fmt_lower)
+
+        if not normalized:
+            raise ValueError("formats must include at least one value")
+
+        return normalized
+
 
 class RunConfig(BaseModel):
     """Complete run configuration.
@@ -114,8 +202,16 @@ class RunConfig(BaseModel):
     execution: ExecutionConfig = Field(default_factory=ExecutionConfig)
     output: OutputConfig = Field(default_factory=OutputConfig)
 
+    @model_validator(mode="after")
+    def validate_models_non_empty(self) -> "RunConfig":
+        """Ensure at least one model is configured."""
+        if not self.models:
+            raise ValueError("models must include at least one model configuration")
+        return self
+
     class Config:
         """Pydantic config."""
+
         json_schema_extra = {
             "example": {
                 "golden_set": "./examples/golden_sets/customer_support.yaml",
diff --git a/tests/test_config_validation.py b/tests/test_config_validation.py
new file mode 100644
index 0000000..26d28da
--- /dev/null
+++ b/tests/test_config_validation.py
@@ -0,0 +1,41 @@
+import pytest
+from pydantic import ValidationError
+
+from promptlens.models.config import ModelConfig, OutputConfig, RunConfig
+
+
+def test_model_config_rejects_invalid_temperature():
+    with pytest.raises(ValidationError):
+        ModelConfig(
+            name="bad-temp",
+            provider="openai",
+            model="gpt-4o",
+            temperature=2.5,
+            max_tokens=100,
+        )
+
+
+def test_model_config_rejects_non_positive_max_tokens():
+    with pytest.raises(ValidationError):
+        ModelConfig(
+            name="bad-tokens",
+            provider="openai",
+            model="gpt-4o",
+            temperature=0.5,
+            max_tokens=0,
+        )
+
+
+def test_output_config_normalizes_and_deduplicates_formats():
+    cfg = OutputConfig(formats=["HTML", "json", "markdown", "md", "JSON"])
+    assert cfg.formats == ["html", "json", "md"]
+
+
+def test_output_config_rejects_unsupported_format():
+    with pytest.raises(ValidationError):
+        OutputConfig(formats=["xml"])
+
+
+def test_run_config_requires_at_least_one_model():
+    with pytest.raises(ValidationError):
+        RunConfig(golden_set="./tests.yaml", models=[])