From 05c03ea850af93620298b461aa5341d5424a1110 Mon Sep 17 00:00:00 2001 From: Mouse Date: Tue, 7 Apr 2026 13:32:17 -0700 Subject: [PATCH] feat: harden run configuration validation and format normalization --- README.md | 1 + promptlens/models/config.py | 98 ++++++++++++++++++++++++++++++++- tests/test_config_validation.py | 41 ++++++++++++++ 3 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 tests/test_config_validation.py diff --git a/README.md b/README.md index 68d076e..784a6eb 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ PromptLens runs golden test sets against multiple models, scores outputs using L - **Beautiful Reports** - Interactive HTML reports with charts, comparisons, and detailed results - **Multiple Export Formats** - HTML, JSON, CSV, and Markdown outputs - **Parallel Execution** - Async execution with configurable concurrency and retry logic +- **Strict Config Validation** - Early validation for model temperatures, token limits, retries, and output formats - **Portable & Local** - No cloud backend, all data stays on your machine - **Easy to Extend** - Plugin architecture for custom providers, judges, and exporters diff --git a/promptlens/models/config.py b/promptlens/models/config.py index 7c85798..c51d1cf 100644 --- a/promptlens/models/config.py +++ b/promptlens/models/config.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator, model_validator class ProviderConfig(BaseModel): @@ -28,6 +28,22 @@ class ProviderConfig(BaseModel): endpoint: Optional[str] = None additional_params: Dict[str, Any] = Field(default_factory=dict) + @field_validator("temperature") + @classmethod + def validate_temperature(cls, value: float) -> float: + """Ensure temperature stays within a sane range.""" + if not 0 <= value <= 2: + raise ValueError("temperature must be between 0 and 2") + return value + + @field_validator("max_tokens", "timeout") + @classmethod + def validate_positive_ints(cls, value: int) -> int: + """Ensure max_tokens/timeout are positive.""" + if value <= 0: + raise ValueError("value must be greater than 0") + return value + class ModelConfig(BaseModel): """Configuration for a model to test. @@ -48,6 +64,22 @@ class ModelConfig(BaseModel): max_tokens: int = 1024 additional_params: Dict[str, Any] = Field(default_factory=dict) + @field_validator("temperature") + @classmethod + def validate_temperature(cls, value: float) -> float: + """Ensure temperature stays within a sane range.""" + if not 0 <= value <= 2: + raise ValueError("temperature must be between 0 and 2") + return value + + @field_validator("max_tokens") + @classmethod + def validate_max_tokens(cls, value: int) -> int: + """Ensure token limits are positive.""" + if value <= 0: + raise ValueError("max_tokens must be greater than 0") + return value + class JudgeConfig(BaseModel): """Configuration for the judge. @@ -66,6 +98,14 @@ class JudgeConfig(BaseModel): custom_prompt: Optional[str] = None criteria: List[str] = Field(default_factory=lambda: ["accuracy", "helpfulness"]) + @field_validator("temperature") + @classmethod + def validate_temperature(cls, value: float) -> float: + """Ensure judge temperature stays within a sane range.""" + if not 0 <= value <= 2: + raise ValueError("temperature must be between 0 and 2") + return value + class ExecutionConfig(BaseModel): """Configuration for execution settings. @@ -82,6 +122,30 @@ class ExecutionConfig(BaseModel): retry_delay_seconds: float = 1.0 timeout_seconds: int = 60 + @field_validator("parallel_requests", "timeout_seconds") + @classmethod + def validate_positive_ints(cls, value: int) -> int: + """Ensure values are positive.""" + if value <= 0: + raise ValueError("value must be greater than 0") + return value + + @field_validator("retry_attempts") + @classmethod + def validate_retry_attempts(cls, value: int) -> int: + """Allow zero retries, but disallow negative values.""" + if value < 0: + raise ValueError("retry_attempts cannot be negative") + return value + + @field_validator("retry_delay_seconds") + @classmethod + def validate_retry_delay(cls, value: float) -> float: + """Allow immediate retries, but disallow negative delay.""" + if value < 0: + raise ValueError("retry_delay_seconds cannot be negative") + return value + class OutputConfig(BaseModel): """Configuration for output settings. @@ -96,6 +160,30 @@ class OutputConfig(BaseModel): formats: List[str] = Field(default_factory=lambda: ["html", "json"]) run_name: Optional[str] = None + @field_validator("formats") + @classmethod + def validate_formats(cls, formats: List[str]) -> List[str]: + """Validate formats and normalize to lowercase unique entries.""" + allowed_formats = {"html", "json", "csv", "md", "markdown"} + normalized: List[str] = [] + + for fmt in formats: + fmt_lower = fmt.lower().strip() + if fmt_lower == "markdown": + fmt_lower = "md" + + if fmt_lower not in allowed_formats: + allowed = ", ".join(sorted(allowed_formats)) + raise ValueError(f"Unsupported output format '{fmt}'. Allowed: {allowed}") + + if fmt_lower not in normalized: + normalized.append(fmt_lower) + + if not normalized: + raise ValueError("formats must include at least one value") + + return normalized + class RunConfig(BaseModel): """Complete run configuration. @@ -114,8 +202,16 @@ class RunConfig(BaseModel): execution: ExecutionConfig = Field(default_factory=ExecutionConfig) output: OutputConfig = Field(default_factory=OutputConfig) + @model_validator(mode="after") + def validate_models_non_empty(self) -> "RunConfig": + """Ensure at least one model is configured.""" + if not self.models: + raise ValueError("models must include at least one model configuration") + return self + class Config: """Pydantic config.""" + json_schema_extra = { "example": { "golden_set": "./examples/golden_sets/customer_support.yaml", diff --git a/tests/test_config_validation.py b/tests/test_config_validation.py new file mode 100644 index 0000000..26d28da --- /dev/null +++ b/tests/test_config_validation.py @@ -0,0 +1,41 @@ +import pytest +from pydantic import ValidationError + +from promptlens.models.config import ModelConfig, OutputConfig, RunConfig + + +def test_model_config_rejects_invalid_temperature(): + with pytest.raises(ValidationError): + ModelConfig( + name="bad-temp", + provider="openai", + model="gpt-4o", + temperature=2.5, + max_tokens=100, + ) + + +def test_model_config_rejects_non_positive_max_tokens(): + with pytest.raises(ValidationError): + ModelConfig( + name="bad-tokens", + provider="openai", + model="gpt-4o", + temperature=0.5, + max_tokens=0, + ) + + +def test_output_config_normalizes_and_deduplicates_formats(): + cfg = OutputConfig(formats=["HTML", "json", "markdown", "md", "JSON"]) + assert cfg.formats == ["html", "json", "md"] + + +def test_output_config_rejects_unsupported_format(): + with pytest.raises(ValidationError): + OutputConfig(formats=["xml"]) + + +def test_run_config_requires_at_least_one_model(): + with pytest.raises(ValidationError): + RunConfig(golden_set="./tests.yaml", models=[])