Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ PromptLens runs golden test sets against multiple models, scores outputs using L
- **Beautiful Reports** - Interactive HTML reports with charts, comparisons, and detailed results
- **Multiple Export Formats** - HTML, JSON, CSV, and Markdown outputs
- **Parallel Execution** - Async execution with configurable concurrency and retry logic
- **Strict Config Validation** - Early validation for model temperatures, token limits, retries, and output formats
- **Portable & Local** - No cloud backend, all data stays on your machine
- **Easy to Extend** - Plugin architecture for custom providers, judges, and exporters

Expand Down
98 changes: 97 additions & 1 deletion promptlens/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator, model_validator


class ProviderConfig(BaseModel):
Expand All @@ -28,6 +28,22 @@ class ProviderConfig(BaseModel):
endpoint: Optional[str] = None
additional_params: Dict[str, Any] = Field(default_factory=dict)

@field_validator("temperature")
@classmethod
def validate_temperature(cls, value: float) -> float:
"""Ensure temperature stays within a sane range."""
if not 0 <= value <= 2:
raise ValueError("temperature must be between 0 and 2")
return value

@field_validator("max_tokens", "timeout")
@classmethod
def validate_positive_ints(cls, value: int) -> int:
"""Ensure max_tokens/timeout are positive."""
if value <= 0:
raise ValueError("value must be greater than 0")
return value


class ModelConfig(BaseModel):
"""Configuration for a model to test.
Expand All @@ -48,6 +64,22 @@ class ModelConfig(BaseModel):
max_tokens: int = 1024
additional_params: Dict[str, Any] = Field(default_factory=dict)

@field_validator("temperature")
@classmethod
def validate_temperature(cls, value: float) -> float:
"""Ensure temperature stays within a sane range."""
if not 0 <= value <= 2:
raise ValueError("temperature must be between 0 and 2")
return value

@field_validator("max_tokens")
@classmethod
def validate_max_tokens(cls, value: int) -> int:
"""Ensure token limits are positive."""
if value <= 0:
raise ValueError("max_tokens must be greater than 0")
return value


class JudgeConfig(BaseModel):
"""Configuration for the judge.
Expand All @@ -66,6 +98,14 @@ class JudgeConfig(BaseModel):
custom_prompt: Optional[str] = None
criteria: List[str] = Field(default_factory=lambda: ["accuracy", "helpfulness"])

@field_validator("temperature")
@classmethod
def validate_temperature(cls, value: float) -> float:
"""Ensure judge temperature stays within a sane range."""
if not 0 <= value <= 2:
raise ValueError("temperature must be between 0 and 2")
return value


class ExecutionConfig(BaseModel):
"""Configuration for execution settings.
Expand All @@ -82,6 +122,30 @@ class ExecutionConfig(BaseModel):
retry_delay_seconds: float = 1.0
timeout_seconds: int = 60

@field_validator("parallel_requests", "timeout_seconds")
@classmethod
def validate_positive_ints(cls, value: int) -> int:
"""Ensure values are positive."""
if value <= 0:
raise ValueError("value must be greater than 0")
return value

@field_validator("retry_attempts")
@classmethod
def validate_retry_attempts(cls, value: int) -> int:
"""Allow zero retries, but disallow negative values."""
if value < 0:
raise ValueError("retry_attempts cannot be negative")
return value

@field_validator("retry_delay_seconds")
@classmethod
def validate_retry_delay(cls, value: float) -> float:
"""Allow immediate retries, but disallow negative delay."""
if value < 0:
raise ValueError("retry_delay_seconds cannot be negative")
return value


class OutputConfig(BaseModel):
"""Configuration for output settings.
Expand All @@ -96,6 +160,30 @@ class OutputConfig(BaseModel):
formats: List[str] = Field(default_factory=lambda: ["html", "json"])
run_name: Optional[str] = None

@field_validator("formats")
@classmethod
def validate_formats(cls, formats: List[str]) -> List[str]:
"""Validate formats and normalize to lowercase unique entries."""
allowed_formats = {"html", "json", "csv", "md", "markdown"}
normalized: List[str] = []

for fmt in formats:
fmt_lower = fmt.lower().strip()
if fmt_lower == "markdown":
fmt_lower = "md"

if fmt_lower not in allowed_formats:
allowed = ", ".join(sorted(allowed_formats))
raise ValueError(f"Unsupported output format '{fmt}'. Allowed: {allowed}")

if fmt_lower not in normalized:
normalized.append(fmt_lower)

if not normalized:
raise ValueError("formats must include at least one value")

return normalized


class RunConfig(BaseModel):
"""Complete run configuration.
Expand All @@ -114,8 +202,16 @@ class RunConfig(BaseModel):
execution: ExecutionConfig = Field(default_factory=ExecutionConfig)
output: OutputConfig = Field(default_factory=OutputConfig)

@model_validator(mode="after")
def validate_models_non_empty(self) -> "RunConfig":
"""Ensure at least one model is configured."""
if not self.models:
raise ValueError("models must include at least one model configuration")
return self

class Config:
"""Pydantic config."""

json_schema_extra = {
"example": {
"golden_set": "./examples/golden_sets/customer_support.yaml",
Expand Down
41 changes: 41 additions & 0 deletions tests/test_config_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import pytest
from pydantic import ValidationError

from promptlens.models.config import ModelConfig, OutputConfig, RunConfig


def test_model_config_rejects_invalid_temperature():
with pytest.raises(ValidationError):
ModelConfig(
name="bad-temp",
provider="openai",
model="gpt-4o",
temperature=2.5,
max_tokens=100,
)


def test_model_config_rejects_non_positive_max_tokens():
with pytest.raises(ValidationError):
ModelConfig(
name="bad-tokens",
provider="openai",
model="gpt-4o",
temperature=0.5,
max_tokens=0,
)


def test_output_config_normalizes_and_deduplicates_formats():
cfg = OutputConfig(formats=["HTML", "json", "markdown", "md", "JSON"])
assert cfg.formats == ["html", "json", "md"]


def test_output_config_rejects_unsupported_format():
with pytest.raises(ValidationError):
OutputConfig(formats=["xml"])


def test_run_config_requires_at_least_one_model():
with pytest.raises(ValidationError):
RunConfig(golden_set="./tests.yaml", models=[])