Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 104 additions & 17 deletions pyrit/score/float_scale/self_ask_likert_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,36 +166,63 @@ def __init__(
self,
*,
chat_target: PromptChatTarget,
likert_scale: LikertScalePaths,
likert_scale: Optional[LikertScalePaths] = None,
custom_likert_path: Optional[Path] = None,
custom_system_prompt_path: Optional[Path] = None,
validator: Optional[ScorerPromptValidator] = None,
) -> None:
"""
Initialize the SelfAskLikertScorer.

Args:
chat_target (PromptChatTarget): The chat target to use for scoring.
likert_scale (LikertScalePaths): The Likert scale configuration to use for scoring.
likert_scale (Optional[LikertScalePaths]): The Likert scale configuration to use for scoring.
custom_likert_path (Optional[Path]): Path to a custom YAML file containing the Likert scale definition.
This allows users to use their own Likert scales without modifying the code, as long as
the YAML file follows the expected format. Only one of `likert_scale` or `custom_likert_path`
should be provided. Defaults to None.
custom_system_prompt_path (Optional[Path]): Path to a custom system prompt file. This allows users to
provide their own system prompt without modifying the code. Defaults to None.
validator (Optional[ScorerPromptValidator]): Custom validator for the scorer. Defaults to None.

Raises:
ValueError: If both `likert_scale` and `custom_likert_path` are provided, if neither is provided,
or if the provided Likert scale or system prompt YAML file is improperly formatted.
"""
super().__init__(validator=validator or self._DEFAULT_VALIDATOR)

self._prompt_target = chat_target
self._likert_scale = likert_scale

# Auto-set evaluation file mapping from the LikertScalePaths enum
if likert_scale.evaluation_files is not None:
from pyrit.score.scorer_evaluation.scorer_evaluator import (
ScorerEvalDatasetFiles,
)
if likert_scale is not None and custom_likert_path is not None:
raise ValueError("Only one of 'likert_scale' or 'custom_likert_path' should be provided, not both.")
if likert_scale is None and custom_likert_path is None:
raise ValueError("One of 'likert_scale' or 'custom_likert_path' must be provided.")

eval_files = likert_scale.evaluation_files
self.evaluation_file_mapping = ScorerEvalDatasetFiles(
human_labeled_datasets_files=eval_files.human_labeled_datasets_files,
result_file=eval_files.result_file,
harm_category=eval_files.harm_category,
)
self._scoring_instructions_template: Optional[SeedPrompt] = (
None # Will be set in _set_likert_scale_system_prompt
)
if custom_system_prompt_path is not None:
self._validate_custom_system_prompt_path(custom_system_prompt_path)
self._scoring_instructions_template = SeedPrompt.from_yaml_file(custom_system_prompt_path)
if likert_scale is not None:
# Auto-set evaluation file mapping from the LikertScalePaths enum
if likert_scale.evaluation_files is not None:
from pyrit.score.scorer_evaluation.scorer_evaluator import (
ScorerEvalDatasetFiles,
)

eval_files = likert_scale.evaluation_files
self.evaluation_file_mapping = ScorerEvalDatasetFiles(
human_labeled_datasets_files=eval_files.human_labeled_datasets_files,
result_file=eval_files.result_file,
harm_category=eval_files.harm_category,
)

self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path)
self._set_likert_scale_system_prompt(likert_scale_path=likert_scale.path)
elif custom_likert_path is not None:
self._validate_custom_likert_path(custom_likert_path)
self._set_likert_scale_system_prompt(likert_scale_path=custom_likert_path)

def _build_identifier(self) -> ComponentIdentifier:
"""
Expand Down Expand Up @@ -268,9 +295,12 @@ def _set_likert_scale_system_prompt(self, likert_scale_path: Path) -> None:
f"but only a single unique value was found: {self._max_scale_value}."
)

self._scoring_instructions_template = SeedPrompt.from_yaml_file(
SCORER_LIKERT_PATH / "likert_system_prompt.yaml"
)
# Only load the default system prompt template if a custom one wasn't already
# set via custom_system_prompt_path in __init__.
if self._scoring_instructions_template is None:
self._scoring_instructions_template = SeedPrompt.from_yaml_file(
SCORER_LIKERT_PATH / "likert_system_prompt.yaml"
)

self._system_prompt = self._scoring_instructions_template.render_template_value(
likert_scale=likert_scale_str,
Expand Down Expand Up @@ -337,6 +367,63 @@ def _likert_scale_description_to_string(self, descriptions: list[dict[str, str]]

return likert_scale_description

@staticmethod
def _validate_custom_system_prompt_path(custom_system_prompt_path: Path) -> None:
"""
Validate the custom system prompt path.

Checks that the file exists, has a YAML extension, and contains the required
template parameters (category, likert_scale, min_scale_value, max_scale_value)
that the Likert scorer needs to render the system prompt.

Args:
custom_system_prompt_path (Path): Path to the custom system prompt YAML file.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is not a YAML file or is missing required template parameters.
"""
if not custom_system_prompt_path.exists():
raise FileNotFoundError(f"Custom system prompt file not found: '{custom_system_prompt_path}'")
if custom_system_prompt_path.suffix not in (".yaml", ".yml"):
raise ValueError(
f"Custom system prompt file must be a YAML file (.yaml or .yml), "
f"got '{custom_system_prompt_path.suffix}'."
)

# Validate the template contains all required parameters used by the Likert scorer.
SeedPrompt.from_yaml_with_required_parameters(
template_path=custom_system_prompt_path,
required_parameters=["category", "likert_scale", "min_scale_value", "max_scale_value"],
error_message=(
"Custom system prompt YAML must define parameters: "
"category, likert_scale, min_scale_value, max_scale_value"
),
)

@staticmethod
def _validate_custom_likert_path(custom_likert_path: Path) -> None:
"""
Validate the custom Likert scale path.

Performs basic path checks (existence and YAML extension). Deeper content
validation (category, scale_descriptions structure, score values) is handled
by ``_set_likert_scale_system_prompt`` when the file is actually parsed.

Args:
custom_likert_path (Path): Path to the custom Likert scale YAML file.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If the file is not a YAML file.
"""
if not custom_likert_path.exists():
raise FileNotFoundError(f"Custom Likert scale file not found: '{custom_likert_path}'")
if custom_likert_path.suffix not in (".yaml", ".yml"):
raise ValueError(
f"Custom Likert scale file must be a YAML file (.yaml or .yml), got '{custom_likert_path.suffix}'."
)

async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
"""
Score the given message_piece using "self-ask" for the chat target.
Expand Down
199 changes: 199 additions & 0 deletions tests/unit/score/test_self_ask_likert.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,202 @@ def test_likert_scale_missing_score_value_key_rejected(tmp_path: Path):
chat_target=chat_target,
likert_scale=LikertScalePaths.CYBER_SCALE,
)


# ---------------------------------------------------------------------------
# custom_likert_path and custom_system_prompt_path tests
# ---------------------------------------------------------------------------


def _make_custom_system_prompt_yaml(tmp_path: Path, *, include_all_params: bool = True) -> Path:
"""Create a custom system prompt YAML file for testing."""
params = ["category", "likert_scale", "min_scale_value", "max_scale_value"] if include_all_params else ["category"]
prompt_data = {
"name": "custom test prompt",
"description": "test",
"parameters": params,
"data_type": "text",
"value": "Custom prompt for {{category}} with scale {{likert_scale}} "
"from {{min_scale_value}} to {{max_scale_value}}."
if include_all_params
else "Only {{category}}.",
}
yaml_file = tmp_path / "custom_system_prompt.yaml"
yaml_file.write_text(yaml.safe_dump(prompt_data), encoding="utf-8")
return yaml_file


def test_custom_likert_path_creates_scorer(tmp_path: Path):
"""Verify that passing custom_likert_path (instead of a LikertScalePaths enum) works."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

custom_path = _make_custom_scale_yaml(tmp_path, category="custom_cat", min_val=0, max_val=3)
scorer = SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=custom_path)

assert scorer._min_scale_value == 0
assert scorer._max_scale_value == 3
assert scorer._score_category == "custom_cat"


def test_custom_likert_path_file_not_found():
"""Verify that a non-existent custom_likert_path raises FileNotFoundError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(FileNotFoundError, match="Custom Likert scale file not found"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=Path("/does/not/exist.yaml"))


def test_custom_likert_path_non_yaml_rejected(tmp_path: Path):
"""Verify that a non-YAML custom_likert_path raises ValueError."""
bad_file = tmp_path / "scale.txt"
bad_file.write_text("not yaml", encoding="utf-8")

memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="must be a YAML file"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=bad_file)


def test_custom_system_prompt_non_yaml_rejected(tmp_path: Path):
"""Verify that a non-YAML custom_system_prompt_path raises ValueError."""
bad_file = tmp_path / "prompt.txt"
bad_file.write_text("not yaml", encoding="utf-8")

memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="must be a YAML file"):
SelfAskLikertScorer(
chat_target=chat_target,
likert_scale=LikertScalePaths.CYBER_SCALE,
custom_system_prompt_path=bad_file,
)


def test_custom_system_prompt_path_used_in_system_prompt(tmp_path: Path):
"""Verify that a custom system prompt template is rendered instead of the default."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

custom_prompt_path = _make_custom_system_prompt_yaml(tmp_path)
custom_likert_path = _make_custom_scale_yaml(tmp_path, category="test_cat", min_val=1, max_val=5)

scorer = SelfAskLikertScorer(
chat_target=chat_target,
custom_likert_path=custom_likert_path,
custom_system_prompt_path=custom_prompt_path,
)

# The system prompt should come from the custom template, not the default one
assert "Custom prompt for test_cat" in scorer._system_prompt
assert "from 1 to 5" in scorer._system_prompt


def test_custom_system_prompt_missing_params_rejected(tmp_path: Path):
"""Verify that a custom system prompt missing required parameters raises ValueError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

bad_prompt_path = _make_custom_system_prompt_yaml(tmp_path, include_all_params=False)
custom_likert_path = _make_custom_scale_yaml(tmp_path)

with pytest.raises(ValueError, match="Custom system prompt YAML must define parameters"):
SelfAskLikertScorer(
chat_target=chat_target,
custom_likert_path=custom_likert_path,
custom_system_prompt_path=bad_prompt_path,
)


def test_both_likert_scale_and_custom_path_raises():
"""Verify that providing both likert_scale and custom_likert_path raises ValueError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="Only one of"):
SelfAskLikertScorer(
chat_target=chat_target,
likert_scale=LikertScalePaths.CYBER_SCALE,
custom_likert_path=Path("dummy.yaml"),
)


def test_neither_likert_scale_nor_custom_path_raises():
"""Verify that providing neither likert_scale nor custom_likert_path raises ValueError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="One of"):
SelfAskLikertScorer(chat_target=chat_target)


def test_custom_system_prompt_file_not_found():
"""Verify that a non-existent custom_system_prompt_path raises FileNotFoundError."""
memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(FileNotFoundError, match="Custom system prompt file not found"):
SelfAskLikertScorer(
chat_target=chat_target,
likert_scale=LikertScalePaths.CYBER_SCALE,
custom_system_prompt_path=Path("/does/not/exist.yaml"),
)


def test_custom_likert_yaml_not_a_dict_rejected(tmp_path: Path):
"""Verify that a YAML file whose top-level structure is not a dict raises ValueError."""
yaml_file = tmp_path / "bad_structure.yaml"
yaml_file.write_text("- item1\n- item2\n", encoding="utf-8")

memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="must contain a YAML mapping/dictionary"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=yaml_file)


def test_likert_scale_single_unique_value_rejected(tmp_path: Path):
"""Verify that a scale with only one distinct score value raises ValueError."""
yaml_file = tmp_path / "single_value.yaml"
yaml_file.write_text(
yaml.safe_dump(
{
"category": "test_harm",
"scale_descriptions": [
{"score_value": "3", "description": "Only level"},
],
}
),
encoding="utf-8",
)

memory = MagicMock(MemoryInterface)
with patch.object(CentralMemory, "get_memory_instance", return_value=memory):
chat_target = MagicMock()
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

with pytest.raises(ValueError, match="at least two distinct score values"):
SelfAskLikertScorer(chat_target=chat_target, custom_likert_path=yaml_file)
Loading