From 82728831b30a01923a026e2c8b093c02a2560230 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Thu, 19 Mar 2026 15:56:18 -0700 Subject: [PATCH 1/7] fixing eval hash --- pyrit/identifiers/component_identifier.py | 22 +++++- pyrit/identifiers/evaluation_identifier.py | 19 +++-- pyrit/memory/memory_models.py | 31 +++++++- .../identifiers/test_component_identifier.py | 71 +++++++++++++++++++ .../identifiers/test_evaluation_identifier.py | 69 ++++++++++++++++++ 5 files changed, 203 insertions(+), 9 deletions(-) diff --git a/pyrit/identifiers/component_identifier.py b/pyrit/identifiers/component_identifier.py index 501d16dbbf..45544a2826 100644 --- a/pyrit/identifiers/component_identifier.py +++ b/pyrit/identifiers/component_identifier.py @@ -113,6 +113,7 @@ class ComponentIdentifier: KEY_CLASS_NAME: ClassVar[str] = "class_name" KEY_CLASS_MODULE: ClassVar[str] = "class_module" KEY_HASH: ClassVar[str] = "hash" + KEY_EVAL_HASH: ClassVar[str] = "eval_hash" KEY_PYRIT_VERSION: ClassVar[str] = "pyrit_version" KEY_CHILDREN: ClassVar[str] = "children" LEGACY_KEY_TYPE: ClassVar[str] = "__type__" @@ -130,6 +131,10 @@ class ComponentIdentifier: hash: str = field(init=False, compare=False) #: Version tag for storage. Not included in hash. pyrit_version: str = field(default_factory=lambda: pyrit.__version__, compare=False) + #: Evaluation hash preserved from DB round-trip. Computed before truncation and + #: stored alongside the identity so that EvaluationIdentifier can use it directly + #: instead of recomputing from potentially truncated params. + stored_eval_hash: Optional[str] = field(default=None, init=False, compare=False) def __post_init__(self) -> None: """Compute the content-addressed hash at creation time.""" @@ -231,7 +236,7 @@ def normalize(cls, value: Union[ComponentIdentifier, dict[str, Any]]) -> Compone return cls.from_dict(value) raise TypeError(f"Expected ComponentIdentifier or dict, got {type(value).__name__}") - def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: + def to_dict(self, *, max_value_length: Optional[int] = None, eval_hash: Optional[str] = None) -> dict[str, Any]: """ Serialize to a JSON-compatible dictionary for DB/JSONL storage. @@ -246,6 +251,10 @@ def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: DB storage where column sizes may be limited. The truncation applies only to param values, not to structural keys like class_name or hash. The limit is propagated to children. Defaults to None (no truncation). + eval_hash (Optional[str]): If provided, the evaluation hash is included in + the serialized dict. This should be computed before truncation so that + it can be recovered via ``from_dict()`` even when param values are + truncated. Defaults to None (no eval_hash stored). Returns: Dict[str, Any]: JSON-serializable dictionary suitable for database storage @@ -258,6 +267,11 @@ def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: self.KEY_PYRIT_VERSION: self.pyrit_version, } + # Include eval_hash if explicitly provided or if preserved from a prior round-trip + effective_eval_hash = eval_hash if eval_hash is not None else self.stored_eval_hash + if effective_eval_hash is not None: + result[self.KEY_EVAL_HASH] = effective_eval_hash + for key, value in self.params.items(): result[key] = self._truncate_value(value=value, max_length=max_value_length) @@ -324,6 +338,7 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: class_module = data.pop(cls.KEY_CLASS_MODULE, None) or data.pop(cls.LEGACY_KEY_MODULE, None) or "unknown" stored_hash = data.pop(cls.KEY_HASH, None) + stored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) pyrit_version = data.pop(cls.KEY_PYRIT_VERSION, pyrit.__version__) # Reconstruct children @@ -346,6 +361,11 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: if stored_hash: object.__setattr__(identifier, "hash", stored_hash) + # Preserve stored eval_hash if available — computed before truncation + # so that EvaluationIdentifier can use it directly. + if stored_eval_hash: + object.__setattr__(identifier, "stored_eval_hash", stored_eval_hash) + return identifier def get_child(self, key: str) -> Optional[ComponentIdentifier]: diff --git a/pyrit/identifiers/evaluation_identifier.py b/pyrit/identifiers/evaluation_identifier.py index 98d338eade..be90b4f2f6 100644 --- a/pyrit/identifiers/evaluation_identifier.py +++ b/pyrit/identifiers/evaluation_identifier.py @@ -170,12 +170,21 @@ class EvaluationIdentifier(ABC): CHILD_EVAL_RULES: ClassVar[dict[str, ChildEvalRule]] def __init__(self, identifier: ComponentIdentifier) -> None: - """Wrap a ComponentIdentifier and eagerly compute its eval hash.""" + """Wrap a ComponentIdentifier and resolve its eval hash. + + If the identifier carries a ``stored_eval_hash`` (preserved from a prior + DB round-trip), that value is used directly. Otherwise the eval hash is + computed from the identifier's params and children using the subclass's + ``CHILD_EVAL_RULES``. + """ self._identifier = identifier - self._eval_hash = compute_eval_hash( - identifier, - child_eval_rules=self.CHILD_EVAL_RULES, - ) + if identifier.stored_eval_hash is not None: + self._eval_hash = identifier.stored_eval_hash + else: + self._eval_hash = compute_eval_hash( + identifier, + child_eval_rules=self.CHILD_EVAL_RULES, + ) @property def identifier(self) -> ComponentIdentifier: diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index e9c83b9300..d5c680322b 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -398,7 +398,12 @@ def __init__(self, *, entry: Score): self.score_metadata = entry.score_metadata # Normalize to ComponentIdentifier (handles dict with deprecation warning) then convert to dict for JSON storage normalized_scorer = ComponentIdentifier.normalize(entry.scorer_class_identifier) - self.scorer_class_identifier = normalized_scorer.to_dict(max_value_length=MAX_IDENTIFIER_VALUE_LENGTH) + # Compute eval_hash from untruncated identifier before truncation + scorer_eval_hash = self._compute_scorer_eval_hash(normalized_scorer) + self.scorer_class_identifier = normalized_scorer.to_dict( + max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, + eval_hash=scorer_eval_hash, + ) self.prompt_request_response_id = entry.message_piece_id if entry.message_piece_id else None self.timestamp = entry.timestamp # Store in both columns for backward compatibility @@ -407,6 +412,16 @@ def __init__(self, *, entry: Score): self.objective = entry.objective self.pyrit_version = pyrit.__version__ + @staticmethod + def _compute_scorer_eval_hash(scorer_identifier: ComponentIdentifier) -> Optional[str]: + """Compute scorer eval_hash from an untruncated identifier.""" + from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier + + try: + return ScorerEvaluationIdentifier(scorer_identifier).eval_hash + except Exception: + return None + def get_score(self) -> Score: """ Convert this database entry back into a Score object. @@ -964,6 +979,8 @@ def __init__(self, *, entry: ScenarioResult): Args: entry (ScenarioResult): The scenario result object to convert into a database entry. """ + from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier + self.id = entry.id self.scenario_name = entry.scenario_identifier.name self.scenario_description = entry.scenario_identifier.description @@ -974,9 +991,17 @@ def __init__(self, *, entry: ScenarioResult): self.objective_target_identifier = entry.objective_target_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH ) - # Convert ComponentIdentifier to dict for JSON storage + # Compute eval_hash from untruncated identifier BEFORE truncation, then include + # it in the serialized dict so it survives the DB round-trip. + scorer_eval_hash = None + if entry.objective_scorer_identifier: + scorer_eval_hash = ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash + self.objective_scorer_identifier = ( - entry.objective_scorer_identifier.to_dict(max_value_length=MAX_IDENTIFIER_VALUE_LENGTH) + entry.objective_scorer_identifier.to_dict( + max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, + eval_hash=scorer_eval_hash, + ) if entry.objective_scorer_identifier else None ) diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py index ccf186a14e..43edd9dd1b 100644 --- a/tests/unit/identifiers/test_component_identifier.py +++ b/tests/unit/identifiers/test_component_identifier.py @@ -544,6 +544,77 @@ def test_roundtrip_with_list_children(self): assert isinstance(recon_converters, list) assert len(recon_converters) == 2 + def test_roundtrip_preserves_eval_hash(self): + """Test that eval_hash is preserved through to_dict -> from_dict round-trip.""" + original = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"system_prompt": "Score the response"}, + ) + expected_eval_hash = "abc123" * 10 + "abcd" # 64 chars + d = original.to_dict(eval_hash=expected_eval_hash) + assert d["eval_hash"] == expected_eval_hash + + reconstructed = ComponentIdentifier.from_dict(d) + assert reconstructed.stored_eval_hash == expected_eval_hash + + def test_roundtrip_eval_hash_survives_truncation(self): + """Regression test: eval_hash computed before truncation is preserved after round-trip. + + This is the core bug fix — long params get truncated in to_dict(), which would + cause eval_hash recomputation to produce a wrong hash. By storing eval_hash in + the dict, it survives truncation. + """ + long_prompt = "You are a scorer that evaluates responses. " * 20 # >80 chars + original = ComponentIdentifier( + class_name="SelfAskTrueFalseScorer", + class_module="pyrit.score", + params={"system_prompt_template": long_prompt}, + ) + eval_hash_before_truncation = "correct_eval_hash_" + "0" * 46 # 64 chars + + # Serialize with truncation AND eval_hash (simulates DB storage) + truncated_dict = original.to_dict(max_value_length=80, eval_hash=eval_hash_before_truncation) + # Params are truncated + assert truncated_dict["system_prompt_template"].endswith("...") + # But eval_hash is preserved + assert truncated_dict["eval_hash"] == eval_hash_before_truncation + + # Deserialize + reconstructed = ComponentIdentifier.from_dict(truncated_dict) + # eval_hash is available on the reconstructed identifier + assert reconstructed.stored_eval_hash == eval_hash_before_truncation + # And it's NOT in params (from_dict pops it as a reserved key) + assert "eval_hash" not in reconstructed.params + + def test_roundtrip_no_eval_hash_when_not_provided(self): + """Test that stored_eval_hash is None when not included in serialization.""" + original = ComponentIdentifier( + class_name="Test", + class_module="mod", + params={"key": "value"}, + ) + d = original.to_dict() + assert "eval_hash" not in d + + reconstructed = ComponentIdentifier.from_dict(d) + assert reconstructed.stored_eval_hash is None + + def test_to_dict_includes_stored_eval_hash_from_prior_roundtrip(self): + """Test that to_dict re-emits stored_eval_hash from a prior round-trip.""" + eval_hash = "deadbeef" * 8 # 64 chars + original = ComponentIdentifier( + class_name="Test", + class_module="mod", + ) + # Simulate a prior round-trip that stored an eval_hash + d1 = original.to_dict(eval_hash=eval_hash) + reconstructed = ComponentIdentifier.from_dict(d1) + + # Re-serialize without explicitly passing eval_hash — stored one should be emitted + d2 = reconstructed.to_dict() + assert d2["eval_hash"] == eval_hash + class TestComponentIdentifierNormalize: """Tests for normalize class method.""" diff --git a/tests/unit/identifiers/test_evaluation_identifier.py b/tests/unit/identifiers/test_evaluation_identifier.py index cf62299f96..8e0b04310a 100644 --- a/tests/unit/identifiers/test_evaluation_identifier.py +++ b/tests/unit/identifiers/test_evaluation_identifier.py @@ -222,3 +222,72 @@ class CustomIdentity(EvaluationIdentifier): }, ) assert identity.eval_hash == expected + + def test_uses_stored_eval_hash_when_available(self): + """Test that EvaluationIdentifier uses stored_eval_hash instead of recomputing.""" + stored_hash = "stored_eval_hash_value_" + "0" * 42 # 64 chars + cid = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"system_prompt": "truncated..."}, + ) + # Simulate a DB round-trip where stored_eval_hash was preserved + object.__setattr__(cid, "stored_eval_hash", stored_hash) + + identity = _StubEvaluationIdentifier(cid) + assert identity.eval_hash == stored_hash + + def test_computes_eval_hash_when_stored_is_none(self): + """Test that eval_hash is computed normally when stored_eval_hash is None.""" + cid = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"threshold": 0.5}, + ) + assert cid.stored_eval_hash is None + + identity = _StubEvaluationIdentifier(cid) + expected = compute_eval_hash(cid, child_eval_rules=_StubEvaluationIdentifier.CHILD_EVAL_RULES) + assert identity.eval_hash == expected + + def test_truncation_roundtrip_preserves_eval_hash(self): + """Regression test: eval_hash survives DB round-trip with param truncation. + + This is the core scenario for the bug fix. A scorer with a long system_prompt + gets stored to the DB with truncation. The eval_hash computed from the untruncated + identifier is included in to_dict(). After from_dict() reconstruction, the + EvaluationIdentifier should use the stored eval_hash (not recompute from truncated params). + """ + # Build a scorer identifier with a long system_prompt and a target child + long_prompt = "Evaluate whether the response achieves the objective. " * 10 + target_child = ComponentIdentifier( + class_name="OpenAIChatTarget", + class_module="pyrit.prompt_target", + params={"model_name": "gpt-4o", "endpoint": "https://api.openai.com", "temperature": 0.0}, + ) + scorer_id = ComponentIdentifier( + class_name="SelfAskTrueFalseScorer", + class_module="pyrit.score", + params={"system_prompt_template": long_prompt}, + children={"prompt_target": target_child}, + ) + + # Compute eval_hash from the untruncated identifier (the correct hash) + correct_eval_hash = compute_eval_hash(scorer_id, child_eval_rules=_CHILD_EVAL_RULES) + + # Simulate DB storage: serialize with truncation + eval_hash + truncated_dict = scorer_id.to_dict(max_value_length=80, eval_hash=correct_eval_hash) + + # Verify params are actually truncated + assert truncated_dict["system_prompt_template"].endswith("...") + + # Reconstruct from truncated dict (simulates DB read) + reconstructed = ComponentIdentifier.from_dict(truncated_dict) + + # The reconstructed identifier has truncated params, so recomputing would give wrong hash + recomputed = compute_eval_hash(reconstructed, child_eval_rules=_CHILD_EVAL_RULES) + assert recomputed != correct_eval_hash, "Truncated params should produce different eval_hash" + + # But EvaluationIdentifier uses stored_eval_hash, giving the correct result + identity = _StubEvaluationIdentifier(reconstructed) + assert identity.eval_hash == correct_eval_hash From 5fcd6e2d20ff234ca552bbe7edbd1fb6085ed994 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Thu, 19 Mar 2026 16:04:04 -0700 Subject: [PATCH 2/7] copilot --- pyrit/memory/memory_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index d5c680322b..d49348c5f9 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -420,6 +420,11 @@ def _compute_scorer_eval_hash(scorer_identifier: ComponentIdentifier) -> Optiona try: return ScorerEvaluationIdentifier(scorer_identifier).eval_hash except Exception: + logger.warning( + f"Failed to compute eval_hash for scorer {scorer_identifier.class_name}; " + "eval_hash will not be stored.", + exc_info=True, + ) return None def get_score(self) -> Score: From 41a56e78649fbd9528cd6aa8717c762ee8062a39 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Thu, 19 Mar 2026 16:09:40 -0700 Subject: [PATCH 3/7] adding hash to atomic attack --- pyrit/memory/memory_models.py | 24 +++++++++++++++++++++++- pyrit/scenario/core/atomic_attack.py | 16 ++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index d49348c5f9..f4d9e2c90b 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -790,8 +790,13 @@ def __init__(self, *, entry: AttackResult): self.attack_identifier = ( _attack_strategy_id.to_dict(max_value_length=MAX_IDENTIFIER_VALUE_LENGTH) if _attack_strategy_id else {} ) + # Compute eval_hash from untruncated identifier before truncation + attack_eval_hash = self._compute_attack_eval_hash(entry.atomic_attack_identifier) self.atomic_attack_identifier = ( - entry.atomic_attack_identifier.to_dict(max_value_length=MAX_IDENTIFIER_VALUE_LENGTH) + entry.atomic_attack_identifier.to_dict( + max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, + eval_hash=attack_eval_hash, + ) if entry.atomic_attack_identifier else None ) @@ -819,6 +824,23 @@ def __init__(self, *, entry: AttackResult): self.timestamp = datetime.now(tz=timezone.utc) self.pyrit_version = pyrit.__version__ + @staticmethod + def _compute_attack_eval_hash(attack_identifier: Optional[ComponentIdentifier]) -> Optional[str]: + """Compute attack eval_hash from an untruncated identifier.""" + if attack_identifier is None: + return None + from pyrit.identifiers.evaluation_identifier import AtomicAttackEvaluationIdentifier + + try: + return AtomicAttackEvaluationIdentifier(attack_identifier).eval_hash + except Exception: + logger.warning( + f"Failed to compute eval_hash for attack {attack_identifier.class_name}; " + "eval_hash will not be stored.", + exc_info=True, + ) + return None + @staticmethod def _get_id_as_uuid(obj: Any) -> Optional[uuid.UUID]: """ diff --git a/pyrit/scenario/core/atomic_attack.py b/pyrit/scenario/core/atomic_attack.py index 81bea9fe27..c77c542455 100644 --- a/pyrit/scenario/core/atomic_attack.py +++ b/pyrit/scenario/core/atomic_attack.py @@ -251,13 +251,25 @@ def _enrich_atomic_attack_identifiers(self, *, results: AttackExecutorResult[Att seed_group=self._seed_groups[idx], ) - # Persist the enriched identifier back to the database + # Persist the enriched identifier back to the database. + # Compute eval_hash before truncation so it survives the DB round-trip. + from pyrit.identifiers.evaluation_identifier import AtomicAttackEvaluationIdentifier + + attack_eval_hash = None + try: + attack_eval_hash = AtomicAttackEvaluationIdentifier( + result.atomic_attack_identifier + ).eval_hash + except Exception: + pass + if result.attack_result_id: memory.update_attack_result_by_id( attack_result_id=result.attack_result_id, update_fields={ "atomic_attack_identifier": result.atomic_attack_identifier.to_dict( - max_value_length=MAX_IDENTIFIER_VALUE_LENGTH + max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, + eval_hash=attack_eval_hash, ), }, ) From 2e09b7ba3ba82efb20e80b7378078fa09a3e84ea Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Thu, 19 Mar 2026 16:21:15 -0700 Subject: [PATCH 4/7] Fix eval_hash mismatch from param truncation in DB storage Store eval_hash inside ComponentIdentifier serialization (to_dict/from_dict) so it survives DB round-trips without recomputation from truncated params. - ComponentIdentifier: added stored_eval_hash field and KEY_EVAL_HASH - EvaluationIdentifier: uses stored_eval_hash when available - ScenarioResultEntry/ScoreEntry/AttackResultEntry: compute eval_hash before truncation - atomic_attack.py: same fix for enriched identifier persistence - Tests: round-trip, double round-trip, and regression tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/identifiers/component_identifier.py | 28 +++------ pyrit/identifiers/evaluation_identifier.py | 15 ++--- pyrit/memory/memory_models.py | 61 +++++++++---------- pyrit/scenario/core/atomic_attack.py | 18 +++--- pyrit/score/float_scale/float_scale_scorer.py | 2 +- pyrit/score/scorer.py | 19 +++--- .../scorer_evaluation/scorer_evaluator.py | 4 +- pyrit/score/true_false/true_false_scorer.py | 2 +- .../identifiers/test_component_identifier.py | 53 ++++++++++++---- .../identifiers/test_evaluation_identifier.py | 50 ++++++++++++--- .../test_scorer_evaluation_identifier.py | 15 +++-- tests/unit/score/test_scorer_evaluator.py | 15 +++-- 12 files changed, 164 insertions(+), 118 deletions(-) diff --git a/pyrit/identifiers/component_identifier.py b/pyrit/identifiers/component_identifier.py index 45544a2826..73d976b706 100644 --- a/pyrit/identifiers/component_identifier.py +++ b/pyrit/identifiers/component_identifier.py @@ -131,10 +131,10 @@ class ComponentIdentifier: hash: str = field(init=False, compare=False) #: Version tag for storage. Not included in hash. pyrit_version: str = field(default_factory=lambda: pyrit.__version__, compare=False) - #: Evaluation hash preserved from DB round-trip. Computed before truncation and - #: stored alongside the identity so that EvaluationIdentifier can use it directly - #: instead of recomputing from potentially truncated params. - stored_eval_hash: Optional[str] = field(default=None, init=False, compare=False) + #: Evaluation hash. Computed by EvaluationIdentifier subclasses (e.g. ScorerEvaluationIdentifier) + #: and attached to the identifier so it is always available via ``to_dict()``. + #: Survives DB round-trips even when param values are truncated. + eval_hash: Optional[str] = field(default=None, init=False, compare=False) def __post_init__(self) -> None: """Compute the content-addressed hash at creation time.""" @@ -236,7 +236,7 @@ def normalize(cls, value: Union[ComponentIdentifier, dict[str, Any]]) -> Compone return cls.from_dict(value) raise TypeError(f"Expected ComponentIdentifier or dict, got {type(value).__name__}") - def to_dict(self, *, max_value_length: Optional[int] = None, eval_hash: Optional[str] = None) -> dict[str, Any]: + def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: """ Serialize to a JSON-compatible dictionary for DB/JSONL storage. @@ -251,10 +251,6 @@ def to_dict(self, *, max_value_length: Optional[int] = None, eval_hash: Optional DB storage where column sizes may be limited. The truncation applies only to param values, not to structural keys like class_name or hash. The limit is propagated to children. Defaults to None (no truncation). - eval_hash (Optional[str]): If provided, the evaluation hash is included in - the serialized dict. This should be computed before truncation so that - it can be recovered via ``from_dict()`` even when param values are - truncated. Defaults to None (no eval_hash stored). Returns: Dict[str, Any]: JSON-serializable dictionary suitable for database storage @@ -267,10 +263,8 @@ def to_dict(self, *, max_value_length: Optional[int] = None, eval_hash: Optional self.KEY_PYRIT_VERSION: self.pyrit_version, } - # Include eval_hash if explicitly provided or if preserved from a prior round-trip - effective_eval_hash = eval_hash if eval_hash is not None else self.stored_eval_hash - if effective_eval_hash is not None: - result[self.KEY_EVAL_HASH] = effective_eval_hash + if self.eval_hash is not None: + result[self.KEY_EVAL_HASH] = self.eval_hash for key, value in self.params.items(): result[key] = self._truncate_value(value=value, max_length=max_value_length) @@ -338,7 +332,7 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: class_module = data.pop(cls.KEY_CLASS_MODULE, None) or data.pop(cls.LEGACY_KEY_MODULE, None) or "unknown" stored_hash = data.pop(cls.KEY_HASH, None) - stored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) + restored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) pyrit_version = data.pop(cls.KEY_PYRIT_VERSION, pyrit.__version__) # Reconstruct children @@ -361,10 +355,8 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: if stored_hash: object.__setattr__(identifier, "hash", stored_hash) - # Preserve stored eval_hash if available — computed before truncation - # so that EvaluationIdentifier can use it directly. - if stored_eval_hash: - object.__setattr__(identifier, "stored_eval_hash", stored_eval_hash) + if restored_eval_hash: + object.__setattr__(identifier, "eval_hash", restored_eval_hash) return identifier diff --git a/pyrit/identifiers/evaluation_identifier.py b/pyrit/identifiers/evaluation_identifier.py index be90b4f2f6..6df3192cf2 100644 --- a/pyrit/identifiers/evaluation_identifier.py +++ b/pyrit/identifiers/evaluation_identifier.py @@ -170,16 +170,17 @@ class EvaluationIdentifier(ABC): CHILD_EVAL_RULES: ClassVar[dict[str, ChildEvalRule]] def __init__(self, identifier: ComponentIdentifier) -> None: - """Wrap a ComponentIdentifier and resolve its eval hash. + """ + Wrap a ComponentIdentifier and resolve its eval hash. - If the identifier carries a ``stored_eval_hash`` (preserved from a prior - DB round-trip), that value is used directly. Otherwise the eval hash is - computed from the identifier's params and children using the subclass's - ``CHILD_EVAL_RULES``. + If the identifier carries an ``eval_hash`` (preserved from a prior + DB round-trip or set by the scorer), that value is used directly. + Otherwise the eval hash is computed from the identifier's params + and children using the subclass's ``CHILD_EVAL_RULES``. """ self._identifier = identifier - if identifier.stored_eval_hash is not None: - self._eval_hash = identifier.stored_eval_hash + if identifier.eval_hash is not None: + self._eval_hash = identifier.eval_hash else: self._eval_hash = compute_eval_hash( identifier, diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index f4d9e2c90b..d290934517 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -33,6 +33,10 @@ import pyrit from pyrit.common.utils import to_sha256 from pyrit.identifiers.component_identifier import ComponentIdentifier +from pyrit.identifiers.evaluation_identifier import ( + AtomicAttackEvaluationIdentifier, + ScorerEvaluationIdentifier, +) from pyrit.models import ( AttackOutcome, AttackResult, @@ -51,6 +55,8 @@ SeedType, ) +logger = logging.getLogger(__name__) + # Default pyrit_version for database records created before version tracking was added LEGACY_PYRIT_VERSION = "<0.10.0" @@ -398,11 +404,11 @@ def __init__(self, *, entry: Score): self.score_metadata = entry.score_metadata # Normalize to ComponentIdentifier (handles dict with deprecation warning) then convert to dict for JSON storage normalized_scorer = ComponentIdentifier.normalize(entry.scorer_class_identifier) - # Compute eval_hash from untruncated identifier before truncation - scorer_eval_hash = self._compute_scorer_eval_hash(normalized_scorer) + # Ensure eval_hash is set before truncation so it survives the DB round-trip + if normalized_scorer.eval_hash is None: + self._set_scorer_eval_hash(normalized_scorer) self.scorer_class_identifier = normalized_scorer.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, - eval_hash=scorer_eval_hash, ) self.prompt_request_response_id = entry.message_piece_id if entry.message_piece_id else None self.timestamp = entry.timestamp @@ -413,19 +419,18 @@ def __init__(self, *, entry: Score): self.pyrit_version = pyrit.__version__ @staticmethod - def _compute_scorer_eval_hash(scorer_identifier: ComponentIdentifier) -> Optional[str]: - """Compute scorer eval_hash from an untruncated identifier.""" - from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier - + def _set_scorer_eval_hash(scorer_identifier: ComponentIdentifier) -> None: + """ + Set eval_hash on the scorer identifier so it survives truncation. + """ try: - return ScorerEvaluationIdentifier(scorer_identifier).eval_hash + eval_hash = ScorerEvaluationIdentifier(scorer_identifier).eval_hash + object.__setattr__(scorer_identifier, "eval_hash", eval_hash) except Exception: logger.warning( - f"Failed to compute eval_hash for scorer {scorer_identifier.class_name}; " - "eval_hash will not be stored.", + f"Failed to compute eval_hash for scorer {scorer_identifier.class_name}; eval_hash will not be stored.", exc_info=True, ) - return None def get_score(self) -> Score: """ @@ -790,12 +795,12 @@ def __init__(self, *, entry: AttackResult): self.attack_identifier = ( _attack_strategy_id.to_dict(max_value_length=MAX_IDENTIFIER_VALUE_LENGTH) if _attack_strategy_id else {} ) - # Compute eval_hash from untruncated identifier before truncation - attack_eval_hash = self._compute_attack_eval_hash(entry.atomic_attack_identifier) + # Ensure eval_hash is set before truncation so it survives the DB round-trip + if entry.atomic_attack_identifier and entry.atomic_attack_identifier.eval_hash is None: + self._set_attack_eval_hash(entry.atomic_attack_identifier) self.atomic_attack_identifier = ( entry.atomic_attack_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, - eval_hash=attack_eval_hash, ) if entry.atomic_attack_identifier else None @@ -825,21 +830,18 @@ def __init__(self, *, entry: AttackResult): self.pyrit_version = pyrit.__version__ @staticmethod - def _compute_attack_eval_hash(attack_identifier: Optional[ComponentIdentifier]) -> Optional[str]: - """Compute attack eval_hash from an untruncated identifier.""" - if attack_identifier is None: - return None - from pyrit.identifiers.evaluation_identifier import AtomicAttackEvaluationIdentifier - + def _set_attack_eval_hash(attack_identifier: ComponentIdentifier) -> None: + """ + Set eval_hash on the attack identifier so it survives truncation. + """ try: - return AtomicAttackEvaluationIdentifier(attack_identifier).eval_hash + eval_hash = AtomicAttackEvaluationIdentifier(attack_identifier).eval_hash + object.__setattr__(attack_identifier, "eval_hash", eval_hash) except Exception: logger.warning( - f"Failed to compute eval_hash for attack {attack_identifier.class_name}; " - "eval_hash will not be stored.", + f"Failed to compute eval_hash for attack {attack_identifier.class_name}; eval_hash will not be stored.", exc_info=True, ) - return None @staticmethod def _get_id_as_uuid(obj: Any) -> Optional[uuid.UUID]: @@ -1006,8 +1008,6 @@ def __init__(self, *, entry: ScenarioResult): Args: entry (ScenarioResult): The scenario result object to convert into a database entry. """ - from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier - self.id = entry.id self.scenario_name = entry.scenario_identifier.name self.scenario_description = entry.scenario_identifier.description @@ -1018,16 +1018,13 @@ def __init__(self, *, entry: ScenarioResult): self.objective_target_identifier = entry.objective_target_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH ) - # Compute eval_hash from untruncated identifier BEFORE truncation, then include - # it in the serialized dict so it survives the DB round-trip. - scorer_eval_hash = None - if entry.objective_scorer_identifier: - scorer_eval_hash = ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash + # Ensure eval_hash is set before truncation so it survives the DB round-trip. + if entry.objective_scorer_identifier and entry.objective_scorer_identifier.eval_hash is None: + ScoreEntry._set_scorer_eval_hash(entry.objective_scorer_identifier) self.objective_scorer_identifier = ( entry.objective_scorer_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, - eval_hash=scorer_eval_hash, ) if entry.objective_scorer_identifier else None diff --git a/pyrit/scenario/core/atomic_attack.py b/pyrit/scenario/core/atomic_attack.py index c77c542455..397934bc85 100644 --- a/pyrit/scenario/core/atomic_attack.py +++ b/pyrit/scenario/core/atomic_attack.py @@ -13,12 +13,14 @@ have a common interface for scenarios. """ +import contextlib import logging from typing import TYPE_CHECKING, Any, Optional from pyrit.executor.attack import AttackExecutor, AttackStrategy from pyrit.executor.attack.core.attack_executor import AttackExecutorResult from pyrit.identifiers import build_atomic_attack_identifier +from pyrit.identifiers.evaluation_identifier import AtomicAttackEvaluationIdentifier from pyrit.memory import CentralMemory from pyrit.memory.memory_models import MAX_IDENTIFIER_VALUE_LENGTH from pyrit.models import AttackResult, SeedAttackGroup @@ -252,16 +254,11 @@ def _enrich_atomic_attack_identifiers(self, *, results: AttackExecutorResult[Att ) # Persist the enriched identifier back to the database. - # Compute eval_hash before truncation so it survives the DB round-trip. - from pyrit.identifiers.evaluation_identifier import AtomicAttackEvaluationIdentifier - - attack_eval_hash = None - try: - attack_eval_hash = AtomicAttackEvaluationIdentifier( - result.atomic_attack_identifier - ).eval_hash - except Exception: - pass + # Set eval_hash before truncation so it survives the DB round-trip. + with contextlib.suppress(Exception): + if result.atomic_attack_identifier.eval_hash is None: + eval_hash = AtomicAttackEvaluationIdentifier(result.atomic_attack_identifier).eval_hash + object.__setattr__(result.atomic_attack_identifier, "eval_hash", eval_hash) if result.attack_result_id: memory.update_attack_result_by_id( @@ -269,7 +266,6 @@ def _enrich_atomic_attack_identifiers(self, *, results: AttackExecutorResult[Att update_fields={ "atomic_attack_identifier": result.atomic_attack_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, - eval_hash=attack_eval_hash, ), }, ) diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py index 126dd909f7..af39cf5bec 100644 --- a/pyrit/score/float_scale/float_scale_scorer.py +++ b/pyrit/score/float_scale/float_scale_scorer.py @@ -59,7 +59,7 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]: return None return find_harm_metrics_by_eval_hash( - eval_hash=self.get_eval_hash(), + eval_hash=self.get_identifier().eval_hash, harm_category=self.evaluation_file_mapping.harm_category, ) diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index c1ad1910a6..d790d5b88e 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -70,21 +70,22 @@ def __init__(self, *, validator: ScorerPromptValidator): """ self._validator = validator - def get_eval_hash(self) -> str: + def get_identifier(self) -> ComponentIdentifier: """ - Compute a behavioral equivalence hash for evaluation grouping. + Get the scorer's identifier with eval_hash always attached. - Delegates to ``ScorerEvaluationIdentifier`` which filters target children - (prompt_target, converter_target) to behavioral params only, so the same - scorer configuration on different deployments produces the same eval hash. + Overrides the base ``Identifiable.get_identifier()`` so that + ``to_dict()`` always emits the ``eval_hash`` key. Returns: - str: A hex-encoded SHA256 hash suitable for eval registry keying. + ComponentIdentifier: The identity with ``eval_hash`` set. """ - # Deferred import to avoid circular dependency (evaluation_identifier → identifiers → …) - from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier + identifier = super().get_identifier() + if identifier.eval_hash is None: + from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier - return ScorerEvaluationIdentifier(self.get_identifier()).eval_hash + object.__setattr__(identifier, "eval_hash", ScorerEvaluationIdentifier(identifier).eval_hash) + return identifier @property def scorer_type(self) -> ScoreType: diff --git a/pyrit/score/scorer_evaluation/scorer_evaluator.py b/pyrit/score/scorer_evaluation/scorer_evaluator.py index da08aa0dfe..df032c9ff9 100644 --- a/pyrit/score/scorer_evaluation/scorer_evaluator.py +++ b/pyrit/score/scorer_evaluation/scorer_evaluator.py @@ -275,7 +275,7 @@ def _should_skip_evaluation( - (False, None) if should run evaluation """ try: - scorer_hash = self.scorer.get_eval_hash() + scorer_hash = self.scorer.get_identifier().eval_hash # Determine if this is a harm or objective evaluation metrics_type = MetricsType.OBJECTIVE if isinstance(self.scorer, TrueFalseScorer) else MetricsType.HARM @@ -489,7 +489,7 @@ def _write_metrics_to_registry( replace_evaluation_results( file_path=result_file_path, scorer_identifier=self.scorer.get_identifier(), - eval_hash=self.scorer.get_eval_hash(), + eval_hash=self.scorer.get_identifier().eval_hash, metrics=metrics, ) except Exception as e: diff --git a/pyrit/score/true_false/true_false_scorer.py b/pyrit/score/true_false/true_false_scorer.py index 671dd57973..9074b79170 100644 --- a/pyrit/score/true_false/true_false_scorer.py +++ b/pyrit/score/true_false/true_false_scorer.py @@ -94,7 +94,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]: if not result_file.exists(): return None - return find_objective_metrics_by_eval_hash(eval_hash=self.get_eval_hash(), file_path=result_file) + return find_objective_metrics_by_eval_hash(eval_hash=self.get_identifier().eval_hash, file_path=result_file) async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]: """ diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py index 43edd9dd1b..7cc66153a2 100644 --- a/tests/unit/identifiers/test_component_identifier.py +++ b/tests/unit/identifiers/test_component_identifier.py @@ -552,11 +552,12 @@ def test_roundtrip_preserves_eval_hash(self): params={"system_prompt": "Score the response"}, ) expected_eval_hash = "abc123" * 10 + "abcd" # 64 chars - d = original.to_dict(eval_hash=expected_eval_hash) + object.__setattr__(original, "eval_hash", expected_eval_hash) + d = original.to_dict() assert d["eval_hash"] == expected_eval_hash reconstructed = ComponentIdentifier.from_dict(d) - assert reconstructed.stored_eval_hash == expected_eval_hash + assert reconstructed.eval_hash == expected_eval_hash def test_roundtrip_eval_hash_survives_truncation(self): """Regression test: eval_hash computed before truncation is preserved after round-trip. @@ -572,9 +573,10 @@ def test_roundtrip_eval_hash_survives_truncation(self): params={"system_prompt_template": long_prompt}, ) eval_hash_before_truncation = "correct_eval_hash_" + "0" * 46 # 64 chars + object.__setattr__(original, "eval_hash", eval_hash_before_truncation) - # Serialize with truncation AND eval_hash (simulates DB storage) - truncated_dict = original.to_dict(max_value_length=80, eval_hash=eval_hash_before_truncation) + # Serialize with truncation (simulates DB storage) + truncated_dict = original.to_dict(max_value_length=80) # Params are truncated assert truncated_dict["system_prompt_template"].endswith("...") # But eval_hash is preserved @@ -583,12 +585,12 @@ def test_roundtrip_eval_hash_survives_truncation(self): # Deserialize reconstructed = ComponentIdentifier.from_dict(truncated_dict) # eval_hash is available on the reconstructed identifier - assert reconstructed.stored_eval_hash == eval_hash_before_truncation + assert reconstructed.eval_hash == eval_hash_before_truncation # And it's NOT in params (from_dict pops it as a reserved key) assert "eval_hash" not in reconstructed.params - def test_roundtrip_no_eval_hash_when_not_provided(self): - """Test that stored_eval_hash is None when not included in serialization.""" + def test_roundtrip_no_eval_hash_when_not_set(self): + """Test that eval_hash is None when not set on the identifier.""" original = ComponentIdentifier( class_name="Test", class_module="mod", @@ -598,23 +600,48 @@ def test_roundtrip_no_eval_hash_when_not_provided(self): assert "eval_hash" not in d reconstructed = ComponentIdentifier.from_dict(d) - assert reconstructed.stored_eval_hash is None + assert reconstructed.eval_hash is None - def test_to_dict_includes_stored_eval_hash_from_prior_roundtrip(self): - """Test that to_dict re-emits stored_eval_hash from a prior round-trip.""" + def test_to_dict_includes_eval_hash_from_prior_roundtrip(self): + """Test that to_dict re-emits eval_hash from a prior round-trip.""" eval_hash = "deadbeef" * 8 # 64 chars original = ComponentIdentifier( class_name="Test", class_module="mod", ) - # Simulate a prior round-trip that stored an eval_hash - d1 = original.to_dict(eval_hash=eval_hash) + # Simulate setting eval_hash then round-tripping + object.__setattr__(original, "eval_hash", eval_hash) + d1 = original.to_dict() reconstructed = ComponentIdentifier.from_dict(d1) - # Re-serialize without explicitly passing eval_hash — stored one should be emitted + # Re-serialize — eval_hash should be emitted d2 = reconstructed.to_dict() assert d2["eval_hash"] == eval_hash + def test_double_roundtrip_preserves_eval_hash_and_identity_hash(self): + """Test that both eval_hash and identity hash survive retrieve → re-store → retrieve.""" + long_prompt = "Score the response carefully. " * 20 + original = ComponentIdentifier( + class_name="Scorer", + class_module="pyrit.score", + params={"system_prompt": long_prompt}, + ) + original_hash = original.hash + eval_hash = "eval_" + "a1b2c3d4" * 7 + "a1b2c3" # 64 chars + object.__setattr__(original, "eval_hash", eval_hash) + + # First round-trip: store with truncation + d1 = original.to_dict(max_value_length=80) + r1 = ComponentIdentifier.from_dict(d1) + assert r1.hash == original_hash + assert r1.eval_hash == eval_hash + + # Second round-trip: re-store (simulating retrieve → use → re-store) + d2 = r1.to_dict(max_value_length=80) + r2 = ComponentIdentifier.from_dict(d2) + assert r2.hash == original_hash + assert r2.eval_hash == eval_hash + class TestComponentIdentifierNormalize: """Tests for normalize class method.""" diff --git a/tests/unit/identifiers/test_evaluation_identifier.py b/tests/unit/identifiers/test_evaluation_identifier.py index 8e0b04310a..4424308e70 100644 --- a/tests/unit/identifiers/test_evaluation_identifier.py +++ b/tests/unit/identifiers/test_evaluation_identifier.py @@ -223,28 +223,28 @@ class CustomIdentity(EvaluationIdentifier): ) assert identity.eval_hash == expected - def test_uses_stored_eval_hash_when_available(self): - """Test that EvaluationIdentifier uses stored_eval_hash instead of recomputing.""" + def test_uses_eval_hash_when_available(self): + """Test that EvaluationIdentifier uses eval_hash instead of recomputing.""" stored_hash = "stored_eval_hash_value_" + "0" * 42 # 64 chars cid = ComponentIdentifier( class_name="Scorer", class_module="pyrit.score", params={"system_prompt": "truncated..."}, ) - # Simulate a DB round-trip where stored_eval_hash was preserved - object.__setattr__(cid, "stored_eval_hash", stored_hash) + # Simulate a DB round-trip where eval_hash was preserved + object.__setattr__(cid, "eval_hash", stored_hash) identity = _StubEvaluationIdentifier(cid) assert identity.eval_hash == stored_hash - def test_computes_eval_hash_when_stored_is_none(self): - """Test that eval_hash is computed normally when stored_eval_hash is None.""" + def test_computes_eval_hash_when_not_set(self): + """Test that eval_hash is computed normally when eval_hash is None.""" cid = ComponentIdentifier( class_name="Scorer", class_module="pyrit.score", params={"threshold": 0.5}, ) - assert cid.stored_eval_hash is None + assert cid.eval_hash is None identity = _StubEvaluationIdentifier(cid) expected = compute_eval_hash(cid, child_eval_rules=_StubEvaluationIdentifier.CHILD_EVAL_RULES) @@ -274,9 +274,10 @@ def test_truncation_roundtrip_preserves_eval_hash(self): # Compute eval_hash from the untruncated identifier (the correct hash) correct_eval_hash = compute_eval_hash(scorer_id, child_eval_rules=_CHILD_EVAL_RULES) + object.__setattr__(scorer_id, "eval_hash", correct_eval_hash) - # Simulate DB storage: serialize with truncation + eval_hash - truncated_dict = scorer_id.to_dict(max_value_length=80, eval_hash=correct_eval_hash) + # Simulate DB storage: serialize with truncation + truncated_dict = scorer_id.to_dict(max_value_length=80) # Verify params are actually truncated assert truncated_dict["system_prompt_template"].endswith("...") @@ -288,6 +289,35 @@ def test_truncation_roundtrip_preserves_eval_hash(self): recomputed = compute_eval_hash(reconstructed, child_eval_rules=_CHILD_EVAL_RULES) assert recomputed != correct_eval_hash, "Truncated params should produce different eval_hash" - # But EvaluationIdentifier uses stored_eval_hash, giving the correct result + # But EvaluationIdentifier uses the preserved eval_hash, giving the correct result identity = _StubEvaluationIdentifier(reconstructed) assert identity.eval_hash == correct_eval_hash + + def test_eval_hash_preserved_through_double_roundtrip(self): + """Test that eval_hash is preserved when retrieved from DB and re-stored. + + Simulates: fresh save → DB retrieve → re-store → DB retrieve. + The eval_hash computed at first save should survive all round-trips. + """ + long_prompt = "Evaluate whether the response achieves the objective. " * 10 + scorer_id = ComponentIdentifier( + class_name="SelfAskTrueFalseScorer", + class_module="pyrit.score", + params={"system_prompt_template": long_prompt}, + ) + + # First save: compute eval_hash from untruncated identifier + correct_eval_hash = compute_eval_hash(scorer_id, child_eval_rules=_CHILD_EVAL_RULES) + object.__setattr__(scorer_id, "eval_hash", correct_eval_hash) + d1 = scorer_id.to_dict(max_value_length=80) + + # First retrieve + r1 = ComponentIdentifier.from_dict(d1) + assert _StubEvaluationIdentifier(r1).eval_hash == correct_eval_hash + + # Re-store: EvaluationIdentifier should use stored value, not recompute + d2 = r1.to_dict(max_value_length=80) + + # Second retrieve + r2 = ComponentIdentifier.from_dict(d2) + assert _StubEvaluationIdentifier(r2).eval_hash == correct_eval_hash diff --git a/tests/unit/score/test_scorer_evaluation_identifier.py b/tests/unit/score/test_scorer_evaluation_identifier.py index 8d0df75d75..dea0cd7cf4 100644 --- a/tests/unit/score/test_scorer_evaluation_identifier.py +++ b/tests/unit/score/test_scorer_evaluation_identifier.py @@ -4,8 +4,7 @@ """ Tests for pyrit.score.scorer_evaluation.scorer_evaluation_identifier. -Covers ``ScorerEvaluationIdentifier`` ClassVar values, eval-hash delegation, and -the ``Scorer.get_eval_hash()`` convenience method. +Covers ``ScorerEvaluationIdentifier`` ClassVar values and eval-hash delegation. """ import pytest @@ -85,10 +84,10 @@ def test_eval_hash_matches_free_function(self): @pytest.mark.usefixtures("patch_central_database") class TestScorerGetEvalHash: - """Tests for Scorer.get_eval_hash() convenience method (adapted from old TestGetEvalHash).""" + """Tests for ScorerEvaluationIdentifier eval_hash computation.""" - def test_get_eval_hash_uses_scorer_identity(self): - """Test that Scorer.get_eval_hash() delegates to ScorerEvaluationIdentifier.""" + def test_eval_hash_uses_scorer_identity(self): + """Test that ScorerEvaluationIdentifier computes eval_hash from identifier.""" class FakeScorer(Identifiable): def _build_identifier(self) -> ComponentIdentifier: @@ -109,8 +108,8 @@ def _build_identifier(self) -> ComponentIdentifier: ) assert eval_hash == expected - def test_get_eval_hash_filters_operational_params(self): - """Test that Scorer.get_eval_hash() filters operational params from target children.""" + def test_eval_hash_filters_operational_params(self): + """Test that eval_hash filters operational params from target children.""" class ScorerLike(Identifiable): def __init__(self, *, endpoint: str): @@ -135,7 +134,7 @@ def _build_identifier(self) -> ComponentIdentifier: # But different component hashes (endpoint is in full identity) assert scorer_a.get_identifier().hash != scorer_b.get_identifier().hash - def test_get_eval_hash_no_target_children_equals_component_hash(self): + def test_eval_hash_no_target_children_equals_component_hash(self): """Test that eval hash equals component hash when there are no target children.""" class SimpleScorer(Identifiable): diff --git a/tests/unit/score/test_scorer_evaluator.py b/tests/unit/score/test_scorer_evaluator.py index 6185b942cb..aa9ebbc784 100644 --- a/tests/unit/score/test_scorer_evaluator.py +++ b/tests/unit/score/test_scorer_evaluator.py @@ -31,9 +31,9 @@ def mock_harm_scorer(): # Create a mock identifier with a controllable hash property mock_identifier = MagicMock() mock_identifier.hash = "test_hash_456" + mock_identifier.eval_hash = "test_hash_456" mock_identifier.system_prompt_template = "test_system_prompt" scorer.get_identifier = MagicMock(return_value=mock_identifier) - scorer.get_eval_hash = MagicMock(return_value="test_hash_456") return scorer @@ -45,9 +45,9 @@ def mock_objective_scorer(): # Create a mock identifier with a controllable hash property mock_identifier = MagicMock() mock_identifier.hash = "test_hash_123" + mock_identifier.eval_hash = "test_hash_123" mock_identifier.user_prompt_template = "test_user_prompt" scorer.get_identifier = MagicMock(return_value=mock_identifier) - scorer.get_eval_hash = MagicMock(return_value="test_hash_123") return scorer @@ -412,8 +412,8 @@ def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_sco evaluator = ObjectiveScorerEvaluator(scorer=mock_objective_scorer) result_file = tmp_path / "test_results.jsonl" - # Make get_eval_hash() raise an exception - mock_objective_scorer.get_eval_hash = MagicMock(side_effect=Exception("Identifier computation failed")) + # Make get_identifier() raise an exception + mock_objective_scorer.get_identifier = MagicMock(side_effect=Exception("Identifier computation failed")) should_skip, result = evaluator._should_skip_evaluation( dataset_version="1.0", @@ -426,8 +426,11 @@ def test_should_skip_evaluation_exception_handling(mock_find, mock_objective_sco assert result is None mock_find.assert_not_called() - # Restore get_eval_hash for other tests - mock_objective_scorer.get_eval_hash = MagicMock(return_value="test_hash_123") + # Restore get_identifier for other tests + mock_id = MagicMock() + mock_id.hash = "test_hash_123" + mock_id.eval_hash = "test_hash_123" + mock_objective_scorer.get_identifier = MagicMock(return_value=mock_id) @patch("pyrit.score.scorer_evaluation.scorer_evaluator.find_harm_metrics_by_eval_hash") From 8a9097fd505484e72a1a11628d525c6692e832ce Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Fri, 20 Mar 2026 12:04:58 -0700 Subject: [PATCH 5/7] refactor --- pyrit/identifiers/component_identifier.py | 56 +++++++++++------ pyrit/memory/memory_models.py | 61 +++++++++---------- pyrit/scenario/core/atomic_attack.py | 5 +- pyrit/score/scorer.py | 3 +- .../identifiers/test_component_identifier.py | 16 ++--- .../identifiers/test_evaluation_identifier.py | 8 +-- 6 files changed, 82 insertions(+), 67 deletions(-) diff --git a/pyrit/identifiers/component_identifier.py b/pyrit/identifiers/component_identifier.py index 73d976b706..027fed85b6 100644 --- a/pyrit/identifiers/component_identifier.py +++ b/pyrit/identifiers/component_identifier.py @@ -128,23 +128,52 @@ class ComponentIdentifier: #: Named child identifiers for compositional identity (e.g., a scorer's target). children: dict[str, Union[ComponentIdentifier, list[ComponentIdentifier]]] = field(default_factory=dict) #: Content-addressed SHA256 hash computed from class, params, and children. - hash: str = field(init=False, compare=False) + #: When ``None`` (the default), it is computed automatically in ``__post_init__``. + #: Pass an explicit value to preserve a pre-computed hash (e.g. from DB storage + #: where params may have been truncated). + hash: Optional[str] = field(default=None, compare=False) #: Version tag for storage. Not included in hash. pyrit_version: str = field(default_factory=lambda: pyrit.__version__, compare=False) #: Evaluation hash. Computed by EvaluationIdentifier subclasses (e.g. ScorerEvaluationIdentifier) #: and attached to the identifier so it is always available via ``to_dict()``. #: Survives DB round-trips even when param values are truncated. - eval_hash: Optional[str] = field(default=None, init=False, compare=False) + eval_hash: Optional[str] = field(default=None, compare=False) def __post_init__(self) -> None: - """Compute the content-addressed hash at creation time.""" - hash_dict = _build_hash_dict( + """Compute the content-addressed hash at creation time if not already provided.""" + if self.hash is None: + hash_dict = _build_hash_dict( + class_name=self.class_name, + class_module=self.class_module, + params=self.params, + children=self.children, + ) + object.__setattr__(self, "hash", config_hash(hash_dict)) + + def with_eval_hash(self, eval_hash: str) -> ComponentIdentifier: + """ + Return a new frozen ComponentIdentifier with ``eval_hash`` set. + + The original ``hash`` is preserved (important for identifiers + reconstructed from truncated DB data where recomputation would + produce a wrong hash). + + Args: + eval_hash: The evaluation hash to attach. + + Returns: + A new ComponentIdentifier identical to this one but with + ``eval_hash`` set to the given value. + """ + return ComponentIdentifier( class_name=self.class_name, class_module=self.class_module, params=self.params, children=self.children, + hash=self.hash, + pyrit_version=self.pyrit_version, + eval_hash=eval_hash, ) - object.__setattr__(self, "hash", config_hash(hash_dict)) @property def short_hash(self) -> str: @@ -332,7 +361,7 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: class_module = data.pop(cls.KEY_CLASS_MODULE, None) or data.pop(cls.LEGACY_KEY_MODULE, None) or "unknown" stored_hash = data.pop(cls.KEY_HASH, None) - restored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) + stored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) pyrit_version = data.pop(cls.KEY_PYRIT_VERSION, pyrit.__version__) # Reconstruct children @@ -341,25 +370,16 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: # Everything remaining is a param params = data - identifier = cls( + return cls( class_name=class_name, class_module=class_module, params=params, children=children, + hash=stored_hash, pyrit_version=pyrit_version, + eval_hash=stored_eval_hash, ) - # Preserve stored hash if available — the stored hash was computed from - # untruncated data and is the correct identity. Recomputing from - # potentially truncated DB values would produce a wrong hash. - if stored_hash: - object.__setattr__(identifier, "hash", stored_hash) - - if restored_eval_hash: - object.__setattr__(identifier, "eval_hash", restored_eval_hash) - - return identifier - def get_child(self, key: str) -> Optional[ComponentIdentifier]: """ Get a single child by key. diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index d290934517..6c43b78aec 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -406,7 +406,16 @@ def __init__(self, *, entry: Score): normalized_scorer = ComponentIdentifier.normalize(entry.scorer_class_identifier) # Ensure eval_hash is set before truncation so it survives the DB round-trip if normalized_scorer.eval_hash is None: - self._set_scorer_eval_hash(normalized_scorer) + try: + normalized_scorer = normalized_scorer.with_eval_hash( + ScorerEvaluationIdentifier(normalized_scorer).eval_hash + ) + except Exception: + logger.warning( + f"Failed to compute eval_hash for scorer {normalized_scorer.class_name}; " + "eval_hash will not be stored.", + exc_info=True, + ) self.scorer_class_identifier = normalized_scorer.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, ) @@ -418,20 +427,6 @@ def __init__(self, *, entry: Score): self.objective = entry.objective self.pyrit_version = pyrit.__version__ - @staticmethod - def _set_scorer_eval_hash(scorer_identifier: ComponentIdentifier) -> None: - """ - Set eval_hash on the scorer identifier so it survives truncation. - """ - try: - eval_hash = ScorerEvaluationIdentifier(scorer_identifier).eval_hash - object.__setattr__(scorer_identifier, "eval_hash", eval_hash) - except Exception: - logger.warning( - f"Failed to compute eval_hash for scorer {scorer_identifier.class_name}; eval_hash will not be stored.", - exc_info=True, - ) - def get_score(self) -> Score: """ Convert this database entry back into a Score object. @@ -797,7 +792,16 @@ def __init__(self, *, entry: AttackResult): ) # Ensure eval_hash is set before truncation so it survives the DB round-trip if entry.atomic_attack_identifier and entry.atomic_attack_identifier.eval_hash is None: - self._set_attack_eval_hash(entry.atomic_attack_identifier) + try: + entry.atomic_attack_identifier = entry.atomic_attack_identifier.with_eval_hash( + AtomicAttackEvaluationIdentifier(entry.atomic_attack_identifier).eval_hash + ) + except Exception: + logger.warning( + f"Failed to compute eval_hash for attack {entry.atomic_attack_identifier.class_name}; " + "eval_hash will not be stored.", + exc_info=True, + ) self.atomic_attack_identifier = ( entry.atomic_attack_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, @@ -829,20 +833,6 @@ def __init__(self, *, entry: AttackResult): self.timestamp = datetime.now(tz=timezone.utc) self.pyrit_version = pyrit.__version__ - @staticmethod - def _set_attack_eval_hash(attack_identifier: ComponentIdentifier) -> None: - """ - Set eval_hash on the attack identifier so it survives truncation. - """ - try: - eval_hash = AtomicAttackEvaluationIdentifier(attack_identifier).eval_hash - object.__setattr__(attack_identifier, "eval_hash", eval_hash) - except Exception: - logger.warning( - f"Failed to compute eval_hash for attack {attack_identifier.class_name}; eval_hash will not be stored.", - exc_info=True, - ) - @staticmethod def _get_id_as_uuid(obj: Any) -> Optional[uuid.UUID]: """ @@ -1020,7 +1010,16 @@ def __init__(self, *, entry: ScenarioResult): ) # Ensure eval_hash is set before truncation so it survives the DB round-trip. if entry.objective_scorer_identifier and entry.objective_scorer_identifier.eval_hash is None: - ScoreEntry._set_scorer_eval_hash(entry.objective_scorer_identifier) + try: + entry.objective_scorer_identifier = entry.objective_scorer_identifier.with_eval_hash( + ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash + ) + except Exception: + logger.warning( + f"Failed to compute eval_hash for scorer " + f"{entry.objective_scorer_identifier.class_name}; eval_hash will not be stored.", + exc_info=True, + ) self.objective_scorer_identifier = ( entry.objective_scorer_identifier.to_dict( diff --git a/pyrit/scenario/core/atomic_attack.py b/pyrit/scenario/core/atomic_attack.py index 397934bc85..716ef7d794 100644 --- a/pyrit/scenario/core/atomic_attack.py +++ b/pyrit/scenario/core/atomic_attack.py @@ -257,8 +257,9 @@ def _enrich_atomic_attack_identifiers(self, *, results: AttackExecutorResult[Att # Set eval_hash before truncation so it survives the DB round-trip. with contextlib.suppress(Exception): if result.atomic_attack_identifier.eval_hash is None: - eval_hash = AtomicAttackEvaluationIdentifier(result.atomic_attack_identifier).eval_hash - object.__setattr__(result.atomic_attack_identifier, "eval_hash", eval_hash) + result.atomic_attack_identifier = result.atomic_attack_identifier.with_eval_hash( + AtomicAttackEvaluationIdentifier(result.atomic_attack_identifier).eval_hash + ) if result.attack_result_id: memory.update_attack_result_by_id( diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index d790d5b88e..55d699381e 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -84,7 +84,8 @@ def get_identifier(self) -> ComponentIdentifier: if identifier.eval_hash is None: from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier - object.__setattr__(identifier, "eval_hash", ScorerEvaluationIdentifier(identifier).eval_hash) + identifier = identifier.with_eval_hash(ScorerEvaluationIdentifier(identifier).eval_hash) + self._identifier = identifier return identifier @property diff --git a/tests/unit/identifiers/test_component_identifier.py b/tests/unit/identifiers/test_component_identifier.py index 7cc66153a2..569fb42bff 100644 --- a/tests/unit/identifiers/test_component_identifier.py +++ b/tests/unit/identifiers/test_component_identifier.py @@ -546,13 +546,12 @@ def test_roundtrip_with_list_children(self): def test_roundtrip_preserves_eval_hash(self): """Test that eval_hash is preserved through to_dict -> from_dict round-trip.""" + expected_eval_hash = "abc123" * 10 + "abcd" # 64 chars original = ComponentIdentifier( class_name="Scorer", class_module="pyrit.score", params={"system_prompt": "Score the response"}, - ) - expected_eval_hash = "abc123" * 10 + "abcd" # 64 chars - object.__setattr__(original, "eval_hash", expected_eval_hash) + ).with_eval_hash(expected_eval_hash) d = original.to_dict() assert d["eval_hash"] == expected_eval_hash @@ -567,13 +566,12 @@ def test_roundtrip_eval_hash_survives_truncation(self): the dict, it survives truncation. """ long_prompt = "You are a scorer that evaluates responses. " * 20 # >80 chars + eval_hash_before_truncation = "correct_eval_hash_" + "0" * 46 # 64 chars original = ComponentIdentifier( class_name="SelfAskTrueFalseScorer", class_module="pyrit.score", params={"system_prompt_template": long_prompt}, - ) - eval_hash_before_truncation = "correct_eval_hash_" + "0" * 46 # 64 chars - object.__setattr__(original, "eval_hash", eval_hash_before_truncation) + ).with_eval_hash(eval_hash_before_truncation) # Serialize with truncation (simulates DB storage) truncated_dict = original.to_dict(max_value_length=80) @@ -608,9 +606,7 @@ def test_to_dict_includes_eval_hash_from_prior_roundtrip(self): original = ComponentIdentifier( class_name="Test", class_module="mod", - ) - # Simulate setting eval_hash then round-tripping - object.__setattr__(original, "eval_hash", eval_hash) + ).with_eval_hash(eval_hash) d1 = original.to_dict() reconstructed = ComponentIdentifier.from_dict(d1) @@ -628,7 +624,7 @@ def test_double_roundtrip_preserves_eval_hash_and_identity_hash(self): ) original_hash = original.hash eval_hash = "eval_" + "a1b2c3d4" * 7 + "a1b2c3" # 64 chars - object.__setattr__(original, "eval_hash", eval_hash) + original = original.with_eval_hash(eval_hash) # First round-trip: store with truncation d1 = original.to_dict(max_value_length=80) diff --git a/tests/unit/identifiers/test_evaluation_identifier.py b/tests/unit/identifiers/test_evaluation_identifier.py index 4424308e70..69eda9d489 100644 --- a/tests/unit/identifiers/test_evaluation_identifier.py +++ b/tests/unit/identifiers/test_evaluation_identifier.py @@ -230,9 +230,7 @@ def test_uses_eval_hash_when_available(self): class_name="Scorer", class_module="pyrit.score", params={"system_prompt": "truncated..."}, - ) - # Simulate a DB round-trip where eval_hash was preserved - object.__setattr__(cid, "eval_hash", stored_hash) + ).with_eval_hash(stored_hash) identity = _StubEvaluationIdentifier(cid) assert identity.eval_hash == stored_hash @@ -274,7 +272,7 @@ def test_truncation_roundtrip_preserves_eval_hash(self): # Compute eval_hash from the untruncated identifier (the correct hash) correct_eval_hash = compute_eval_hash(scorer_id, child_eval_rules=_CHILD_EVAL_RULES) - object.__setattr__(scorer_id, "eval_hash", correct_eval_hash) + scorer_id = scorer_id.with_eval_hash(correct_eval_hash) # Simulate DB storage: serialize with truncation truncated_dict = scorer_id.to_dict(max_value_length=80) @@ -308,7 +306,7 @@ def test_eval_hash_preserved_through_double_roundtrip(self): # First save: compute eval_hash from untruncated identifier correct_eval_hash = compute_eval_hash(scorer_id, child_eval_rules=_CHILD_EVAL_RULES) - object.__setattr__(scorer_id, "eval_hash", correct_eval_hash) + scorer_id = scorer_id.with_eval_hash(correct_eval_hash) d1 = scorer_id.to_dict(max_value_length=80) # First retrieve From 297ffb900201d142215f703d1fd76a4c493886b0 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Fri, 20 Mar 2026 12:15:25 -0700 Subject: [PATCH 6/7] unswallowing exceptions --- pyrit/memory/memory_models.py | 39 +++++++--------------------- pyrit/scenario/core/atomic_attack.py | 10 +++---- 2 files changed, 13 insertions(+), 36 deletions(-) diff --git a/pyrit/memory/memory_models.py b/pyrit/memory/memory_models.py index 6c43b78aec..9376768bd4 100644 --- a/pyrit/memory/memory_models.py +++ b/pyrit/memory/memory_models.py @@ -406,16 +406,9 @@ def __init__(self, *, entry: Score): normalized_scorer = ComponentIdentifier.normalize(entry.scorer_class_identifier) # Ensure eval_hash is set before truncation so it survives the DB round-trip if normalized_scorer.eval_hash is None: - try: - normalized_scorer = normalized_scorer.with_eval_hash( - ScorerEvaluationIdentifier(normalized_scorer).eval_hash - ) - except Exception: - logger.warning( - f"Failed to compute eval_hash for scorer {normalized_scorer.class_name}; " - "eval_hash will not be stored.", - exc_info=True, - ) + normalized_scorer = normalized_scorer.with_eval_hash( + ScorerEvaluationIdentifier(normalized_scorer).eval_hash + ) self.scorer_class_identifier = normalized_scorer.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, ) @@ -792,16 +785,9 @@ def __init__(self, *, entry: AttackResult): ) # Ensure eval_hash is set before truncation so it survives the DB round-trip if entry.atomic_attack_identifier and entry.atomic_attack_identifier.eval_hash is None: - try: - entry.atomic_attack_identifier = entry.atomic_attack_identifier.with_eval_hash( - AtomicAttackEvaluationIdentifier(entry.atomic_attack_identifier).eval_hash - ) - except Exception: - logger.warning( - f"Failed to compute eval_hash for attack {entry.atomic_attack_identifier.class_name}; " - "eval_hash will not be stored.", - exc_info=True, - ) + entry.atomic_attack_identifier = entry.atomic_attack_identifier.with_eval_hash( + AtomicAttackEvaluationIdentifier(entry.atomic_attack_identifier).eval_hash + ) self.atomic_attack_identifier = ( entry.atomic_attack_identifier.to_dict( max_value_length=MAX_IDENTIFIER_VALUE_LENGTH, @@ -1010,16 +996,9 @@ def __init__(self, *, entry: ScenarioResult): ) # Ensure eval_hash is set before truncation so it survives the DB round-trip. if entry.objective_scorer_identifier and entry.objective_scorer_identifier.eval_hash is None: - try: - entry.objective_scorer_identifier = entry.objective_scorer_identifier.with_eval_hash( - ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash - ) - except Exception: - logger.warning( - f"Failed to compute eval_hash for scorer " - f"{entry.objective_scorer_identifier.class_name}; eval_hash will not be stored.", - exc_info=True, - ) + entry.objective_scorer_identifier = entry.objective_scorer_identifier.with_eval_hash( + ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash + ) self.objective_scorer_identifier = ( entry.objective_scorer_identifier.to_dict( diff --git a/pyrit/scenario/core/atomic_attack.py b/pyrit/scenario/core/atomic_attack.py index 716ef7d794..ec43e28026 100644 --- a/pyrit/scenario/core/atomic_attack.py +++ b/pyrit/scenario/core/atomic_attack.py @@ -13,7 +13,6 @@ have a common interface for scenarios. """ -import contextlib import logging from typing import TYPE_CHECKING, Any, Optional @@ -255,11 +254,10 @@ def _enrich_atomic_attack_identifiers(self, *, results: AttackExecutorResult[Att # Persist the enriched identifier back to the database. # Set eval_hash before truncation so it survives the DB round-trip. - with contextlib.suppress(Exception): - if result.atomic_attack_identifier.eval_hash is None: - result.atomic_attack_identifier = result.atomic_attack_identifier.with_eval_hash( - AtomicAttackEvaluationIdentifier(result.atomic_attack_identifier).eval_hash - ) + if result.atomic_attack_identifier.eval_hash is None: + result.atomic_attack_identifier = result.atomic_attack_identifier.with_eval_hash( + AtomicAttackEvaluationIdentifier(result.atomic_attack_identifier).eval_hash + ) if result.attack_result_id: memory.update_attack_result_by_id( From 7a1e485d8e05dd8ff749e990b5d303a92ace00f6 Mon Sep 17 00:00:00 2001 From: Richard Lundeen Date: Fri, 20 Mar 2026 12:44:24 -0700 Subject: [PATCH 7/7] pr feedback --- pyrit/score/scorer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 55d699381e..b18a1802a9 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -23,7 +23,7 @@ pyrit_json_retry, remove_markdown_json, ) -from pyrit.identifiers import ComponentIdentifier, Identifiable +from pyrit.identifiers import ComponentIdentifier, Identifiable, ScorerEvaluationIdentifier from pyrit.memory import CentralMemory, MemoryInterface from pyrit.models import ( ChatMessageRole, @@ -82,8 +82,6 @@ def get_identifier(self) -> ComponentIdentifier: """ identifier = super().get_identifier() if identifier.eval_hash is None: - from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier - identifier = identifier.with_eval_hash(ScorerEvaluationIdentifier(identifier).eval_hash) self._identifier = identifier return identifier