diff --git a/.github/instructions/converters.instructions.md b/.github/instructions/converters.instructions.md index 9c00bb2ed1..031832bb73 100644 --- a/.github/instructions/converters.instructions.md +++ b/.github/instructions/converters.instructions.md @@ -65,7 +65,7 @@ from pyrit.identifiers import ComponentIdentifier For LLM-based converters, also import: ```python -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget ``` ## Constructor Pattern @@ -77,7 +77,7 @@ from pyrit.common.apply_defaults import apply_defaults class MyConverter(PromptConverter): @apply_defaults - def __init__(self, *, target: PromptChatTarget, template: str = "default") -> None: + def __init__(self, *, target: PromptTarget, template: str = "default") -> None: ... ``` diff --git a/.github/instructions/scenarios.instructions.md b/.github/instructions/scenarios.instructions.md index ba544465fc..89de20f4d6 100644 --- a/.github/instructions/scenarios.instructions.md +++ b/.github/instructions/scenarios.instructions.md @@ -40,7 +40,7 @@ class MyScenario(Scenario): def __init__( self, *, - adversarial_chat: PromptChatTarget | None = None, + adversarial_chat: PromptTarget | None = None, objective_scorer: TrueFalseScorer | None = None, scenario_result_id: str | None = None, ) -> None: diff --git a/.github/instructions/style-guide.instructions.md b/.github/instructions/style-guide.instructions.md index 8ddb2e6099..ea6303079b 100644 --- a/.github/instructions/style-guide.instructions.md +++ b/.github/instructions/style-guide.instructions.md @@ -249,7 +249,7 @@ In the same module, importing from the specific path is usually necessary to pre ```python # Correct -from pyrit.prompt_target import PromptChatTarget, OpenAIChatTarget +from pyrit.prompt_target import PromptTarget, OpenAIChatTarget # Correct from pyrit.score import ( @@ -263,7 +263,7 @@ from pyrit.score import ( ) # Incorrect (if importing from a non-target module) -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget ``` diff --git a/doc/code/executor/attack/2_red_teaming_attack.ipynb b/doc/code/executor/attack/2_red_teaming_attack.ipynb index c27d9924af..65e08abdac 100644 --- a/doc/code/executor/attack/2_red_teaming_attack.ipynb +++ b/doc/code/executor/attack/2_red_teaming_attack.ipynb @@ -1546,7 +1546,7 @@ "source": [ "## Other Multi-Turn Attacks\n", "\n", - "The above examples should work using other multi-turn attacks with minimal modification. Check out attacks under `pyrit.executor.attack.multi_turn` for other examples, like Crescendo and Tree of Attacks. These algorithms are always more effective than `RedTeamingAttack`, which is a simple algorithm. However, `RedTeamingAttack` by its nature supports more targets - because it doesn't modify conversation history it can support any `PromptTarget` and not only `PromptChatTargets`." + "The above examples should work using other multi-turn attacks with minimal modification. Check out attacks under `pyrit.executor.attack.multi_turn` for other examples, like Crescendo and Tree of Attacks. These algorithms are always more effective than `RedTeamingAttack`, which is a simple algorithm. However, `RedTeamingAttack` by its nature supports more targets - because it doesn't modify conversation history it can support any `PromptTarget` even if `supports_editable_history` is false" ] } ], diff --git a/doc/code/executor/attack/2_red_teaming_attack.py b/doc/code/executor/attack/2_red_teaming_attack.py index 5193a49802..5aec60a6df 100644 --- a/doc/code/executor/attack/2_red_teaming_attack.py +++ b/doc/code/executor/attack/2_red_teaming_attack.py @@ -328,4 +328,4 @@ # %% [markdown] # ## Other Multi-Turn Attacks # -# The above examples should work using other multi-turn attacks with minimal modification. Check out attacks under `pyrit.executor.attack.multi_turn` for other examples, like Crescendo and Tree of Attacks. These algorithms are always more effective than `RedTeamingAttack`, which is a simple algorithm. However, `RedTeamingAttack` by its nature supports more targets - because it doesn't modify conversation history it can support any `PromptTarget` and not only `PromptChatTargets`. +# The above examples should work using other multi-turn attacks with minimal modification. Check out attacks under `pyrit.executor.attack.multi_turn` for other examples, like Crescendo and Tree of Attacks. These algorithms are always more effective than `RedTeamingAttack`, which is a simple algorithm. However, `RedTeamingAttack` by its nature supports more targets - because it doesn't modify conversation history it can support any `PromptTarget` even if `supports_editable_history` is false diff --git a/doc/code/setup/default_values.md b/doc/code/setup/default_values.md index b8d36ff321..04efe67e24 100644 --- a/doc/code/setup/default_values.md +++ b/doc/code/setup/default_values.md @@ -23,7 +23,7 @@ from pyrit.common.apply_defaults import apply_defaults class MyConverter(PromptConverter): @apply_defaults - def __init__(self, *, converter_target: Optional[PromptChatTarget] = None, temperature: Optional[float] = None): + def __init__(self, *, converter_target: Optional[PromptTarget] = None, temperature: Optional[float] = None): self.converter_target = converter_target self.temperature = temperature ``` diff --git a/doc/code/targets/0_prompt_targets.md b/doc/code/targets/0_prompt_targets.md index cfda73d72d..ed6061acb1 100644 --- a/doc/code/targets/0_prompt_targets.md +++ b/doc/code/targets/0_prompt_targets.md @@ -19,20 +19,31 @@ async def send_prompt_async(self, *, message: Message) -> Message: A `Message` object is a normalized object with all the information a target will need to send a prompt, including a way to get a history for that prompt (in the cases that also needs to be sent). This is discussed in more depth [here](../memory/3_memory_data_types.md). -## PromptChatTargets vs PromptTargets +## Target Capabilities -A `PromptTarget` is a generic place to send a prompt. With PyRIT, the idea is that it will eventually be consumed by an AI application, but that doesn't have to be immediate. For example, you could have a SharePoint target. Everything you send a prompt to is a `PromptTarget`. Many attacks work generically with any `PromptTarget` including `RedTeamingAttack` and `PromptSendingAttack`. +Every `PromptTarget` declares a `TargetCapabilities` object that describes what the target supports. Attacks, scorers, and converters use these flags to validate that a target is compatible before execution, raising a clear error at construction time rather than failing mid-run. -With some algorithms, you want to send a prompt, set a system prompt, and modify conversation history (including PAIR [@chao2023pair], TAP [@mehrotra2023tap], and flip attack [@li2024flipattack]). These often require a `PromptChatTarget`, which implies you can modify a conversation history. `PromptChatTarget` is a subclass of `PromptTarget`. +| Capability | Type | Description | +|---|---|---| +| `supports_multi_turn` | `bool` | Target accepts conversation history across multiple turns. Required by multi-turn attacks (e.g., PAIR, TAP, Crescendo). | +| `supports_editable_history` | `bool` | Target allows prepended conversation history to be injected into memory. Required by attacks that seed a conversation before starting (e.g., TAP, FlipAttack, ContextCompliance). | +| `supports_multi_message_pieces` | `bool` | Target accepts a single request with multiple pieces (e.g., text + image in one turn). | +| `supports_json_output` | `bool` | Target can be instructed to return valid JSON (e.g., via a `response_format` parameter). | +| `supports_json_schema` | `bool` | Target can constrain output to a specific JSON schema. | +| `input_modalities` | `frozenset` | The combinations of data types the target accepts as input (e.g., `{"text"}`, `{"text", "image_path"}`). | +| `output_modalities` | `frozenset` | The data types the target can produce as output (e.g., `{"text"}`, `{"audio_path"}`). | + +Capabilities are defined at the class level via `_DEFAULT_CAPABILITIES` and can be overridden per instance using the `custom_capabilities` constructor parameter. This is useful for targets like `HTTPTarget` or `PlaywrightTarget` where capabilities depend on the specific deployment being wrapped. Here are some examples: -| Example | Is `PromptChatTarget`? | Notes | -|-------------------------------------|---------------------------------------|-------------------------------------------------------------------------------------------------| -| **OpenAIChatTarget** (e.g., GPT-4) | **Yes** (`PromptChatTarget`) | Designed for conversational prompts (system messages, conversation history, etc.). | -| **OpenAIImageTarget** | **No** (not a `PromptChatTarget`) | Used for image generation; does not manage conversation history. | -| **HTTPTarget** | **No** (not a `PromptChatTarget`) | Generic HTTP target. Some apps might allow conversation history, but this target doesn't handle it. | -| **AzureBlobStorageTarget** | **No** (not a `PromptChatTarget`) | Used primarily for storage; not for conversation-based AI. | +| Example | `supports_multi_turn` | `supports_editable_history` | Notes | +|---|---|---|---| +| **OpenAIChatTarget** | Yes | Yes | Full chat target; supports multi-turn and injected history. | +| **OpenAIImageTarget** | No | No | Image generation; single-turn only. | +| **OpenAITTSTarget** | No | No | Text-to-speech; single-turn only. | +| **HTTPTarget** | No (default) | No (default) | Configurable via `custom_capabilities` if the wrapped app supports it. | +| **AzureBlobStorageTarget** | No | No | Storage target; not conversational. | ## Multi-Modal Targets diff --git a/doc/code/targets/10_3_websocket_copilot_target.ipynb b/doc/code/targets/10_3_websocket_copilot_target.ipynb index 0b4b1c2714..c024593e24 100644 --- a/doc/code/targets/10_3_websocket_copilot_target.ipynb +++ b/doc/code/targets/10_3_websocket_copilot_target.ipynb @@ -82,7 +82,7 @@ "\n", "The `WebSocketCopilotTarget` supports multi-turn conversations by leveraging Copilot's server-side conversation management. It automatically generates consistent `session_id` and `conversation_id` values for each PyRIT conversation, enabling Copilot to maintain context across multiple turns.\n", "\n", - "However, this target does not support setting a system prompt nor modifying conversation history. As a result, it cannot be used with attack strategies that require altering prior messages (such as PAIR, TAP, or flip attack) or in contexts where a `PromptChatTarget` is required.\n", + "However, this target does not support setting a system prompt nor modifying conversation history. As a result, it cannot be used with attack strategies that require altering prior messages (such as PAIR, TAP, or flip attack) or in contexts where a `PromptTarget` which supports editable history and mulit turn conversations is required.\n", "\n", "Here is a simple multi-turn conversation example:" ] diff --git a/doc/code/targets/10_3_websocket_copilot_target.py b/doc/code/targets/10_3_websocket_copilot_target.py index 3afc88e239..02921f4a44 100644 --- a/doc/code/targets/10_3_websocket_copilot_target.py +++ b/doc/code/targets/10_3_websocket_copilot_target.py @@ -49,7 +49,7 @@ # # The `WebSocketCopilotTarget` supports multi-turn conversations by leveraging Copilot's server-side conversation management. It automatically generates consistent `session_id` and `conversation_id` values for each PyRIT conversation, enabling Copilot to maintain context across multiple turns. # -# However, this target does not support setting a system prompt nor modifying conversation history. As a result, it cannot be used with attack strategies that require altering prior messages (such as PAIR, TAP, or flip attack) or in contexts where a `PromptChatTarget` is required. +# However, this target does not support setting a system prompt nor modifying conversation history. As a result, it cannot be used with attack strategies that require altering prior messages (such as PAIR, TAP, or flip attack) or in contexts where a `PromptTarget` which supports editable history and multi turn conversations is required. # # Here is a simple multi-turn conversation example: diff --git a/doc/code/targets/8_non_llm_targets.ipynb b/doc/code/targets/8_non_llm_targets.ipynb index 76f1cb95f4..b1536721dd 100644 --- a/doc/code/targets/8_non_llm_targets.ipynb +++ b/doc/code/targets/8_non_llm_targets.ipynb @@ -10,7 +10,8 @@ "Prompt Targets are most often LLMs, but not always. They should be thought of as anything that you send prompts to.\n", "\n", "\n", - "The `AzureBlobStorageTarget` inherits from `PromptTarget`, meaning it has functionality to send prompts. In contrast to `PromptChatTarget`s, `PromptTarget`s do not interact with chat assistants.\n", + "The `AzureBlobStorageTarget` inherits from `PromptTarget`, meaning it has functionality to send prompts. It does not have multi-turn conversation capabilities.\n", + "\n", "This prompt target in particular will take in a prompt and upload it as a text file to the provided Azure Storage Account Container.\n", "This could be useful for Cross-Prompt Injection Attack scenarios, for example, where there is a jailbreak within a file.\n", "\n", @@ -81,7 +82,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.11" + "version": "3.11.15" } }, "nbformat": 4, diff --git a/doc/code/targets/8_non_llm_targets.py b/doc/code/targets/8_non_llm_targets.py index a233760572..d5619cc436 100644 --- a/doc/code/targets/8_non_llm_targets.py +++ b/doc/code/targets/8_non_llm_targets.py @@ -14,7 +14,7 @@ # Prompt Targets are most often LLMs, but not always. They should be thought of as anything that you send prompts to. # # -# The `AzureBlobStorageTarget` inherits from `PromptTarget`, meaning it has functionality to send prompts. In contrast to `PromptChatTarget`s, `PromptTarget`s do not interact with chat assistants. +# The `AzureBlobStorageTarget` inherits from `PromptTarget`, meaning it has functionality to send prompts. It does not have multi-turn conversation capabilities. # This prompt target in particular will take in a prompt and upload it as a text file to the provided Azure Storage Account Container. # This could be useful for Cross-Prompt Injection Attack scenarios, for example, where there is a jailbreak within a file. # diff --git a/doc/cookbooks/2_precomputing_turns.ipynb b/doc/cookbooks/2_precomputing_turns.ipynb index fef999a2c0..91704050fc 100644 --- a/doc/cookbooks/2_precomputing_turns.ipynb +++ b/doc/cookbooks/2_precomputing_turns.ipynb @@ -9,7 +9,7 @@ "\n", "Here is a scenario; you want to use a powerful attack technique like `Crescendo` [@russinovich2024crescendo] or `TAP` [@mehrotra2023tap]. That's great! These are the most successful attacks in our arsenal. But there's a catch. They are slow.\n", "\n", - "One way to speed these up is to generate the first N turns in advance, and start these algorithms on a later turn. This is possible on any target where you can modify prompt history (any PromptChatTarget). And it can be extremely useful if you want to test a new model after having tested an old one.\n", + "One way to speed these up is to generate the first N turns in advance, and start these algorithms on a later turn. This is possible on any target where you can modify prompt history (any PromptTarget that supports editable history). And it can be extremely useful if you want to test a new model after having tested an old one.\n", "\n", "This cookbook (like all cookbooks in our docs) takes you step by step, tackling this problem using our best practices and in a way that's the most generic. Sometimes there are issues we want to solve, but haven't yet, and we try to note those and we'll try to keep this up to date as we improve. Comments are added around the pieces you may want to configure as you adapt to your scenario.\n", "\n", diff --git a/doc/cookbooks/2_precomputing_turns.py b/doc/cookbooks/2_precomputing_turns.py index 6ba5373938..d14c22bb3d 100644 --- a/doc/cookbooks/2_precomputing_turns.py +++ b/doc/cookbooks/2_precomputing_turns.py @@ -17,7 +17,7 @@ # # Here is a scenario; you want to use a powerful attack technique like `Crescendo` [@russinovich2024crescendo] or `TAP` [@mehrotra2023tap]. That's great! These are the most successful attacks in our arsenal. But there's a catch. They are slow. # -# One way to speed these up is to generate the first N turns in advance, and start these algorithms on a later turn. This is possible on any target where you can modify prompt history (any PromptChatTarget). And it can be extremely useful if you want to test a new model after having tested an old one. +# One way to speed these up is to generate the first N turns in advance, and start these algorithms on a later turn. This is possible on any target where you can modify prompt history (any PromptTarget that supports editable history). And it can be extremely useful if you want to test a new model after having tested an old one. # # This cookbook (like all cookbooks in our docs) takes you step by step, tackling this problem using our best practices and in a way that's the most generic. Sometimes there are issues we want to solve, but haven't yet, and we try to note those and we'll try to keep this up to date as we improve. Comments are added around the pieces you may want to configure as you adapt to your scenario. # diff --git a/pyrit/executor/attack/component/conversation_manager.py b/pyrit/executor/attack/component/conversation_manager.py index 7a27cb5666..09d92ea233 100644 --- a/pyrit/executor/attack/component/conversation_manager.py +++ b/pyrit/executor/attack/component/conversation_manager.py @@ -7,6 +7,7 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Optional +from pyrit.common.deprecation import print_deprecation_message from pyrit.common.utils import combine_dict from pyrit.executor.attack.component.prepended_conversation_config import ( PrependedConversationConfig, @@ -20,7 +21,6 @@ ) from pyrit.prompt_normalizer.prompt_normalizer import PromptNormalizer from pyrit.prompt_target import PromptTarget -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget if TYPE_CHECKING: from pyrit.executor.attack.core import AttackContext @@ -242,7 +242,7 @@ def get_last_message( def set_system_prompt( self, *, - target: PromptChatTarget, + target: PromptTarget, conversation_id: str, system_prompt: str, labels: Optional[dict[str, str]] = None, @@ -250,13 +250,23 @@ def set_system_prompt( """ Set or update the system prompt for a conversation. + .. deprecated:: 0.14.0 + Use ``prepended_conversation`` on the attack context instead. Pass a + ``Message.from_system_prompt(system_prompt)`` as the first element of + ``AttackParameters.prepended_conversation``. This method will be removed in 0.14.0. + Args: target: The chat target to set the system prompt on. conversation_id: Unique identifier for the conversation. system_prompt: The system prompt text. labels: Optional labels to associate with the system prompt. """ - target.set_system_prompt( + print_deprecation_message( + old_item="ConversationManager.set_system_prompt", + new_item="AttackParameters.prepended_conversation", + removed_in="0.14.0", + ) + target._set_target_system_prompt( system_prompt=system_prompt, conversation_id=conversation_id, attack_identifier=self._attack_identifier, @@ -283,11 +293,11 @@ async def initialize_context_async( 3. Updates context.executed_turns for multi-turn attacks 4. Sets context.next_message if there's an unanswered user message - For PromptChatTarget: + For PromptTarget that support editable conversation history & multi-turn conversations: - Adds prepended messages to memory with simulated_assistant role - All messages get new UUIDs - For non-chat PromptTarget: + For PromptTarget that do NOT support editable conversation history & multi-turn conversations: - If `config.non_chat_target_behavior="normalize_first_turn"`: normalizes conversation to string and prepends to context.next_message - If `config.non_chat_target_behavior="raise"`: raises ValueError @@ -305,8 +315,8 @@ async def initialize_context_async( ConversationState with turn_count and last_assistant_message_scores. Raises: - ValueError: If conversation_id is empty, or if prepended_conversation - requires a PromptChatTarget but target is not one. + ValueError: If conversation_id is empty, or if prepended_conversation requires a target that supports + multi-turn conversations & editable history but target does not support these capabilities. """ if not conversation_id: raise ValueError("conversation_id cannot be empty") @@ -322,7 +332,7 @@ async def initialize_context_async( return state # Handle target type compatibility - is_chat_target = isinstance(target, PromptChatTarget) + is_chat_target = target.capabilities.supports_multi_turn and target.capabilities.supports_editable_history if not is_chat_target: return await self._handle_non_chat_target_async( context=context, @@ -348,7 +358,7 @@ async def _handle_non_chat_target_async( config: Optional["PrependedConversationConfig"], ) -> ConversationState: """ - Handle prepended conversation for non-chat targets. + Handle prepended conversation for targets that don't support conversation management. Args: context: The attack context. @@ -366,8 +376,9 @@ async def _handle_non_chat_target_async( if config.non_chat_target_behavior == "raise": raise ValueError( - "prepended_conversation requires the objective target to be a PromptChatTarget. " - "Non-chat objective targets do not support conversation history. " + "prepended_conversation requires the objective target to be a PromptTarget " + "that supports multi-turn conversations & editable history. " + "Non-chat targets do not support conversation history. " "Use PrependedConversationConfig with non_chat_target_behavior='normalize_first_turn' " "to normalize the conversation into the first message instead." ) diff --git a/pyrit/executor/attack/component/prepended_conversation_config.py b/pyrit/executor/attack/component/prepended_conversation_config.py index c78ffad767..413bfe3283 100644 --- a/pyrit/executor/attack/component/prepended_conversation_config.py +++ b/pyrit/executor/attack/component/prepended_conversation_config.py @@ -22,7 +22,8 @@ class PrependedConversationConfig: This class provides control over: - Which message roles should have request converters applied - How to normalize conversation history for non-chat objective targets - - What to do when the objective target is not a PromptChatTarget + - What to do when the objective target does not support multi-turn conversations & + editable history (e.g., single-turn LLMs, image generation models, etc.) """ # Roles for which request converters should be applied to prepended messages. @@ -36,13 +37,14 @@ class PrependedConversationConfig: # ConversationContextNormalizer is used that produces "Turn N: User/Assistant" format. message_normalizer: Optional[MessageStringNormalizer] = None - # Behavior when the target is a PromptTarget but not a PromptChatTarget: + # Behavior when the target does not support conversation management (i.e., multi-turn conversations + # and conversations with editable history): # - "normalize_first_turn": Normalize the prepended conversation into a string and # store it in ConversationState.normalized_prepended_context. This context will be # prepended to the first message sent to the target. Uses objective_target_context_normalizer # if provided, otherwise falls back to ConversationContextNormalizer. # - "raise": Raise a ValueError. Use this when prepended conversation history must be - # maintained by the target (i.e., target must be a PromptChatTarget). + # maintained by the target (i.e., target must support multi-turn & editable history). non_chat_target_behavior: Literal["normalize_first_turn", "raise"] = "normalize_first_turn" def get_message_normalizer(self) -> MessageStringNormalizer: @@ -78,7 +80,7 @@ def for_non_chat_target( apply_converters_to_roles: Optional[list[ChatMessageRole]] = None, ) -> PrependedConversationConfig: """ - Create a configuration for use with non-chat targets. + Create a configuration for use with targets that don't support conversation management. This configuration normalizes the prepended conversation into a text block that will be prepended to the first message sent to the target. @@ -90,7 +92,8 @@ def for_non_chat_target( Defaults to all roles. Returns: - A configuration that normalizes the prepended conversation for non-chat targets. + A configuration that normalizes the prepended conversation for targets that don't support conversation + management. """ return cls( apply_converters_to_roles=( diff --git a/pyrit/executor/attack/core/attack_config.py b/pyrit/executor/attack/core/attack_config.py index 7d128ffd79..8c9bb8e2b1 100644 --- a/pyrit/executor/attack/core/attack_config.py +++ b/pyrit/executor/attack/core/attack_config.py @@ -7,7 +7,7 @@ from pyrit.executor.core import StrategyConverterConfig from pyrit.models import SeedPrompt -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.score import Scorer, TrueFalseScorer @@ -23,7 +23,7 @@ class AttackAdversarialConfig: _DEFAULT_SEED_PROMPT = "" # Adversarial chat target for the attack - target: PromptChatTarget + target: PromptTarget # Path to the YAML file containing the system prompt for the adversarial chat target system_prompt_path: Optional[Union[str, Path]] = None diff --git a/pyrit/executor/attack/core/attack_executor.py b/pyrit/executor/attack/core/attack_executor.py index 06850fb33f..8b5f092f0b 100644 --- a/pyrit/executor/attack/core/attack_executor.py +++ b/pyrit/executor/attack/core/attack_executor.py @@ -34,7 +34,7 @@ from pyrit.models import Message, SeedAttackGroup if TYPE_CHECKING: - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer AttackResultT = TypeVar("AttackResultT") @@ -145,7 +145,7 @@ async def execute_attack_from_seed_groups_async( *, attack: AttackStrategy[AttackStrategyContextT, AttackStrategyResultT], seed_groups: Sequence[SeedAttackGroup], - adversarial_chat: Optional["PromptChatTarget"] = None, + adversarial_chat: Optional["PromptTarget"] = None, objective_scorer: Optional["TrueFalseScorer"] = None, field_overrides: Optional[Sequence[dict[str, Any]]] = None, return_partial_on_failure: bool = False, diff --git a/pyrit/executor/attack/core/attack_parameters.py b/pyrit/executor/attack/core/attack_parameters.py index 95635cde3b..336e5262eb 100644 --- a/pyrit/executor/attack/core/attack_parameters.py +++ b/pyrit/executor/attack/core/attack_parameters.py @@ -10,7 +10,7 @@ from pyrit.models import Message, SeedAttackGroup, SeedGroup if TYPE_CHECKING: - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer AttackParamsT = TypeVar("AttackParamsT", bound="AttackParameters") @@ -78,7 +78,7 @@ async def from_seed_group_async( cls: type[AttackParamsT], *, seed_group: SeedAttackGroup, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objective_scorer: Optional[TrueFalseScorer] = None, **overrides: Any, ) -> AttackParamsT: diff --git a/pyrit/executor/attack/multi_turn/chunked_request.py b/pyrit/executor/attack/multi_turn/chunked_request.py index 1a70c89195..81a9cf8b67 100644 --- a/pyrit/executor/attack/multi_turn/chunked_request.py +++ b/pyrit/executor/attack/multi_turn/chunked_request.py @@ -230,11 +230,10 @@ async def _setup_async(self, *, context: ChunkedRequestAttackContext) -> None: Raises: ValueError: If the objective target does not support multi-turn conversations. """ - if not self._objective_target.capabilities.supports_multi_turn: - raise ValueError( - "ChunkedRequestAttack requires a multi-turn target. " - "The objective target does not support multi-turn conversations." - ) + self._objective_target.capabilities.validate( + required={"supports_multi_turn"}, + context="objective_target", + ) # Ensure the context has a session context.session = ConversationSession() diff --git a/pyrit/executor/attack/multi_turn/crescendo.py b/pyrit/executor/attack/multi_turn/crescendo.py index 4a180d5df3..1559a6c2b8 100644 --- a/pyrit/executor/attack/multi_turn/crescendo.py +++ b/pyrit/executor/attack/multi_turn/crescendo.py @@ -43,7 +43,7 @@ SeedPrompt, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import ( FloatScaleThresholdScorer, Scorer, @@ -121,7 +121,7 @@ class CrescendoAttack(MultiTurnAttackStrategy[CrescendoAttackContext, CrescendoA def __init__( self, *, - objective_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] attack_adversarial_config: AttackAdversarialConfig, attack_converter_config: Optional[AttackConverterConfig] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, @@ -134,7 +134,8 @@ def __init__( Initialize the Crescendo attack strategy. Args: - objective_target (PromptChatTarget): The target system to attack. Must be a PromptChatTarget. + objective_target (PromptTarget): The target system to attack. Must support multi-turn conversations + & editable history attack_adversarial_config (AttackAdversarialConfig): Configuration for the adversarial component, including the adversarial chat target and optional system prompt path. attack_converter_config (Optional[AttackConverterConfig]): Configuration for attack converters, @@ -143,12 +144,12 @@ def __init__( prompt_normalizer (Optional[PromptNormalizer]): Normalizer for prompts. max_backtracks (int): Maximum number of backtracks allowed. max_turns (int): Maximum number of turns allowed. - prepended_conversation_config (Optional[PrependedConversationConfiguration]): + prepended_conversation_config (Optional[PrependedConversationConfig]): Configuration for how to process prepended conversations. Controls converter application by role, message normalization, and non-chat target behavior. Raises: - ValueError: If objective_target is not a PromptChatTarget. + ValueError: If parameters are invalid. """ # Initialize base class super().__init__(objective_target=objective_target, logger=logger, context_type=CrescendoAttackContext) @@ -261,12 +262,10 @@ async def _setup_async(self, *, context: CrescendoAttackContext) -> None: Raises: ValueError: If the objective target does not support multi-turn conversations. """ - if not self._objective_target.capabilities.supports_multi_turn: - raise ValueError( - "CrescendoAttack requires a multi-turn target. Crescendo fundamentally relies on " - "multi-turn conversation history to gradually escalate prompts. " - "Use RedTeamingAttack or TreeOfAttacksWithPruning instead." - ) + self._objective_target.capabilities.validate( + required={"supports_multi_turn"}, + context="objective_target", + ) # Ensure the context has a session context.session = ConversationSession() @@ -307,7 +306,7 @@ async def _setup_async(self, *, context: CrescendoAttackContext) -> None: conversation_context=adversarial_chat_context, ) - self._adversarial_chat.set_system_prompt( + self._adversarial_chat._set_target_system_prompt( system_prompt=system_prompt, conversation_id=context.session.adversarial_chat_conversation_id, attack_identifier=self.get_identifier(), diff --git a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py index a9d4b75adc..cd2eae9812 100644 --- a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py +++ b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py @@ -32,7 +32,6 @@ from pyrit.score import Scorer if TYPE_CHECKING: - from pyrit.prompt_target import PromptChatTarget from pyrit.score import TrueFalseScorer logger = logging.getLogger(__name__) @@ -54,7 +53,7 @@ async def from_seed_group_async( cls: type["MultiPromptSendingAttackParameters"], seed_group: SeedAttackGroup, *, - adversarial_chat: Optional["PromptChatTarget"] = None, + adversarial_chat: Optional["PromptTarget"] = None, objective_scorer: Optional["TrueFalseScorer"] = None, **overrides: Any, ) -> "MultiPromptSendingAttackParameters": @@ -93,6 +92,13 @@ async def from_seed_group_async( f"MultiPromptSendingAttackParameters does not accept: {invalid_fields}. Only accepts: {valid_fields}" ) + # Validate that the adversarial chat target supports required capabilities + if adversarial_chat: + adversarial_chat.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="adversarial_chat", + ) + # Build parameters with only objective, user_messages, and memory_labels return cls( objective=seed_group.objective.value, @@ -208,11 +214,10 @@ async def _setup_async(self, *, context: MultiTurnAttackContext[Any]) -> None: Raises: ValueError: If the objective target does not support multi-turn conversations. """ - if not self._objective_target.capabilities.supports_multi_turn: - raise ValueError( - "MultiPromptSendingAttack requires a multi-turn target. " - "The objective target does not support multi-turn conversations." - ) + self._objective_target.capabilities.validate( + required={"supports_multi_turn"}, + context="objective_target", + ) # Ensure the context has a session (like red_teaming.py does) context.session = ConversationSession() diff --git a/pyrit/executor/attack/multi_turn/red_teaming.py b/pyrit/executor/attack/multi_turn/red_teaming.py index f02b2e5d37..05339dfb9d 100644 --- a/pyrit/executor/attack/multi_turn/red_teaming.py +++ b/pyrit/executor/attack/multi_turn/red_teaming.py @@ -253,7 +253,7 @@ async def _setup_async(self, *, context: MultiTurnAttackContext[Any]) -> None: if not adversarial_system_prompt: raise ValueError("Adversarial chat system prompt must be defined") - self._adversarial_chat.set_system_prompt( + self._adversarial_chat._set_target_system_prompt( system_prompt=adversarial_system_prompt, conversation_id=context.session.adversarial_chat_conversation_id, attack_identifier=self.get_identifier(), diff --git a/pyrit/executor/attack/multi_turn/simulated_conversation.py b/pyrit/executor/attack/multi_turn/simulated_conversation.py index 40c3bb515a..7bccd288f8 100644 --- a/pyrit/executor/attack/multi_turn/simulated_conversation.py +++ b/pyrit/executor/attack/multi_turn/simulated_conversation.py @@ -26,7 +26,7 @@ if TYPE_CHECKING: from pathlib import Path - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer logger = logging.getLogger(__name__) @@ -35,7 +35,7 @@ async def generate_simulated_conversation_async( *, objective: str, - adversarial_chat: PromptChatTarget, + adversarial_chat: PromptTarget, objective_scorer: TrueFalseScorer, num_turns: int = 3, starting_sequence: int = 0, @@ -170,7 +170,7 @@ async def _generate_next_message_async( *, objective: str, conversation_messages: list[Message], - adversarial_chat: PromptChatTarget, + adversarial_chat: PromptTarget, next_message_system_prompt_path: Union[str, Path], ) -> Message: """ @@ -215,7 +215,7 @@ async def _generate_next_message_async( ) # Set the system prompt on the target - adversarial_chat.set_system_prompt( + adversarial_chat._set_target_system_prompt( system_prompt=system_prompt, conversation_id=request_message.conversation_id, ) diff --git a/pyrit/executor/attack/multi_turn/tree_of_attacks.py b/pyrit/executor/attack/multi_turn/tree_of_attacks.py index e92bd1cf67..ed5ee4f7f5 100644 --- a/pyrit/executor/attack/multi_turn/tree_of_attacks.py +++ b/pyrit/executor/attack/multi_turn/tree_of_attacks.py @@ -50,7 +50,7 @@ SeedPrompt, ) from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import ( FloatScaleThresholdScorer, Scorer, @@ -257,8 +257,8 @@ class _TreeOfAttacksNode: def __init__( self, *, - objective_target: PromptChatTarget, - adversarial_chat: PromptChatTarget, + objective_target: PromptTarget, + adversarial_chat: PromptTarget, adversarial_chat_seed_prompt: SeedPrompt, adversarial_chat_prompt_template: SeedPrompt, adversarial_chat_system_seed_prompt: SeedPrompt, @@ -279,8 +279,8 @@ def __init__( Initialize a tree node. Args: - objective_target (PromptChatTarget): The target to attack. - adversarial_chat (PromptChatTarget): The chat target for generating adversarial prompts. + objective_target (PromptTarget): The target to attack. + adversarial_chat (PromptTarget): The chat target for generating adversarial prompts. adversarial_chat_seed_prompt (SeedPrompt): The seed prompt for the first turn. adversarial_chat_prompt_template (SeedPrompt): The template for subsequent turns. adversarial_chat_system_seed_prompt (SeedPrompt): The system prompt for the adversarial chat @@ -978,7 +978,7 @@ async def _generate_first_turn_prompt_async(self, objective: str) -> str: conversation_context=self._conversation_context, ) - self._adversarial_chat.set_system_prompt( + self._adversarial_chat._set_target_system_prompt( system_prompt=system_prompt, conversation_id=self.adversarial_chat_conversation_id, attack_identifier=self._attack_id, @@ -1254,7 +1254,7 @@ class TreeOfAttacksWithPruningAttack(AttackStrategy[TAPAttackContext, TAPAttackR def __init__( self, *, - objective_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] attack_adversarial_config: AttackAdversarialConfig, attack_converter_config: Optional[AttackConverterConfig] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, @@ -1271,7 +1271,7 @@ def __init__( Initialize the Tree of Attacks with Pruning attack strategy. Args: - objective_target (PromptChatTarget): The target system to attack. + objective_target (PromptTarget): The target system to attack. attack_adversarial_config (AttackAdversarialConfig): Configuration for the adversarial chat component. attack_converter_config (Optional[AttackConverterConfig]): Configuration for attack converters. Defaults to None. @@ -1293,7 +1293,7 @@ def __init__( Raises: ValueError: If attack_scoring_config uses a non-FloatScaleThresholdScorer objective scorer, - if target is not PromptChatTarget, or if parameters are invalid. + if target does not support multi-turn or editable history, or if parameters are invalid. """ # Validate tree parameters if tree_depth < 1: @@ -1322,8 +1322,10 @@ def __init__( # Initialize adversarial configuration self._adversarial_chat = attack_adversarial_config.target - if not isinstance(self._adversarial_chat, PromptChatTarget): - raise ValueError("The adversarial target must be a PromptChatTarget for TAP attack.") + self._adversarial_chat.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="adversarial_chat", + ) # Load system prompts self._adversarial_chat_system_prompt_path = ( @@ -1855,7 +1857,7 @@ def _create_attack_node( generate adversarial prompts and evaluate responses. """ node = _TreeOfAttacksNode( - objective_target=cast("PromptChatTarget", self._objective_target), + objective_target=self._objective_target, adversarial_chat=self._adversarial_chat, adversarial_chat_seed_prompt=self._adversarial_chat_seed_prompt, adversarial_chat_system_seed_prompt=self._adversarial_chat_system_seed_prompt, diff --git a/pyrit/executor/attack/single_turn/context_compliance.py b/pyrit/executor/attack/single_turn/context_compliance.py index d03ab2a41f..de9ad8d080 100644 --- a/pyrit/executor/attack/single_turn/context_compliance.py +++ b/pyrit/executor/attack/single_turn/context_compliance.py @@ -23,7 +23,7 @@ SeedDataset, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -57,7 +57,7 @@ class ContextComplianceAttack(PromptSendingAttack): def __init__( self, *, - objective_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] attack_adversarial_config: AttackAdversarialConfig, attack_converter_config: Optional[AttackConverterConfig] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, @@ -70,7 +70,7 @@ def __init__( Initialize the context compliance attack strategy. Args: - objective_target (PromptChatTarget): The target system to attack. Must be a PromptChatTarget. + objective_target (PromptTarget): The target system to attack. attack_adversarial_config (AttackAdversarialConfig): Configuration for the adversarial component, including the adversarial chat target used for rephrasing. attack_converter_config (Optional[AttackConverterConfig]): Configuration for attack converters, diff --git a/pyrit/executor/attack/single_turn/flip_attack.py b/pyrit/executor/attack/single_turn/flip_attack.py index cc4a230bd6..3148a4435c 100644 --- a/pyrit/executor/attack/single_turn/flip_attack.py +++ b/pyrit/executor/attack/single_turn/flip_attack.py @@ -21,7 +21,7 @@ ) from pyrit.prompt_converter import FlipConverter from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -39,7 +39,7 @@ class FlipAttack(PromptSendingAttack): @apply_defaults def __init__( self, - objective_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] attack_converter_config: Optional[AttackConverterConfig] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, prompt_normalizer: Optional[PromptNormalizer] = None, @@ -47,7 +47,7 @@ def __init__( ) -> None: """ Args: - objective_target (PromptChatTarget): The target system to attack. + objective_target (PromptTarget): The target system to attack. attack_converter_config (AttackConverterConfig, Optional): Configuration for the prompt converters. attack_scoring_config (AttackScoringConfig, Optional): Configuration for scoring components. prompt_normalizer (PromptNormalizer, Optional): Normalizer for handling prompts. diff --git a/pyrit/executor/attack/single_turn/role_play.py b/pyrit/executor/attack/single_turn/role_play.py index 87a904d7ea..3c63563516 100644 --- a/pyrit/executor/attack/single_turn/role_play.py +++ b/pyrit/executor/attack/single_turn/role_play.py @@ -20,7 +20,7 @@ ) from pyrit.prompt_converter import LLMGenericTextConverter from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer -from pyrit.prompt_target import PromptChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -66,7 +66,7 @@ def __init__( self, *, objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] - adversarial_chat: PromptChatTarget, + adversarial_chat: PromptTarget, role_play_definition_path: pathlib.Path, attack_converter_config: Optional[AttackConverterConfig] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, @@ -78,7 +78,7 @@ def __init__( Args: objective_target (PromptTarget): The target system to attack. - adversarial_chat (PromptChatTarget): The adversarial chat target used to rephrase + adversarial_chat (PromptTarget): The adversarial chat target used to rephrase objectives into role-play scenarios. role_play_definition_path (pathlib.Path): Path to the YAML file containing role-play definitions (rephrase instructions, user start turn, assistant start turn). diff --git a/pyrit/executor/promptgen/anecdoctor.py b/pyrit/executor/promptgen/anecdoctor.py index 208c4040d7..40ee787fc7 100644 --- a/pyrit/executor/promptgen/anecdoctor.py +++ b/pyrit/executor/promptgen/anecdoctor.py @@ -26,7 +26,7 @@ from pyrit.prompt_normalizer import PromptNormalizer if TYPE_CHECKING: - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -102,8 +102,8 @@ class AnecdoctorGenerator( def __init__( self, *, - objective_target: PromptChatTarget, - processing_model: Optional[PromptChatTarget] = None, + objective_target: PromptTarget, + processing_model: Optional[PromptTarget] = None, converter_config: Optional[StrategyConverterConfig] = None, prompt_normalizer: Optional[PromptNormalizer] = None, ) -> None: @@ -111,8 +111,8 @@ def __init__( Initialize the Anecdoctor prompt generation strategy. Args: - objective_target (PromptChatTarget): The chat model to be used for prompt generation. - processing_model (Optional[PromptChatTarget]): The model used for knowledge graph extraction. + objective_target (PromptTarget): The chat model to be used for prompt generation. + processing_model (Optional[PromptTarget]): The model used for knowledge graph extraction. If provided, the generator will extract a knowledge graph from the examples before generation. If None, the generator will use few-shot examples directly. converter_config (Optional[StrategyConverterConfig]): Configuration for prompt converters. @@ -209,7 +209,7 @@ async def _setup_async(self, *, context: AnecdoctorContext) -> None: system_prompt = self._system_prompt_template.format(language=context.language, type=context.content_type) # Configure the target with the system prompt - self._objective_target.set_system_prompt( + self._objective_target._set_target_system_prompt( system_prompt=system_prompt, conversation_id=context.conversation_id, attack_identifier=self.get_identifier(), @@ -370,7 +370,7 @@ async def _extract_knowledge_graph_async(self, *, context: AnecdoctorContext) -> kg_conversation_id = str(uuid.uuid4()) # Set system prompt on processing model - self._processing_model.set_system_prompt( + self._processing_model._set_target_system_prompt( system_prompt=kg_system_prompt, conversation_id=kg_conversation_id, attack_identifier=self.get_identifier(), diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer.py b/pyrit/executor/promptgen/fuzzer/fuzzer.py index 7021c0d6ad..0a8ff74f96 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer.py @@ -36,7 +36,7 @@ if TYPE_CHECKING: from pyrit.executor.promptgen.fuzzer.fuzzer_converter_base import FuzzerConverter - from pyrit.prompt_target import PromptChatTarget, PromptTarget + from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -539,7 +539,7 @@ def with_default_scorer( *, objective_target: PromptTarget, template_converters: list[FuzzerConverter], - scoring_target: PromptChatTarget, + scoring_target: PromptTarget, converter_config: Optional[StrategyConverterConfig] = None, prompt_normalizer: Optional[PromptNormalizer] = None, frequency_weight: float = _DEFAULT_FREQUENCY_WEIGHT, @@ -562,7 +562,7 @@ def with_default_scorer( Args: objective_target (PromptTarget): The target to send the prompts to. template_converters (List[FuzzerConverter]): The converters to apply on the selected jailbreak template. - scoring_target (PromptChatTarget): The chat target to use for scoring responses. + scoring_target (PromptTarget): The chat target to use for scoring responses. converter_config (Optional[StrategyConverterConfig]): Configuration for prompt converters. prompt_normalizer (Optional[PromptNormalizer]): The prompt normalizer to use. frequency_weight (float): Constant that balances between high reward and selection frequency. diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_converter_base.py b/pyrit/executor/promptgen/fuzzer/fuzzer_converter_base.py index 4db32e65e8..99e83fb144 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_converter_base.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_converter_base.py @@ -19,7 +19,7 @@ SeedPrompt, ) from pyrit.prompt_converter import ConverterResult, PromptConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -40,14 +40,14 @@ class FuzzerConverter(PromptConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] prompt_template: SeedPrompt, ): """ Initialize the converter with the specified chat target and prompt template. Args: - converter_target (PromptChatTarget): Chat target used to perform fuzzing on user prompt. + converter_target (PromptTarget): Chat target used to perform fuzzing on user prompt. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt): Template to be used instead of the default system prompt with instructions for the chat target. @@ -55,6 +55,10 @@ def __init__( Raises: ValueError: If converter_target is not provided and no default has been configured. """ + converter_target.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="converter_target", + ) self.converter_target = converter_target self.system_prompt = prompt_template.value self.template_label = "TEMPLATE" @@ -81,7 +85,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt( + self.converter_target._set_target_system_prompt( system_prompt=self.system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_crossover_converter.py b/pyrit/executor/promptgen/fuzzer/fuzzer_crossover_converter.py index 2f2b44f4a7..90dc8333b2 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_crossover_converter.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_crossover_converter.py @@ -13,7 +13,7 @@ ) from pyrit.models import Message, MessagePiece, PromptDataType, SeedPrompt from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget class FuzzerCrossOverConverter(FuzzerConverter): @@ -25,7 +25,7 @@ class FuzzerCrossOverConverter(FuzzerConverter): def __init__( self, *, - converter_target: Optional[PromptChatTarget] = None, + converter_target: Optional[PromptTarget] = None, prompt_template: Optional[SeedPrompt] = None, prompt_templates: Optional[list[str]] = None, ): @@ -33,7 +33,7 @@ def __init__( Initialize the converter with the specified chat target and prompt templates. Args: - converter_target (PromptChatTarget): Chat target used to perform fuzzing on user prompt. + converter_target (PromptTarget): Chat target used to perform fuzzing on user prompt. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt, Optional): Template to be used instead of the default system prompt with instructions for the chat target. @@ -79,7 +79,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt( + self.converter_target._set_target_system_prompt( system_prompt=self.system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_expand_converter.py b/pyrit/executor/promptgen/fuzzer/fuzzer_expand_converter.py index ddd8c60792..6d517b03f8 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_expand_converter.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_expand_converter.py @@ -12,7 +12,7 @@ ) from pyrit.models import Message, MessagePiece, PromptDataType, SeedPrompt from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget class FuzzerExpandConverter(FuzzerConverter): @@ -24,7 +24,7 @@ class FuzzerExpandConverter(FuzzerConverter): def __init__( self, *, - converter_target: Optional[PromptChatTarget] = None, + converter_target: Optional[PromptTarget] = None, prompt_template: Optional[SeedPrompt] = None, ): """Initialize the expand converter with optional chat target and prompt template.""" @@ -56,7 +56,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt( + self.converter_target._set_target_system_prompt( system_prompt=self.system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_rephrase_converter.py b/pyrit/executor/promptgen/fuzzer/fuzzer_rephrase_converter.py index 99a001b955..de6d812451 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_rephrase_converter.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_rephrase_converter.py @@ -10,7 +10,7 @@ FuzzerConverter, ) from pyrit.models import SeedPrompt -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget class FuzzerRephraseConverter(FuzzerConverter): @@ -20,7 +20,7 @@ class FuzzerRephraseConverter(FuzzerConverter): @apply_defaults def __init__( - self, *, converter_target: Optional[PromptChatTarget] = None, prompt_template: Optional[SeedPrompt] = None + self, *, converter_target: Optional[PromptTarget] = None, prompt_template: Optional[SeedPrompt] = None ): """Initialize the rephrase converter with optional chat target and prompt template.""" prompt_template = ( diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_shorten_converter.py b/pyrit/executor/promptgen/fuzzer/fuzzer_shorten_converter.py index a6ba025e1a..5f5111be9e 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_shorten_converter.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_shorten_converter.py @@ -10,7 +10,7 @@ FuzzerConverter, ) from pyrit.models import SeedPrompt -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget class FuzzerShortenConverter(FuzzerConverter): @@ -20,7 +20,7 @@ class FuzzerShortenConverter(FuzzerConverter): @apply_defaults def __init__( - self, *, converter_target: Optional[PromptChatTarget] = None, prompt_template: Optional[SeedPrompt] = None + self, *, converter_target: Optional[PromptTarget] = None, prompt_template: Optional[SeedPrompt] = None ): """Initialize the shorten converter with optional chat target and prompt template.""" prompt_template = ( diff --git a/pyrit/executor/promptgen/fuzzer/fuzzer_similar_converter.py b/pyrit/executor/promptgen/fuzzer/fuzzer_similar_converter.py index cf891af371..86fb7e6c8d 100644 --- a/pyrit/executor/promptgen/fuzzer/fuzzer_similar_converter.py +++ b/pyrit/executor/promptgen/fuzzer/fuzzer_similar_converter.py @@ -10,7 +10,7 @@ FuzzerConverter, ) from pyrit.models import SeedPrompt -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget class FuzzerSimilarConverter(FuzzerConverter): @@ -20,7 +20,7 @@ class FuzzerSimilarConverter(FuzzerConverter): @apply_defaults def __init__( - self, *, converter_target: Optional[PromptChatTarget] = None, prompt_template: Optional[SeedPrompt] = None + self, *, converter_target: Optional[PromptTarget] = None, prompt_template: Optional[SeedPrompt] = None ): """Initialize the similar converter with optional chat target and prompt template.""" prompt_template = ( diff --git a/pyrit/prompt_converter/denylist_converter.py b/pyrit/prompt_converter/denylist_converter.py index a9672e3718..186447f1a8 100644 --- a/pyrit/prompt_converter/denylist_converter.py +++ b/pyrit/prompt_converter/denylist_converter.py @@ -10,7 +10,7 @@ from pyrit.models import PromptDataType, SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -19,14 +19,14 @@ class DenylistConverter(LLMGenericTextConverter): """ Replaces forbidden words or phrases in a prompt with synonyms using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] system_prompt_template: Optional[SeedPrompt] = None, denylist: list[str] = None, ): @@ -34,7 +34,7 @@ def __init__( Initialize the converter with a target, an optional system prompt template, and a denylist. Args: - converter_target (PromptChatTarget): The target for the prompt conversion. + converter_target (PromptTarget): The target for the prompt conversion. Can be omitted if a default has been configured via PyRIT initialization. system_prompt_template (Optional[SeedPrompt]): The system prompt template to use for the conversion. If not provided, a default template will be used. diff --git a/pyrit/prompt_converter/llm_generic_text_converter.py b/pyrit/prompt_converter/llm_generic_text_converter.py index f56990247f..7e5388c3f8 100644 --- a/pyrit/prompt_converter/llm_generic_text_converter.py +++ b/pyrit/prompt_converter/llm_generic_text_converter.py @@ -15,7 +15,7 @@ SeedPrompt, ) from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ class LLMGenericTextConverter(PromptConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] system_prompt_template: Optional[SeedPrompt] = None, user_prompt_template_with_objective: Optional[SeedPrompt] = None, **kwargs: Any, @@ -41,7 +41,7 @@ def __init__( Initialize the converter with a target and optional prompt templates. Args: - converter_target (PromptChatTarget): The endpoint that converts the prompt. + converter_target (PromptTarget): The endpoint that converts the prompt. Can be omitted if a default has been configured via PyRIT initialization. system_prompt_template (SeedPrompt, Optional): The prompt template to set as the system prompt. user_prompt_template_with_objective (SeedPrompt, Optional): The prompt template to set as the user prompt. @@ -112,7 +112,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text if self._system_prompt_template: system_prompt = self._system_prompt_template.render_template_value(**kwargs) - self._converter_target.set_system_prompt( + self._converter_target._set_target_system_prompt( system_prompt=system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/prompt_converter/malicious_question_generator_converter.py b/pyrit/prompt_converter/malicious_question_generator_converter.py index 41a7848458..5725fff9c4 100644 --- a/pyrit/prompt_converter/malicious_question_generator_converter.py +++ b/pyrit/prompt_converter/malicious_question_generator_converter.py @@ -10,7 +10,7 @@ from pyrit.models import PromptDataType, SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -19,21 +19,21 @@ class MaliciousQuestionGeneratorConverter(LLMGenericTextConverter): """ Generates malicious questions using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] prompt_template: Optional[SeedPrompt] = None, ): """ Initialize the converter with a specific target and template. Args: - converter_target (PromptChatTarget): The endpoint that converts the prompt. + converter_target (PromptTarget): The endpoint that converts the prompt. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt): The seed prompt template to use. """ diff --git a/pyrit/prompt_converter/math_prompt_converter.py b/pyrit/prompt_converter/math_prompt_converter.py index fd6491bbc1..a3520b190c 100644 --- a/pyrit/prompt_converter/math_prompt_converter.py +++ b/pyrit/prompt_converter/math_prompt_converter.py @@ -10,7 +10,7 @@ from pyrit.models import PromptDataType, SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -19,21 +19,21 @@ class MathPromptConverter(LLMGenericTextConverter): """ Converts natural language instructions into symbolic mathematics problems using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] prompt_template: Optional[SeedPrompt] = None, ): """ Initialize the converter with a specific target and template. Args: - converter_target (PromptChatTarget): The endpoint that converts the prompt. + converter_target (PromptTarget): The endpoint that converts the prompt. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt): The seed prompt template to use. """ diff --git a/pyrit/prompt_converter/noise_converter.py b/pyrit/prompt_converter/noise_converter.py index 0d7bdf302f..86c5375773 100644 --- a/pyrit/prompt_converter/noise_converter.py +++ b/pyrit/prompt_converter/noise_converter.py @@ -11,7 +11,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -20,14 +20,14 @@ class NoiseConverter(LLMGenericTextConverter): """ Injects noise errors into a conversation using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] noise: Optional[str] = None, number_errors: int = 5, prompt_template: Optional[SeedPrompt] = None, @@ -36,7 +36,7 @@ def __init__( Initialize the converter with the specified parameters. Args: - converter_target (PromptChatTarget): The endpoint that converts the prompt. + converter_target (PromptTarget): The endpoint that converts the prompt. Can be omitted if a default has been configured via PyRIT initialization. noise (str): The noise to inject. Grammar error, delete random letter, insert random space, etc. number_errors (int): The number of errors to inject. diff --git a/pyrit/prompt_converter/persuasion_converter.py b/pyrit/prompt_converter/persuasion_converter.py index 11b6bd66e6..6d9c36762f 100644 --- a/pyrit/prompt_converter/persuasion_converter.py +++ b/pyrit/prompt_converter/persuasion_converter.py @@ -21,7 +21,7 @@ SeedPrompt, ) from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -52,14 +52,14 @@ class PersuasionConverter(PromptConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] persuasion_technique: str, ): """ Initialize the converter with the specified target and prompt template. Args: - converter_target (PromptChatTarget): The chat target used to perform rewriting on user prompts. + converter_target (PromptTarget): The chat target used to perform rewriting on user prompts. Can be omitted if a default has been configured via PyRIT initialization. persuasion_technique (str): Persuasion technique to be used by the converter, determines the system prompt to be used to generate new prompts. Must be one of "authority_endorsement", "evidence_based", @@ -69,6 +69,11 @@ def __init__( ValueError: If converter_target is not provided and no default has been configured. ValueError: If the persuasion technique is not supported or does not exist. """ + converter_target.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="converter_target", + ) + self.converter_target = converter_target try: @@ -115,7 +120,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt( + self.converter_target._set_target_system_prompt( system_prompt=self.system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/prompt_converter/random_translation_converter.py b/pyrit/prompt_converter/random_translation_converter.py index 74953c2603..7e11810323 100644 --- a/pyrit/prompt_converter/random_translation_converter.py +++ b/pyrit/prompt_converter/random_translation_converter.py @@ -13,7 +13,7 @@ from pyrit.prompt_converter.prompt_converter import ConverterResult from pyrit.prompt_converter.text_selection_strategy import WordSelectionStrategy from pyrit.prompt_converter.word_level_converter import WordLevelConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class RandomTranslationConverter(LLMGenericTextConverter, WordLevelConverter): """ Translates each individual word in a prompt to a random language using an LLM. - An existing ``PromptChatTarget`` is used to perform the translation (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the translation (like Azure OpenAI). """ SUPPORTED_INPUT_TYPES = ("text",) @@ -35,7 +35,7 @@ class RandomTranslationConverter(LLMGenericTextConverter, WordLevelConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] system_prompt_template: Optional[SeedPrompt] = None, languages: Optional[list[str]] = None, word_selection_strategy: Optional[WordSelectionStrategy] = None, @@ -44,7 +44,7 @@ def __init__( Initialize the converter with a target, an optional system prompt template, and language options. Args: - converter_target (PromptChatTarget): The target for the prompt conversion. + converter_target (PromptTarget): The target for the prompt conversion. Can be omitted if a default has been configured via PyRIT initialization. system_prompt_template (Optional[SeedPrompt]): The system prompt template to use for the conversion. If not provided, a default template will be used. diff --git a/pyrit/prompt_converter/scientific_translation_converter.py b/pyrit/prompt_converter/scientific_translation_converter.py index 2a6c965996..bdc7987041 100644 --- a/pyrit/prompt_converter/scientific_translation_converter.py +++ b/pyrit/prompt_converter/scientific_translation_converter.py @@ -10,7 +10,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -45,7 +45,7 @@ class ScientificTranslationConverter(LLMGenericTextConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] mode: str = "combined", prompt_template: Optional[SeedPrompt] = None, ) -> None: @@ -53,7 +53,7 @@ def __init__( Initialize the scientific translation converter. Args: - converter_target (PromptChatTarget): The LLM target to perform the conversion. + converter_target (PromptTarget): The LLM target to perform the conversion. mode (str): The translation mode to use. Built-in options are: - ``academic``: Use academic/homework style framing diff --git a/pyrit/prompt_converter/tense_converter.py b/pyrit/prompt_converter/tense_converter.py index 237a2934d5..eede7adef9 100644 --- a/pyrit/prompt_converter/tense_converter.py +++ b/pyrit/prompt_converter/tense_converter.py @@ -10,7 +10,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -19,14 +19,14 @@ class TenseConverter(LLMGenericTextConverter): """ Converts a conversation to a different tense using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] tense: str, prompt_template: Optional[SeedPrompt] = None, ): @@ -34,7 +34,7 @@ def __init__( Initialize the converter with the target chat support, tense, and optional prompt template. Args: - converter_target (PromptChatTarget): The target chat support for the conversion which will translate. + converter_target (PromptTarget): The target chat support for the conversion which will translate. Can be omitted if a default has been configured via PyRIT initialization. tense (str): The tense the converter should convert the prompt to. E.g. past, present, future. prompt_template (SeedPrompt, Optional): The prompt template for the conversion. diff --git a/pyrit/prompt_converter/tone_converter.py b/pyrit/prompt_converter/tone_converter.py index a7b8e5a9f1..4a6d0e859e 100644 --- a/pyrit/prompt_converter/tone_converter.py +++ b/pyrit/prompt_converter/tone_converter.py @@ -10,7 +10,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -19,14 +19,14 @@ class ToneConverter(LLMGenericTextConverter): """ Converts a conversation to a different tone using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). """ @apply_defaults def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] tone: str, prompt_template: Optional[SeedPrompt] = None, ): @@ -34,7 +34,7 @@ def __init__( Initialize the converter with the target chat support, tone, and optional prompt template. Args: - converter_target (PromptChatTarget): The target chat support for the conversion which will translate. + converter_target (PromptTarget): The target chat support for the conversion which will translate. Can be omitted if a default has been configured via PyRIT initialization. tone (str): The tone for the conversation. E.g. upset, sarcastic, indifferent, etc. prompt_template (SeedPrompt, Optional): The prompt template for the conversion. diff --git a/pyrit/prompt_converter/toxic_sentence_generator_converter.py b/pyrit/prompt_converter/toxic_sentence_generator_converter.py index d3390c6af7..636e50ad8d 100644 --- a/pyrit/prompt_converter/toxic_sentence_generator_converter.py +++ b/pyrit/prompt_converter/toxic_sentence_generator_converter.py @@ -14,7 +14,7 @@ from pyrit.models import PromptDataType, SeedPrompt from pyrit.prompt_converter.llm_generic_text_converter import LLMGenericTextConverter from pyrit.prompt_converter.prompt_converter import ConverterResult -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ class ToxicSentenceGeneratorConverter(LLMGenericTextConverter): """ Generates toxic sentence starters using an LLM. - An existing ``PromptChatTarget`` is used to perform the conversion (like Azure OpenAI). + An existing ``PromptTarget`` is used to perform the conversion (like Azure OpenAI). Based on Project Moonshot's attack module that generates toxic sentences to test LLM safety guardrails: @@ -34,14 +34,14 @@ class ToxicSentenceGeneratorConverter(LLMGenericTextConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] prompt_template: Optional[SeedPrompt] = None, ): """ Initialize the converter with a specific target and template. Args: - converter_target (PromptChatTarget): The endpoint that converts the prompt. + converter_target (PromptTarget): The endpoint that converts the prompt. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt): The seed prompt template to use. If not provided, defaults to the ``toxic_sentence_generator.yaml``. diff --git a/pyrit/prompt_converter/translation_converter.py b/pyrit/prompt_converter/translation_converter.py index 911f72ab57..fb0045a281 100644 --- a/pyrit/prompt_converter/translation_converter.py +++ b/pyrit/prompt_converter/translation_converter.py @@ -24,7 +24,7 @@ SeedPrompt, ) from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -41,7 +41,7 @@ class TranslationConverter(PromptConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] language: str, prompt_template: Optional[SeedPrompt] = None, max_retries: int = 3, @@ -51,7 +51,7 @@ def __init__( Initialize the converter with the target chat support, language, and optional prompt template. Args: - converter_target (PromptChatTarget): The target chat support for the conversion which will translate. + converter_target (PromptTarget): The target chat support for the conversion which will translate. Can be omitted if a default has been configured via PyRIT initialization. language (str): The language for the conversion. E.g. Spanish, French, leetspeak, etc. prompt_template (SeedPrompt, Optional): The prompt template for the conversion. @@ -112,7 +112,9 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text """ conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt(system_prompt=self.system_prompt, conversation_id=conversation_id) + self.converter_target._set_target_system_prompt( + system_prompt=self.system_prompt, conversation_id=conversation_id + ) if not self.input_supported(input_type): raise ValueError("Input type not supported") diff --git a/pyrit/prompt_converter/variation_converter.py b/pyrit/prompt_converter/variation_converter.py index 328e463072..2f9d45e44d 100644 --- a/pyrit/prompt_converter/variation_converter.py +++ b/pyrit/prompt_converter/variation_converter.py @@ -23,7 +23,7 @@ SeedPrompt, ) from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -40,14 +40,14 @@ class VariationConverter(PromptConverter): def __init__( self, *, - converter_target: PromptChatTarget = REQUIRED_VALUE, # type: ignore[assignment] + converter_target: PromptTarget = REQUIRED_VALUE, # type: ignore[assignment] prompt_template: Optional[SeedPrompt] = None, ): """ Initialize the converter with the specified target and prompt template. Args: - converter_target (PromptChatTarget): The target to which the prompt will be sent for conversion. + converter_target (PromptTarget): The target to which the prompt will be sent for conversion. Can be omitted if a default has been configured via PyRIT initialization. prompt_template (SeedPrompt, optional): The template used for generating the system prompt. If not provided, a default template will be used. @@ -55,6 +55,11 @@ def __init__( Raises: ValueError: If converter_target is not provided and no default has been configured. """ + converter_target.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="converter_target", + ) + self.converter_target = converter_target # set to default strategy if not provided @@ -98,7 +103,7 @@ async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text conversation_id = str(uuid.uuid4()) - self.converter_target.set_system_prompt( + self.converter_target._set_target_system_prompt( system_prompt=self.system_prompt, conversation_id=conversation_id, attack_identifier=None, diff --git a/pyrit/prompt_target/azure_ml_chat_target.py b/pyrit/prompt_target/azure_ml_chat_target.py index cc83332027..1571d59077 100644 --- a/pyrit/prompt_target/azure_ml_chat_target.py +++ b/pyrit/prompt_target/azure_ml_chat_target.py @@ -19,14 +19,14 @@ Message, construct_response_from_request, ) -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.common.target_capabilities import TargetCapabilities from pyrit.prompt_target.common.utils import limit_requests_per_minute, validate_temperature, validate_top_p logger = logging.getLogger(__name__) -class AzureMLChatTarget(PromptChatTarget): +class AzureMLChatTarget(PromptTarget): """ A prompt target for Azure Machine Learning chat endpoints. @@ -96,8 +96,7 @@ def __init__( endpoint_value = default_values.get_required_value( env_var_name=self.endpoint_uri_environment_variable, passed_value=endpoint ) - PromptChatTarget.__init__( - self, + super().__init__( max_requests_per_minute=max_requests_per_minute, endpoint=endpoint_value, model_name=model_name, diff --git a/pyrit/prompt_target/common/prompt_chat_target.py b/pyrit/prompt_target/common/prompt_chat_target.py index af12a11a1e..3a3b2d0e51 100644 --- a/pyrit/prompt_target/common/prompt_chat_target.py +++ b/pyrit/prompt_target/common/prompt_chat_target.py @@ -3,6 +3,7 @@ from typing import Optional +from pyrit.common.deprecation import print_deprecation_message from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece from pyrit.models.json_response_config import _JsonResponseConfig @@ -12,13 +13,9 @@ class PromptChatTarget(PromptTarget): """ - A prompt chat target is a target where you can explicitly set the conversation history using memory. + Deprecated. Use :class:`PromptTarget` directly instead. - Some algorithms require conversation to be modified (e.g. deleting the last message) or set explicitly. - These algorithms will require PromptChatTargets be used. - - As a concrete example, OpenAI chat targets are PromptChatTargets. You can set made-up conversation history. - Realtime chat targets or OpenAI completions are NOT PromptChatTargets. You don't send the conversation history. + This class will be removed in 0.12.0. """ _DEFAULT_CAPABILITIES: TargetCapabilities = TargetCapabilities( @@ -47,6 +44,11 @@ def __init__( custom_capabilities (TargetCapabilities, Optional): Override the default capabilities for this target instance. If None, uses the class-level defaults. Defaults to None. """ + print_deprecation_message( + old_item=PromptChatTarget, + new_item=PromptTarget, + removed_in="0.14.0", + ) super().__init__( max_requests_per_minute=max_requests_per_minute, endpoint=endpoint, diff --git a/pyrit/prompt_target/common/prompt_target.py b/pyrit/prompt_target/common/prompt_target.py index 069208e3c8..f656150427 100644 --- a/pyrit/prompt_target/common/prompt_target.py +++ b/pyrit/prompt_target/common/prompt_target.py @@ -5,9 +5,10 @@ import logging from typing import Any, Optional, Union +from pyrit.common.deprecation import print_deprecation_message from pyrit.identifiers import ComponentIdentifier, Identifiable from pyrit.memory import CentralMemory, MemoryInterface -from pyrit.models import Message +from pyrit.models import Message, MessagePiece from pyrit.prompt_target.common.target_capabilities import TargetCapabilities logger = logging.getLogger(__name__) @@ -134,6 +135,74 @@ def _validate_request(self, *, message: Message) -> None: if len(messages) > 0: raise ValueError(f"This target only supports a single turn conversation. {custom_capabilities_message}") + def set_system_prompt( + self, + *, + system_prompt: str, + conversation_id: str, + attack_identifier: Optional[ComponentIdentifier] = None, + labels: Optional[dict[str, str]] = None, + ) -> None: + """ + Set the system prompt for the prompt target. May be overridden by subclasses. + + .. deprecated:: + Use ``prepended_conversation`` on the attack context instead. Pass a + ``Message.from_system_prompt(system_prompt)`` as the first element of + ``AttackParameters.prepended_conversation``. This method will be removed in 0.14.0. + + Raises: + RuntimeError: If the conversation already exists. + """ + print_deprecation_message( + old_item="PromptTarget.set_system_prompt", + new_item="AttackParameters.prepended_conversation", + removed_in="0.14.0", + ) + self._set_target_system_prompt( + system_prompt=system_prompt, + conversation_id=conversation_id, + attack_identifier=attack_identifier, + labels=labels, + ) + + def _set_target_system_prompt( + self, + *, + system_prompt: str, + conversation_id: str, + attack_identifier: Optional[ComponentIdentifier] = None, + labels: Optional[dict[str, str]] = None, + ) -> None: + """ + Set the system prompt for the prompt target. + + Args: + system_prompt (str): The system prompt text to set. + conversation_id (str): The conversation ID to associate with this system prompt. + attack_identifier (Optional[ComponentIdentifier]): An optional identifier for the attack context. + labels (Optional[dict[str, str]]): Optional labels to associate with the system prompt in memory. + + Raises: + RuntimeError: If the conversation already exists. + """ + messages = self._memory.get_conversation(conversation_id=conversation_id) + + if messages: + raise RuntimeError("Conversation already exists, system prompt needs to be set at the beginning") + + self._memory.add_message_to_memory( + request=MessagePiece( + role="system", + conversation_id=conversation_id, + original_value=system_prompt, + converted_value=system_prompt, + prompt_target_identifier=self.get_identifier(), + attack_identifier=attack_identifier, + labels=labels, + ).to_message() + ) + def set_model_name(self, *, model_name: str) -> None: """ Set the model name for this target. diff --git a/pyrit/prompt_target/common/target_capabilities.py b/pyrit/prompt_target/common/target_capabilities.py index 5b1b596120..2530e36365 100644 --- a/pyrit/prompt_target/common/target_capabilities.py +++ b/pyrit/prompt_target/common/target_capabilities.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from dataclasses import dataclass +from dataclasses import dataclass, fields from typing import Optional, cast from pyrit.models import PromptDataType @@ -58,6 +58,26 @@ def get_known_capabilities(underlying_model: str) -> "Optional[TargetCapabilitie """ return _KNOWN_CAPABILITIES.get(underlying_model) + def validate(self, *, required: set[str], context: str = "target") -> None: + """ + Assert that all named boolean capabilities are True, raising ValueError for each that is not. + + Args: + required (set[str]): Names of boolean fields on TargetCapabilities that must be True + (e.g. {"supports_multi_turn", "supports_editable_history"}). + context (str): Label used in the error message (e.g. "converter_target"). + + Raises: + ValueError: If any required capability is False. + AttributeError: If a name in ``required`` is not a field on TargetCapabilities. + """ + valid_fields = {f.name for f in fields(self)} + for capability in required: + if capability not in valid_fields: + raise AttributeError(f"'{capability}' is not a field on TargetCapabilities.") + if not getattr(self, capability): + raise ValueError(f"{context} must have '{capability}' capability.") + # --------------------------------------------------------------------------- # Known capability profiles — add new models here. diff --git a/pyrit/prompt_target/common/utils.py b/pyrit/prompt_target/common/utils.py index ca0a4ca7da..9204a52d57 100644 --- a/pyrit/prompt_target/common/utils.py +++ b/pyrit/prompt_target/common/utils.py @@ -39,7 +39,7 @@ def validate_top_p(top_p: Optional[float]) -> None: def limit_requests_per_minute(func: Callable[..., Any]) -> Callable[..., Any]: """ Enforce rate limit of the target through setting requests per minute. - This should be applied to all send_prompt_async() functions on PromptTarget and PromptChatTarget. + This should be applied to all send_prompt_async() functions on PromptTarget. Args: func (Callable): The function to be decorated. diff --git a/pyrit/prompt_target/hugging_face/hugging_face_chat_target.py b/pyrit/prompt_target/hugging_face/hugging_face_chat_target.py index 19ba5b062a..46b53efb8b 100644 --- a/pyrit/prompt_target/hugging_face/hugging_face_chat_target.py +++ b/pyrit/prompt_target/hugging_face/hugging_face_chat_target.py @@ -19,7 +19,7 @@ from pyrit.exceptions import EmptyResponseException, pyrit_target_retry from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message, construct_response_from_request -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.common.target_capabilities import TargetCapabilities from pyrit.prompt_target.common.utils import limit_requests_per_minute @@ -29,7 +29,7 @@ import torch -class HuggingFaceChatTarget(PromptChatTarget): +class HuggingFaceChatTarget(PromptTarget): """ The HuggingFaceChatTarget interacts with HuggingFace models, specifically for conducting red teaming activities. Inherits from PromptTarget to comply with the current design standards. diff --git a/pyrit/prompt_target/openai/openai_chat_target.py b/pyrit/prompt_target/openai/openai_chat_target.py index 84eefe892b..4507cef6aa 100644 --- a/pyrit/prompt_target/openai/openai_chat_target.py +++ b/pyrit/prompt_target/openai/openai_chat_target.py @@ -24,7 +24,6 @@ data_serializer_factory, ) from pyrit.models.json_response_config import _JsonResponseConfig -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget from pyrit.prompt_target.common.target_capabilities import TargetCapabilities from pyrit.prompt_target.common.utils import limit_requests_per_minute, validate_temperature, validate_top_p from pyrit.prompt_target.openai.openai_chat_audio_config import OpenAIChatAudioConfig @@ -33,7 +32,7 @@ logger = logging.getLogger(__name__) -class OpenAIChatTarget(OpenAITarget, PromptChatTarget): +class OpenAIChatTarget(OpenAITarget): """ Facilitates multimodal (image and text) input and text output generation. @@ -69,6 +68,7 @@ class OpenAIChatTarget(OpenAITarget, PromptChatTarget): supports_multi_turn=True, supports_json_output=True, supports_multi_message_pieces=True, + supports_editable_history=True, ) def __init__( @@ -232,11 +232,17 @@ async def send_prompt_async(self, *, message: Message) -> list[Message]: Returns: list[Message]: A list containing the response from the prompt target. + + Raises: + ValueError: If the target does not support JSON response format. """ self._validate_request(message=message) message_piece: MessagePiece = message.message_pieces[0] - json_config = self._get_json_response_config(message_piece=message_piece) + json_config = _JsonResponseConfig.from_metadata(metadata=message_piece.prompt_metadata) + + if json_config.enabled and not self.capabilities.supports_json_output: + raise ValueError(f"This target OpenAIChatTarget does not support JSON response format.") # Get conversation from memory and append the current message conversation = self._memory.get_conversation(conversation_id=message_piece.conversation_id) diff --git a/pyrit/prompt_target/openai/openai_realtime_target.py b/pyrit/prompt_target/openai/openai_realtime_target.py index 22edf20391..82154687c7 100644 --- a/pyrit/prompt_target/openai/openai_realtime_target.py +++ b/pyrit/prompt_target/openai/openai_realtime_target.py @@ -21,7 +21,7 @@ construct_response_from_request, data_serializer_factory, ) -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.common.target_capabilities import TargetCapabilities from pyrit.prompt_target.common.utils import limit_requests_per_minute from pyrit.prompt_target.openai.openai_target import OpenAITarget @@ -57,7 +57,7 @@ def flatten_transcripts(self) -> str: return "".join(self.transcripts) -class RealtimeTarget(OpenAITarget, PromptChatTarget): +class RealtimeTarget(OpenAITarget, PromptTarget): """ A prompt target for Azure OpenAI Realtime API. diff --git a/pyrit/prompt_target/openai/openai_response_target.py b/pyrit/prompt_target/openai/openai_response_target.py index be720fa5c9..fe4e42af4b 100644 --- a/pyrit/prompt_target/openai/openai_response_target.py +++ b/pyrit/prompt_target/openai/openai_response_target.py @@ -28,7 +28,7 @@ PromptResponseError, ) from pyrit.models.json_response_config import _JsonResponseConfig -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.common.target_capabilities import TargetCapabilities from pyrit.prompt_target.common.utils import limit_requests_per_minute, validate_temperature, validate_top_p from pyrit.prompt_target.openai.openai_error_handling import _is_content_filter_error @@ -58,7 +58,7 @@ class MessagePieceType(str, Enum): MCP_APPROVAL_REQUEST = "mcp_approval_request" -class OpenAIResponseTarget(OpenAITarget, PromptChatTarget): +class OpenAIResponseTarget(OpenAITarget, PromptTarget): """ Enables communication with endpoints that support the OpenAI Response API. @@ -532,7 +532,7 @@ async def send_prompt_async(self, *, message: Message) -> list[Message]: json_config = _JsonResponseConfig(enabled=False) if message.message_pieces: last_piece = message.message_pieces[-1] - json_config = self._get_json_response_config(message_piece=last_piece) + json_config = _JsonResponseConfig.from_metadata(metadata=last_piece.prompt_metadata) # Get full conversation history from memory and append the current message conversation: MutableSequence[Message] = self._memory.get_conversation( diff --git a/pyrit/scenario/core/atomic_attack.py b/pyrit/scenario/core/atomic_attack.py index ec43e28026..eb371cd991 100644 --- a/pyrit/scenario/core/atomic_attack.py +++ b/pyrit/scenario/core/atomic_attack.py @@ -25,7 +25,7 @@ from pyrit.models import AttackResult, SeedAttackGroup if TYPE_CHECKING: - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer logger = logging.getLogger(__name__) @@ -71,7 +71,7 @@ def __init__( atomic_attack_name: str, attack: AttackStrategy[Any, Any], seed_groups: list[SeedAttackGroup], - adversarial_chat: Optional["PromptChatTarget"] = None, + adversarial_chat: Optional["PromptTarget"] = None, objective_scorer: Optional["TrueFalseScorer"] = None, memory_labels: Optional[dict[str, str]] = None, **attack_execute_params: Any, @@ -86,7 +86,7 @@ def __init__( seed_groups (List[SeedAttackGroup]): List of seed attack groups. Each seed group must have an objective set. The seed groups serve as the single source of truth for objectives, prepended conversations, and next messages. - adversarial_chat (Optional[PromptChatTarget]): Optional chat target for generating + adversarial_chat (Optional[PromptTarget]): Optional chat target for generating adversarial prompts or simulated conversations. Required when seed groups contain SeedSimulatedConversation configurations. objective_scorer (Optional[TrueFalseScorer]): Optional scorer for evaluating simulated diff --git a/pyrit/scenario/scenarios/airt/content_harms.py b/pyrit/scenario/scenarios/airt/content_harms.py index 0fcc816ad4..b88c9c40c9 100644 --- a/pyrit/scenario/scenarios/airt/content_harms.py +++ b/pyrit/scenario/scenarios/airt/content_harms.py @@ -20,7 +20,7 @@ TreeOfAttacksWithPruningAttack, ) from pyrit.models import SeedAttackGroup, SeedGroup -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario @@ -156,7 +156,7 @@ def default_dataset_config(cls) -> DatasetConfiguration: def __init__( self, *, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objective_scorer: Optional[TrueFalseScorer] = None, scenario_result_id: Optional[str] = None, objectives_by_harm: Optional[dict[str, Sequence[SeedGroup]]] = None, @@ -165,7 +165,7 @@ def __init__( Initialize the Content Harms Scenario. Args: - adversarial_chat (Optional[PromptChatTarget]): Additionally used for scoring defaults. + adversarial_chat (Optional[PromptTarget]): Additionally used for scoring defaults. If not provided, a default OpenAI target will be created using environment variables. objective_scorer (Optional[TrueFalseScorer]): Scorer to evaluate attack success. If not provided, creates a default composite scorer using Azure Content Filter diff --git a/pyrit/scenario/scenarios/airt/cyber.py b/pyrit/scenario/scenarios/airt/cyber.py index be084e6e90..9284b1e895 100644 --- a/pyrit/scenario/scenarios/airt/cyber.py +++ b/pyrit/scenario/scenarios/airt/cyber.py @@ -16,7 +16,7 @@ from pyrit.executor.attack.multi_turn.red_teaming import RedTeamingAttack from pyrit.executor.attack.single_turn.prompt_sending import PromptSendingAttack from pyrit.models import SeedAttackGroup, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario @@ -99,7 +99,7 @@ def default_dataset_config(cls) -> DatasetConfiguration: def __init__( self, *, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objectives: Optional[list[str]] = None, objective_scorer: Optional[TrueFalseScorer] = None, include_baseline: bool = True, @@ -109,7 +109,7 @@ def __init__( Initialize the cyber harms scenario. Args: - adversarial_chat (Optional[PromptChatTarget]): Adversarial chat for the red teaming attack, corresponding + adversarial_chat (Optional[PromptTarget]): Adversarial chat for the red teaming attack, corresponding to CyberStrategy.MultiTurn. If not provided, defaults to an OpenAI chat target. objectives (Optional[List[str]]): Deprecated. Use dataset_config in initialize_async instead. objective_scorer (Optional[TrueFalseScorer]): Objective scorer for malware detection. If not diff --git a/pyrit/scenario/scenarios/airt/leakage.py b/pyrit/scenario/scenarios/airt/leakage.py index 61c1f13e13..498f71ed44 100644 --- a/pyrit/scenario/scenarios/airt/leakage.py +++ b/pyrit/scenario/scenarios/airt/leakage.py @@ -23,7 +23,7 @@ from pyrit.models import SeedAttackGroup, SeedObjective from pyrit.prompt_converter import AddImageTextConverter, FirstLetterConverter, PromptConverter from pyrit.prompt_normalizer import PromptConverterConfiguration -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario @@ -141,7 +141,7 @@ def default_dataset_config(cls) -> DatasetConfiguration: def __init__( self, *, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objectives: Optional[list[str]] = None, objective_scorer: Optional[TrueFalseScorer] = None, include_baseline: bool = True, @@ -151,7 +151,7 @@ def __init__( Initialize the leakage scenario. Args: - adversarial_chat (Optional[PromptChatTarget]): Adversarial chat target for multi-turn attacks + adversarial_chat (Optional[PromptTarget]): Adversarial chat target for multi-turn attacks (Crescendo, RolePlay). If not provided, defaults to an OpenAI chat target. objectives (Optional[List[str]]): List of objectives to test for data leakage. If not provided, defaults to objectives from the airt_leakage dataset. diff --git a/pyrit/scenario/scenarios/airt/psychosocial.py b/pyrit/scenario/scenarios/airt/psychosocial.py index 16320231c3..c5e9c1c4e5 100644 --- a/pyrit/scenario/scenarios/airt/psychosocial.py +++ b/pyrit/scenario/scenarios/airt/psychosocial.py @@ -28,7 +28,7 @@ from pyrit.prompt_normalizer.prompt_converter_configuration import ( PromptConverterConfiguration, ) -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario @@ -211,7 +211,7 @@ def __init__( self, *, objectives: Optional[list[str]] = None, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objective_scorer: Optional[FloatScaleThresholdScorer] = None, scenario_result_id: Optional[str] = None, subharm_configs: Optional[dict[str, SubharmConfig]] = None, @@ -223,7 +223,7 @@ def __init__( Args: objectives (Optional[List[str]]): DEPRECATED - Use dataset_config in initialize_async instead. List of objectives to test for psychosocial harms. - adversarial_chat (Optional[PromptChatTarget]): Additionally used for adversarial attacks + adversarial_chat (Optional[PromptTarget]): Additionally used for adversarial attacks and scoring defaults. If not provided, a default OpenAI target will be created using environment variables. objective_scorer (Optional[FloatScaleThresholdScorer]): Scorer to evaluate attack success. @@ -431,10 +431,10 @@ def _get_scorer(self, subharm: Optional[str] = None) -> FloatScaleThresholdScore async def _get_atomic_attacks_async(self) -> list[AtomicAttack]: if self._objective_target is None: raise ValueError("objective_target must be set before creating attacks") - if not isinstance(self._objective_target, PromptChatTarget): - raise TypeError( - f"PsychosocialHarmsScenario requires a PromptChatTarget, got {type(self._objective_target).__name__}" - ) + self._objective_target.capabilities.validate( + required={"supports_multi_turn", "supports_editable_history"}, + context="objective_target", + ) resolved = self._resolve_seed_groups() self._seed_groups = resolved.seed_groups diff --git a/pyrit/scenario/scenarios/airt/scam.py b/pyrit/scenario/scenarios/airt/scam.py index 98ae7b338d..4c8f77dc73 100644 --- a/pyrit/scenario/scenarios/airt/scam.py +++ b/pyrit/scenario/scenarios/airt/scam.py @@ -23,7 +23,7 @@ AttackScoringConfig, ) from pyrit.models import SeedAttackGroup, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration from pyrit.scenario.core.scenario import Scenario @@ -135,7 +135,7 @@ def __init__( *, objectives: Optional[list[str]] = None, objective_scorer: Optional[TrueFalseScorer] = None, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, include_baseline: bool = True, scenario_result_id: Optional[str] = None, ) -> None: @@ -146,7 +146,7 @@ def __init__( objectives (Optional[List[str]]): List of objectives to test for scam-related harms. objective_scorer (Optional[TrueFalseScorer]): Custom scorer for objective evaluation. - adversarial_chat (Optional[PromptChatTarget]): Chat target used to rephrase the + adversarial_chat (Optional[PromptTarget]): Chat target used to rephrase the objective into the role-play context (in single-turn strategies). include_baseline (bool): Whether to include a baseline atomic attack that sends all objectives without modifications. Defaults to True. When True, a "baseline" attack is automatically diff --git a/pyrit/scenario/scenarios/foundry/red_team_agent.py b/pyrit/scenario/scenarios/foundry/red_team_agent.py index afbbfabd21..15c76bfd94 100644 --- a/pyrit/scenario/scenarios/foundry/red_team_agent.py +++ b/pyrit/scenario/scenarios/foundry/red_team_agent.py @@ -60,7 +60,7 @@ from pyrit.prompt_normalizer.prompt_converter_configuration import ( PromptConverterConfiguration, ) -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget from pyrit.scenario.core.atomic_attack import AtomicAttack from pyrit.scenario.core.dataset_configuration import DatasetConfiguration @@ -249,7 +249,7 @@ def default_dataset_config(cls) -> DatasetConfiguration: def __init__( self, *, - adversarial_chat: Optional[PromptChatTarget] = None, + adversarial_chat: Optional[PromptTarget] = None, objectives: Optional[list[str]] = None, attack_scoring_config: Optional[AttackScoringConfig] = None, include_baseline: bool = True, @@ -259,7 +259,7 @@ def __init__( Initialize a Foundry Scenario with the specified attack strategies. Args: - adversarial_chat (Optional[PromptChatTarget]): Target for multi-turn attacks + adversarial_chat (Optional[PromptTarget]): Target for multi-turn attacks like Crescendo and RedTeaming. Additionally used for scoring defaults. If not provided, a default OpenAI target will be created using environment variables. objectives (Optional[List[str]]): Deprecated. Use dataset_config in initialize_async instead. diff --git a/pyrit/score/float_scale/float_scale_scorer.py b/pyrit/score/float_scale/float_scale_scorer.py index af39cf5bec..a22d29921c 100644 --- a/pyrit/score/float_scale/float_scale_scorer.py +++ b/pyrit/score/float_scale/float_scale_scorer.py @@ -7,7 +7,7 @@ from pyrit.exceptions.exception_classes import InvalidJsonException from pyrit.identifiers import ComponentIdentifier from pyrit.models import PromptDataType, Score, UnvalidatedScore -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.score.scorer import Scorer from pyrit.score.scorer_prompt_validator import ScorerPromptValidator @@ -66,7 +66,7 @@ def get_scorer_metrics(self) -> Optional["HarmScorerMetrics"]: async def _score_value_with_llm( self, *, - prompt_target: PromptChatTarget, + prompt_target: PromptTarget, system_prompt: str, message_value: str, message_data_type: PromptDataType, diff --git a/pyrit/score/float_scale/insecure_code_scorer.py b/pyrit/score/float_scale/insecure_code_scorer.py index 45c64dab00..043a86e45b 100644 --- a/pyrit/score/float_scale/insecure_code_scorer.py +++ b/pyrit/score/float_scale/insecure_code_scorer.py @@ -9,7 +9,7 @@ from pyrit.exceptions.exception_classes import InvalidJsonException from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer from pyrit.score.scorer_prompt_validator import ScorerPromptValidator @@ -25,7 +25,7 @@ class InsecureCodeScorer(FloatScaleScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, system_prompt_path: Optional[Union[str, Path]] = None, validator: Optional[ScorerPromptValidator] = None, ): @@ -33,7 +33,7 @@ def __init__( Initialize the Insecure Code Scorer. Args: - chat_target (PromptChatTarget): The target to use for scoring code security. + chat_target (PromptTarget): The target to use for scoring code security. system_prompt_path (Optional[Union[str, Path]]): Path to the YAML file containing the system prompt. Defaults to the default insecure code scoring prompt if not provided. validator (Optional[ScorerPromptValidator]): Custom validator for the scorer. Defaults to None. diff --git a/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py b/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py index ae9e0acc4b..2d086372b9 100644 --- a/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py +++ b/pyrit/score/float_scale/self_ask_general_float_scale_scorer.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, UnvalidatedScore - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget class SelfAskGeneralFloatScaleScorer(FloatScaleScorer): @@ -28,7 +28,7 @@ class SelfAskGeneralFloatScaleScorer(FloatScaleScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, system_prompt_format_string: str, prompt_format_string: Optional[str] = None, category: Optional[str] = None, @@ -52,7 +52,7 @@ def __init__( in the response, the provided `category` argument will be applied. Args: - chat_target (PromptChatTarget): The chat target used to score. + chat_target (PromptTarget): The chat target used to score. system_prompt_format_string (str): System prompt template with placeholders for objective, prompt, and message_piece. prompt_format_string (Optional[str]): User prompt template with the same placeholders. diff --git a/pyrit/score/float_scale/self_ask_likert_scorer.py b/pyrit/score/float_scale/self_ask_likert_scorer.py index ab72c7ba16..07f184d00a 100644 --- a/pyrit/score/float_scale/self_ask_likert_scorer.py +++ b/pyrit/score/float_scale/self_ask_likert_scorer.py @@ -12,7 +12,7 @@ from pyrit.common.path import HARM_DEFINITION_PATH, SCORER_LIKERT_PATH from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt, UnvalidatedScore -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer from pyrit.score.scorer_prompt_validator import ScorerPromptValidator @@ -165,7 +165,7 @@ class SelfAskLikertScorer(FloatScaleScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, likert_scale: LikertScalePaths, validator: Optional[ScorerPromptValidator] = None, ) -> None: @@ -173,7 +173,7 @@ def __init__( Initialize the SelfAskLikertScorer. Args: - chat_target (PromptChatTarget): The chat target to use for scoring. + chat_target (PromptTarget): The chat target to use for scoring. likert_scale (LikertScalePaths): The Likert scale configuration to use for scoring. validator (Optional[ScorerPromptValidator]): Custom validator for the scorer. Defaults to None. """ diff --git a/pyrit/score/float_scale/self_ask_scale_scorer.py b/pyrit/score/float_scale/self_ask_scale_scorer.py index 4bf0dc2dee..e249702aaf 100644 --- a/pyrit/score/float_scale/self_ask_scale_scorer.py +++ b/pyrit/score/float_scale/self_ask_scale_scorer.py @@ -11,7 +11,7 @@ from pyrit.common.path import SCORER_SCALES_PATH from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt, UnvalidatedScore -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer from pyrit.score.scorer_prompt_validator import ScorerPromptValidator @@ -43,7 +43,7 @@ class SystemPaths(enum.Enum): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, scale_arguments_path: Optional[Union[Path, str]] = None, system_prompt_path: Optional[Union[Path, str]] = None, validator: Optional[ScorerPromptValidator] = None, @@ -52,7 +52,7 @@ def __init__( Initialize the SelfAskScaleScorer. Args: - chat_target (PromptChatTarget): The chat target to use for scoring. + chat_target (PromptTarget): The chat target to use for scoring. scale_arguments_path (Optional[Union[Path, str]]): Path to the YAML file containing scale definitions. Defaults to TREE_OF_ATTACKS_SCALE if not provided. system_prompt_path (Optional[Union[Path, str]]): Path to the YAML file containing the system prompt. diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index b18a1802a9..c2ff77043c 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -39,7 +39,7 @@ if TYPE_CHECKING: from collections.abc import Sequence - from pyrit.prompt_target import PromptChatTarget, PromptTarget + from pyrit.prompt_target import PromptTarget from pyrit.score.scorer_evaluation.metrics_type import RegistryUpdateBehavior from pyrit.score.scorer_evaluation.scorer_evaluator import ( ScorerEvalDatasetFiles, @@ -494,7 +494,7 @@ def scale_value_float(self, value: float, min_value: float, max_value: float) -> async def _score_value_with_llm( self, *, - prompt_target: PromptChatTarget, + prompt_target: PromptTarget, system_prompt: str, message_value: str, message_data_type: PromptDataType, @@ -516,7 +516,7 @@ async def _score_value_with_llm( description fields. Args: - prompt_target (PromptChatTarget): The target LLM to send the message to. + prompt_target (PromptTarget): The target LLM to send the message to. system_prompt (str): The system-level prompt that guides the behavior of the target LLM. message_value (str): The actual value or content to be scored by the LLM (e.g., text, image path, audio path). @@ -555,7 +555,7 @@ async def _score_value_with_llm( """ conversation_id = str(uuid.uuid4()) - prompt_target.set_system_prompt( + prompt_target._set_target_system_prompt( system_prompt=system_prompt, conversation_id=conversation_id, attack_identifier=attack_identifier, diff --git a/pyrit/score/true_false/gandalf_scorer.py b/pyrit/score/true_false/gandalf_scorer.py index 2aab7c264e..633b7f8bec 100644 --- a/pyrit/score/true_false/gandalf_scorer.py +++ b/pyrit/score/true_false/gandalf_scorer.py @@ -11,7 +11,7 @@ from pyrit.exceptions import PyritException, pyrit_target_retry from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message, MessagePiece, Score -from pyrit.prompt_target import GandalfLevel, PromptChatTarget +from pyrit.prompt_target import GandalfLevel, PromptTarget from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( TrueFalseAggregatorFunc, @@ -35,7 +35,7 @@ def __init__( self, *, level: GandalfLevel, - chat_target: PromptChatTarget, + chat_target: PromptTarget, validator: Optional[ScorerPromptValidator] = None, score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR, ) -> None: @@ -44,7 +44,7 @@ def __init__( Args: level (GandalfLevel): The Gandalf challenge level to score against. - chat_target (PromptChatTarget): The chat target used for password extraction. + chat_target (PromptTarget): The chat target used for password extraction. validator (Optional[ScorerPromptValidator]): Custom validator. Defaults to text data type validator. score_aggregator (TrueFalseAggregatorFunc): Aggregator for combining scores. Defaults to TrueFalseScoreAggregator.OR. @@ -99,7 +99,7 @@ async def _check_for_password_in_conversation(self, conversation_id: str) -> str ) scoring_conversation_id = str(uuid.uuid4()) - self._prompt_target.set_system_prompt( + self._prompt_target._set_target_system_prompt( system_prompt=system_prompt, conversation_id=scoring_conversation_id, ) diff --git a/pyrit/score/true_false/self_ask_category_scorer.py b/pyrit/score/true_false/self_ask_category_scorer.py index 7102ba3af6..8ab174cd1b 100644 --- a/pyrit/score/true_false/self_ask_category_scorer.py +++ b/pyrit/score/true_false/self_ask_category_scorer.py @@ -11,7 +11,7 @@ from pyrit.common.path import SCORER_CONTENT_CLASSIFIERS_PATH from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt, UnvalidatedScore -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( TrueFalseAggregatorFunc, @@ -41,7 +41,7 @@ class SelfAskCategoryScorer(TrueFalseScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, content_classifier_path: Union[str, Path], score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR, validator: Optional[ScorerPromptValidator] = None, @@ -50,7 +50,7 @@ def __init__( Initialize a new instance of the SelfAskCategoryScorer class. Args: - chat_target (PromptChatTarget): The chat target to interact with. + chat_target (PromptTarget): The chat target to interact with. content_classifier_path (Union[str, Path]): The path to the classifier YAML file. score_aggregator (TrueFalseAggregatorFunc): The aggregator function to use. Defaults to TrueFalseScoreAggregator.OR. diff --git a/pyrit/score/true_false/self_ask_general_true_false_scorer.py b/pyrit/score/true_false/self_ask_general_true_false_scorer.py index 44bb362748..cd85e162a1 100644 --- a/pyrit/score/true_false/self_ask_general_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_general_true_false_scorer.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, UnvalidatedScore - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget class SelfAskGeneralTrueFalseScorer(TrueFalseScorer): @@ -32,7 +32,7 @@ class SelfAskGeneralTrueFalseScorer(TrueFalseScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, system_prompt_format_string: str, prompt_format_string: Optional[str] = None, category: Optional[str] = None, @@ -55,7 +55,7 @@ def __init__( in the response, the provided `category` argument will be applied. Args: - chat_target (PromptChatTarget): The chat target used to score. + chat_target (PromptTarget): The chat target used to score. system_prompt_format_string (str): System prompt template with placeholders for objective, task (alias of objective), prompt, and message_piece. prompt_format_string (Optional[str]): User prompt template with the same placeholders. diff --git a/pyrit/score/true_false/self_ask_question_answer_scorer.py b/pyrit/score/true_false/self_ask_question_answer_scorer.py index bf1c017dde..11719cba5b 100644 --- a/pyrit/score/true_false/self_ask_question_answer_scorer.py +++ b/pyrit/score/true_false/self_ask_question_answer_scorer.py @@ -18,7 +18,7 @@ import pathlib from pyrit.models import MessagePiece, Score, UnvalidatedScore - from pyrit.prompt_target import PromptChatTarget + from pyrit.prompt_target import PromptTarget class SelfAskQuestionAnswerScorer(SelfAskTrueFalseScorer): @@ -37,7 +37,7 @@ class SelfAskQuestionAnswerScorer(SelfAskTrueFalseScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, true_false_question_path: Optional[pathlib.Path] = None, validator: Optional[ScorerPromptValidator] = None, score_aggregator: TrueFalseAggregatorFunc = TrueFalseScoreAggregator.OR, @@ -46,7 +46,7 @@ def __init__( Initialize the SelfAskQuestionAnswerScorer object. Args: - chat_target (PromptChatTarget): The chat target to use for the scorer. + chat_target (PromptTarget): The chat target to use for the scorer. true_false_question_path (Optional[pathlib.Path]): The path to the true/false question file. Defaults to None, which uses the default question_answering.yaml file. validator (Optional[ScorerPromptValidator]): Custom validator. Defaults to None. diff --git a/pyrit/score/true_false/self_ask_refusal_scorer.py b/pyrit/score/true_false/self_ask_refusal_scorer.py index 923fdb71ff..4c435153f1 100644 --- a/pyrit/score/true_false/self_ask_refusal_scorer.py +++ b/pyrit/score/true_false/self_ask_refusal_scorer.py @@ -8,7 +8,7 @@ from pyrit.common.path import SCORER_SEED_PROMPT_PATH from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt, UnvalidatedScore -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( TrueFalseAggregatorFunc, @@ -60,7 +60,7 @@ class SelfAskRefusalScorer(TrueFalseScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, refusal_system_prompt_path: Union[RefusalScorerPaths, Path, str] = RefusalScorerPaths.DEFAULT, prompt_format_string: Optional[str] = None, validator: Optional[ScorerPromptValidator] = None, @@ -70,7 +70,7 @@ def __init__( Initialize the SelfAskRefusalScorer. Args: - chat_target (PromptChatTarget): The endpoint that will be used to score the prompt. + chat_target (PromptTarget): The endpoint that will be used to score the prompt. refusal_system_prompt_path (Union[RefusalScorerPaths, Path, str]): The path to the system prompt to use for refusal detection. Can be a RefusalScorerPaths enum value, a Path, or a string path. Defaults to RefusalScorerPaths.DEFAULT. diff --git a/pyrit/score/true_false/self_ask_true_false_scorer.py b/pyrit/score/true_false/self_ask_true_false_scorer.py index da1054274d..f971e2d7c1 100644 --- a/pyrit/score/true_false/self_ask_true_false_scorer.py +++ b/pyrit/score/true_false/self_ask_true_false_scorer.py @@ -12,7 +12,7 @@ from pyrit.common.path import SCORER_SEED_PROMPT_PATH from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, SeedPrompt -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import ( TrueFalseAggregatorFunc, @@ -97,7 +97,7 @@ class SelfAskTrueFalseScorer(TrueFalseScorer): def __init__( self, *, - chat_target: PromptChatTarget, + chat_target: PromptTarget, true_false_question_path: Optional[Union[str, Path]] = None, true_false_question: Optional[TrueFalseQuestion] = None, true_false_system_prompt_path: Optional[Union[str, Path]] = None, @@ -108,7 +108,7 @@ def __init__( Initialize the SelfAskTrueFalseScorer. Args: - chat_target (PromptChatTarget): The chat target to interact with. + chat_target (PromptTarget): The chat target to interact with. true_false_question_path (Optional[Union[str, Path]]): The path to the true/false question file. true_false_question (Optional[TrueFalseQuestion]): The true/false question object. true_false_system_prompt_path (Optional[Union[str, Path]]): The path to the system prompt file. diff --git a/pyrit/setup/initializers/components/scorers.py b/pyrit/setup/initializers/components/scorers.py index 06b304ebc6..81114e0085 100644 --- a/pyrit/setup/initializers/components/scorers.py +++ b/pyrit/setup/initializers/components/scorers.py @@ -34,7 +34,7 @@ from pyrit.setup.initializers.pyrit_initializer import InitializerParameter, PyRITInitializer if TYPE_CHECKING: - from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget + from pyrit.prompt_target.common.prompt_target import PromptTarget logger = logging.getLogger(__name__) @@ -151,12 +151,12 @@ async def initialize_async(self) -> None: scorer_registry = ScorerRegistry.get_registry_singleton() # Get targets from registry - gpt4o: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_TARGET) # type: ignore[assignment] - gpt4o_temp0: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_TEMP0_TARGET) # type: ignore[assignment] - gpt4o_temp9: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_TEMP9_TARGET) # type: ignore[assignment] - unsafe: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TARGET) # type: ignore[assignment] - unsafe_temp0: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TEMP0_TARGET) # type: ignore[assignment] - unsafe_temp9: Optional[PromptChatTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TEMP9_TARGET) # type: ignore[assignment] + gpt4o: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_TARGET) + gpt4o_temp0: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_TEMP0_TARGET) + gpt4o_temp9: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_TEMP9_TARGET) + unsafe: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TARGET) + unsafe_temp0: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TEMP0_TARGET) + unsafe_temp9: Optional[PromptTarget] = target_registry.get_instance_by_name(GPT4O_UNSAFE_TEMP9_TARGET) # Refusal Scorers self._try_register(scorer_registry, REFUSAL_GPT4O, lambda: SelfAskRefusalScorer(chat_target=gpt4o), gpt4o) diff --git a/tests/integration/mocks.py b/tests/integration/mocks.py index 5b872eb014..2ff8f484db 100644 --- a/tests/integration/mocks.py +++ b/tests/integration/mocks.py @@ -9,7 +9,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.memory import MemoryInterface, SQLiteMemory from pyrit.models import Message, MessagePiece -from pyrit.prompt_target import PromptChatTarget, limit_requests_per_minute +from pyrit.prompt_target import PromptTarget, limit_requests_per_minute def get_memory_interface() -> Generator[MemoryInterface, None, None]: @@ -36,7 +36,7 @@ def get_sqlite_memory() -> Generator[SQLiteMemory, None, None]: sqlite_memory.dispose_engine() -class MockPromptTarget(PromptChatTarget): +class MockPromptTarget(PromptTarget): prompt_sent: list[str] def __init__(self, id=None, rpm=None) -> None: # noqa: A002 diff --git a/tests/unit/backend/test_converter_service.py b/tests/unit/backend/test_converter_service.py index 0deb273b2b..b1331735a3 100644 --- a/tests/unit/backend/test_converter_service.py +++ b/tests/unit/backend/test_converter_service.py @@ -371,7 +371,7 @@ def _try_instantiate_converter(converter_name: str): """ Try to instantiate a converter with minimal representative arguments. - Uses mock objects for complex dependencies (PromptChatTarget, PromptConverter) + Uses mock objects for complex dependencies (PromptTarget, PromptConverter) and provides minimal valid values for simple required parameters so that the identifier extraction test covers ALL converters without skipping. @@ -386,7 +386,7 @@ def _try_instantiate_converter(converter_name: str): from unittest.mock import MagicMock from pyrit.common.apply_defaults import _RequiredValueSentinel - from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget + from pyrit.prompt_target.common.prompt_target import PromptTarget # Converters requiring external credentials or resources that can't be mocked # at the constructor level — these validate env vars / files in __init__ body @@ -433,11 +433,11 @@ def _try_instantiate_converter(converter_name: str): ann = param.annotation ann_str = str(ann) if ann is not inspect.Parameter.empty else "" - # PromptChatTarget — mock it with a proper identifier + # PromptTarget — mock it with a proper identifier if ann is not inspect.Parameter.empty and ( - (isinstance(ann, type) and issubclass(ann, PromptChatTarget)) or "PromptChatTarget" in ann_str + (isinstance(ann, type) and issubclass(ann, PromptTarget)) or "PromptTarget" in ann_str ): - mock_target = MagicMock(spec=PromptChatTarget) + mock_target = MagicMock(spec=PromptTarget) mock_target.__class__.__name__ = "MockChatTarget" # Configure get_identifier() to return a proper identifier-like object # so that _create_identifier can extract class_name, model_name, etc. @@ -505,7 +505,7 @@ def test_build_instance_from_converter(self, converter_name: str) -> None: Test that _build_instance_from_object works with each converter. Instantiates every converter with minimal representative arguments - (using mocks for complex dependencies like PromptChatTarget) and verifies: + (using mocks for complex dependencies like PromptTarget) and verifies: - converter_id is set correctly - converter_type matches the class name - supported_input_types and supported_output_types are lists diff --git a/tests/unit/converter/test_generic_llm_converter.py b/tests/unit/converter/test_generic_llm_converter.py index 403294cab3..647c2f526d 100644 --- a/tests/unit/converter/test_generic_llm_converter.py +++ b/tests/unit/converter/test_generic_llm_converter.py @@ -38,9 +38,9 @@ async def test_noise_converter_sets_system_prompt_default(mock_target) -> None: converter = NoiseConverter(converter_target=mock_target) await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "Grammar error, Delete random letter" in system_arg @@ -50,9 +50,9 @@ async def test_noise_converter_sets_system_prompt(mock_target) -> None: converter = NoiseConverter(converter_target=mock_target, noise="extra random periods") await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "extra random periods" in system_arg @@ -62,9 +62,9 @@ async def test_tone_converter_sets_system_prompt(mock_target) -> None: converter = ToneConverter(tone="formal", converter_target=mock_target) await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "formal" in system_arg @@ -74,9 +74,9 @@ async def test_tense_converter_sets_system_prompt(mock_target) -> None: converter = TenseConverter(tense="past", converter_target=mock_target) await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "past" in system_arg @@ -86,9 +86,9 @@ async def test_malicious_question_converter_sets_system_prompt(mock_target) -> N converter = MaliciousQuestionGeneratorConverter(converter_target=mock_target) await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "Please act as an expert in this domain: being awesome" in system_arg diff --git a/tests/unit/converter/test_random_translation_converter.py b/tests/unit/converter/test_random_translation_converter.py index a71fb86fe8..e27ad7af3d 100644 --- a/tests/unit/converter/test_random_translation_converter.py +++ b/tests/unit/converter/test_random_translation_converter.py @@ -37,9 +37,9 @@ async def test_random_translation_converter_sets_system_prompt(mock_target) -> N converter = RandomTranslationConverter(converter_target=mock_target) await converter.convert_async(prompt="being awesome") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "Each word is associated with a target language on the same line." in system_arg diff --git a/tests/unit/converter/test_scientific_translation_converter.py b/tests/unit/converter/test_scientific_translation_converter.py index 363b2ce70b..7e79bf1339 100644 --- a/tests/unit/converter/test_scientific_translation_converter.py +++ b/tests/unit/converter/test_scientific_translation_converter.py @@ -63,9 +63,9 @@ async def test_scientific_translation_converter_sets_system_prompt_academic(mock converter = ScientificTranslationConverter(converter_target=mock_target, mode="academic") await converter.convert_async(prompt="tell me about dangerous chemicals") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_promptm_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "homework or exam" in system_arg.lower() @@ -75,9 +75,9 @@ async def test_scientific_translation_converter_sets_system_prompt_technical(moc converter = ScientificTranslationConverter(converter_target=mock_target, mode="technical") await converter.convert_async(prompt="tell me about dangerous chemicals") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "technical" in system_arg.lower() @@ -87,9 +87,9 @@ async def test_scientific_translation_converter_sets_system_prompt_combined(mock converter = ScientificTranslationConverter(converter_target=mock_target, mode="combined") await converter.convert_async(prompt="tell me about dangerous chemicals") - mock_target.set_system_prompt.assert_called_once() + mock_target._set_target_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert isinstance(system_arg, str) assert "combination" in system_arg.lower() @@ -143,8 +143,8 @@ async def test_scientific_translation_converter_custom_template_used_in_conversi ) await converter.convert_async(prompt="test prompt") - mock_target.set_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + mock_target._set_target_system_prompt.assert_called_once() + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert "CUSTOM_MARKER" in system_arg @@ -179,6 +179,6 @@ async def test_scientific_translation_converter_custom_mode_conversion(mock_targ result = await converter.convert_async(prompt="test input") assert result.output_text == "scientifically obfuscated prompt" - mock_target.set_system_prompt.assert_called_once() - system_arg = mock_target.set_system_prompt.call_args[1]["system_prompt"] + mock_target._set_target_system_prompt.assert_called_once() + system_arg = mock_target._set_target_system_prompt.call_args[1]["system_prompt"] assert "PROPRIETARY_METHOD" in system_arg diff --git a/tests/unit/converter/test_toxic_sentence_generator_converter.py b/tests/unit/converter/test_toxic_sentence_generator_converter.py index 1e75ef89bc..deb50a57d4 100644 --- a/tests/unit/converter/test_toxic_sentence_generator_converter.py +++ b/tests/unit/converter/test_toxic_sentence_generator_converter.py @@ -8,12 +8,12 @@ from pyrit.models import MessagePiece, SeedPrompt from pyrit.prompt_converter import ToxicSentenceGeneratorConverter -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget @pytest.fixture def mock_target(): - mock = MagicMock(spec=PromptChatTarget) + mock = MagicMock(spec=PromptTarget) # Create a Message response instead of PromptResponse response = MessagePiece( role="assistant", @@ -76,7 +76,7 @@ async def test_toxic_sentence_generator_convert(mock_target, mock_template): async def test_toxic_sentence_generator_input_output_supported(): """Test that the converter correctly identifies supported input/output types.""" with patch("pyrit.prompt_converter.toxic_sentence_generator_converter.SeedPrompt.from_yaml_file"): - converter = ToxicSentenceGeneratorConverter(converter_target=MagicMock(spec=PromptChatTarget)) + converter = ToxicSentenceGeneratorConverter(converter_target=MagicMock(spec=PromptTarget)) assert converter.input_supported("text") is True assert converter.input_supported("image") is False diff --git a/tests/unit/executor/attack/component/test_conversation_manager.py b/tests/unit/executor/attack/component/test_conversation_manager.py index c86e741e9c..27c1df0f86 100644 --- a/tests/unit/executor/attack/component/test_conversation_manager.py +++ b/tests/unit/executor/attack/component/test_conversation_manager.py @@ -37,7 +37,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message, MessagePiece, Score from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer -from pyrit.prompt_target import PromptChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget def _mock_target_id(name: str = "MockTarget") -> ComponentIdentifier: @@ -85,8 +85,10 @@ def mock_prompt_normalizer() -> MagicMock: @pytest.fixture def mock_chat_target() -> MagicMock: """Create a mock chat target for testing.""" - target = MagicMock(spec=PromptChatTarget) - target.set_system_prompt = MagicMock() + target = MagicMock(spec=PromptTarget) + target._set_target_system_prompt = MagicMock() + target.capabilities.supports_multi_turn = True + target.capabilities.supports_editable_history = True target.get_identifier.return_value = _mock_target_id("MockChatTarget") return target @@ -95,6 +97,8 @@ def mock_chat_target() -> MagicMock: def mock_prompt_target() -> MagicMock: """Create a mock prompt target (non-chat) for testing.""" target = MagicMock(spec=PromptTarget) + target.capabilities.supports_multi_turn = False + target.capabilities.supports_editable_history = False target.get_identifier.return_value = _mock_target_id("MockTarget") return target @@ -601,45 +605,45 @@ def test_get_last_message_with_role_filter_returns_none_when_no_match( class TestSystemPromptHandling: """Tests for system prompt functionality.""" - def test_set_system_prompt_with_chat_target( + def test__set_target_system_prompt_with_chat_target( self, attack_identifier: ComponentIdentifier, mock_chat_target: MagicMock ) -> None: - """Test set_system_prompt calls target's set_system_prompt method.""" + """Test _set_target_system_prompt calls target's _set_target_system_prompt method.""" manager = ConversationManager(attack_identifier=attack_identifier) conversation_id = str(uuid.uuid4()) system_prompt = "You are a helpful assistant" labels = {"type": "system"} - manager.set_system_prompt( + manager._set_target_system_prompt( target=mock_chat_target, conversation_id=conversation_id, system_prompt=system_prompt, labels=labels, ) - mock_chat_target.set_system_prompt.assert_called_once_with( + mock_chat_target._set_target_system_prompt.assert_called_once_with( system_prompt=system_prompt, conversation_id=conversation_id, attack_identifier=attack_identifier, labels=labels, ) - def test_set_system_prompt_without_labels( + def test__set_target_system_prompt_without_labels( self, attack_identifier: ComponentIdentifier, mock_chat_target: MagicMock ) -> None: - """Test set_system_prompt works without labels.""" + """Test _set_target_system_prompt works without labels.""" manager = ConversationManager(attack_identifier=attack_identifier) conversation_id = str(uuid.uuid4()) system_prompt = "You are a helpful assistant" - manager.set_system_prompt( + manager._set_target_system_prompt( target=mock_chat_target, conversation_id=conversation_id, system_prompt=system_prompt, ) - mock_chat_target.set_system_prompt.assert_called_once() - call_args = mock_chat_target.set_system_prompt.call_args + mock_chat_target._set_target_system_prompt.assert_called_once() + call_args = mock_chat_target._set_target_system_prompt.call_args assert call_args.kwargs["labels"] is None @@ -1059,7 +1063,7 @@ async def test_non_chat_target_behavior_raise_explicit( config = PrependedConversationConfig(non_chat_target_behavior="raise") with pytest.raises( - ValueError, match="prepended_conversation requires the objective target to be a PromptChatTarget" + ValueError, match="prepended_conversation requires the objective target to be a PromptTarget" ): await manager.initialize_context_async( context=context, diff --git a/tests/unit/executor/attack/component/test_simulated_conversation.py b/tests/unit/executor/attack/component/test_simulated_conversation.py index 99ec06e3af..9a97632892 100644 --- a/tests/unit/executor/attack/component/test_simulated_conversation.py +++ b/tests/unit/executor/attack/component/test_simulated_conversation.py @@ -22,7 +22,7 @@ SeedPrompt, SimulatedTargetSystemPromptPaths, ) -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer @@ -45,9 +45,9 @@ def _mock_scorer_id(name: str = "MockScorer") -> ComponentIdentifier: @pytest.fixture def mock_adversarial_chat() -> MagicMock: """Create a mock adversarial chat target for testing.""" - chat = MagicMock(spec=PromptChatTarget) + chat = MagicMock(spec=PromptTarget) chat.send_prompt_async = AsyncMock() - chat.set_system_prompt = MagicMock() + chat._set_target_system_prompt = MagicMock() chat.get_identifier.return_value = _mock_target_id("MockAdversarialChat") return chat @@ -700,8 +700,8 @@ async def test_next_message_system_prompt_path_sets_system_prompt( next_message_system_prompt_path=NextMessageSystemPromptPaths.DIRECT.value, ) - # Verify set_system_prompt was called on adversarial_chat - mock_adversarial_chat.set_system_prompt.assert_called() + # Verify _set_target_system_prompt was called on adversarial_chat + mock_adversarial_chat._set_target_system_prompt.assert_called() @pytest.mark.asyncio async def test_starting_sequence_sets_first_sequence_number( diff --git a/tests/unit/executor/attack/multi_turn/test_crescendo.py b/tests/unit/executor/attack/multi_turn/test_crescendo.py index 37473f371d..8370fbab95 100644 --- a/tests/unit/executor/attack/multi_turn/test_crescendo.py +++ b/tests/unit/executor/attack/multi_turn/test_crescendo.py @@ -35,7 +35,7 @@ ScoreType, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import FloatScaleThresholdScorer, SelfAskRefusalScorer, TrueFalseScorer from pyrit.score.score_utils import ORIGINAL_FLOAT_VALUE_KEY @@ -62,9 +62,9 @@ def create_mock_chat_target(*, name: str = "MockChatTarget") -> MagicMock: This standardizes the creation of mock chat targets across tests, ensuring they all have the required methods and return values. """ - target = MagicMock(spec=PromptChatTarget) + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() - target.set_system_prompt = MagicMock() + target._set_target_system_prompt = MagicMock() target.get_identifier.return_value = _mock_target_id(name) return target @@ -669,8 +669,8 @@ async def test_setup_sets_adversarial_chat_system_prompt( await attack._setup_async(context=basic_context) # Verify system prompt was set - mock_adversarial_chat.set_system_prompt.assert_called_once() - call_args = mock_adversarial_chat.set_system_prompt.call_args + mock_adversarial_chat._set_target_system_prompt.assert_called_once() + call_args = mock_adversarial_chat._set_target_system_prompt.call_args assert "Test objective" in call_args.kwargs["system_prompt"] assert "15" in call_args.kwargs["system_prompt"] # Check for the max_turns value assert call_args.kwargs["conversation_id"] == basic_context.session.adversarial_chat_conversation_id diff --git a/tests/unit/executor/attack/multi_turn/test_red_teaming.py b/tests/unit/executor/attack/multi_turn/test_red_teaming.py index c33848d586..50dde33ca8 100644 --- a/tests/unit/executor/attack/multi_turn/test_red_teaming.py +++ b/tests/unit/executor/attack/multi_turn/test_red_teaming.py @@ -31,7 +31,7 @@ SeedPrompt, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import Scorer, TrueFalseScorer @@ -61,9 +61,9 @@ def mock_objective_target() -> MagicMock: @pytest.fixture def mock_adversarial_chat() -> MagicMock: - chat = MagicMock(spec=PromptChatTarget) + chat = MagicMock(spec=PromptTarget) chat.send_prompt_async = AsyncMock() - chat.set_system_prompt = MagicMock() + chat._set_target_system_prompt = MagicMock() chat.get_identifier.return_value = _mock_target_id("MockChatTarget") return chat @@ -534,10 +534,10 @@ async def test_max_turns_validation_with_prepended_conversation( mock_adversarial_chat: MagicMock, ): """Test that prepended conversation turns are validated against max_turns.""" - # Create a separate chat target for objective since prepended_conversation requires PromptChatTarget - mock_chat_objective_target = MagicMock(spec=PromptChatTarget) + # Create a separate chat target for objective since prepended_conversation requires PromptTarget + mock_chat_objective_target = MagicMock(spec=PromptTarget) mock_chat_objective_target.send_prompt_async = AsyncMock() - mock_chat_objective_target.set_system_prompt = MagicMock() + mock_chat_objective_target._set_target_system_prompt = MagicMock() mock_chat_objective_target.get_identifier.return_value = _mock_target_id("MockChatTarget") adversarial_config = AttackAdversarialConfig(target=mock_adversarial_chat) @@ -687,8 +687,8 @@ async def test_setup_sets_adversarial_chat_system_prompt( await attack._setup_async(context=basic_context) # Verify system prompt was set - mock_adversarial_chat.set_system_prompt.assert_called_once() - call_args = mock_adversarial_chat.set_system_prompt.call_args + mock_adversarial_chat._set_target_system_prompt.assert_called_once() + call_args = mock_adversarial_chat._set_target_system_prompt.call_args assert "Test objective" in call_args.kwargs["system_prompt"] assert call_args.kwargs["conversation_id"] == basic_context.session.adversarial_chat_conversation_id diff --git a/tests/unit/executor/attack/multi_turn/test_supports_multi_turn_attacks.py b/tests/unit/executor/attack/multi_turn/test_supports_multi_turn_attacks.py index 3baeaf463c..18eafc3d5c 100644 --- a/tests/unit/executor/attack/multi_turn/test_supports_multi_turn_attacks.py +++ b/tests/unit/executor/attack/multi_turn/test_supports_multi_turn_attacks.py @@ -12,6 +12,7 @@ ) from pyrit.memory import CentralMemory from pyrit.models import ConversationType, MessagePiece +from pyrit.prompt_target import TargetCapabilities def _make_context() -> MultiTurnAttackContext: @@ -26,7 +27,7 @@ def _make_strategy(*, supports_multi_turn: bool): from pyrit.executor.attack.multi_turn.multi_turn_attack_strategy import MultiTurnAttackStrategy target = MagicMock() - target.capabilities.supports_multi_turn = supports_multi_turn + target.capabilities = TargetCapabilities(supports_multi_turn=supports_multi_turn) target.get_identifier.return_value = MagicMock() with patch.multiple( @@ -375,7 +376,7 @@ def _make_tap_node(self, *, supports_multi_turn: bool): from pyrit.executor.attack.multi_turn.tree_of_attacks import _TreeOfAttacksNode target = MagicMock() - target.capabilities.supports_multi_turn = supports_multi_turn + target.capabilities = TargetCapabilities(supports_multi_turn=supports_multi_turn) target.get_identifier.return_value = MagicMock() adversarial_chat = MagicMock() @@ -685,7 +686,7 @@ class TestValueErrorGuards: def _make_single_turn_target(self): target = MagicMock() - target.capabilities.supports_multi_turn = False + target.capabilities = TargetCapabilities(supports_multi_turn=False) target.get_identifier.return_value = MagicMock() return target @@ -719,7 +720,7 @@ async def test_crescendo_raises_for_single_turn_target(self): params=AttackParameters(objective="Test"), ) - with pytest.raises(ValueError, match="CrescendoAttack requires a multi-turn target"): + with pytest.raises(ValueError, match="objective_target must have 'supports_multi_turn' capability"): await attack._setup_async(context=context) @pytest.mark.asyncio @@ -733,7 +734,7 @@ async def test_multi_prompt_sending_raises_for_single_turn_target(self): params=AttackParameters(objective="Test"), ) - with pytest.raises(ValueError, match="MultiPromptSendingAttack requires a multi-turn target"): + with pytest.raises(ValueError, match="objective_target must have 'supports_multi_turn' capability"): await attack._setup_async(context=context) @pytest.mark.asyncio @@ -750,7 +751,7 @@ async def test_chunked_request_raises_for_single_turn_target(self): params=AttackParameters(objective="Test"), ) - with pytest.raises(ValueError, match="ChunkedRequestAttack requires a multi-turn target"): + with pytest.raises(ValueError, match="objective_target must have 'supports_multi_turn' capability"): await attack._setup_async(context=context) @@ -763,7 +764,7 @@ def _make_tap_node(self, *, supports_multi_turn: bool): from pyrit.executor.attack.multi_turn.tree_of_attacks import _TreeOfAttacksNode target = MagicMock() - target.capabilities.supports_multi_turn = supports_multi_turn + target.capabilities = TargetCapabilities(supports_multi_turn=supports_multi_turn) target.get_identifier.return_value = MagicMock() adversarial_chat = MagicMock() diff --git a/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py b/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py index 2ea2e5f40a..5019025a43 100644 --- a/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py +++ b/tests/unit/executor/attack/multi_turn/test_tree_of_attacks.py @@ -37,7 +37,7 @@ SeedPrompt, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import FloatScaleThresholdScorer, Scorer, TrueFalseScorer from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer from pyrit.score.score_utils import normalize_score_to_float @@ -148,7 +148,7 @@ class AttackBuilder: def __init__(self) -> None: self.objective_target: Optional[PromptTarget] = None - self.adversarial_chat: Optional[PromptChatTarget] = None + self.adversarial_chat: Optional[PromptTarget] = None self.objective_scorer: Optional[Scorer] = None self.auxiliary_scorers: list[Scorer] = [] self.tree_params: dict[str, Any] = {} @@ -228,15 +228,15 @@ def _create_mock_target() -> PromptTarget: return cast("PromptTarget", target) @staticmethod - def _create_mock_chat() -> PromptChatTarget: - chat = MagicMock(spec=PromptChatTarget) + def _create_mock_chat() -> PromptTarget: + chat = MagicMock(spec=PromptTarget) chat.send_prompt_async = AsyncMock(return_value=None) - chat.set_system_prompt = MagicMock() + chat._set_target_system_prompt = MagicMock() chat.get_identifier.return_value = ComponentIdentifier( class_name="MockChatTarget", class_module="test_module", ) - return cast("PromptChatTarget", chat) + return cast("PromptTarget", chat) @staticmethod def _create_mock_scorer(name: str) -> TrueFalseScorer: diff --git a/tests/unit/executor/attack/single_turn/test_context_compliance.py b/tests/unit/executor/attack/single_turn/test_context_compliance.py index 4d9727fdaf..058d21c018 100644 --- a/tests/unit/executor/attack/single_turn/test_context_compliance.py +++ b/tests/unit/executor/attack/single_turn/test_context_compliance.py @@ -23,7 +23,7 @@ SeedPrompt, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer @@ -45,8 +45,8 @@ def _mock_scorer_id(name: str = "MockScorer") -> ComponentIdentifier: @pytest.fixture def mock_objective_target(): - """Create a mock PromptChatTarget for testing""" - target = MagicMock(spec=PromptChatTarget) + """Create a mock PromptTarget for testing""" + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() target.get_identifier.return_value = _mock_target_id("MockTarget") return target @@ -55,7 +55,7 @@ def mock_objective_target(): @pytest.fixture def mock_adversarial_chat(): """Create a mock adversarial chat target for testing""" - target = MagicMock(spec=PromptChatTarget) + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() target.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return target diff --git a/tests/unit/executor/attack/single_turn/test_flip_attack.py b/tests/unit/executor/attack/single_turn/test_flip_attack.py index d8499edd28..9ecc618651 100644 --- a/tests/unit/executor/attack/single_turn/test_flip_attack.py +++ b/tests/unit/executor/attack/single_turn/test_flip_attack.py @@ -20,7 +20,7 @@ ) from pyrit.prompt_converter import FlipConverter from pyrit.prompt_normalizer import PromptConverterConfiguration, PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import TrueFalseScorer @@ -42,8 +42,8 @@ def _mock_scorer_id(name: str = "MockScorer") -> ComponentIdentifier: @pytest.fixture def mock_objective_target(): - """Create a mock PromptChatTarget for testing""" - target = MagicMock(spec=PromptChatTarget) + """Create a mock PromptTarget for testing""" + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() target.get_identifier.return_value = _mock_target_id("MockTarget") return target diff --git a/tests/unit/executor/attack/single_turn/test_role_play.py b/tests/unit/executor/attack/single_turn/test_role_play.py index ad45affc6f..20520532a7 100644 --- a/tests/unit/executor/attack/single_turn/test_role_play.py +++ b/tests/unit/executor/attack/single_turn/test_role_play.py @@ -24,14 +24,14 @@ ) from pyrit.prompt_converter import Base64Converter, StringJoinConverter from pyrit.prompt_normalizer import PromptConverterConfiguration -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import Scorer, TrueFalseScorer @pytest.fixture def mock_objective_target(): """Create a mock prompt target for testing""" - target = MagicMock(spec=PromptChatTarget) + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() target.get_identifier.return_value = get_mock_target_identifier("MockTarget") return target @@ -40,7 +40,7 @@ def mock_objective_target(): @pytest.fixture def mock_adversarial_chat_target(): """Create a mock adversarial chat target for testing""" - target = MagicMock(spec=PromptChatTarget) + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() target.get_identifier.return_value = get_mock_target_identifier("MockAdversarialChat") return target diff --git a/tests/unit/executor/attack/test_attack_parameter_consistency.py b/tests/unit/executor/attack/test_attack_parameter_consistency.py index 90db3740ad..4cc2c3083a 100644 --- a/tests/unit/executor/attack/test_attack_parameter_consistency.py +++ b/tests/unit/executor/attack/test_attack_parameter_consistency.py @@ -34,7 +34,7 @@ Score, ) from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget, PromptTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import FloatScaleThresholdScorer, TrueFalseScorer @@ -140,29 +140,22 @@ def prepended_conversation_multimodal() -> list[Message]: @pytest.fixture def mock_chat_target() -> MagicMock: - """Create a mock PromptChatTarget with common setup.""" - target = MagicMock(spec=PromptChatTarget) - target.send_prompt_async = AsyncMock() - target.set_system_prompt = MagicMock() - target.get_identifier.return_value = _mock_target_id("MockChatTarget") - return target - - -@pytest.fixture -def mock_non_chat_target() -> MagicMock: - """Create a mock PromptTarget (non-chat) with common setup.""" + """Create a mock chat target (objective target).""" target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() - target.get_identifier.return_value = _mock_target_id("MockTarget") + target._set_target_system_prompt = MagicMock() + target.capabilities.supports_multi_turn = True + target.capabilities.supports_editable_history = True + target.get_identifier.return_value = _mock_target_id("MockChatTarget") return target @pytest.fixture def mock_adversarial_chat() -> MagicMock: """Create a mock adversarial chat target.""" - target = MagicMock(spec=PromptChatTarget) + target = MagicMock(spec=PromptTarget) target.send_prompt_async = AsyncMock() - target.set_system_prompt = MagicMock() + target._set_target_system_prompt = MagicMock() target.get_identifier.return_value = _mock_target_id("MockAdversarialChat") return target @@ -564,7 +557,7 @@ class TestPrependedConversationInMemory: """ Tests verifying that prepended_conversation is properly added to memory. - For PromptChatTargets, prepended_conversation should: + For PromptTargets, prepended_conversation should: 1. Be added to memory with the correct conversation_id 2. Have assistant messages translated to simulated_assistant role 3. Preserve multi-modal content @@ -934,7 +927,7 @@ def _assert_prepended_text_in_adversarial_context( This helper verifies the content appears regardless of the injection method by checking: 1. Adversarial chat memory (history messages) - 2. The set_system_prompt call args (if mock provided and memory is empty) + 2. The _set_target_system_prompt call args (if mock provided and memory is empty) Args: prepended_conversation: The original prepended conversation. @@ -948,13 +941,13 @@ def _assert_prepended_text_in_adversarial_context( adversarial_chat_conversation_id=adversarial_chat_conversation_id ) - # If memory is empty but we have a mock, check set_system_prompt calls + # If memory is empty but we have a mock, check _set_target_system_prompt calls if ( not adversarial_text_values and adversarial_chat_mock is not None - and adversarial_chat_mock.set_system_prompt.called + and adversarial_chat_mock._set_target_system_prompt.called ): - for call in adversarial_chat_mock.set_system_prompt.call_args_list: + for call in adversarial_chat_mock._set_target_system_prompt.call_args_list: system_prompt = call.kwargs.get("system_prompt", "") if system_prompt: adversarial_text_values.append(system_prompt) @@ -1044,7 +1037,7 @@ async def test_tap_injects_prepended_into_adversarial_context( sqlite_instance, ) -> None: """Test that TreeOfAttacksWithPruningAttack injects prepended conversation into adversarial context.""" - # TAP may fail due to JSON parsing, but set_system_prompt should be called before the error + # TAP may fail due to JSON parsing, but _set_target_system_prompt should be called before the error with suppress(Exception): await tap_attack.execute_async( objective="Test objective", @@ -1052,7 +1045,7 @@ async def test_tap_injects_prepended_into_adversarial_context( next_message=multimodal_text_message, ) - # Verify prepended text appears in adversarial context (checks mock's set_system_prompt calls) + # Verify prepended text appears in adversarial context (checks mock's _set_target_system_prompt calls) _assert_prepended_text_in_adversarial_context( prepended_conversation=prepended_conversation_text, adversarial_chat_conversation_id="", # Empty - will fall back to mock check diff --git a/tests/unit/executor/attack/test_error_skip_scoring.py b/tests/unit/executor/attack/test_error_skip_scoring.py index 55cc0520e9..a31aa5779f 100644 --- a/tests/unit/executor/attack/test_error_skip_scoring.py +++ b/tests/unit/executor/attack/test_error_skip_scoring.py @@ -24,7 +24,6 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message, MessagePiece, SeedGroup, SeedPrompt from pyrit.prompt_target import PromptTarget -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget from pyrit.score import FloatScaleThresholdScorer, TrueFalseScorer @@ -182,9 +181,9 @@ async def test_attack_executor_skips_scoring_on_error( # Setup additional configs for multi-turn attacks that need adversarial config if attack_class in [RedTeamingAttack, CrescendoAttack, TreeOfAttacksWithPruningAttack]: - # TreeOfAttacks requires PromptChatTarget, others can use PromptTarget + # TreeOfAttacks requires multi-turn target, others can use PromptTarget if attack_class == TreeOfAttacksWithPruningAttack: - adversarial_target = MagicMock(spec=PromptChatTarget) + adversarial_target = MagicMock(spec=PromptTarget) else: adversarial_target = MagicMock(spec=PromptTarget) diff --git a/tests/unit/executor/promptgen/test_anecdoctor.py b/tests/unit/executor/promptgen/test_anecdoctor.py index 31d4667cad..93f66f70f2 100644 --- a/tests/unit/executor/promptgen/test_anecdoctor.py +++ b/tests/unit/executor/promptgen/test_anecdoctor.py @@ -15,7 +15,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message from pyrit.prompt_normalizer import PromptNormalizer -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget def _mock_target_id(name: str = "MockTarget") -> ComponentIdentifier: @@ -27,19 +27,19 @@ def _mock_target_id(name: str = "MockTarget") -> ComponentIdentifier: @pytest.fixture -def mock_objective_target() -> PromptChatTarget: +def mock_objective_target() -> PromptTarget: """Create a mock objective target for testing.""" - mock_target = MagicMock(spec=PromptChatTarget) - mock_target.set_system_prompt = MagicMock() + mock_target = MagicMock(spec=PromptTarget) + mock_target._set_target_system_prompt = MagicMock() mock_target.get_identifier.return_value = _mock_target_id("mock_objective_target") return mock_target @pytest.fixture -def mock_processing_model() -> PromptChatTarget: +def mock_processing_model() -> PromptTarget: """Create a mock processing model for testing.""" - mock_model = MagicMock(spec=PromptChatTarget) - mock_model.set_system_prompt = MagicMock() + mock_model = MagicMock(spec=PromptTarget) + mock_model._set_target_system_prompt = MagicMock() mock_model.get_identifier.return_value = _mock_target_id("MockProcessingModel") return mock_model @@ -241,7 +241,7 @@ async def test_setup_formats_system_prompt(self, mock_objective_target, sample_c """Test setup formats system prompt with language and content type.""" generator = AnecdoctorGenerator(objective_target=mock_objective_target) - with patch.object(generator._objective_target, "set_system_prompt") as mock_set: + with patch.object(generator._objective_target, "_set_target_system_prompt") as mock_set: await generator._setup_async(context=sample_context) mock_set.assert_called_once() diff --git a/tests/unit/mocks.py b/tests/unit/mocks.py index 0bfa55f609..d2a51886af 100644 --- a/tests/unit/mocks.py +++ b/tests/unit/mocks.py @@ -13,7 +13,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.memory import AzureSQLMemory, CentralMemory, PromptMemoryEntry from pyrit.models import Message, MessagePiece -from pyrit.prompt_target import PromptChatTarget, PromptTarget, limit_requests_per_minute +from pyrit.prompt_target import PromptTarget, TargetCapabilities, limit_requests_per_minute def get_mock_scorer_identifier() -> ComponentIdentifier: @@ -119,15 +119,17 @@ def raise_for_status(self): raise Exception(f"HTTP Error {self.status}") -class MockPromptTarget(PromptChatTarget): +class MockPromptTarget(PromptTarget): prompt_sent: list[str] + _DEFAULT_CAPABILITIES = TargetCapabilities(supports_multi_turn=True, supports_editable_history=True) + def __init__(self, id=None, rpm=None) -> None: # noqa: A002 super().__init__(max_requests_per_minute=rpm) self.id = id self.prompt_sent = [] - def set_system_prompt( + def _set_target_system_prompt( self, *, system_prompt: str, diff --git a/tests/unit/registry/test_target_registry.py b/tests/unit/registry/test_target_registry.py index 503d096a38..c8970c0a4e 100644 --- a/tests/unit/registry/test_target_registry.py +++ b/tests/unit/registry/test_target_registry.py @@ -7,7 +7,6 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import Message, MessagePiece from pyrit.prompt_target import PromptTarget -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget from pyrit.registry.instance_registries.target_registry import TargetRegistry @@ -33,28 +32,6 @@ def _validate_request(self, *, message: Message) -> None: pass -class MockPromptChatTarget(PromptChatTarget): - """Mock PromptChatTarget for testing conversation history support.""" - - def __init__(self, *, model_name: str = "mock_chat_model", endpoint: str = "http://chat-test") -> None: - super().__init__(model_name=model_name, endpoint=endpoint) - - async def send_prompt_async( - self, - *, - message: Message, - ) -> list[Message]: - return [ - MessagePiece( - role="assistant", - original_value="chat response", - ).to_message() - ] - - def _validate_request(self, *, message: Message) -> None: - pass - - class TestTargetRegistrySingleton: """Tests for the singleton pattern in TargetRegistry.""" @@ -121,7 +98,7 @@ def test_register_instance_generates_name_from_class(self): def test_register_instance_multiple_targets_unique_names(self): """Test registering multiple targets generates unique names.""" target1 = MockPromptTarget() - target2 = MockPromptChatTarget() + target2 = MockPromptTarget() self.registry.register_instance(target1) self.registry.register_instance(target2) @@ -209,7 +186,7 @@ def setup_method(self): self.target1 = MockPromptTarget(model_name="model_a") self.target2 = MockPromptTarget(model_name="model_b") - self.chat_target = MockPromptChatTarget() + self.chat_target = MockPromptTarget() self.registry.register_instance(self.target1, name="target_1") self.registry.register_instance(self.target2, name="target_2") diff --git a/tests/unit/scenarios/test_content_harms.py b/tests/unit/scenarios/test_content_harms.py index ef81b03bae..260d61a191 100644 --- a/tests/unit/scenarios/test_content_harms.py +++ b/tests/unit/scenarios/test_content_harms.py @@ -12,7 +12,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedAttackGroup, SeedObjective, SeedPrompt from pyrit.prompt_target import PromptTarget -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.scenario import ScenarioCompositeStrategy from pyrit.scenario.airt import ( ContentHarms, @@ -51,7 +51,7 @@ def mock_objective_target(): @pytest.fixture def mock_adversarial_target(): """Create a mock adversarial target for testing.""" - mock = MagicMock(spec=PromptChatTarget) + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return mock diff --git a/tests/unit/scenarios/test_cyber.py b/tests/unit/scenarios/test_cyber.py index afa7f144a3..43445bed4e 100644 --- a/tests/unit/scenarios/test_cyber.py +++ b/tests/unit/scenarios/test_cyber.py @@ -13,7 +13,7 @@ from pyrit.executor.attack.core.attack_config import AttackScoringConfig from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedAttackGroup, SeedDataset, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget, PromptTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario import DatasetConfiguration from pyrit.scenario.airt import Cyber, CyberStrategy from pyrit.score import TrueFalseCompositeScorer @@ -105,7 +105,7 @@ def mock_objective_scorer(): @pytest.fixture def mock_adversarial_target(): """Create a mock adversarial target for testing.""" - mock = MagicMock(spec=PromptChatTarget) + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return mock diff --git a/tests/unit/scenarios/test_foundry.py b/tests/unit/scenarios/test_foundry.py index dc41d8b7aa..d812b53393 100644 --- a/tests/unit/scenarios/test_foundry.py +++ b/tests/unit/scenarios/test_foundry.py @@ -14,7 +14,7 @@ from pyrit.models import SeedAttackGroup, SeedObjective from pyrit.prompt_converter import Base64Converter from pyrit.prompt_target import PromptTarget -from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget +from pyrit.prompt_target.common.prompt_target import PromptTarget from pyrit.scenario import AtomicAttack, DatasetConfiguration from pyrit.scenario.foundry import FoundryStrategy, RedTeamAgent from pyrit.score import FloatScaleThresholdScorer, TrueFalseScorer @@ -69,7 +69,7 @@ def mock_objective_target(): @pytest.fixture def mock_adversarial_target(): """Create a mock adversarial target for testing.""" - mock = MagicMock(spec=PromptChatTarget) + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return mock diff --git a/tests/unit/scenarios/test_leakage_scenario.py b/tests/unit/scenarios/test_leakage_scenario.py index b7b7d066db..98eb3950b6 100644 --- a/tests/unit/scenarios/test_leakage_scenario.py +++ b/tests/unit/scenarios/test_leakage_scenario.py @@ -13,7 +13,7 @@ from pyrit.executor.attack.core.attack_config import AttackScoringConfig from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedAttackGroup, SeedDataset, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget, PromptTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario import DatasetConfiguration from pyrit.scenario.airt import Leakage, LeakageStrategy from pyrit.score import TrueFalseCompositeScorer @@ -112,7 +112,7 @@ def mock_objective_scorer(): @pytest.fixture def mock_adversarial_target(): - mock = MagicMock(spec=PromptChatTarget) + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return mock diff --git a/tests/unit/scenarios/test_psychosocial_harms.py b/tests/unit/scenarios/test_psychosocial_harms.py index 7b55e3ff5d..67f275446b 100644 --- a/tests/unit/scenarios/test_psychosocial_harms.py +++ b/tests/unit/scenarios/test_psychosocial_harms.py @@ -16,7 +16,7 @@ ) from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedAttackGroup, SeedDataset, SeedGroup, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario.scenarios.airt import ( Psychosocial, PsychosocialStrategy, @@ -61,8 +61,8 @@ def mock_runtime_env(): @pytest.fixture -def mock_objective_target() -> PromptChatTarget: - mock = MagicMock(spec=PromptChatTarget) +def mock_objective_target() -> PromptTarget: + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = ComponentIdentifier(class_name="MockObjectiveTarget", class_module="test") return mock @@ -75,8 +75,8 @@ def mock_objective_scorer() -> FloatScaleThresholdScorer: @pytest.fixture -def mock_adversarial_target() -> PromptChatTarget: - mock = MagicMock(spec=PromptChatTarget) +def mock_adversarial_target() -> PromptTarget: + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = ComponentIdentifier(class_name="MockAdversarialTarget", class_module="test") return mock @@ -235,7 +235,7 @@ async def test_attack_generation_for_all( async def test_attack_generation_for_imminent_crisis_async( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, mock_objective_scorer: FloatScaleThresholdScorer, sample_objectives: list[str], imminent_crisis_strategy: PsychosocialStrategy, @@ -260,7 +260,7 @@ async def test_attack_generation_for_imminent_crisis_async( async def test_attack_runs_include_objectives_async( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, mock_objective_scorer: FloatScaleThresholdScorer, sample_objectives: list[str], ) -> None: @@ -283,7 +283,7 @@ async def test_attack_runs_include_objectives_async( async def test_get_atomic_attacks_async_returns_attacks( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, mock_objective_scorer: FloatScaleThresholdScorer, sample_objectives: list[str], ) -> None: @@ -307,7 +307,7 @@ class TestPsychosocialHarmsLifecycle: async def test_initialize_async_with_max_concurrency( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, mock_objective_scorer: FloatScaleThresholdScorer, sample_objectives: list[str], ) -> None: @@ -320,7 +320,7 @@ async def test_initialize_async_with_max_concurrency( async def test_initialize_async_with_memory_labels( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, mock_objective_scorer: FloatScaleThresholdScorer, sample_objectives: list[str], ) -> None: @@ -365,7 +365,7 @@ def test_get_default_strategy(self) -> None: async def test_no_target_duplication_async( self, *, - mock_objective_target: PromptChatTarget, + mock_objective_target: PromptTarget, sample_objectives: list[str], ) -> None: """Test that all three targets (adversarial, objective, scorer) are distinct.""" diff --git a/tests/unit/scenarios/test_scam.py b/tests/unit/scenarios/test_scam.py index a74046bf9f..58ef31038f 100644 --- a/tests/unit/scenarios/test_scam.py +++ b/tests/unit/scenarios/test_scam.py @@ -17,7 +17,7 @@ from pyrit.executor.attack.core.attack_config import AttackScoringConfig from pyrit.identifiers import ComponentIdentifier from pyrit.models import SeedAttackGroup, SeedDataset, SeedGroup, SeedObjective -from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget, PromptTarget +from pyrit.prompt_target import OpenAIChatTarget, PromptTarget from pyrit.scenario import DatasetConfiguration from pyrit.scenario.scenarios.airt.scam import Scam, ScamStrategy from pyrit.score import TrueFalseCompositeScorer @@ -111,8 +111,8 @@ def mock_objective_scorer() -> TrueFalseCompositeScorer: @pytest.fixture -def mock_adversarial_target() -> PromptChatTarget: - mock = MagicMock(spec=PromptChatTarget) +def mock_adversarial_target() -> PromptTarget: + mock = MagicMock(spec=PromptTarget) mock.get_identifier.return_value = _mock_target_id("MockAdversarialTarget") return mock diff --git a/tests/unit/score/test_gandalf_scorer.py b/tests/unit/score/test_gandalf_scorer.py index abad3a306a..bb7c5b4fb4 100644 --- a/tests/unit/score/test_gandalf_scorer.py +++ b/tests/unit/score/test_gandalf_scorer.py @@ -86,7 +86,7 @@ async def test_gandalf_scorer_score( @patch("requests.post") @pytest.mark.parametrize("level", [GandalfLevel.LEVEL_1, GandalfLevel.LEVEL_2, GandalfLevel.LEVEL_3]) @pytest.mark.asyncio -async def test_gandalf_scorer_set_system_prompt( +async def test_gandalf_scorer__set_target_system_prompt( mocked_post, sqlite_instance: MemoryInterface, level: GandalfLevel, @@ -106,7 +106,7 @@ async def test_gandalf_scorer_set_system_prompt( await scorer.score_async(response) - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() mocked_post.assert_called_once() diff --git a/tests/unit/score/test_insecure_code_scorer.py b/tests/unit/score/test_insecure_code_scorer.py index fc1237eabf..1cfc13f5bb 100644 --- a/tests/unit/score/test_insecure_code_scorer.py +++ b/tests/unit/score/test_insecure_code_scorer.py @@ -8,13 +8,13 @@ from pyrit.exceptions.exception_classes import InvalidJsonException from pyrit.identifiers import ComponentIdentifier from pyrit.models import MessagePiece, Score, UnvalidatedScore -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import InsecureCodeScorer @pytest.fixture def mock_chat_target(patch_central_database): - return MagicMock(spec=PromptChatTarget) + return MagicMock(spec=PromptTarget) @pytest.mark.asyncio diff --git a/tests/unit/score/test_scorer.py b/tests/unit/score/test_scorer.py index a7ec650fd0..7db49b7409 100644 --- a/tests/unit/score/test_scorer.py +++ b/tests/unit/score/test_scorer.py @@ -13,7 +13,7 @@ from pyrit.identifiers import ComponentIdentifier from pyrit.memory import CentralMemory from pyrit.models import Message, MessagePiece, Score -from pyrit.prompt_target import PromptChatTarget +from pyrit.prompt_target import PromptTarget from pyrit.score import ( Scorer, ScorerPromptValidator, @@ -151,7 +151,7 @@ def get_scorer_metrics(self): @pytest.mark.asyncio @pytest.mark.parametrize("bad_json", [BAD_JSON, KEY_ERROR_JSON, KEY_ERROR2_JSON]) async def test_scorer_send_chat_target_async_bad_json_exception_retries(bad_json: str): - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") bad_json_resp = Message( message_pieces=[MessagePiece(role="assistant", original_value=bad_json, conversation_id="test-convo")] @@ -175,7 +175,7 @@ async def test_scorer_send_chat_target_async_bad_json_exception_retries(bad_json @pytest.mark.asyncio async def test_scorer_score_value_with_llm_exception_display_prompt_id(): - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") chat_target.send_prompt_async = AsyncMock(side_effect=Exception("Test exception")) @@ -200,10 +200,10 @@ async def test_scorer_score_value_with_llm_use_provided_attack_identifier(good_j message = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] ) - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") chat_target.send_prompt_async = AsyncMock(return_value=[message]) - chat_target.set_system_prompt = MagicMock() + chat_target._set_target_system_prompt = MagicMock() expected_system_prompt = "system_prompt" expected_attack_identifier = ComponentIdentifier(class_name="TestAttack", class_module="test.module") @@ -220,9 +220,9 @@ async def test_scorer_score_value_with_llm_use_provided_attack_identifier(good_j attack_identifier=expected_attack_identifier, ) - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() - _, set_sys_prompt_args = chat_target.set_system_prompt.call_args + _, set_sys_prompt_args = chat_target._set_target_system_prompt.call_args assert set_sys_prompt_args["system_prompt"] == expected_system_prompt assert isinstance(set_sys_prompt_args["conversation_id"], str) assert set_sys_prompt_args["attack_identifier"] is expected_attack_identifier @@ -235,10 +235,10 @@ async def test_scorer_score_value_with_llm_does_not_add_score_prompt_id_for_empt message = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] ) - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") chat_target.send_prompt_async = AsyncMock(return_value=[message]) - chat_target.set_system_prompt = MagicMock() + chat_target._set_target_system_prompt = MagicMock() expected_system_prompt = "system_prompt" @@ -252,9 +252,9 @@ async def test_scorer_score_value_with_llm_does_not_add_score_prompt_id_for_empt objective="task", ) - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() - _, set_sys_prompt_args = chat_target.set_system_prompt.call_args + _, set_sys_prompt_args = chat_target._set_target_system_prompt.call_args assert set_sys_prompt_args["system_prompt"] == expected_system_prompt assert isinstance(set_sys_prompt_args["conversation_id"], str) assert not set_sys_prompt_args["attack_identifier"] @@ -262,7 +262,7 @@ async def test_scorer_score_value_with_llm_does_not_add_score_prompt_id_for_empt @pytest.mark.asyncio async def test_scorer_send_chat_target_async_good_response(good_json): - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") good_json_resp = Message( @@ -287,7 +287,7 @@ async def test_scorer_send_chat_target_async_good_response(good_json): @pytest.mark.asyncio async def test_scorer_remove_markdown_json_called(good_json): - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") good_json_resp = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] @@ -313,7 +313,7 @@ async def test_scorer_remove_markdown_json_called(good_json): @pytest.mark.asyncio async def test_score_value_with_llm_prepended_text_message_piece_creates_multipiece_message(good_json): """Test that prepended_text_message_piece creates a multi-piece message (text context + main content).""" - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") good_json_resp = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] @@ -357,7 +357,7 @@ async def test_score_value_with_llm_prepended_text_message_piece_creates_multipi @pytest.mark.asyncio async def test_score_value_with_llm_no_prepended_text_creates_single_piece_message(good_json): """Test that without prepended_text_message_piece, only a single piece message is created.""" - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") good_json_resp = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] @@ -393,7 +393,7 @@ async def test_score_value_with_llm_no_prepended_text_creates_single_piece_messa @pytest.mark.asyncio async def test_score_value_with_llm_prepended_text_works_with_audio(good_json): """Test that prepended_text_message_piece works with audio content (type-independent).""" - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") good_json_resp = Message( message_pieces=[MessagePiece(role="assistant", original_value=good_json, conversation_id="test-convo")] @@ -1424,7 +1424,7 @@ async def test_blocked_takes_precedence_over_generic_error( @pytest.mark.asyncio async def test_score_value_with_llm_skips_reasoning_piece(good_json): """Test that _score_value_with_llm extracts JSON from the text piece, not a reasoning piece.""" - chat_target = MagicMock(PromptChatTarget) + chat_target = MagicMock(PromptTarget) chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") # Simulate a reasoning model response: first piece is reasoning, second is the actual text with JSON diff --git a/tests/unit/score/test_self_ask_category.py b/tests/unit/score/test_self_ask_category.py index 390788d1ee..ba55626c40 100644 --- a/tests/unit/score/test_self_ask_category.py +++ b/tests/unit/score/test_self_ask_category.py @@ -63,7 +63,9 @@ def test_category_scorer_set_no_category_found(): @pytest.mark.asyncio -async def test_category_scorer_set_system_prompt(scorer_category_response_bullying: Message, patch_central_database): +async def test_category_scorer__set_target_system_prompt( + scorer_category_response_bullying: Message, patch_central_database +): chat_target = MagicMock() chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") @@ -75,7 +77,7 @@ async def test_category_scorer_set_system_prompt(scorer_category_response_bullyi await scorer.score_text_async("this has a lot of bullying") - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() @pytest.mark.asyncio diff --git a/tests/unit/score/test_self_ask_likert.py b/tests/unit/score/test_self_ask_likert.py index 54f3c88f28..142b4ccc1c 100644 --- a/tests/unit/score/test_self_ask_likert.py +++ b/tests/unit/score/test_self_ask_likert.py @@ -35,7 +35,7 @@ def scorer_likert_response() -> Message: @pytest.mark.asyncio -async def test_likert_scorer_set_system_prompt(scorer_likert_response: Message): +async def test_likert_scorer__set_target_system_prompt(scorer_likert_response: Message): memory = MagicMock(MemoryInterface) with patch.object(CentralMemory, "get_memory_instance", return_value=memory): chat_target = MagicMock() @@ -46,7 +46,7 @@ async def test_likert_scorer_set_system_prompt(scorer_likert_response: Message): await scorer.score_text_async(text="string") - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() # assert that the likert score was loaded into system prompt # If _score_category is a list, check all values are in the prompt diff --git a/tests/unit/score/test_self_ask_refusal.py b/tests/unit/score/test_self_ask_refusal.py index 634716e371..47772efc5a 100644 --- a/tests/unit/score/test_self_ask_refusal.py +++ b/tests/unit/score/test_self_ask_refusal.py @@ -49,7 +49,7 @@ async def test_refusal_scorer_score(scorer_true_false_response: Message, patch_c @pytest.mark.asyncio -async def test_refusal_scorer_set_system_prompt(scorer_true_false_response: Message, patch_central_database): +async def test_refusal_scorer__set_target_system_prompt(scorer_true_false_response: Message, patch_central_database): chat_target = MagicMock() chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") chat_target.send_prompt_async = AsyncMock(return_value=[scorer_true_false_response]) @@ -57,7 +57,7 @@ async def test_refusal_scorer_set_system_prompt(scorer_true_false_response: Mess await scorer.score_text_async("true false") - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() @pytest.mark.asyncio diff --git a/tests/unit/score/test_self_ask_scale.py b/tests/unit/score/test_self_ask_scale.py index 824e594e84..13270797f9 100644 --- a/tests/unit/score/test_self_ask_scale.py +++ b/tests/unit/score/test_self_ask_scale.py @@ -61,7 +61,7 @@ def scale_scorer(patch_central_database) -> SelfAskScaleScorer: (criteria_scale_path, criteria_system_prompt_path), ], ) -async def test_scale_scorer_set_system_prompt( +async def test_scale_scorer__set_target_system_prompt( scorer_scale_response: Message, scale_arguments_path: Path, system_prompt_path: Path, @@ -79,7 +79,7 @@ async def test_scale_scorer_set_system_prompt( await scorer.score_text_async(text="string", objective="task") - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() # assert that the scale score was loaded into system prompt diff --git a/tests/unit/score/test_self_ask_true_false.py b/tests/unit/score/test_self_ask_true_false.py index 17a10048c6..c0c9f0b96d 100644 --- a/tests/unit/score/test_self_ask_true_false.py +++ b/tests/unit/score/test_self_ask_true_false.py @@ -53,7 +53,7 @@ async def test_true_false_scorer_score(patch_central_database, scorer_true_false @pytest.mark.asyncio -async def test_true_false_scorer_set_system_prompt(patch_central_database, scorer_true_false_response: Message): +async def test_true_false_scorer__set_target_system_prompt(patch_central_database, scorer_true_false_response: Message): chat_target = MagicMock() chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget") chat_target.send_prompt_async = AsyncMock(return_value=[scorer_true_false_response]) @@ -64,7 +64,7 @@ async def test_true_false_scorer_set_system_prompt(patch_central_database, score await scorer.score_text_async("true false") - chat_target.set_system_prompt.assert_called_once() + chat_target._set_target_system_prompt.assert_called_once() # assert that the category content was loaded into system prompt assert "# Instructions" in scorer._system_prompt diff --git a/tests/unit/target/test_openai_chat_target.py b/tests/unit/target/test_openai_chat_target.py index 56f61b8733..a278781a7e 100644 --- a/tests/unit/target/test_openai_chat_target.py +++ b/tests/unit/target/test_openai_chat_target.py @@ -32,7 +32,7 @@ OpenAIChatAudioConfig, OpenAIChatTarget, OpenAIResponseTarget, - PromptChatTarget, + PromptTarget, ) from pyrit.prompt_target.common.target_capabilities import TargetCapabilities @@ -584,17 +584,17 @@ def test_validate_request_unsupported_data_types(target: OpenAIChatTarget): def test_inheritance_from_prompt_chat_target(target: OpenAIChatTarget): - """Test that OpenAIChatTarget properly inherits from PromptChatTarget.""" - assert isinstance(target, PromptChatTarget), "OpenAIChatTarget must inherit from PromptChatTarget" + """Test that OpenAIChatTarget properly inherits from PromptTarget.""" + assert isinstance(target, PromptTarget), "OpenAIChatTarget must inherit from PromptTarget" def test_inheritance_from_prompt_chat_target_base(): - """Test that OpenAIChatTargetBase properly inherits from PromptChatTarget.""" + """Test that OpenAIChatTargetBase properly inherits from PromptTarget.""" # Create a minimal instance to test inheritance target = OpenAIChatTarget(model_name="test-model", endpoint="https://test.com", api_key="test-key") - assert isinstance(target, PromptChatTarget), ( - "OpenAIChatTarget must inherit from PromptChatTarget through OpenAIChatTargetBase" + assert isinstance(target, PromptTarget), ( + "OpenAIChatTarget must inherit from PromptTarget through OpenAIChatTargetBase" ) diff --git a/tests/unit/target/test_openai_response_target.py b/tests/unit/target/test_openai_response_target.py index 507f8f0935..297333984b 100644 --- a/tests/unit/target/test_openai_response_target.py +++ b/tests/unit/target/test_openai_response_target.py @@ -26,7 +26,7 @@ from pyrit.memory.memory_interface import MemoryInterface from pyrit.models import Message, MessagePiece from pyrit.models.json_response_config import _JsonResponseConfig -from pyrit.prompt_target import OpenAIResponseTarget, PromptChatTarget +from pyrit.prompt_target import OpenAIResponseTarget, PromptTarget def create_mock_response(response_dict: dict = None) -> MagicMock: @@ -594,8 +594,8 @@ def test_validate_request_unsupported_data_types(target: OpenAIResponseTarget): def test_inheritance_from_prompt_chat_target(target: OpenAIResponseTarget): - """Test that OpenAIResponseTarget properly inherits from PromptChatTarget.""" - assert isinstance(target, PromptChatTarget), "OpenAIResponseTarget must inherit from PromptChatTarget" + """Test that OpenAIResponseTarget properly inherits from PromptTarget.""" + assert isinstance(target, PromptTarget), "OpenAIResponseTarget must inherit from PromptTarget" def test_is_response_format_json_supported(target: OpenAIResponseTarget): diff --git a/tests/unit/target/test_prompt_target.py b/tests/unit/target/test_prompt_target.py index 08ce7f217d..0e0b907b7d 100644 --- a/tests/unit/target/test_prompt_target.py +++ b/tests/unit/target/test_prompt_target.py @@ -46,8 +46,8 @@ def mock_attack_strategy(): return strategy -def test_set_system_prompt(azure_openai_target: OpenAIChatTarget, mock_attack_strategy: AttackStrategy): - azure_openai_target.set_system_prompt( +def test__set_target_system_prompt(azure_openai_target: OpenAIChatTarget, mock_attack_strategy: AttackStrategy): + azure_openai_target._set_target_system_prompt( system_prompt="system prompt", conversation_id="1", attack_identifier=mock_attack_strategy.get_identifier(), @@ -60,21 +60,23 @@ def test_set_system_prompt(azure_openai_target: OpenAIChatTarget, mock_attack_st assert chats[0].converted_value == "system prompt" -@pytest.mark.asyncio -@pytest.mark.asyncio -async def test_set_system_prompt_adds_memory( +def test__set_target_system_prompt_raises_if_conversation_exists( azure_openai_target: OpenAIChatTarget, mock_attack_strategy: AttackStrategy ): - azure_openai_target.set_system_prompt( + azure_openai_target._set_target_system_prompt( system_prompt="system prompt", conversation_id="1", attack_identifier=mock_attack_strategy.get_identifier(), labels={}, ) - chats = azure_openai_target._memory.get_message_pieces(conversation_id="1") - assert len(chats) == 1, f"Expected 1 chats, got {len(chats)}" - assert chats[0].api_role == "system" + with pytest.raises(RuntimeError, match="Conversation already exists"): + azure_openai_target._set_target_system_prompt( + system_prompt="another prompt", + conversation_id="1", + attack_identifier=mock_attack_strategy.get_identifier(), + labels={}, + ) @pytest.mark.asyncio @@ -100,7 +102,7 @@ async def test_send_prompt_with_system_calls_chat_complete( ) as mock_create: mock_create.return_value = mock_response - azure_openai_target.set_system_prompt( + azure_openai_target._set_target_system_prompt( system_prompt="system prompt", conversation_id="1", attack_identifier=mock_attack_strategy.get_identifier(),