-
Notifications
You must be signed in to change notification settings - Fork 709
FEAT: Add partner integration tests for azure-ai-evaluation red team … #1533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
slister1001
wants to merge
4
commits into
microsoft:main
Choose a base branch
from
slister1001:partner-integration-tests
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
8e13830
FEAT: Add partner integration tests for azure-ai-evaluation red team …
slister1001 0a0edfb
fix: address review findings in partner integration tests
slister1001 1830719
fix: ruff-format split f-string in test_converter_contract.py
slister1001 895406c
Address PR review comments: PromptChatTarget -> PromptTarget, fix imp…
slister1001 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. |
97 changes: 97 additions & 0 deletions
97
tests/partner_integration/azure_ai_evaluation/test_converter_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for PromptConverter interface and specific converters used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module: | ||
| - Extends PromptConverter via _DefaultConverter | ||
| - Imports 20+ specific converters in _agent/_agent_utils.py and strategy_utils.py | ||
| - Uses ConverterResult as the return type | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from pyrit.prompt_converter import ConverterResult, PromptConverter | ||
|
|
||
|
|
||
| class TestPromptConverterContract: | ||
| """Validate PromptConverter base class interface stability.""" | ||
|
|
||
| def test_prompt_converter_base_exists(self): | ||
| """_DefaultConverter extends PromptConverter.""" | ||
| assert PromptConverter is not None | ||
|
|
||
| def test_converter_result_exists(self): | ||
| """_DefaultConverter.convert_async returns ConverterResult.""" | ||
| assert ConverterResult is not None | ||
|
|
||
| def test_prompt_converter_has_convert_async(self): | ||
| """_DefaultConverter overrides convert_async.""" | ||
| assert hasattr(PromptConverter, "convert_async") | ||
|
|
||
| def test_prompt_converter_subclassable(self): | ||
| """_DefaultConverter subclasses PromptConverter with convert_async.""" | ||
|
|
||
| class TestConverter(PromptConverter): | ||
| SUPPORTED_INPUT_TYPES = ("text",) | ||
| SUPPORTED_OUTPUT_TYPES = ("text",) | ||
|
|
||
| async def convert_async(self, *, prompt, input_type="text"): | ||
| return ConverterResult(output_text=prompt, output_type="text") | ||
|
|
||
| converter = TestConverter() | ||
| assert isinstance(converter, PromptConverter) | ||
|
|
||
|
|
||
| class TestSpecificConvertersImportable: | ||
| """Validate that all converters imported by azure-ai-evaluation are available. | ||
|
|
||
| These converters are imported in: | ||
| - _agent/_agent_utils.py (20+ converters) | ||
| - _utils/strategy_utils.py (converter instantiation) | ||
| """ | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "converter_name", | ||
| [ | ||
| "AnsiAttackConverter", | ||
| "AsciiArtConverter", | ||
| "AtbashConverter", | ||
| "Base64Converter", | ||
| "BinaryConverter", | ||
| "CaesarConverter", | ||
| "CharacterSpaceConverter", | ||
| "CharSwapConverter", | ||
| "DiacriticConverter", | ||
| "FlipConverter", | ||
| "LeetspeakConverter", | ||
| "MorseConverter", | ||
| "ROT13Converter", | ||
| "StringJoinConverter", | ||
| "SuffixAppendConverter", | ||
| "TenseConverter", | ||
| "UnicodeConfusableConverter", | ||
| "UnicodeSubstitutionConverter", | ||
| "UrlConverter", | ||
| ], | ||
| ) | ||
| def test_converter_importable(self, converter_name): | ||
| """Each converter used by azure-ai-evaluation must be importable from pyrit.prompt_converter.""" | ||
| import pyrit.prompt_converter as pc | ||
|
|
||
| converter_class = getattr(pc, converter_name, None) | ||
| assert converter_class is not None, ( | ||
| f"{converter_name} not found in pyrit.prompt_converter — azure-ai-evaluation depends on this converter" | ||
| ) | ||
|
|
||
| def test_ascii_smuggler_converter_importable(self): | ||
| """AsciiSmugglerConverter is imported in _agent/_agent_utils.py.""" | ||
| from pyrit.prompt_converter import AsciiSmugglerConverter | ||
|
|
||
| assert AsciiSmugglerConverter is not None | ||
|
|
||
| def test_llm_generic_text_converter_importable(self): | ||
| """LLMGenericTextConverter is used for tense/translation strategies.""" | ||
| from pyrit.prompt_converter import LLMGenericTextConverter | ||
|
|
||
| assert LLMGenericTextConverter is not None |
65 changes: 65 additions & 0 deletions
65
tests/partner_integration/azure_ai_evaluation/test_exceptions_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for PyRIT exception types and retry decorators used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module uses these in: | ||
| - _callback_chat_target.py: EmptyResponseException, RateLimitException, pyrit_target_retry | ||
| - _rai_service_target.py: remove_markdown_json | ||
| """ | ||
|
|
||
| from pyrit.exceptions import ( | ||
| EmptyResponseException, | ||
| RateLimitException, | ||
| pyrit_target_retry, | ||
| remove_markdown_json, | ||
| ) | ||
|
|
||
|
|
||
| class TestExceptionTypesContract: | ||
| """Validate exception types exist and are proper Exception subclasses.""" | ||
|
|
||
| def test_empty_response_exception_is_exception(self): | ||
| """_CallbackChatTarget catches EmptyResponseException.""" | ||
| assert issubclass(EmptyResponseException, Exception) | ||
|
|
||
| def test_rate_limit_exception_is_exception(self): | ||
| """_CallbackChatTarget catches RateLimitException.""" | ||
| assert issubclass(RateLimitException, Exception) | ||
|
|
||
| def test_empty_response_exception_instantiable(self): | ||
| """Verify EmptyResponseException can be raised with a message.""" | ||
| exc = EmptyResponseException() | ||
| assert isinstance(exc, Exception) | ||
|
|
||
| def test_rate_limit_exception_instantiable(self): | ||
| """Verify RateLimitException can be raised with a message.""" | ||
| exc = RateLimitException() | ||
| assert isinstance(exc, Exception) | ||
|
|
||
|
|
||
| class TestRetryDecoratorContract: | ||
| """Validate retry decorator availability.""" | ||
|
|
||
| def test_pyrit_target_retry_is_callable(self): | ||
| """_CallbackChatTarget uses @pyrit_target_retry decorator.""" | ||
| assert callable(pyrit_target_retry) | ||
|
|
||
|
|
||
| class TestUtilityFunctionsContract: | ||
| """Validate utility functions used by azure-ai-evaluation.""" | ||
|
|
||
| def test_remove_markdown_json_is_callable(self): | ||
| """_rai_service_target.py uses remove_markdown_json.""" | ||
| assert callable(remove_markdown_json) | ||
|
|
||
| def test_remove_markdown_json_handles_plain_text(self): | ||
| """Verify remove_markdown_json passes through plain text.""" | ||
| result = remove_markdown_json("plain text") | ||
| assert isinstance(result, str) | ||
|
|
||
| def test_remove_markdown_json_strips_markdown_fences(self): | ||
| """Verify remove_markdown_json strips ```json fences.""" | ||
| input_text = '```json\n{"key": "value"}\n```' | ||
| result = remove_markdown_json(input_text) | ||
| assert "```" not in result |
76 changes: 76 additions & 0 deletions
76
tests/partner_integration/azure_ai_evaluation/test_foundry_contract.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Contract tests for Foundry scenario APIs used by azure-ai-evaluation. | ||
|
|
||
| The azure-ai-evaluation red team module uses the Foundry framework for modern attack execution: | ||
| - FoundryExecutionManager creates FoundryScenario instances per risk category | ||
| - StrategyMapper maps AttackStrategy enum → FoundryStrategy | ||
| - DatasetConfigurationBuilder produces DatasetConfiguration from RAI objectives | ||
| - ScenarioOrchestrator processes ScenarioResult and AttackResult | ||
| - RAIServiceScorer uses AttackScoringConfig for scoring configuration | ||
| """ | ||
|
|
||
| from pyrit.executor.attack import AttackScoringConfig | ||
| from pyrit.models import AttackOutcome, AttackResult | ||
| from pyrit.models.scenario_result import ScenarioResult | ||
| from pyrit.scenario import DatasetConfiguration, ScenarioStrategy | ||
| from pyrit.scenario.foundry import FoundryScenario, FoundryStrategy | ||
|
|
||
|
|
||
| class TestFoundryStrategyContract: | ||
| """Validate FoundryStrategy availability and structure.""" | ||
|
|
||
| def test_foundry_strategy_class_exists(self): | ||
| """StrategyMapper maps to FoundryStrategy values.""" | ||
| assert FoundryStrategy is not None | ||
|
|
||
| def test_foundry_strategy_is_scenario_strategy(self): | ||
| """FoundryStrategy should extend ScenarioStrategy.""" | ||
| assert issubclass(FoundryStrategy, ScenarioStrategy) | ||
|
|
||
|
|
||
| class TestFoundryScenarioContract: | ||
| """Validate FoundryScenario availability.""" | ||
|
|
||
| def test_foundry_scenario_class_exists(self): | ||
| """ScenarioOrchestrator creates FoundryScenario instances.""" | ||
| assert FoundryScenario is not None | ||
|
|
||
|
|
||
| class TestDatasetConfigurationContract: | ||
| """Validate DatasetConfiguration availability.""" | ||
|
|
||
| def test_dataset_configuration_class_exists(self): | ||
| """DatasetConfigurationBuilder produces DatasetConfiguration.""" | ||
| assert DatasetConfiguration is not None | ||
|
|
||
|
|
||
| class TestAttackScoringConfigContract: | ||
| """Validate AttackScoringConfig availability.""" | ||
|
|
||
| def test_attack_scoring_config_exists(self): | ||
| """ScenarioOrchestrator uses AttackScoringConfig.""" | ||
| assert AttackScoringConfig is not None | ||
|
|
||
| def test_attack_scoring_config_has_expected_fields(self): | ||
| """AttackScoringConfig should accept objective_scorer and refusal_scorer.""" | ||
| config = AttackScoringConfig() | ||
| assert hasattr(config, "objective_scorer") | ||
| assert hasattr(config, "refusal_scorer") | ||
|
|
||
|
|
||
| class TestScenarioResultContract: | ||
| """Validate ScenarioResult model availability.""" | ||
|
|
||
| def test_scenario_result_class_exists(self): | ||
| """ScenarioOrchestrator reads ScenarioResult.""" | ||
| assert ScenarioResult is not None | ||
|
|
||
| def test_attack_result_class_exists(self): | ||
| """FoundryResultProcessor processes AttackResult.""" | ||
| assert AttackResult is not None | ||
|
|
||
| def test_attack_outcome_class_exists(self): | ||
| """FoundryResultProcessor checks AttackOutcome values.""" | ||
| assert AttackOutcome is not None | ||
81 changes: 81 additions & 0 deletions
81
tests/partner_integration/azure_ai_evaluation/test_import_smoke.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| """Import smoke tests for azure-ai-evaluation red team module integration. | ||
|
|
||
| These tests verify that the azure-ai-evaluation red team module can be imported | ||
| and that its PyRIT subclasses correctly extend PyRIT base classes. | ||
|
|
||
| Tests are SKIPPED if azure-ai-evaluation[redteam] is not installed. | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from pyrit.prompt_target import PromptTarget | ||
| from pyrit.score.true_false.true_false_scorer import TrueFalseScorer | ||
|
|
||
|
|
||
| def _azure_ai_evaluation_available() -> bool: | ||
| """Check if azure-ai-evaluation[redteam] is installed.""" | ||
| try: | ||
| from azure.ai.evaluation.red_team import RedTeam # noqa: F401 | ||
|
|
||
| return True | ||
| except ImportError: | ||
| return False | ||
|
|
||
|
|
||
| requires_azure_ai_evaluation = pytest.mark.skipif( | ||
| not _azure_ai_evaluation_available(), | ||
| reason="azure-ai-evaluation[redteam] is not installed", | ||
| ) | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestRedTeamModuleImports: | ||
| """Verify azure-ai-evaluation red_team module imports succeed with current PyRIT.""" | ||
|
|
||
| def test_redteam_public_api_imports(self): | ||
| """Verify all public classes from azure.ai.evaluation.red_team are importable.""" | ||
| from azure.ai.evaluation.red_team import ( | ||
| AttackStrategy, | ||
| RedTeam, | ||
| RedTeamResult, | ||
| RiskCategory, | ||
| SupportedLanguages, | ||
| ) | ||
|
|
||
| assert RedTeam is not None | ||
| assert AttackStrategy is not None | ||
| assert RiskCategory is not None | ||
| assert RedTeamResult is not None | ||
| assert SupportedLanguages is not None | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestCallbackChatTargetInheritance: | ||
| """Verify _CallbackChatTarget correctly extends PromptTarget. | ||
|
|
||
| NOTE: These tests intentionally import private (_-prefixed) modules from | ||
| azure-ai-evaluation. This is correct for contract testing — we need to verify | ||
| the actual subclass relationships that PyRIT API changes could break. | ||
| Explicit inheritance checks are needed because azure-ai-evaluation subclasses | ||
| are detected via issubclass() checks in PyRIT orchestrators and scenarios. | ||
| """ | ||
|
|
||
| def test_callback_chat_target_extends_prompt_target(self): | ||
| """_CallbackChatTarget must be a subclass of pyrit.prompt_target.PromptTarget.""" | ||
| from azure.ai.evaluation.red_team._callback_chat_target import _CallbackChatTarget | ||
|
|
||
| assert issubclass(_CallbackChatTarget, PromptTarget) | ||
|
|
||
|
|
||
| @requires_azure_ai_evaluation | ||
| class TestRAIScorerInheritance: | ||
| """Verify RAIServiceScorer correctly extends TrueFalseScorer.""" | ||
|
|
||
| def test_rai_scorer_extends_true_false_scorer(self): | ||
| """RAIServiceScorer must be a subclass of pyrit.score.true_false.TrueFalseScorer.""" | ||
| from azure.ai.evaluation.red_team._foundry._rai_scorer import RAIServiceScorer # private: intentional | ||
|
|
||
| assert issubclass(RAIServiceScorer, TrueFalseScorer) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: rename to RedTeamScenario
(internally there's a bit of an existential crisis going on where half of the classes are named foundry and the other half is redteam but we're moving to RedTeam so I'd try to use that instead)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as an fyi: