From a1f03a576d3430fd8fcc8d9aa2a70bd9b0869dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?El=C3=ADas=20Snorrason?= Date: Tue, 11 Feb 2025 22:34:12 +0000 Subject: [PATCH 1/3] move TLM protocol definition into types directory --- src/cleanlab_codex/response_validation.py | 22 +--------------------- src/cleanlab_codex/types/tlm.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) create mode 100644 src/cleanlab_codex/types/tlm.py diff --git a/src/cleanlab_codex/response_validation.py b/src/cleanlab_codex/response_validation.py index dcc15d5..4ec0a85 100644 --- a/src/cleanlab_codex/response_validation.py +++ b/src/cleanlab_codex/response_validation.py @@ -9,36 +9,16 @@ Callable, Dict, Optional, - Protocol, - Sequence, Union, cast, - runtime_checkable, ) from pydantic import BaseModel, ConfigDict, Field +from cleanlab_codex.types.tlm import TLM from cleanlab_codex.utils.errors import MissingDependencyError from cleanlab_codex.utils.prompt import default_format_prompt - -@runtime_checkable -class TLM(Protocol): - def get_trustworthiness_score( - self, - prompt: Union[str, Sequence[str]], - response: Union[str, Sequence[str]], - **kwargs: Any, - ) -> Dict[str, Any]: ... - - def prompt( - self, - prompt: Union[str, Sequence[str]], - /, - **kwargs: Any, - ) -> Dict[str, Any]: ... - - DEFAULT_FALLBACK_ANSWER: str = ( "Based on the available information, I cannot provide a complete answer to this question." ) diff --git a/src/cleanlab_codex/types/tlm.py b/src/cleanlab_codex/types/tlm.py new file mode 100644 index 0000000..17c5c54 --- /dev/null +++ b/src/cleanlab_codex/types/tlm.py @@ -0,0 +1,23 @@ +"""Protocol for a Trustworthy Language Model.""" + + +from __future__ import annotations + +from typing import Any, Dict, Protocol, Sequence, Union, runtime_checkable + + +@runtime_checkable +class TLM(Protocol): + def get_trustworthiness_score( + self, + prompt: Union[str, Sequence[str]], + response: Union[str, Sequence[str]], + **kwargs: Any, + ) -> Dict[str, Any]: ... + + def prompt( + self, + prompt: Union[str, Sequence[str]], + /, + **kwargs: Any, + ) -> Dict[str, Any]: ... From 05f7a0936ae05ade9876c2b614c7b6fdad652277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?El=C3=ADas=20Snorrason?= Date: Tue, 11 Feb 2025 22:48:59 +0000 Subject: [PATCH 2/3] add CodexBackup class --- src/cleanlab_codex/__init__.py | 2 +- src/cleanlab_codex/codex_backup.py | 114 +++++++++++++++++++++++++++++ src/cleanlab_codex/types/backup.py | 30 ++++++++ tests/test_codex_backup.py | 70 ++++++++++++++++++ 4 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 src/cleanlab_codex/codex_backup.py create mode 100644 src/cleanlab_codex/types/backup.py create mode 100644 tests/test_codex_backup.py diff --git a/src/cleanlab_codex/__init__.py b/src/cleanlab_codex/__init__.py index d1b8ef6..c5f25dc 100644 --- a/src/cleanlab_codex/__init__.py +++ b/src/cleanlab_codex/__init__.py @@ -3,4 +3,4 @@ from cleanlab_codex.codex_tool import CodexTool from cleanlab_codex.project import Project -__all__ = ["Client", "CodexTool", "Project"] +__all__ = ["Client", "CodexTool", "CodexBackup", "Project"] diff --git a/src/cleanlab_codex/codex_backup.py b/src/cleanlab_codex/codex_backup.py new file mode 100644 index 0000000..e01df25 --- /dev/null +++ b/src/cleanlab_codex/codex_backup.py @@ -0,0 +1,114 @@ +"""Enables connecting RAG applications to Codex as a Backup system. + +This module provides functionality to use Codex as a fallback when a primary +RAG (Retrieval-Augmented Generation) system fails to provide adequate responses. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Optional + +from cleanlab_codex.response_validation import BadResponseDetectionConfig, is_bad_response + +if TYPE_CHECKING: + from cleanlab_codex.project import Project + from cleanlab_codex.types.backup import BackupHandler + from cleanlab_codex.types.tlm import TLM + + +def handle_backup_default(codex_response: str, primary_system: Any) -> None: # noqa: ARG001 + """Default implementation is a no-op.""" + return None + + +class CodexBackup: + """A backup decorator that connects to a Codex project to answer questions that + cannot be adequately answered by the existing agent. + + Args: + project: The Codex project to use for backup responses + fallback_answer: The fallback answer to use if the primary system fails to provide an adequate response + backup_handler: A callback function that processes Codex's response and updates the primary RAG system. This handler is called whenever Codex provides a backup response after the primary system fails. By default, the backup handler is a no-op. + primary_system: The existing RAG system that needs to be backed up by Codex + tlm: The client for the Trustworthy Language Model, which evaluates the quality of responses from the primary system + is_bad_response_kwargs: Additional keyword arguments to pass to the is_bad_response function, for detecting inadequate responses from the primary system + """ + + DEFAULT_FALLBACK_ANSWER = "Based on the available information, I cannot provide a complete answer to this question." + + def __init__( + self, + *, + project: Project, + fallback_answer: str = DEFAULT_FALLBACK_ANSWER, + backup_handler: BackupHandler = handle_backup_default, + primary_system: Optional[Any] = None, + tlm: Optional[TLM] = None, + is_bad_response_kwargs: Optional[dict[str, Any]] = None, + ): + self._project = project + self._fallback_answer = fallback_answer + self._backup_handler = backup_handler + self._primary_system: Optional[Any] = primary_system + self._tlm = tlm + self._is_bad_response_kwargs = is_bad_response_kwargs or {} + + @classmethod + def from_project(cls, project: Project, **kwargs: Any) -> CodexBackup: + return cls(project=project, **kwargs) + + @property + def primary_system(self) -> Any: + if self._primary_system is None: + error_message = "Primary system not set. Please set a primary system using the `add_primary_system` method." + raise ValueError(error_message) + return self._primary_system + + @primary_system.setter + def primary_system(self, primary_system: Any) -> None: + """Set the primary RAG system that will be used to generate responses.""" + self._primary_system = primary_system + + def run( + self, + response: str, + query: str, + context: Optional[str] = None, + ) -> str: + """Check if a response is adequate and provide a backup from Codex if needed. + + Args: + primary_system: The system that generated the original response + response: The response to evaluate + query: The original query that generated the response + context: Optional context used to generate the response + + Returns: + str: Either the original response if adequate, or a backup response from Codex + """ + + is_bad = is_bad_response( + response, + query=query, + context=context, + config=BadResponseDetectionConfig.model_validate( + { + "tlm": self._tlm, + "fallback_answer": self._fallback_answer, + **self._is_bad_response_kwargs, + }, + ), + ) + if not is_bad: + return response + + codex_response = self._project.query(query, fallback_answer=self._fallback_answer)[0] + if not codex_response: + return response + + if self._primary_system is not None: + self._backup_handler( + codex_response=codex_response, + primary_system=self._primary_system, + ) + return codex_response diff --git a/src/cleanlab_codex/types/backup.py b/src/cleanlab_codex/types/backup.py new file mode 100644 index 0000000..0369369 --- /dev/null +++ b/src/cleanlab_codex/types/backup.py @@ -0,0 +1,30 @@ +"""Types for Codex Backup.""" + +from __future__ import annotations + +from typing import Any, Protocol + + +class BackupHandler(Protocol): + """Protocol defining how to handle backup responses from Codex. + + This protocol defines a callable interface for processing Codex responses that are + retrieved when the primary response system (e.g., a RAG system) fails to provide + an adequate answer. Implementations of this protocol can be used to: + + - Update the primary system's context or knowledge base + - Log Codex responses for analysis + - Trigger system improvements or retraining + - Perform any other necessary side effects + + Args: + codex_response (str): The response received from Codex + primary_system (Any): The instance of the primary RAG system that + generated the inadequate response. This allows the handler to + update or modify the primary system if needed. + + Returns: + None: The handler performs side effects but doesn't return a value + """ + + def __call__(self, codex_response: str, primary_system: Any) -> None: ... diff --git a/tests/test_codex_backup.py b/tests/test_codex_backup.py new file mode 100644 index 0000000..b62ff6e --- /dev/null +++ b/tests/test_codex_backup.py @@ -0,0 +1,70 @@ +from unittest.mock import MagicMock + +import pytest + +from cleanlab_codex.codex_backup import CodexBackup + +MOCK_BACKUP_RESPONSE = "This is a test response" +FALLBACK_MESSAGE = "Based on the available information, I cannot provide a complete answer to this question." +TEST_MESSAGE = "Hello, world!" + + +class MockApp: + def chat(self, user_message: str) -> str: + # Just echo the user message + return user_message + +@pytest.fixture +def mock_app() -> MockApp: + return MockApp() + + +def test_codex_backup(mock_app: MockApp) -> None: + # Create a mock project directly + mock_project = MagicMock() + mock_project.query.return_value = (MOCK_BACKUP_RESPONSE,) + + # Echo works well + query = TEST_MESSAGE + response = mock_app.chat(query) + assert response == query + + # Backup works well for fallback responses + codex_backup = CodexBackup.from_project(mock_project) + query = FALLBACK_MESSAGE + response = mock_app.chat(query) + assert response == query + response = codex_backup.run(response, query=query) + assert response == MOCK_BACKUP_RESPONSE, f"Response was {response}" + + +def test_backup_handler(mock_app: MockApp) -> None: + mock_project = MagicMock() + mock_project.query.return_value = (MOCK_BACKUP_RESPONSE,) + + mock_handler = MagicMock() + mock_handler.return_value = None + + codex_backup = CodexBackup.from_project(mock_project, primary_system=mock_app, backup_handler=mock_handler) + + query = TEST_MESSAGE + response = mock_app.chat(query) + assert response == query + + response = codex_backup.run(response, query=query) + assert response == query, f"Response was {response}" + + # Handler should not be called for good responses + assert mock_handler.call_count == 0 + + query = FALLBACK_MESSAGE + response = mock_app.chat(query) + assert response == query + response = codex_backup.run(response, query=query) + assert response == MOCK_BACKUP_RESPONSE, f"Response was {response}" + + # Handler should be called for bad responses + assert mock_handler.call_count == 1 + # The MockApp is the second argument to the handler, i.e. it has the necessary context + # to handle the new response + assert mock_handler.call_args.kwargs["primary_system"] == mock_app From bdf8872f7b6bf42b4b687b10bfdb77020736f676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?El=C3=ADas=20Snorrason?= Date: Tue, 11 Feb 2025 22:56:29 +0000 Subject: [PATCH 3/3] formatting --- src/cleanlab_codex/types/tlm.py | 1 - tests/test_codex_backup.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cleanlab_codex/types/tlm.py b/src/cleanlab_codex/types/tlm.py index 17c5c54..773c49c 100644 --- a/src/cleanlab_codex/types/tlm.py +++ b/src/cleanlab_codex/types/tlm.py @@ -1,6 +1,5 @@ """Protocol for a Trustworthy Language Model.""" - from __future__ import annotations from typing import Any, Dict, Protocol, Sequence, Union, runtime_checkable diff --git a/tests/test_codex_backup.py b/tests/test_codex_backup.py index b62ff6e..d5b52ad 100644 --- a/tests/test_codex_backup.py +++ b/tests/test_codex_backup.py @@ -14,6 +14,7 @@ def chat(self, user_message: str) -> str: # Just echo the user message return user_message + @pytest.fixture def mock_app() -> MockApp: return MockApp()