From 711751872871ec0fa41c477916c0b00e56c36a17 Mon Sep 17 00:00:00 2001 From: Siddarth Date: Fri, 7 Feb 2025 12:38:48 -0500 Subject: [PATCH 1/2] testing conditional retreival --- rag_logs.md | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/rag.py | 125 ++++++++++++++++++++++++++----- 2 files changed, 313 insertions(+), 18 deletions(-) create mode 100644 rag_logs.md diff --git a/rag_logs.md b/rag_logs.md new file mode 100644 index 0000000..fee2905 --- /dev/null +++ b/rag_logs.md @@ -0,0 +1,206 @@ + +### 2025-02-07 12:31:49 +🔍 No conversation history, not a clarification. + +### 2025-02-07 12:31:50 + +#### Context Usage +- 🔎 Retrieved new documents for query +- 📚 Number of new documents: 20 +- 📄 Document paths: + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/evaluation.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/evaluation.rst + - tutorials/rag/rag.py + - adalflow/adalflow/core/retriever.py + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/db.rst + + +### 2025-02-07 12:32:21 + +#### Query Analysis +- 🤔 Query: 'can you give me example usage?' +- 📝 Result: This is a clarification + + +### 2025-02-07 12:32:21 + +#### Context Usage +- ♻️ Reusing previous context for clarification query +- 📚 Number of reused documents: 20 +- 📄 Document paths: + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/evaluation.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/evaluation.rst + - tutorials/rag/rag.py + - adalflow/adalflow/core/retriever.py + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/db.rst + + +### 2025-02-07 12:33:21 + +#### Query Analysis +- 🤔 Query: 'can you use groq client instead of openai for the generation' +- 📝 Result: This is a clarification + + +### 2025-02-07 12:33:21 + +#### Context Usage +- ♻️ Reusing previous context for clarification query +- 📚 Number of reused documents: 20 +- 📄 Document paths: + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/evaluation.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/evaluation.rst + - tutorials/rag/rag.py + - adalflow/adalflow/core/retriever.py + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/db.rst + + +### 2025-02-07 12:33:55 + +#### Query Analysis +- 🤔 Query: 'provide me with the whole example' +- 📝 Result: This is a clarification + + +### 2025-02-07 12:33:55 + +#### Context Usage +- ♻️ Reusing previous context for clarification query +- 📚 Number of reused documents: 20 +- 📄 Document paths: + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/evaluation.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/evaluation.rst + - tutorials/rag/rag.py + - adalflow/adalflow/core/retriever.py + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/db.rst + + +### 2025-02-07 12:34:35 + +#### Query Analysis +- 🤔 Query: 'provide me with the whole example but use the groq client' +- 📝 Result: This is a clarification + + +### 2025-02-07 12:34:35 + +#### Context Usage +- ♻️ Reusing previous context for clarification query +- 📚 Number of reused documents: 20 +- 📄 Document paths: + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/evaluation.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/evaluation.rst + - tutorials/rag/rag.py + - adalflow/adalflow/core/retriever.py + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/retriever.rst + - docs/source/tutorials/rag_playbook.rst + - docs/source/tutorials/db.rst + + +### 2025-02-07 12:35:16 + +#### Query Analysis +- 🤔 Query: 'can you explain me about agent compoenent.' +- 📝 Result: This is a new question + + +### 2025-02-07 12:35:17 + +#### Context Usage +- 🔎 Retrieved new documents for query +- 📚 Number of new documents: 20 +- 📄 Document paths: + - adalflow/adalflow/components/agent/README.md + - docs/source/tutorials/agent.rst + - docs/source/tutorials/component.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/index.rst + - docs/source/tutorials/agent.rst + - docs/source/apis/components/index.rst + - docs/source/tutorials/index.rst + - adalflow/tests/test_react_agent.py + - docs/source/tutorials/component.rst + - use_cases/unsorted/simple_qa_groq.py + - docs/source/apis/core/index.rst + - docs/source/tutorials/auto_text_grad.rst + - adalflow/tests/test_component.py + - docs/source/tutorials/adalcomponent.rst + - use_cases/unsorted/simple_qa_memory.py + - docs/source/tutorials/auto_text_grad.rst + - docs/source/tutorials/index.rst + - use_cases/unsorted/simple_qa_trainable.py + - docs/source/tutorials/component.rst + diff --git a/src/rag.py b/src/rag.py index 0f36934..77a7dd0 100644 --- a/src/rag.py +++ b/src/rag.py @@ -1,5 +1,7 @@ -from typing import Any, List +from typing import Any, List, Tuple, Optional from uuid import uuid4 +import os +from datetime import datetime import adalflow as adal from adalflow.core.types import ( @@ -16,6 +18,7 @@ from config import configs from src.data_pipeline import DatabaseManager from adalflow.utils import printc +from dataclasses import dataclass, field class Memory(DataComponent): @@ -41,6 +44,14 @@ def add_dialog_turn(self, user_query: str, assistant_response: str): self.current_conversation.append_dialog_turn(dialog_turn) +@dataclass +class RAGAnswer(adal.DataClass): + rationale: str = field(default="", metadata={"desc": "Rationale for the answer."}) + answer: str = field(default="", metadata={"desc": "Answer to the user query."}) + + __output_fields__ = ["rationale", "answer"] + + system_prompt = r""" You are a code assistant which answer's user question on a Github Repo. You will receive user query, relevant context, and past conversation history. @@ -75,16 +86,6 @@ def add_dialog_turn(self, user_query: str, assistant_response: str): """ -from dataclasses import dataclass, field - - -@dataclass -class RAGAnswer(adal.DataClass): - rationale: str = field(default="", metadata={"desc": "Rationale for the answer."}) - answer: str = field(default="", metadata={"desc": "Answer to the user query."}) - - __output_fields__ = ["rationale", "answer"] - class RAG(adal.Component): __doc__ = """RAG with one repo. @@ -119,6 +120,13 @@ def __init__(self): model_kwargs=configs["generator"]["model_kwargs"], output_processors=data_parser, ) + self.previous_retrieved_documents = None + + # Initialize log file + self.log_file = "rag_logs.md" + if os.path.exists(self.log_file): + # Clear previous logs when starting new session + open(self.log_file, 'w').close() def initialize_db_manager(self): self.db_manager = DatabaseManager() @@ -136,15 +144,89 @@ def prepare_retriever(self, repo_url_or_path: str): document_map_func=lambda doc: doc.vector, ) - def call(self, query: str) -> Any: + def log_to_file(self, message: str): + """Write log messages to file with timestamp""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + with open(self.log_file, 'a') as f: + f.write(f"\n### {timestamp}\n{message}\n") + + def is_clarification_query(self, query: str) -> bool: + """ + Determines if the current query is a clarification of a previous query. + """ + if not self.memory(): + self.log_to_file("🔍 No conversation history, not a clarification.") + return False + + clarification_prompt = f""" + You are a clarification detector. Analyze if the query is a follow-up or clarification of the previous conversation. + Your response should include: + - A rationale explaining your reasoning + - A clear True/False answer + + Output your response in this format: + {{ + "rationale": "Your step-by-step reasoning here", + "answer": "True or False" + }} + + Conversation History: + {self.memory()} + + Query: + {query} + """ + response = self.generator( + prompt_kwargs={ + "conversation_history": self.memory(), + "system_prompt": clarification_prompt, + }, + ) + + is_clarification = "true" in response.data.answer.lower() + log_message = f""" +#### Query Analysis +- 🤔 Query: '{query}' +- 📝 Result: {'This is a clarification' if is_clarification else 'This is a new question'} +""" + self.log_to_file(log_message) + return is_clarification + + def call(self, query: str) -> Tuple[Any, Any]: + previous_context = ( + self.previous_retrieved_documents[0].documents + if self.previous_retrieved_documents + else None + ) - retrieved_documents = self.retriever(query) + is_clarification = self.is_clarification_query(query) - # fill in the document - retrieved_documents[0].documents = [ - self.transformed_docs[doc_index] - for doc_index in retrieved_documents[0].doc_indices - ] + if is_clarification and self.previous_retrieved_documents: + retrieved_documents = self.previous_retrieved_documents + log_message = f""" +#### Context Usage +- ♻️ Reusing previous context for clarification query +- 📚 Number of reused documents: {len(retrieved_documents[0].documents)} +- 📄 Document paths: + {self._format_doc_paths(retrieved_documents[0].documents)} +""" + self.log_to_file(log_message) + else: + retrieved_documents = self.retriever(query) + retrieved_documents[0].documents = [ + self.transformed_docs[doc_index] + for doc_index in retrieved_documents[0].doc_indices + ] + self.previous_retrieved_documents = retrieved_documents + + log_message = f""" +#### Context Usage +- 🔎 Retrieved new documents for query +- 📚 Number of new documents: {len(retrieved_documents[0].documents)} +- 📄 Document paths: + {self._format_doc_paths(retrieved_documents[0].documents)} +""" + self.log_to_file(log_message) printc(f"retrieved_documents: {retrieved_documents[0].documents}") printc(f"memory: {self.memory()}") @@ -168,6 +250,13 @@ def call(self, query: str) -> Any: return final_response, retrieved_documents + def _format_doc_paths(self, documents: List[Any]) -> str: + """Helper to format document paths for logging""" + return "\n ".join( + f"- {doc.meta_data.get('file_path', 'unknown')}" + for doc in documents + ) + if __name__ == "__main__": from adalflow.utils import get_logger From 0a4a9208c697ccb371a49eb48d62c8beca933fce Mon Sep 17 00:00:00 2001 From: Siddarth Date: Fri, 7 Feb 2025 12:39:21 -0500 Subject: [PATCH 2/2] Update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 1a903d7..004e230 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,6 @@ __pycache__/ # ignore adalflow cache /adalflow + +# Ignore test files +rag_logs.md \ No newline at end of file