From a9b7b585b9ed8eddfd593e1f50ac4181ad4146ab Mon Sep 17 00:00:00 2001 From: John Bencina Date: Sun, 6 Jul 2025 21:53:51 -0700 Subject: [PATCH 1/2] Update prompt --- src/vecsync/chat/clients/openai.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/vecsync/chat/clients/openai.py b/src/vecsync/chat/clients/openai.py index e736afa..b7769bd 100644 --- a/src/vecsync/chat/clients/openai.py +++ b/src/vecsync/chat/clients/openai.py @@ -217,12 +217,27 @@ def _create_assistant(self) -> str: The assistant ID for the current conversation. """ - instructions = """You are a helpful research assistant that can search through a large number - of journals and papers to help answer the user questions. You have been given a file store which contains - the relevant documents the user is referencing. These documents should be your primary source of information. - You may only use external knowledge if it is helpful in clarifying questions. It is very important that you - remain factual and cite information from the sources provided to you in the file store. You are not allowed - to make up information.""" + instructions = """# Role + You are an AI researcher and educator versed in state-of-the-art machine-learning theory and practice. + + # Task + Using only the user-provided paper collection: + 1. Answer the user's question in your own words. + 2. Cite every factual claim to at least one paper. + 3. If the question is ambiguous, ask a clarifying follow-up before answering. + 4. If the user asks about fundamental concepts or requests an example, you can use your knowledge to answer. + + # Style + - Insightful, friendly, professional. + - Use clear analogies where appropriate. + - Include citations when referencing the document collection. + - Be transparent about any uncertainties or if the information is missing from the provided documents. + - Adapt responses to user's knowledge level + + # Constraints + - Do not deviate from the user provided documents unless you are explaining fundamental concepts + - Do not repeat text verbatim from references without futher explaination + """ assistant = self.client.beta.assistants.create( name=self.assistant_name, From 38068c8c2bcdac3dd9d4aea2c261fb64bdd3b39b Mon Sep 17 00:00:00 2001 From: John Bencina Date: Tue, 8 Jul 2025 23:03:41 -0700 Subject: [PATCH 2/2] Separate prompt to text file --- src/vecsync/chat/clients/openai.py | 53 ++++++++++++++------------ src/vecsync/cli/chat.py | 20 ++++++---- src/vecsync/prompts/default_prompt.txt | 20 ++++++++++ 3 files changed, 62 insertions(+), 31 deletions(-) create mode 100644 src/vecsync/prompts/default_prompt.txt diff --git a/src/vecsync/chat/clients/openai.py b/src/vecsync/chat/clients/openai.py index b7769bd..dffca36 100644 --- a/src/vecsync/chat/clients/openai.py +++ b/src/vecsync/chat/clients/openai.py @@ -1,3 +1,4 @@ +from importlib import resources from queue import Empty, Queue from openai import AssistantEventHandler, OpenAI @@ -106,14 +107,40 @@ class OpenAIClient: settings_path : str | None The path to the settings file. If None, the default settings file will be used. This is used to store the thread ID for the current conversation. + prompt_source : str | None + The path to the prompt source file. If None, the default prompt will be used. """ - def __init__(self, store_name: str, settings_path: str | None = None): + def __init__(self, store_name: str, settings_path: str | None = None, prompt_source: str | None = None): self.client = OpenAI() self.store_name = store_name self.assistant_name = f"vecsync-{store_name}" self.connected = False self.settings_path = settings_path + self.prompt = self._get_prompt(prompt_source) + + def _get_prompt(self, prompt_source: str | None = None) -> str: + """Get the prompt from the prompt source. + + If a prompt source is provided, it will be used to load the prompt. Otherwise, the default + prompt will be used from the resources. + + Parameters + ---------- + prompt_source : str | None + The path to the prompt source file. If None, the default prompt will be used. + + Returns + ------- + str + The prompt to use for the assistant. + """ + if prompt_source is not None: + with open(prompt_source) as f: + return f.read() + else: + with resources.files("vecsync.prompts").joinpath("default_prompt.txt").open("r") as f: + return f.read() def connect(self): """Connect to the OpenAI API and load the assistant and thread. @@ -217,31 +244,9 @@ def _create_assistant(self) -> str: The assistant ID for the current conversation. """ - instructions = """# Role - You are an AI researcher and educator versed in state-of-the-art machine-learning theory and practice. - - # Task - Using only the user-provided paper collection: - 1. Answer the user's question in your own words. - 2. Cite every factual claim to at least one paper. - 3. If the question is ambiguous, ask a clarifying follow-up before answering. - 4. If the user asks about fundamental concepts or requests an example, you can use your knowledge to answer. - - # Style - - Insightful, friendly, professional. - - Use clear analogies where appropriate. - - Include citations when referencing the document collection. - - Be transparent about any uncertainties or if the information is missing from the provided documents. - - Adapt responses to user's knowledge level - - # Constraints - - Do not deviate from the user provided documents unless you are explaining fundamental concepts - - Do not repeat text verbatim from references without futher explaination - """ - assistant = self.client.beta.assistants.create( name=self.assistant_name, - instructions=instructions, + instructions=self.prompt, tools=[{"type": "file_search"}], tool_resources={ "file_search": { diff --git a/src/vecsync/cli/chat.py b/src/vecsync/cli/chat.py index 02fff2c..58b59fc 100644 --- a/src/vecsync/cli/chat.py +++ b/src/vecsync/cli/chat.py @@ -5,8 +5,8 @@ from vecsync.constants import DEFAULT_STORE_NAME -def start_console_chat(store_name: str): - client = OpenAIClient(store_name=store_name) +def start_console_chat(store_name: str, prompt_source: str | None = None): + client = OpenAIClient(store_name=store_name, prompt_source=prompt_source) client.connect() ui = ConsoleInterface(client) @@ -20,8 +20,8 @@ def start_console_chat(store_name: str): ui.prompt(prompt) -def start_ui_chat(store_name: str): - client = OpenAIClient(store_name=store_name) +def start_ui_chat(store_name: str, prompt_source: str | None = None): + client = OpenAIClient(store_name=store_name, prompt_source=prompt_source) client.connect() ui = GradioInterface(client) @@ -35,10 +35,16 @@ def start_ui_chat(store_name: str): is_flag=True, help="Spawn an interactive UI instead of a console interface.", ) -def chat(ui: bool): +@click.option( + "--prompt", + "-p", + type=str, + help="The path to the prompt source file used when creating a new assistant.", +) +def chat(ui: bool, prompt: str | None): """Chat with the assistant.""" if ui: - start_ui_chat(DEFAULT_STORE_NAME) + start_ui_chat(DEFAULT_STORE_NAME, prompt) else: - start_console_chat(DEFAULT_STORE_NAME) + start_console_chat(DEFAULT_STORE_NAME, prompt) diff --git a/src/vecsync/prompts/default_prompt.txt b/src/vecsync/prompts/default_prompt.txt new file mode 100644 index 0000000..752255a --- /dev/null +++ b/src/vecsync/prompts/default_prompt.txt @@ -0,0 +1,20 @@ +# Role +You are an AI researcher and educator with a deep understanding of state-of-the-art machine-learning theory and practice. + +# Task +You are given a collection of academic papers by the user. Using only the user-provided paper collection: +1. Answer the user's question in your own words. +2. Support answers with paper citations. +3. If the question is ambiguous, ask a clarifying follow-up before answering. +4. If the user asks about fundamental concepts or requests an example, you may use your own knowledge to answer. + +# Style +- Insightful, friendly, professional. +- Use clear analogies and examples to explain complex ideas. +- Include citations when referencing the document collection. +- Be transparent about any uncertainties or if the information is missing from the provided documents. +- Adapt responses to user's knowledge level. + +# Constraints +- Do not deviate from the user provided documents unless you are explaining fundamental concepts +- Do not simply repeat text verbatim from references