From a9b7b585b9ed8eddfd593e1f50ac4181ad4146ab Mon Sep 17 00:00:00 2001
From: John Bencina <jbencina@users.noreply.github.com>
Date: Sun, 6 Jul 2025 21:53:51 -0700
Subject: [PATCH 1/2] Update prompt

---
 src/vecsync/chat/clients/openai.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/vecsync/chat/clients/openai.py b/src/vecsync/chat/clients/openai.py
index e736afa..b7769bd 100644
--- a/src/vecsync/chat/clients/openai.py
+++ b/src/vecsync/chat/clients/openai.py
@@ -217,12 +217,27 @@ def _create_assistant(self) -> str:
             The assistant ID for the current conversation.
         """
 
-        instructions = """You are a helpful research assistant that can search through a large number
-        of journals and papers to help answer the user questions. You have been given a file store which contains
-        the relevant documents the user is referencing. These documents should be your primary source of information.
-        You may only use external knowledge if it is helpful in clarifying questions. It is very important that you
-        remain factual and cite information from the sources provided to you in the file store. You are not allowed
-        to make up information."""
+        instructions = """# Role
+        You are an AI researcher and educator versed in state-of-the-art machine-learning theory and practice.
+
+        # Task
+        Using only the user-provided paper collection:
+        1. Answer the user's question in your own words.
+        2. Cite every factual claim to at least one paper.
+        3. If the question is ambiguous, ask a clarifying follow-up before answering.
+        4. If the user asks about fundamental concepts or requests an example, you can use your knowledge to answer.
+
+        # Style
+        - Insightful, friendly, professional.  
+        - Use clear analogies where appropriate.
+        - Include citations when referencing the document collection.
+        - Be transparent about any uncertainties or if the information is missing from the provided documents.
+        - Adapt responses to user's knowledge level
+
+        # Constraints
+        - Do not deviate from the user provided documents unless you are explaining fundamental concepts
+        - Do not repeat text verbatim from references without futher explaination
+        """
 
         assistant = self.client.beta.assistants.create(
             name=self.assistant_name,

From 38068c8c2bcdac3dd9d4aea2c261fb64bdd3b39b Mon Sep 17 00:00:00 2001
From: John Bencina <jbencina@users.noreply.github.com>
Date: Tue, 8 Jul 2025 23:03:41 -0700
Subject: [PATCH 2/2] Separate prompt to text file

---
 src/vecsync/chat/clients/openai.py     | 53 ++++++++++++++------------
 src/vecsync/cli/chat.py                | 20 ++++++----
 src/vecsync/prompts/default_prompt.txt | 20 ++++++++++
 3 files changed, 62 insertions(+), 31 deletions(-)
 create mode 100644 src/vecsync/prompts/default_prompt.txt

diff --git a/src/vecsync/chat/clients/openai.py b/src/vecsync/chat/clients/openai.py
index b7769bd..dffca36 100644
--- a/src/vecsync/chat/clients/openai.py
+++ b/src/vecsync/chat/clients/openai.py
@@ -1,3 +1,4 @@
+from importlib import resources
 from queue import Empty, Queue
 
 from openai import AssistantEventHandler, OpenAI
@@ -106,14 +107,40 @@ class OpenAIClient:
     settings_path : str | None
         The path to the settings file. If None, the default settings file will be used.
         This is used to store the thread ID for the current conversation.
+    prompt_source : str | None
+        The path to the prompt source file. If None, the default prompt will be used.
     """
 
-    def __init__(self, store_name: str, settings_path: str | None = None):
+    def __init__(self, store_name: str, settings_path: str | None = None, prompt_source: str | None = None):
         self.client = OpenAI()
         self.store_name = store_name
         self.assistant_name = f"vecsync-{store_name}"
         self.connected = False
         self.settings_path = settings_path
+        self.prompt = self._get_prompt(prompt_source)
+
+    def _get_prompt(self, prompt_source: str | None = None) -> str:
+        """Get the prompt from the prompt source.
+
+        If a prompt source is provided, it will be used to load the prompt. Otherwise, the default
+        prompt will be used from the resources.
+
+        Parameters
+        ----------
+        prompt_source : str | None
+            The path to the prompt source file. If None, the default prompt will be used.
+
+        Returns
+        -------
+        str
+            The prompt to use for the assistant.
+        """
+        if prompt_source is not None:
+            with open(prompt_source) as f:
+                return f.read()
+        else:
+            with resources.files("vecsync.prompts").joinpath("default_prompt.txt").open("r") as f:
+                return f.read()
 
     def connect(self):
         """Connect to the OpenAI API and load the assistant and thread.
@@ -217,31 +244,9 @@ def _create_assistant(self) -> str:
             The assistant ID for the current conversation.
         """
 
-        instructions = """# Role
-        You are an AI researcher and educator versed in state-of-the-art machine-learning theory and practice.
-
-        # Task
-        Using only the user-provided paper collection:
-        1. Answer the user's question in your own words.
-        2. Cite every factual claim to at least one paper.
-        3. If the question is ambiguous, ask a clarifying follow-up before answering.
-        4. If the user asks about fundamental concepts or requests an example, you can use your knowledge to answer.
-
-        # Style
-        - Insightful, friendly, professional.  
-        - Use clear analogies where appropriate.
-        - Include citations when referencing the document collection.
-        - Be transparent about any uncertainties or if the information is missing from the provided documents.
-        - Adapt responses to user's knowledge level
-
-        # Constraints
-        - Do not deviate from the user provided documents unless you are explaining fundamental concepts
-        - Do not repeat text verbatim from references without futher explaination
-        """
-
         assistant = self.client.beta.assistants.create(
             name=self.assistant_name,
-            instructions=instructions,
+            instructions=self.prompt,
             tools=[{"type": "file_search"}],
             tool_resources={
                 "file_search": {
diff --git a/src/vecsync/cli/chat.py b/src/vecsync/cli/chat.py
index 02fff2c..58b59fc 100644
--- a/src/vecsync/cli/chat.py
+++ b/src/vecsync/cli/chat.py
@@ -5,8 +5,8 @@
 from vecsync.constants import DEFAULT_STORE_NAME
 
 
-def start_console_chat(store_name: str):
-    client = OpenAIClient(store_name=store_name)
+def start_console_chat(store_name: str, prompt_source: str | None = None):
+    client = OpenAIClient(store_name=store_name, prompt_source=prompt_source)
     client.connect()
 
     ui = ConsoleInterface(client)
@@ -20,8 +20,8 @@ def start_console_chat(store_name: str):
         ui.prompt(prompt)
 
 
-def start_ui_chat(store_name: str):
-    client = OpenAIClient(store_name=store_name)
+def start_ui_chat(store_name: str, prompt_source: str | None = None):
+    client = OpenAIClient(store_name=store_name, prompt_source=prompt_source)
     client.connect()
 
     ui = GradioInterface(client)
@@ -35,10 +35,16 @@ def start_ui_chat(store_name: str):
     is_flag=True,
     help="Spawn an interactive UI instead of a console interface.",
 )
-def chat(ui: bool):
+@click.option(
+    "--prompt",
+    "-p",
+    type=str,
+    help="The path to the prompt source file used when creating a new assistant.",
+)
+def chat(ui: bool, prompt: str | None):
     """Chat with the assistant."""
 
     if ui:
-        start_ui_chat(DEFAULT_STORE_NAME)
+        start_ui_chat(DEFAULT_STORE_NAME, prompt)
     else:
-        start_console_chat(DEFAULT_STORE_NAME)
+        start_console_chat(DEFAULT_STORE_NAME, prompt)
diff --git a/src/vecsync/prompts/default_prompt.txt b/src/vecsync/prompts/default_prompt.txt
new file mode 100644
index 0000000..752255a
--- /dev/null
+++ b/src/vecsync/prompts/default_prompt.txt
@@ -0,0 +1,20 @@
+# Role
+You are an AI researcher and educator with a deep understanding of state-of-the-art machine-learning theory and practice.
+
+# Task
+You are given a collection of academic papers by the user. Using only the user-provided paper collection:
+1. Answer the user's question in your own words.
+2. Support answers with paper citations.
+3. If the question is ambiguous, ask a clarifying follow-up before answering.
+4. If the user asks about fundamental concepts or requests an example, you may use your own knowledge to answer.
+
+# Style
+- Insightful, friendly, professional.
+- Use clear analogies and examples to explain complex ideas.
+- Include citations when referencing the document collection.
+- Be transparent about any uncertainties or if the information is missing from the provided documents.
+- Adapt responses to user's knowledge level.
+
+# Constraints
+- Do not deviate from the user provided documents unless you are explaining fundamental concepts
+- Do not simply repeat text verbatim from references