From 681dec068178f64c15e2beb37ae0043dbe7cf6a5 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 30 Jul 2025 17:31:16 -0400 Subject: [PATCH 01/13] Delete dead Anthropic Citations code --- server/api/views/text_extraction/views.py | 126 ++-------------------- 1 file changed, 10 insertions(+), 116 deletions(-) diff --git a/server/api/views/text_extraction/views.py b/server/api/views/text_extraction/views.py index e4122851..65b03f5c 100644 --- a/server/api/views/text_extraction/views.py +++ b/server/api/views/text_extraction/views.py @@ -1,4 +1,3 @@ -import os import json import re @@ -8,136 +7,31 @@ from rest_framework import status from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt -import anthropic from ...services.openai_services import openAIServices from api.models.model_embeddings import Embeddings -USER_PROMPT = """ -I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details - -Act as a seasoned physician or medical professional who treat patients with bipolar disorder - -Identify rules for medication inclusion or exclusion based on medical history or concerns - -Return an output with the same structure as these examples: - -The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the -only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder. -The medications for this rule are lithium. - -The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and -Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone -} -""" - - -def anthropic_citations(client: anthropic.Client, user_prompt: str, content_chunks: list) -> tuple: - """ - Sends a message to Anthropic Citations and extract and format the response - - Parameters - ---------- - client: An instance of the Anthropic API client used to make the request - user_prompt: The user's question or instruction to be processed by the model - content_chunks: A list of text chunks that provide context for the model to use during generation - - Returns - ------- - tuple - - """ - - - message = client.messages.create( - model="claude-3-5-haiku-20241022", - max_tokens=1024, - messages=[ - { - "role": "user", - "content": [ - { - "type": "document", - "source": { - "type": "content", - "content": content_chunks - }, - "citations": {"enabled": True} - }, - - { - "type": "text", - "text": user_prompt - } - ] - } - ], - ) - - # Response Structure: https://docs.anthropic.com/en/docs/build-with-claude/citations#response-structure - - text = [] - cited_text = [] - for content in message.to_dict()['content']: - text.append(content['text']) - if 'citations' in content.keys(): - text.append(" ".join( - [f"<{citation['start_block_index']} - {citation['end_block_index']}>" for citation in content['citations']])) - cited_text.append(" ".join( - [f"<{citation['start_block_index']} - {citation['end_block_index']}> {citation['cited_text']}" for citation in content['citations']])) - - texts = " ".join(text) - cited_texts = " ".join(cited_text) - - return texts, cited_texts - - -@method_decorator(csrf_exempt, name='dispatch') -class RuleExtractionAPIView(APIView): - - permission_classes = [IsAuthenticated] - - def get(self, request): - try: - - client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) - - guid = request.query_params.get('guid') - - query = Embeddings.objects.filter(upload_file__guid=guid) - - # TODO: Format into the Anthropic API"s expected input format in the anthropic_citations function - chunks = [{"type": "text", "text": chunk.text} for chunk in query] - - texts, cited_texts = anthropic_citations(client, USER_PROMPT, chunks) - - - return Response({"texts": texts, "cited_texts": cited_texts}, status=status.HTTP_200_OK) - - except Exception as e: - return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - - # This is to use openai to extract the rules to save cost + def openai_extraction(content_chunks, user_prompt): """ Prepares the OpenAI input and returns the extracted text. """ - combined_text = "\n\n".join(chunk['text'] for chunk in content_chunks) + combined_text = "\n\n".join(chunk["text"] for chunk in content_chunks) result = openAIServices.openAI( userMessage=combined_text, prompt=user_prompt, model="gpt-4o-mini", temp=0.0, - stream=False + stream=False, ) return result -@method_decorator(csrf_exempt, name='dispatch') +@method_decorator(csrf_exempt, name="dispatch") class RuleExtractionAPIOpenAIView(APIView): permission_classes = [IsAuthenticated] @@ -167,7 +61,7 @@ def get(self, request): Return the entire output as a JSON array. """ - guid = request.query_params.get('guid') + guid = request.query_params.get("guid") query = Embeddings.objects.filter(upload_file__guid=guid) chunks = [ {"type": "text", "text": f"[chunk-{i}] {chunk.text}"} @@ -175,13 +69,11 @@ def get(self, request): ] output_text = openai_extraction(chunks, user_prompt) - cleaned_text = re.sub(r"^```json|```$", "", - output_text.strip()).strip() + cleaned_text = re.sub(r"^```json|```$", "", output_text.strip()).strip() rules = json.loads(cleaned_text) # Attach chunk_number and chunk_text to each rule - chunk_lookup = {f"chunk-{i}": chunk.text for i, - chunk in enumerate(query)} + chunk_lookup = {f"chunk-{i}": chunk.text for i, chunk in enumerate(query)} for rule in rules: source = rule.get("source", "").strip("[]") # e.g. chunk-63 if source.startswith("chunk-"): @@ -192,4 +84,6 @@ def get(self, request): return Response({"rules": rules}, status=status.HTTP_200_OK) except Exception as e: - return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + return Response( + {"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR + ) From 2134996d716268a330461f4f12d373e7b9f69bb2 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 30 Jul 2025 18:13:56 -0400 Subject: [PATCH 02/13] Add all prompts to a centralized prompt service --- server/api/views/conversations/views.py | 118 +++++++++++------- server/api/views/embeddings/embeddingsView.py | 77 ++++++------ server/api/views/text_extraction/views.py | 25 +--- 3 files changed, 108 insertions(+), 112 deletions(-) diff --git a/server/api/views/conversations/views.py b/server/api/views/conversations/views.py index d46f8222..3c1efef3 100644 --- a/server/api/views/conversations/views.py +++ b/server/api/views/conversations/views.py @@ -16,6 +16,7 @@ from .models import Conversation, Message from .serializers import ConversationSerializer from ...services.tools.tools import tools, execute_tool +from ...services.prompt_services import PromptTemplates @csrf_exempt @@ -47,7 +48,7 @@ def extract_text(request: str) -> JsonResponse: messages=[ { "role": "system", - "content": "Give a brief description of this medicine: %s" % tokens, + "content": PromptTemplates.get_medicine_description_prompt(tokens), } ], max_tokens=500, @@ -64,8 +65,10 @@ def get_tokens(string: str, encoding_name: str) -> str: output_string = encoding.decode(tokens) return output_string + class OpenAIAPIException(APIException): """Custom exception for OpenAI API errors.""" + status_code = status.HTTP_500_INTERNAL_SERVER_ERROR default_detail = "An error occurred while communicating with the OpenAI API." default_code = "openai_api_error" @@ -77,6 +80,7 @@ def __init__(self, detail=None, code=None): self.detail = {"error": self.default_detail} self.status_code = code or self.status_code + class ConversationViewSet(viewsets.ModelViewSet): serializer_class = ConversationSerializer permission_classes = [IsAuthenticated] @@ -93,26 +97,29 @@ def destroy(self, request, *args, **kwargs): self.perform_destroy(instance) return Response(status=status.HTTP_204_NO_CONTENT) - @action(detail=True, methods=['post']) + @action(detail=True, methods=["post"]) def continue_conversation(self, request, pk=None): conversation = self.get_object() - user_message = request.data.get('message') - page_context = request.data.get('page_context') + user_message = request.data.get("message") + page_context = request.data.get("page_context") if not user_message: return Response({"error": "Message is required"}, status=400) # Save user message - Message.objects.create(conversation=conversation, - content=user_message, is_user=True) + Message.objects.create( + conversation=conversation, content=user_message, is_user=True + ) # Get ChatGPT response chatgpt_response = self.get_chatgpt_response( - conversation, user_message, page_context) + conversation, user_message, page_context + ) # Save ChatGPT response - Message.objects.create(conversation=conversation, - content=chatgpt_response, is_user=False) + Message.objects.create( + conversation=conversation, content=chatgpt_response, is_user=False + ) # Generate or update title if it's the first message or empty if conversation.messages.count() <= 2 or not conversation.title: @@ -121,27 +128,35 @@ def continue_conversation(self, request, pk=None): return Response({"response": chatgpt_response, "title": conversation.title}) - @action(detail=True, methods=['patch']) + @action(detail=True, methods=["patch"]) def update_title(self, request, pk=None): conversation = self.get_object() - new_title = request.data.get('title') + new_title = request.data.get("title") if not new_title: - return Response({"error": "New title is required"}, status=status.HTTP_400_BAD_REQUEST) + return Response( + {"error": "New title is required"}, status=status.HTTP_400_BAD_REQUEST + ) conversation.title = new_title conversation.save() - return Response({"status": "Title updated successfully", "title": conversation.title}) + return Response( + {"status": "Title updated successfully", "title": conversation.title} + ) def get_chatgpt_response(self, conversation, user_message, page_context=None): - messages = [{ - "role": "system", - "content": "You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. If applicable, use the supplied tools to assist the professional." - }] + messages = [ + { + "role": "system", + "content": PromptTemplates.get_conversation_system_prompt(), + } + ] if page_context: - context_message = f"If applicable, please use the following content to ask questions. If not applicable, please answer to the best of your ability: {page_context}" + context_message = PromptTemplates.get_conversation_page_context_prompt( + page_context + ) messages.append({"role": "system", "content": context_message}) for msg in conversation.messages.all(): @@ -155,46 +170,50 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): model="gpt-3.5-turbo", messages=messages, tools=tools, - tool_choice="auto" + tool_choice="auto", ) response_message = response.choices[0].message - tool_calls = response_message.get('tool_calls', []) + tool_calls = response_message.get("tool_calls", []) if not tool_calls: - return response_message['content'] - + return response_message["content"] # Handle tool calls # Add the assistant's message with tool calls to the conversation - messages.append({ - "role": "assistant", - "content": response_message.get('content', ''), - "tool_calls": tool_calls - }) - + messages.append( + { + "role": "assistant", + "content": response_message.get("content", ""), + "tool_calls": tool_calls, + } + ) + # Process each tool call for tool_call in tool_calls: - tool_call_id = tool_call['id'] - tool_function_name = tool_call['function']['name'] - tool_arguments = json.loads(tool_call['function'].get('arguments', '{}')) - + tool_call_id = tool_call["id"] + tool_function_name = tool_call["function"]["name"] + tool_arguments = json.loads( + tool_call["function"].get("arguments", "{}") + ) + # Execute the tool results = execute_tool(tool_function_name, tool_arguments) - + # Add the tool response message - messages.append({ - "role": "tool", - "content": str(results), # Convert results to string - "tool_call_id": tool_call_id - }) - + messages.append( + { + "role": "tool", + "content": str(results), # Convert results to string + "tool_call_id": tool_call_id, + } + ) + # Final API call with tool results final_response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=messages - ) - return final_response.choices[0].message['content'] + model="gpt-3.5-turbo", messages=messages + ) + return final_response.choices[0].message["content"] except openai.error.OpenAIError as e: logging.error("OpenAI API Error: %s", str(e)) raise OpenAIAPIException(detail=str(e)) @@ -206,14 +225,17 @@ def generate_title(self, conversation): # Get the first two messages messages = conversation.messages.all()[:2] context = "\n".join([msg.content for msg in messages]) - prompt = f"Based on the following conversation, generate a short, descriptive title (max 6 words):\n\n{context}" + prompt = PromptTemplates.get_title_generation_user_prompt(context) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ - {"role": "system", "content": "You are a helpful assistant that generates short, descriptive titles."}, - {"role": "user", "content": prompt} - ] + { + "role": "system", + "content": PromptTemplates.get_title_generation_system_prompt(), + }, + {"role": "user", "content": prompt}, + ], ) - return response.choices[0].message['content'].strip() + return response.choices[0].message["content"].strip() diff --git a/server/api/views/embeddings/embeddingsView.py b/server/api/views/embeddings/embeddingsView.py index 9469bb09..fdaf6e9d 100644 --- a/server/api/views/embeddings/embeddingsView.py +++ b/server/api/views/embeddings/embeddingsView.py @@ -6,59 +6,52 @@ from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids from ...services.openai_services import openAIServices +from ...services.prompt_services import PromptTemplates from django.utils.decorators import method_decorator from django.views.decorators.csrf import csrf_exempt import json -@method_decorator(csrf_exempt, name='dispatch') +@method_decorator(csrf_exempt, name="dispatch") class AskEmbeddingsAPIView(APIView): permission_classes = [IsAuthenticated] def post(self, request, *args, **kwargs): try: user = request.user - guid = request.query_params.get('guid') - stream = request.query_params.get( - 'stream', 'false').lower() == 'true' + guid = request.query_params.get("guid") + stream = request.query_params.get("stream", "false").lower() == "true" - request_data = request.data.get('message', None) + request_data = request.data.get("message", None) if not request_data: - return Response({"error": "Message data is required."}, status=status.HTTP_400_BAD_REQUEST) + return Response( + {"error": "Message data is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) message = str(request_data) embeddings_results = get_closest_embeddings( - user=user, message_data=message, guid=guid) + user=user, message_data=message, guid=guid + ) embeddings_results = convert_uuids(embeddings_results) prompt_texts = [ - f"[Start of INFO {i+1} === GUID: {obj['file_id']}, Page Number: {obj['page_number']}, Chunk Number: {obj['chunk_number']}, Text: {obj['text']} === End of INFO {i+1} ]" for i, obj in enumerate(embeddings_results)] + f"[Start of INFO {i + 1} === GUID: {obj['file_id']}, Page Number: {obj['page_number']}, Chunk Number: {obj['chunk_number']}, Text: {obj['text']} === End of INFO {i + 1} ]" + for i, obj in enumerate(embeddings_results) + ] listOfEmbeddings = " ".join(prompt_texts) - prompt_text = ( - f"""You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: - 1. Content: Use information contained within [PROVIDED-INFO] to answer the question. - 2. Organization: Structure your response with clear sections and paragraphs. - 3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. - 4. Clarity: Ensure your answer is well-structured and easy to follow. - 5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. - Here's an example of the required response format: - ________________________________________ - See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. - People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. - Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. - Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. - ________________________________________ - Please provide your response to the user's question following these guidelines precisely. - [PROVIDED-INFO] = {listOfEmbeddings}""" - ) + prompt_text = PromptTemplates.get_embeddings_query_prompt(listOfEmbeddings) if stream: + def stream_generator(): try: last_chunk = "" - for chunk in openAIServices.openAI(message, prompt_text, stream=True, raw_stream=False): + for chunk in openAIServices.openAI( + message, prompt_text, stream=True, raw_stream=False + ): # Format as Server-Sent Events for better client handling if chunk and chunk != last_chunk: last_chunk = chunk @@ -72,27 +65,29 @@ def stream_generator(): yield f"data: {error_data}\n\n" response = StreamingHttpResponse( - stream_generator(), - content_type='text/event-stream' + stream_generator(), content_type="text/event-stream" ) # Add CORS and caching headers for streaming - response['Cache-Control'] = 'no-cache' - response['Access-Control-Allow-Origin'] = '*' + response["Cache-Control"] = "no-cache" + response["Access-Control-Allow-Origin"] = "*" # Disable nginx buffering if behind nginx - response['X-Accel-Buffering'] = 'no' + response["X-Accel-Buffering"] = "no" return response # Non-streaming response answer = openAIServices.openAI( - userMessage=message, - prompt=prompt_text, - stream=False + userMessage=message, prompt=prompt_text, stream=False + ) + return Response( + { + "question": message, + "llm_response": answer, + "embeddings_info": embeddings_results, + "sent_to_llm": prompt_text, + }, + status=status.HTTP_200_OK, ) - return Response({ - "question": message, - "llm_response": answer, - "embeddings_info": embeddings_results, - "sent_to_llm": prompt_text, - }, status=status.HTTP_200_OK) except Exception as e: - return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + return Response( + {"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR + ) diff --git a/server/api/views/text_extraction/views.py b/server/api/views/text_extraction/views.py index 65b03f5c..365e30cf 100644 --- a/server/api/views/text_extraction/views.py +++ b/server/api/views/text_extraction/views.py @@ -9,6 +9,7 @@ from django.views.decorators.csrf import csrf_exempt from ...services.openai_services import openAIServices +from ...services.prompt_services import PromptTemplates from api.models.model_embeddings import Embeddings # This is to use openai to extract the rules to save cost @@ -37,29 +38,7 @@ class RuleExtractionAPIOpenAIView(APIView): def get(self, request): try: - user_prompt = """ - You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. - - Act as a seasoned physician or medical professional who treats patients with bipolar disorder. - - Identify rules for medication inclusion or exclusion based on medical history or concerns. - - For each rule you find, return a JSON object using the following format: - - { - "rule": "", - "type": "INCLUDE" or "EXCLUDE", - "reason": "", - "medications": ["", "", ...], - "source": "" - } - - Only include rules that are explicitly stated or strongly implied in the chunk. - - Only use the chunks provided. If no rule is found in a chunk, skip it. - - Return the entire output as a JSON array. - """ + user_prompt = PromptTemplates.get_text_extraction_prompt() guid = request.query_params.get("guid") query = Embeddings.objects.filter(upload_file__guid=guid) From e0414285e746220358ca1d4858bdd642405771da Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 5 Aug 2025 19:18:53 -0400 Subject: [PATCH 03/13] Add prompt_services.py --- server/api/services/prompt_services.py | 97 ++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 server/api/services/prompt_services.py diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py new file mode 100644 index 00000000..73f58707 --- /dev/null +++ b/server/api/services/prompt_services.py @@ -0,0 +1,97 @@ +""" +Centralized prompt management for the application. +Contains all prompts used across different services. +""" + + +class PromptTemplates: + """Central repository for all prompt templates used in the application.""" + + TEXT_EXTRACTION_RULE_EXTRACTION = """ +You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. + +Act as a seasoned physician or medical professional who treats patients with bipolar disorder. + +Identify rules for medication inclusion or exclusion based on medical history or concerns. + +For each rule you find, return a JSON object using the following format: + +{ + "rule": "", + "type": "INCLUDE" or "EXCLUDE", + "reason": "", + "medications": ["", "", ...], + "source": "" +} + +Only include rules that are explicitly stated or strongly implied in the chunk. + +Only use the chunks provided. If no rule is found in a chunk, skip it. + +Return the entire output as a JSON array. +""" + + EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: +1. Content: Use information contained within [PROVIDED-INFO] to answer the question. +2. Organization: Structure your response with clear sections and paragraphs. +3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. +4. Clarity: Ensure your answer is well-structured and easy to follow. +5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. +Here's an example of the required response format: +________________________________________ +See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. +People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. +Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. +Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. +________________________________________ +Please provide your response to the user's question following these guidelines precisely. +[PROVIDED-INFO] = {listOfEmbeddings}""" + + CONVERSATION_SYSTEM_PROMPT = """You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. If applicable, use the supplied tools to assist the professional.""" + + CONVERSATION_PAGE_CONTEXT_PROMPT = """If applicable, please use the following content to ask questions. If not applicable, please answer to the best of your ability: {page_context}""" + + MEDICINE_DESCRIPTION_PROMPT = """Give a brief description of this medicine: %s""" + + TITLE_GENERATION_SYSTEM_PROMPT = ( + """You are a helpful assistant that generates short, descriptive titles.""" + ) + + TITLE_GENERATION_USER_PROMPT = """Based on the following conversation, generate a short, descriptive title (max 6 words): + +{context}""" + + @classmethod + def get_text_extraction_prompt(cls): + """Get the text extraction rule extraction prompt.""" + return cls.TEXT_EXTRACTION_RULE_EXTRACTION + + @classmethod + def get_embeddings_query_prompt(cls, list_of_embeddings): + """Get the embeddings query response prompt with embedded data.""" + return cls.EMBEDDINGS_QUERY_RESPONSE.format(listOfEmbeddings=list_of_embeddings) + + @classmethod + def get_conversation_system_prompt(cls): + """Get the conversation system prompt.""" + return cls.CONVERSATION_SYSTEM_PROMPT + + @classmethod + def get_conversation_page_context_prompt(cls, page_context): + """Get the conversation page context prompt.""" + return cls.CONVERSATION_PAGE_CONTEXT_PROMPT.format(page_context=page_context) + + @classmethod + def get_medicine_description_prompt(cls, tokens): + """Get the medicine description prompt.""" + return cls.MEDICINE_DESCRIPTION_PROMPT % tokens + + @classmethod + def get_title_generation_system_prompt(cls): + """Get the title generation system prompt.""" + return cls.TITLE_GENERATION_SYSTEM_PROMPT + + @classmethod + def get_title_generation_user_prompt(cls, context): + """Get the title generation user prompt.""" + return cls.TITLE_GENERATION_USER_PROMPT.format(context=context) From ff0bfeb322d07a15a35158e2f522eefde5c82157 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 30 Sep 2025 15:49:14 -0400 Subject: [PATCH 04/13] Resolve merge conflicts --- server/api/views/conversations/views.py | 54 ------------------------- 1 file changed, 54 deletions(-) diff --git a/server/api/views/conversations/views.py b/server/api/views/conversations/views.py index 7aa0de02..cecc9a96 100644 --- a/server/api/views/conversations/views.py +++ b/server/api/views/conversations/views.py @@ -146,21 +146,12 @@ def update_title(self, request, pk=None): ) def get_chatgpt_response(self, conversation, user_message, page_context=None): -<<<<<<< HEAD messages = [ { "role": "system", "content": PromptTemplates.get_conversation_system_prompt(), } ] -======= - client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) - messages = [{ - "role": "system", - "content": "You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. If applicable, use the supplied tools to assist the professional." - }] ->>>>>>> listOfMed - if page_context: context_message = PromptTemplates.get_conversation_page_context_prompt( page_context @@ -181,7 +172,6 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): ) response_message = response.choices[0].message -<<<<<<< HEAD tool_calls = response_message.get("tool_calls", []) if not tool_calls: @@ -204,35 +194,10 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): tool_arguments = json.loads( tool_call["function"].get("arguments", "{}") ) -======= - tool_calls = getattr(response_message, "tool_calls", []) - - tool_calls = response_message.model_dump().get("tool_calls", []) - - if not tool_calls: - return response_message['content'] - - # Handle tool calls - # Add the assistant's message with tool calls to the conversation - messages.append({ - "role": "assistant", - "content": response_message.content or "", - "tool_calls": tool_calls - }) - - # Process each tool call - for tool_call in tool_calls: - tool_call_id = tool_call['id'] - tool_function_name = tool_call['function']['name'] - tool_arguments = json.loads( - tool_call['function'].get('arguments', '{}')) ->>>>>>> listOfMed - # Execute the tool results = execute_tool(tool_function_name, tool_arguments) # Add the tool response message -<<<<<<< HEAD messages.append( { "role": "tool", @@ -247,21 +212,6 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): ) return final_response.choices[0].message["content"] except openai.error.OpenAIError as e: -======= - messages.append({ - "role": "tool", - "content": str(results), # Convert results to string - "tool_call_id": tool_call_id - }) - - # Final API call with tool results - final_response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=messages - ) - return final_response.choices[0].message.content - except OpenAI.error.OpenAIError as e: ->>>>>>> listOfMed logging.error("OpenAI API Error: %s", str(e)) raise OpenAIAPIException(detail=str(e)) except Exception as e: @@ -285,8 +235,4 @@ def generate_title(self, conversation): ], ) -<<<<<<< HEAD return response.choices[0].message["content"].strip() -======= - return response.choices[0].message.content.strip() ->>>>>>> listOfMed From 584e3eeb777c830bad4dbaef7fc1a45e63548ff8 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 30 Sep 2025 15:52:45 -0400 Subject: [PATCH 05/13] Consolidate prompt constants --- server/api/services/prompt_services.py | 76 ++++++++++++++++++++++---- server/api/views/assistant/views.py | 40 ++------------ 2 files changed, 69 insertions(+), 47 deletions(-) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index 73f58707..2282b4d2 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -31,19 +31,19 @@ class PromptTemplates: Return the entire output as a JSON array. """ - EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: -1. Content: Use information contained within [PROVIDED-INFO] to answer the question. -2. Organization: Structure your response with clear sections and paragraphs. -3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. -4. Clarity: Ensure your answer is well-structured and easy to follow. -5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. + EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: +1. Content: Use information contained within [PROVIDED-INFO] to answer the question. +2. Organization: Structure your response with clear sections and paragraphs. +3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. +4. Clarity: Ensure your answer is well-structured and easy to follow. +5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. Here's an example of the required response format: -________________________________________ +________________________________________ See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. -People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. -Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. -Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. -________________________________________ +People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. +Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. +Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. +________________________________________ Please provide your response to the user's question following these guidelines precisely. [PROVIDED-INFO] = {listOfEmbeddings}""" @@ -95,3 +95,57 @@ def get_title_generation_system_prompt(cls): def get_title_generation_user_prompt(cls, context): """Get the title generation user prompt.""" return cls.TITLE_GENERATION_USER_PROMPT.format(context=context) + + # Assistant tool prompts + ASSISTANT_TOOL_DESCRIPTION = """ + Search the user's uploaded documents for information relevant to answering their question. + Call this function when you need to find specific information from the user's documents + to provide an accurate, citation-backed response. Always search before answering questions + about document content. + """ + + ASSISTANT_TOOL_PROPERTY_DESCRIPTION = """ + A specific search query to find relevant information in the user's documents. + Use keywords, phrases, or questions related to what the user is asking about. + Be specific rather than generic - use terms that would appear in the relevant documents. + """ + + ASSISTANT_INSTRUCTIONS = """ + You are an AI assistant that helps users find and understand information about bipolar disorder + from their uploaded bipolar disorder research documents using semantic search. + + SEMANTIC SEARCH STRATEGY: + - Always perform semantic search using the search_documents function when users ask questions + - Use conceptually related terms and synonyms, not just exact keyword matches + - Search for the meaning and context of the user's question, not just literal words + - Consider medical terminology, lay terms, and related conditions when searching + + FUNCTION USAGE: + - When a user asks about information that might be in their documents ALWAYS use the search_documents function first + - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question + - Only provide answers based on information found through document searches + + RESPONSE FORMAT: + After gathering information through semantic searches, provide responses that: + 1. Answer the user's question directly using only the found information + 2. Structure responses with clear sections and paragraphs + 3. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** + 4. Only cite information that directly supports your statements + + If no relevant information is found in the documents, clearly state that the information is not available in the uploaded documents. + """ + + @classmethod + def get_assistant_tool_description(cls): + """Get the assistant tool description.""" + return cls.ASSISTANT_TOOL_DESCRIPTION + + @classmethod + def get_assistant_tool_property_description(cls): + """Get the assistant tool property description.""" + return cls.ASSISTANT_TOOL_PROPERTY_DESCRIPTION + + @classmethod + def get_assistant_instructions(cls): + """Get the assistant instructions.""" + return cls.ASSISTANT_INSTRUCTIONS diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py index ca65f335..3d5fc95e 100644 --- a/server/api/views/assistant/views.py +++ b/server/api/views/assistant/views.py @@ -15,6 +15,7 @@ from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids +from ...services.prompt_services import PromptTemplates # Configure logging logger = logging.getLogger(__name__) @@ -119,18 +120,8 @@ def post(self, request): client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - TOOL_DESCRIPTION = """ - Search the user's uploaded documents for information relevant to answering their question. - Call this function when you need to find specific information from the user's documents - to provide an accurate, citation-backed response. Always search before answering questions - about document content. - """ - - TOOL_PROPERTY_DESCRIPTION = """ - A specific search query to find relevant information in the user's documents. - Use keywords, phrases, or questions related to what the user is asking about. - Be specific rather than generic - use terms that would appear in the relevant documents. - """ + TOOL_DESCRIPTION = PromptTemplates.get_assistant_tool_description() + TOOL_PROPERTY_DESCRIPTION = PromptTemplates.get_assistant_tool_property_description() tools = [ { @@ -195,30 +186,7 @@ def search_documents(query: str, user=user) -> str: except Exception as e: return f"Error searching documents: {str(e)}. Please try again if the issue persists." - INSTRUCTIONS = """ - You are an AI assistant that helps users find and understand information about bipolar disorder - from their uploaded bipolar disorder research documents using semantic search. - - SEMANTIC SEARCH STRATEGY: - - Always perform semantic search using the search_documents function when users ask questions - - Use conceptually related terms and synonyms, not just exact keyword matches - - Search for the meaning and context of the user's question, not just literal words - - Consider medical terminology, lay terms, and related conditions when searching - - FUNCTION USAGE: - - When a user asks about information that might be in their documents ALWAYS use the search_documents function first - - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question - - Only provide answers based on information found through document searches - - RESPONSE FORMAT: - After gathering information through semantic searches, provide responses that: - 1. Answer the user's question directly using only the found information - 2. Structure responses with clear sections and paragraphs - 3. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** - 4. Only cite information that directly supports your statements - - If no relevant information is found in the documents, clearly state that the information is not available in the uploaded documents. - """ + INSTRUCTIONS = PromptTemplates.get_assistant_instructions() MODEL_DEFAULTS = { "instructions": INSTRUCTIONS, From 447af54a02e70996af8674fd3852bd29a31beb03 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 30 Sep 2025 15:52:45 -0400 Subject: [PATCH 06/13] Consolidate the assistant endpoint prompt constants --- server/api/services/prompt_services.py | 76 ++++++++++++++++++++++---- server/api/views/assistant/views.py | 40 ++------------ 2 files changed, 69 insertions(+), 47 deletions(-) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index 73f58707..2282b4d2 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -31,19 +31,19 @@ class PromptTemplates: Return the entire output as a JSON array. """ - EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: -1. Content: Use information contained within [PROVIDED-INFO] to answer the question. -2. Organization: Structure your response with clear sections and paragraphs. -3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. -4. Clarity: Ensure your answer is well-structured and easy to follow. -5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. + EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: +1. Content: Use information contained within [PROVIDED-INFO] to answer the question. +2. Organization: Structure your response with clear sections and paragraphs. +3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. +4. Clarity: Ensure your answer is well-structured and easy to follow. +5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. Here's an example of the required response format: -________________________________________ +________________________________________ See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. -People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. -Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. -Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. -________________________________________ +People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. +Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. +Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. +________________________________________ Please provide your response to the user's question following these guidelines precisely. [PROVIDED-INFO] = {listOfEmbeddings}""" @@ -95,3 +95,57 @@ def get_title_generation_system_prompt(cls): def get_title_generation_user_prompt(cls, context): """Get the title generation user prompt.""" return cls.TITLE_GENERATION_USER_PROMPT.format(context=context) + + # Assistant tool prompts + ASSISTANT_TOOL_DESCRIPTION = """ + Search the user's uploaded documents for information relevant to answering their question. + Call this function when you need to find specific information from the user's documents + to provide an accurate, citation-backed response. Always search before answering questions + about document content. + """ + + ASSISTANT_TOOL_PROPERTY_DESCRIPTION = """ + A specific search query to find relevant information in the user's documents. + Use keywords, phrases, or questions related to what the user is asking about. + Be specific rather than generic - use terms that would appear in the relevant documents. + """ + + ASSISTANT_INSTRUCTIONS = """ + You are an AI assistant that helps users find and understand information about bipolar disorder + from their uploaded bipolar disorder research documents using semantic search. + + SEMANTIC SEARCH STRATEGY: + - Always perform semantic search using the search_documents function when users ask questions + - Use conceptually related terms and synonyms, not just exact keyword matches + - Search for the meaning and context of the user's question, not just literal words + - Consider medical terminology, lay terms, and related conditions when searching + + FUNCTION USAGE: + - When a user asks about information that might be in their documents ALWAYS use the search_documents function first + - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question + - Only provide answers based on information found through document searches + + RESPONSE FORMAT: + After gathering information through semantic searches, provide responses that: + 1. Answer the user's question directly using only the found information + 2. Structure responses with clear sections and paragraphs + 3. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** + 4. Only cite information that directly supports your statements + + If no relevant information is found in the documents, clearly state that the information is not available in the uploaded documents. + """ + + @classmethod + def get_assistant_tool_description(cls): + """Get the assistant tool description.""" + return cls.ASSISTANT_TOOL_DESCRIPTION + + @classmethod + def get_assistant_tool_property_description(cls): + """Get the assistant tool property description.""" + return cls.ASSISTANT_TOOL_PROPERTY_DESCRIPTION + + @classmethod + def get_assistant_instructions(cls): + """Get the assistant instructions.""" + return cls.ASSISTANT_INSTRUCTIONS diff --git a/server/api/views/assistant/views.py b/server/api/views/assistant/views.py index ca65f335..3d5fc95e 100644 --- a/server/api/views/assistant/views.py +++ b/server/api/views/assistant/views.py @@ -15,6 +15,7 @@ from ...services.embedding_services import get_closest_embeddings from ...services.conversions_services import convert_uuids +from ...services.prompt_services import PromptTemplates # Configure logging logger = logging.getLogger(__name__) @@ -119,18 +120,8 @@ def post(self, request): client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - TOOL_DESCRIPTION = """ - Search the user's uploaded documents for information relevant to answering their question. - Call this function when you need to find specific information from the user's documents - to provide an accurate, citation-backed response. Always search before answering questions - about document content. - """ - - TOOL_PROPERTY_DESCRIPTION = """ - A specific search query to find relevant information in the user's documents. - Use keywords, phrases, or questions related to what the user is asking about. - Be specific rather than generic - use terms that would appear in the relevant documents. - """ + TOOL_DESCRIPTION = PromptTemplates.get_assistant_tool_description() + TOOL_PROPERTY_DESCRIPTION = PromptTemplates.get_assistant_tool_property_description() tools = [ { @@ -195,30 +186,7 @@ def search_documents(query: str, user=user) -> str: except Exception as e: return f"Error searching documents: {str(e)}. Please try again if the issue persists." - INSTRUCTIONS = """ - You are an AI assistant that helps users find and understand information about bipolar disorder - from their uploaded bipolar disorder research documents using semantic search. - - SEMANTIC SEARCH STRATEGY: - - Always perform semantic search using the search_documents function when users ask questions - - Use conceptually related terms and synonyms, not just exact keyword matches - - Search for the meaning and context of the user's question, not just literal words - - Consider medical terminology, lay terms, and related conditions when searching - - FUNCTION USAGE: - - When a user asks about information that might be in their documents ALWAYS use the search_documents function first - - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question - - Only provide answers based on information found through document searches - - RESPONSE FORMAT: - After gathering information through semantic searches, provide responses that: - 1. Answer the user's question directly using only the found information - 2. Structure responses with clear sections and paragraphs - 3. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** - 4. Only cite information that directly supports your statements - - If no relevant information is found in the documents, clearly state that the information is not available in the uploaded documents. - """ + INSTRUCTIONS = PromptTemplates.get_assistant_instructions() MODEL_DEFAULTS = { "instructions": INSTRUCTIONS, From 4abe3a693f2bb3489d97372c50886a0ecba1d08e Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 30 Sep 2025 16:16:04 -0400 Subject: [PATCH 07/13] Consolidate the risk endpoint prompts --- server/api/services/prompt_services.py | 15 +++++++++++++++ server/api/views/risk/views.py | 3 ++- server/api/views/risk/views_riskWithSources.py | 16 +++------------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index 2282b4d2..380496f9 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -149,3 +149,18 @@ def get_assistant_tool_property_description(cls): def get_assistant_instructions(cls): """Get the assistant instructions.""" return cls.ASSISTANT_INSTRUCTIONS + + # Risk endpoint prompts + RISK_BASIC_MEDICATION_PROMPT = """You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. Please provide this information for the medication: {medication}.""" + + RISK_DIAGNOSIS_MEDICATION_PROMPT = """You are providing medication information from a diagnosis/clinical perspective. Provide a concise list of 5 key benefits and 5 key risks for the medication {medication} when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!.""" + + @classmethod + def get_risk_basic_medication_prompt(cls, medication): + """Get the basic medication risk/benefit prompt.""" + return cls.RISK_BASIC_MEDICATION_PROMPT.format(medication=medication) + + @classmethod + def get_risk_diagnosis_medication_prompt(cls, medication): + """Get the diagnosis-specific medication risk/benefit prompt.""" + return cls.RISK_DIAGNOSIS_MEDICATION_PROMPT.format(medication=medication) diff --git a/server/api/views/risk/views.py b/server/api/views/risk/views.py index 99327a8d..8fb9e540 100644 --- a/server/api/views/risk/views.py +++ b/server/api/views/risk/views.py @@ -3,6 +3,7 @@ import openai import json from api.views.listMeds.models import Medication +from api.services.prompt_services import PromptTemplates # XXX: remove csrf_exempt usage before production from django.views.decorators.csrf import csrf_exempt @@ -33,7 +34,7 @@ def medication(request): messages=[ { "role": "system", - "content": f"You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. Please provide this information for the medication: {diagnosis}." + "content": PromptTemplates.get_risk_basic_medication_prompt(diagnosis) } ] ) diff --git a/server/api/views/risk/views_riskWithSources.py b/server/api/views/risk/views_riskWithSources.py index d1c01615..1ff0a134 100644 --- a/server/api/views/risk/views_riskWithSources.py +++ b/server/api/views/risk/views_riskWithSources.py @@ -3,6 +3,7 @@ from rest_framework import status from api.views.listMeds.models import Medication from api.models.model_medRule import MedRule, MedRuleSource +from api.services.prompt_services import PromptTemplates import openai import os @@ -37,12 +38,7 @@ def post(self, request): }) except Medication.DoesNotExist: - prompt = ( - f"You are to provide a concise list of 5 key benefits and 5 key risks " - f"for the medication suggested when taking it for Bipolar. Each point should be short, " - f"clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and " - f"the risks section with !!!risk!!!. Please provide this information for the medication: {drug}." - ) + prompt = PromptTemplates.get_risk_basic_medication_prompt(drug) try: ai_response = openai.ChatCompletion.create( @@ -265,13 +261,7 @@ def _build_pdf_link(self, embedding): def _get_ai_response_for_diagnosis(self, drug): """Get AI response with diagnosis-specific context""" - prompt = ( - f"You are providing medication information from a diagnosis/clinical perspective. " - f"Provide a concise list of 5 key benefits and 5 key risks for the medication {drug} " - f"when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. " - f"Each point should be short, clear and be kept under 10 words. " - f"Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!." - ) + prompt = PromptTemplates.get_risk_diagnosis_medication_prompt(drug) try: ai_response = openai.ChatCompletion.create( From af92d2a1ed7dcb4b56f671a8f65e101d102b1947 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 30 Sep 2025 18:52:43 -0400 Subject: [PATCH 08/13] Update assistant instructions to reference internal library and limit conversations to bipolar --- server/api/services/prompt_services.py | 27 +++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index 380496f9..a9788a2c 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -98,21 +98,25 @@ def get_title_generation_user_prompt(cls, context): # Assistant tool prompts ASSISTANT_TOOL_DESCRIPTION = """ - Search the user's uploaded documents for information relevant to answering their question. - Call this function when you need to find specific information from the user's documents + Search your internal library of bipolar disorder sources for information relevant to answering the user's question. + Call this function when you need to find specific information from your source library to provide an accurate, citation-backed response. Always search before answering questions - about document content. + about bipolar disorder topics. """ ASSISTANT_TOOL_PROPERTY_DESCRIPTION = """ - A specific search query to find relevant information in the user's documents. + A specific search query to find relevant information in your source library. Use keywords, phrases, or questions related to what the user is asking about. - Be specific rather than generic - use terms that would appear in the relevant documents. + Be specific rather than generic - use terms that would appear in the relevant sources. """ ASSISTANT_INSTRUCTIONS = """ + When you are asked a question, respond as if you are a chatbot with a library of sources that the user can't see. The user did not upload these sources, so they don't know about them. You have to explain what is in the sources and give references to the sources. + + When a prompt is received that is unrelated to bipolar disorder, mental health treatment, or psychiatric medications, respond to the user by saying you are limited to bipolar-specific conversations. + You are an AI assistant that helps users find and understand information about bipolar disorder - from their uploaded bipolar disorder research documents using semantic search. + from your internal library of bipolar disorder research sources using semantic search. SEMANTIC SEARCH STRATEGY: - Always perform semantic search using the search_documents function when users ask questions @@ -121,18 +125,19 @@ def get_title_generation_user_prompt(cls, context): - Consider medical terminology, lay terms, and related conditions when searching FUNCTION USAGE: - - When a user asks about information that might be in their documents ALWAYS use the search_documents function first + - When a user asks about information that might be in your source library ALWAYS use the search_documents function first - Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question - - Only provide answers based on information found through document searches + - Only provide answers based on information found through your source searches RESPONSE FORMAT: After gathering information through semantic searches, provide responses that: 1. Answer the user's question directly using only the found information 2. Structure responses with clear sections and paragraphs - 3. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** - 4. Only cite information that directly supports your statements + 3. Explain what information you found in your sources and provide context + 4. Include citations using this exact format: ***[Name {name}, Page {page_number}]*** + 5. Only cite information that directly supports your statements - If no relevant information is found in the documents, clearly state that the information is not available in the uploaded documents. + If no relevant information is found in your source library, clearly state that the information is not available in your current sources. """ @classmethod From 5c077e0df07696aee29901b4a4ab8dd22ab8af82 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Wed, 1 Oct 2025 18:19:51 -0400 Subject: [PATCH 09/13] Remove missing import and URL pattern --- server/api/views/text_extraction/urls.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/api/views/text_extraction/urls.py b/server/api/views/text_extraction/urls.py index bdf6244f..9e3cb976 100644 --- a/server/api/views/text_extraction/urls.py +++ b/server/api/views/text_extraction/urls.py @@ -1,11 +1,11 @@ from django.urls import path -from .views import RuleExtractionAPIView, RuleExtractionAPIOpenAIView +from .views import RuleExtractionAPIOpenAIView urlpatterns = [ - - path('v1/api/rule_extraction', RuleExtractionAPIView.as_view(), - name='rule_extraction'), - path('v1/api/rule_extraction_openai', RuleExtractionAPIOpenAIView.as_view(), - name='rule_extraction_openai') + path( + "v1/api/rule_extraction_openai", + RuleExtractionAPIOpenAIView.as_view(), + name="rule_extraction_openai", + ) ] From 5d4e53f8de1ddf784c709caf92cf57c06d983212 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Mon, 6 Oct 2025 15:37:15 -0400 Subject: [PATCH 10/13] Remove anthropic from requirements --- README.md | 17 ++++++++--------- server/requirements.txt | 1 - 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0b48973e..533a7b60 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,11 @@ for patients with bipolar disorder, helping them shorten their journey to stabil You can view the current build of the website here: [https://balancertestsite.com](https://balancertestsite.com/) -## Contributing +## Contributing ### Join the Balancer community -Balancer is a [Code for Philly](https://www.codeforphilly.org/) project +Balancer is a [Code for Philly](https://www.codeforphilly.org/) project Join the [Code for Philly Slack and introduce yourself](https://codeforphilly.org/projects/balancer) in the #balancer channel @@ -19,15 +19,14 @@ The project kanban board is [on GitHub here](https://github.com/orgs/CodeForPhil ### Code for Philly Code of Conduct -The Code for Philly Code of Conduct is [here](https://codeforphilly.org/pages/code_of_conduct/) +The Code for Philly Code of Conduct is [here](https://codeforphilly.org/pages/code_of_conduct/) -### Setting up a development environment +### Setting up a development environment Get the code using git by either forking or cloning `CodeForPhilly/balancer-main` Tools used to run Balancer: 1. `OpenAI API`: Ask for an API key and add it to `config/env/env.dev` -2. `Anthropic API`: Ask for an API key and add it to `config/env/env.dev` Tools used for development: 1. `Docker`: Install Docker Desktop @@ -36,15 +35,15 @@ Tools used for development: ### Running Balancer for development -Start the Postgres, Django REST, and React services by starting Docker Desktop and running `docker compose up --build` +Start the Postgres, Django REST, and React services by starting Docker Desktop and running `docker compose up --build` #### Postgres -- Download a sample of papers to upload from [https://balancertestsite.com](https://balancertestsite.com/) +- Download a sample of papers to upload from [https://balancertestsite.com](https://balancertestsite.com/) - The email and password of `pgAdmin` are specified in `balancer-main/docker-compose.yml` - The first time you use `pgAdmin` after building the Docker containers you will need to register the server. - The `Host name/address` is the Postgres server service name in the Docker Compose file - The `Username` and `Password` are the Postgres server environment variables in the Docker Compose file -- You can use the below code snippet to query the database from a Jupyter notebook: +- You can use the below code snippet to query the database from a Jupyter notebook: ``` from sqlalchemy import create_engine @@ -100,6 +99,6 @@ The Balancer website is a Postgres, Django REST, and React project. The source c ![Architecture Drawing](Architecture.png) -## License +## License Balancer is licensed under the [AGPL-3.0 license](https://choosealicense.com/licenses/agpl-3.0/) diff --git a/server/requirements.txt b/server/requirements.txt index bbaf7bc9..d61cf94f 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -18,4 +18,3 @@ sentence_transformers PyMuPDF==1.24.0 Pillow pytesseract -anthropic \ No newline at end of file From bd6f737c8e37003dc3f37504cbcc4db442f4a6b6 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Mon, 6 Oct 2025 17:50:42 -0400 Subject: [PATCH 11/13] Fix undefined name linting errors --- server/api/views/conversations/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/api/views/conversations/views.py b/server/api/views/conversations/views.py index cecc9a96..29eb9dbc 100644 --- a/server/api/views/conversations/views.py +++ b/server/api/views/conversations/views.py @@ -7,6 +7,7 @@ from bs4 import BeautifulSoup from nltk.stem import PorterStemmer import requests +import openai from openai import OpenAI import tiktoken import os @@ -164,6 +165,7 @@ def get_chatgpt_response(self, conversation, user_message, page_context=None): messages.append({"role": "user", "content": user_message}) try: + client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=messages, From 2f31e415dd36ef983f7b36272dcffdf3aed46080 Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 7 Oct 2025 13:14:24 -0400 Subject: [PATCH 12/13] Format the line length of the prompts --- server/api/services/prompt_services.py | 113 +++++++++++++++---------- 1 file changed, 70 insertions(+), 43 deletions(-) diff --git a/server/api/services/prompt_services.py b/server/api/services/prompt_services.py index a9788a2c..be802aa4 100644 --- a/server/api/services/prompt_services.py +++ b/server/api/services/prompt_services.py @@ -7,59 +7,77 @@ class PromptTemplates: """Central repository for all prompt templates used in the application.""" + # Text Extraction + TEXT_EXTRACTION_RULE_EXTRACTION = """ -You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. + You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X]. -Act as a seasoned physician or medical professional who treats patients with bipolar disorder. + Act as a seasoned physician or medical professional who treats patients with bipolar disorder. -Identify rules for medication inclusion or exclusion based on medical history or concerns. + Identify rules for medication inclusion or exclusion based on medical history or concerns. -For each rule you find, return a JSON object using the following format: + For each rule you find, return a JSON object using the following format: -{ - "rule": "", - "type": "INCLUDE" or "EXCLUDE", - "reason": "", - "medications": ["", "", ...], - "source": "" -} + { + "rule": "", + "type": "INCLUDE" or "EXCLUDE", + "reason": "", + "medications": ["", "", ...], + "source": "" + } -Only include rules that are explicitly stated or strongly implied in the chunk. + Only include rules that are explicitly stated or strongly implied in the chunk. -Only use the chunks provided. If no rule is found in a chunk, skip it. + Only use the chunks provided. If no rule is found in a chunk, skip it. -Return the entire output as a JSON array. -""" + Return the entire output as a JSON array. + """ - EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: -1. Content: Use information contained within [PROVIDED-INFO] to answer the question. -2. Organization: Structure your response with clear sections and paragraphs. -3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . Only use citations that correspond to the information you're presenting. -4. Clarity: Ensure your answer is well-structured and easy to follow. -5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. -Here's an example of the required response format: -________________________________________ -See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. -People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. -Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. -Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. -________________________________________ -Please provide your response to the user's question following these guidelines precisely. -[PROVIDED-INFO] = {listOfEmbeddings}""" - - CONVERSATION_SYSTEM_PROMPT = """You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. If applicable, use the supplied tools to assist the professional.""" - - CONVERSATION_PAGE_CONTEXT_PROMPT = """If applicable, please use the following content to ask questions. If not applicable, please answer to the best of your ability: {page_context}""" + # Embeddings/Search + + EMBEDDINGS_QUERY_RESPONSE = """You are an AI assistant tasked with providing detailed, well-structured responses based + on the information provided in [PROVIDED-INFO]. Follow these guidelines strictly: + 1. Content: Use information contained within [PROVIDED-INFO] to answer the question. + 2. Organization: Structure your response with clear sections and paragraphs. + 3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], + include a citation in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . + Only use citations that correspond to the information you're presenting. + 4. Clarity: Ensure your answer is well-structured and easy to follow. + 5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. + Here's an example of the required response format: + ________________________________________ + See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 individual sales on a Friday, + and an additional 3,931 transactions on a Saturday ***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***. + People like the consumption of fudge and peanut brittle the most ***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. + Here is the history of See's Candy: the company was purchased in 1972, and its products have not been materially + altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. + Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood stabilizer + effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. + For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine monotherapy is + suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. + ________________________________________ + Please provide your response to the user's question following these guidelines precisely. + [PROVIDED-INFO] = {listOfEmbeddings}""" + + # Conversation/Chat + + CONVERSATION_SYSTEM_PROMPT = """You are a knowledgeable assistant. + Balancer is a powerful tool for selecting bipolar medication for patients. We are open-source and available for free use. + Your primary role is to assist licensed clinical professionals with information related to Balancer and bipolar medication selection. + If applicable, use the supplied tools to assist the professional.""" + + CONVERSATION_PAGE_CONTEXT_PROMPT = """If applicable, please use the following content to ask questions. + If not applicable, please answer to the best of your ability: {page_context}""" MEDICINE_DESCRIPTION_PROMPT = """Give a brief description of this medicine: %s""" + # Title Generation + TITLE_GENERATION_SYSTEM_PROMPT = ( """You are a helpful assistant that generates short, descriptive titles.""" ) - TITLE_GENERATION_USER_PROMPT = """Based on the following conversation, generate a short, descriptive title (max 6 words): - -{context}""" + TITLE_GENERATION_USER_PROMPT = """Based on the following conversation, generate a short, descriptive title (max 6 words): {context}""" @classmethod def get_text_extraction_prompt(cls): @@ -96,7 +114,7 @@ def get_title_generation_user_prompt(cls, context): """Get the title generation user prompt.""" return cls.TITLE_GENERATION_USER_PROMPT.format(context=context) - # Assistant tool prompts + # Assistant Instructions ASSISTANT_TOOL_DESCRIPTION = """ Search your internal library of bipolar disorder sources for information relevant to answering the user's question. Call this function when you need to find specific information from your source library @@ -111,9 +129,12 @@ def get_title_generation_user_prompt(cls, context): """ ASSISTANT_INSTRUCTIONS = """ - When you are asked a question, respond as if you are a chatbot with a library of sources that the user can't see. The user did not upload these sources, so they don't know about them. You have to explain what is in the sources and give references to the sources. + When you are asked a question, respond as if you are a chatbot with a library of sources that the user can't see. + The user did not upload these sources, so they don't know about them. + You have to explain what is in the sources and give references to the sources. - When a prompt is received that is unrelated to bipolar disorder, mental health treatment, or psychiatric medications, respond to the user by saying you are limited to bipolar-specific conversations. + When a prompt is received that is unrelated to bipolar disorder, mental health treatment, or psychiatric medications, + respond to the user by saying you are limited to bipolar-specific conversations. You are an AI assistant that helps users find and understand information about bipolar disorder from your internal library of bipolar disorder research sources using semantic search. @@ -155,10 +176,16 @@ def get_assistant_instructions(cls): """Get the assistant instructions.""" return cls.ASSISTANT_INSTRUCTIONS - # Risk endpoint prompts - RISK_BASIC_MEDICATION_PROMPT = """You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. Please provide this information for the medication: {medication}.""" + # Risk Assessment + + RISK_BASIC_MEDICATION_PROMPT = """You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested + when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. Begin the benefits + section with !!!benefits!!! and the risks section with !!!risk!!!. Please provide this information for the medication: {medication}.""" - RISK_DIAGNOSIS_MEDICATION_PROMPT = """You are providing medication information from a diagnosis/clinical perspective. Provide a concise list of 5 key benefits and 5 key risks for the medication {medication} when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. Each point should be short, clear and be kept under 10 words. Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!.""" + RISK_DIAGNOSIS_MEDICATION_PROMPT = """You are providing medication information from a diagnosis/clinical perspective. + Provide a concise list of 5 key benefits and 5 key risks for the medication {medication} when prescribed for Bipolar disorder, + focusing on clinical evidence and diagnostic considerations. Each point should be short, clear and be kept under 10 words. + Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!.""" @classmethod def get_risk_basic_medication_prompt(cls, medication): From 2213c689d556e7ed761240dd85ba51fd8cb2d35b Mon Sep 17 00:00:00 2001 From: Sahil D Shah Date: Tue, 7 Oct 2025 14:12:55 -0400 Subject: [PATCH 13/13] Anthropic Rule Extraction endpoint is unused --- config/env/env.dev | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/config/env/env.dev b/config/env/env.dev index 22e70e5d..16cb47d5 100644 --- a/config/env/env.dev +++ b/config/env/env.dev @@ -10,7 +10,6 @@ SQL_PORT=5432 DATABASE=postgres LOGIN_REDIRECT_URL= OPENAI_API_KEY= -ANTHROPIC_API_KEY= PINECONE_API_KEY= EMAIL_HOST_USER= -EMAIL_HOST_PASSWORD= \ No newline at end of file +EMAIL_HOST_PASSWORD=