From 55fa0f3cd131951d448d814cd48c7fa908e7d979 Mon Sep 17 00:00:00 2001 From: ayaanamir <139801250+ayaanamir@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:59:17 -0400 Subject: [PATCH 1/2] feat(lexai-functions): OpenAI translation around RunPod for non-English Add openai_translate for non-English chat around RunPod; bind API keys as secrets, expose RunPod endpoint / model as params, and lazy-init Pinecone so deploy discovery does not need keys at import. --- lexai-functions/.env.example | 23 +++++ lexai-functions/.gitignore | 1 + lexai-functions/main.py | 95 +++++++++++++----- lexai-functions/openai_translate.py | 146 ++++++++++++++++++++++++++++ lexai-functions/requirements.txt | 3 +- 5 files changed, 243 insertions(+), 25 deletions(-) create mode 100644 lexai-functions/.env.example create mode 100644 lexai-functions/openai_translate.py diff --git a/lexai-functions/.env.example b/lexai-functions/.env.example new file mode 100644 index 0000000..93e7e0a --- /dev/null +++ b/lexai-functions/.env.example @@ -0,0 +1,23 @@ +# Copy to `.env` for local runs and for `firebase deploy` (Firebase loads this folder’s `.env` +# when resolving params during deploy). + +# --- Google Cloud Secret Manager (production runtime) --- +# Create or rotate secrets (you will be prompted for the value; paste once, it is not echoed): +# cd .. # repo root (where firebase.json lives) +# firebase functions:secrets:set OPENAI_API_KEY +# firebase functions:secrets:set PINECONE_API_KEY +# firebase functions:secrets:set RUNPOD_API_KEY +# +# First deploy after adding secrets: if the CLI asks, allow it to grant the function’s +# service account access to read these secrets. +# +# For local development only, you may also put the same keys in this file so imports and +# the emulator match production variable names: +OPENAI_API_KEY= +PINECONE_API_KEY= +RUNPOD_API_KEY= + +# --- Plain environment / deploy params (not Secret Manager) --- +# Supplied via this `.env` on deploy, or via interactive prompts if unset. +RUNPOD_ENDPOINT_ID= +OPENAI_TRANSLATION_MODEL=gpt-5.1 diff --git a/lexai-functions/.gitignore b/lexai-functions/.gitignore index 1609bab..4d8ee00 100644 --- a/lexai-functions/.gitignore +++ b/lexai-functions/.gitignore @@ -4,3 +4,4 @@ __pycache__/ # Python virtual environment venv/ *.local +.env diff --git a/lexai-functions/main.py b/lexai-functions/main.py index 1b64714..bca3a52 100644 --- a/lexai-functions/main.py +++ b/lexai-functions/main.py @@ -1,29 +1,64 @@ -import json import os import time +from typing import Any + import requests from firebase_functions import https_fn +from firebase_functions.params import SecretParam, StringParam from firebase_admin import initialize_app from pinecone import Pinecone from dotenv import load_dotenv +from openai_translate import ( + normalize_to_english, + require_openai_if_translating, + translate_english_to_ui_language, +) + load_dotenv() -# --- Config --- -PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") -RUNPOD_API_KEY = os.getenv("RUNPOD_API_KEY") -RUNPOD_ENDPOINT_ID = os.getenv("RUNPOD_ENDPOINT_ID") +# --- Secret Manager (bind in @https_fn.on_call); runtime values appear in os.environ --- +OPENAI_API_KEY = SecretParam("OPENAI_API_KEY") +PINECONE_API_KEY = SecretParam("PINECONE_API_KEY") +RUNPOD_API_KEY = SecretParam("RUNPOD_API_KEY") + +# --- Deploy-time / runtime params (non-secret); also read from lexai-functions/.env on deploy --- +RUNPOD_ENDPOINT_ID = StringParam( + "RUNPOD_ENDPOINT_ID", + label="RunPod endpoint ID", + description="Serverless endpoint ID from the RunPod console (v2 API path segment).", +) +OPENAI_TRANSLATION_MODEL = StringParam( + "OPENAI_TRANSLATION_MODEL", + default="gpt-5.1", + label="OpenAI translation model", + description="Model id for translating non-English UI around RunPod.", +) + INDEX_NAME = "michigan-legislation" EMBED_MODEL = "multilingual-e5-large" initialize_app() -# Initialize Pinecone -pc = Pinecone(api_key=PINECONE_API_KEY) -index = pc.Index(INDEX_NAME) +_PINECONE_CLIENT: Pinecone | None = None +_PINECONE_INDEX: Any = None + + +def _get_pinecone() -> tuple[Pinecone, Any]: + """Lazy Pinecone client so deploy-time discovery does not require API keys at import.""" + global _PINECONE_CLIENT, _PINECONE_INDEX + if _PINECONE_CLIENT is not None and _PINECONE_INDEX is not None: + return _PINECONE_CLIENT, _PINECONE_INDEX + api_key = (os.environ.get("PINECONE_API_KEY") or "").strip() + if not api_key: + raise RuntimeError("PINECONE_API_KEY is not set") + _PINECONE_CLIENT = Pinecone(api_key=api_key) + _PINECONE_INDEX = _PINECONE_CLIENT.Index(INDEX_NAME) + return _PINECONE_CLIENT, _PINECONE_INDEX def embed_query(query_text: str) -> list: + pc, _ = _get_pinecone() embeddings_response = pc.inference.embed( model=EMBED_MODEL, inputs=[query_text], @@ -33,6 +68,7 @@ def embed_query(query_text: str) -> list: def query_pinecone(query_embedding: list, top_k: int = 5) -> list: + _, index = _get_pinecone() results = index.query( vector=query_embedding, top_k=top_k, @@ -42,10 +78,17 @@ def query_pinecone(query_embedding: list, top_k: int = 5) -> list: def call_runpod(messages: list) -> str: + endpoint_id = (os.environ.get("RUNPOD_ENDPOINT_ID") or "").strip() + runpod_key = (os.environ.get("RUNPOD_API_KEY") or "").strip() + if not endpoint_id: + raise RuntimeError("RUNPOD_ENDPOINT_ID is not set") + if not runpod_key: + raise RuntimeError("RUNPOD_API_KEY is not set") + run_response = requests.post( - f"https://api.runpod.ai/v2/{RUNPOD_ENDPOINT_ID}/run", + f"https://api.runpod.ai/v2/{endpoint_id}/run", headers={ - "Authorization": f"Bearer {RUNPOD_API_KEY}", + "Authorization": f"Bearer {runpod_key}", "Content-Type": "application/json", }, json={ @@ -66,8 +109,8 @@ def call_runpod(messages: list) -> str: # Poll for completion, max 5 minutes for _ in range(150): status_response = requests.get( - f"https://api.runpod.ai/v2/{RUNPOD_ENDPOINT_ID}/status/{job_id}", - headers={"Authorization": f"Bearer {RUNPOD_API_KEY}"}, + f"https://api.runpod.ai/v2/{endpoint_id}/status/{job_id}", + headers={"Authorization": f"Bearer {runpod_key}"}, ) status_response.raise_for_status() result = status_response.json() @@ -96,7 +139,11 @@ def call_runpod(messages: list) -> str: return "Error: Request timed out." -@https_fn.on_call(enforce_app_check=False, timeout_sec=300) +@https_fn.on_call( + enforce_app_check=False, + timeout_sec=300, + secrets=[OPENAI_API_KEY, PINECONE_API_KEY, RUNPOD_API_KEY], +) def chat(req: https_fn.CallableRequest) -> dict: try: prompt = req.data.get("prompt", "") @@ -106,33 +153,33 @@ def chat(req: https_fn.CallableRequest) -> dict: if not prompt: return {"error": "No prompt provided"} + require_openai_if_translating(language) - query_embedding = embed_query(prompt) + history_en, prompt_en = normalize_to_english(chat_history, prompt, language) + query_embedding = embed_query(prompt_en) relevant_chunks = query_pinecone(query_embedding, top_k=5) - context = "\n\n---\n\n".join(relevant_chunks) - system_message = { "role": "system", "content": ( - f"You are LexAI, a legal assistant specializing in Michigan legislation. " - f"Answer the user's question based on the following legislation excerpts. " - f"If the excerpts don't contain relevant information, say so honestly. " - f"Respond in {language}.\n\n" + "You are LexAI, a legal assistant specializing in Michigan legislation. " + "Answer the user's question based on the following legislation excerpts. " + "If the excerpts don't contain relevant information, say so honestly. " + "Write your entire answer in clear English.\n\n" f"RELEVANT LEGISLATION:\n{context}" ), } - messages = [system_message] + chat_history + [{"role": "user", "content": prompt}] - + messages = [system_message] + history_en + [{"role": "user", "content": prompt_en}] - response_text = call_runpod(messages) + response_en = call_runpod(messages) + response_text = translate_english_to_ui_language(response_en, language) return {"response": response_text} except Exception as e: - return {"error": str(e)} \ No newline at end of file + return {"error": str(e)} diff --git a/lexai-functions/openai_translate.py b/lexai-functions/openai_translate.py new file mode 100644 index 0000000..776cd34 --- /dev/null +++ b/lexai-functions/openai_translate.py @@ -0,0 +1,146 @@ +"""OpenAI translation: chat → English for RunPod; RunPod reply → UI language.""" + +from __future__ import annotations + +import json +import os +from typing import Any + +from openai import OpenAI + +_BATCH_SYSTEM = """You translate legal app chat fragments to English for a downstream English-only legal model. + +Rules: +- Output ONLY one JSON object. No markdown code fences. No commentary before or after JSON. +- Exact shape: {"items":[{"i":,"t":""}]} +- The same integer keys `i`, same count, and same order as in the input. +- Preserve statute identifiers, MCL / Michigan references, section numbers, docket-style numbers, and party names. +- Do not add legal analysis or advice — translation only. +""" + +_OUT_SYSTEM_TEMPLATE = """Translate the assistant message from English into {target_language}. + +Rules: +- Preserve citations, statute and section numbers, lists, and paragraph breaks where possible. +- Output only the translated text — no preamble, no quotes, no markdown wrapper.""" + + +def is_ui_english(language: str | None) -> bool: + s = (language or "").strip().lower() + return s in ("english", "en", "") + + +def _client() -> OpenAI: + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + raise RuntimeError("OPENAI_API_KEY is not set") + return OpenAI(api_key=api_key) + + +def translation_model() -> str: + return os.environ.get("OPENAI_TRANSLATION_MODEL", "gpt-5.1").strip() + + +def require_openai_if_translating(ui_language: str) -> None: + if is_ui_english(ui_language): + return + if not os.environ.get("OPENAI_API_KEY"): + raise RuntimeError("OPENAI_API_KEY is required when the UI language is not English") + + +def _chat_complete( + client: OpenAI, + *, + messages: list[dict[str, str]], + json_object: bool, +) -> str: + model = translation_model() + kwargs: dict[str, Any] = { + "model": model, + "messages": messages, + "temperature": 0.2, + } + if json_object: + kwargs["response_format"] = {"type": "json_object"} + try: + resp = client.chat.completions.create(**kwargs, max_completion_tokens=8192) + except TypeError: + resp = client.chat.completions.create(**kwargs, max_tokens=8192) + content = resp.choices[0].message.content + return (content or "").strip() + + +def normalize_to_english( + chat_history: list[Any], + current_prompt: str, + ui_language: str, +) -> tuple[list[dict[str, str]], str]: + """Return (chat_history with English contents, English current user prompt).""" + history: list[dict[str, str]] = [] + for m in chat_history: + if not isinstance(m, dict): + continue + role = str(m.get("role", "user")) + content = str(m.get("content", "")) + history.append({"role": role, "content": content}) + + prompt = str(current_prompt) + + if is_ui_english(ui_language): + return history, prompt + + client = _client() + payloads: list[dict[str, Any]] = [] + idx = 0 + for m in history: + payloads.append({"i": idx, "role": m["role"], "t": m["content"]}) + idx += 1 + payloads.append({"i": idx, "role": "user", "t": prompt}) + last_i = idx + + user_payload = json.dumps({"items": payloads}, ensure_ascii=False) + raw = _chat_complete( + client, + messages=[ + {"role": "system", "content": _BATCH_SYSTEM}, + {"role": "user", "content": user_payload}, + ], + json_object=True, + ) + data = json.loads(raw or "{}") + items = data.get("items") + if not isinstance(items, list) or len(items) != len(payloads): + raise RuntimeError("OpenAI translation returned invalid items length") + + by_i: dict[int, str] = {} + for x in items: + if not isinstance(x, dict) or "i" not in x: + continue + by_i[int(x["i"])] = str(x.get("t", "")) + + if len(by_i) != len(payloads): + raise RuntimeError("OpenAI translation missing segment keys") + + history_en: list[dict[str, str]] = [] + for p in payloads: + if p["i"] == last_i: + break + history_en.append({"role": str(p["role"]), "content": by_i[p["i"]]}) + prompt_en = by_i[last_i] + return history_en, prompt_en + + +def translate_english_to_ui_language(text: str, ui_language: str) -> str: + if is_ui_english(ui_language): + return text + client = _client() + target = (ui_language or "English").strip() + system = _OUT_SYSTEM_TEMPLATE.format(target_language=target) + return _chat_complete( + client, + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": text}, + ], + json_object=False, + ) diff --git a/lexai-functions/requirements.txt b/lexai-functions/requirements.txt index 18df91f..ef27de4 100644 --- a/lexai-functions/requirements.txt +++ b/lexai-functions/requirements.txt @@ -2,4 +2,5 @@ firebase-functions>=0.1.0 firebase-admin>=6.0.0 requests>=2.31.0 python-dotenv -pinecone>=5.0.0 \ No newline at end of file +pinecone>=5.0.0 +openai>=1.40.0 \ No newline at end of file From 851846a7a36a552d39d7862007cb377a31af8854 Mon Sep 17 00:00:00 2001 From: ayaanamir <139801250+ayaanamir@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:34:41 -0400 Subject: [PATCH 2/2] Added comments and documentation --- lexai-functions/main.py | 15 +++++++++++++++ lexai-functions/openai_translate.py | 25 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/lexai-functions/main.py b/lexai-functions/main.py index bca3a52..bc2ad84 100644 --- a/lexai-functions/main.py +++ b/lexai-functions/main.py @@ -1,3 +1,13 @@ +"""Firebase callable entrypoint for LexAI chat. + +Pipeline: optional translation of user + history into English, embed the English prompt, +retrieve Michigan legislation chunks from Pinecone, call the English-only RunPod legal +model, then translate the assistant reply back to the client's UI language when needed. + +Pinecone is initialized lazily so deploy-time import/discovery does not require +``PINECONE_API_KEY`` until a request actually runs. +""" + import os import time from typing import Any @@ -78,6 +88,7 @@ def query_pinecone(query_embedding: list, top_k: int = 5) -> list: def call_runpod(messages: list) -> str: + """POST to RunPod serverless ``/run``, then poll ``/status`` until COMPLETED or timeout.""" endpoint_id = (os.environ.get("RUNPOD_ENDPOINT_ID") or "").strip() runpod_key = (os.environ.get("RUNPOD_API_KEY") or "").strip() if not endpoint_id: @@ -145,6 +156,10 @@ def call_runpod(messages: list) -> str: secrets=[OPENAI_API_KEY, PINECONE_API_KEY, RUNPOD_API_KEY], ) def chat(req: https_fn.CallableRequest) -> dict: + """HTTPS callable: ``req.data`` may include ``prompt``, ``chat_history``, ``language`` (default ``en``). + + Returns ``{"response": str}`` on success or ``{"error": str}`` on validation/runtime failure. + """ try: prompt = req.data.get("prompt", "") chat_history = req.data.get("chat_history", []) diff --git a/lexai-functions/openai_translate.py b/lexai-functions/openai_translate.py index 776cd34..b7f86d8 100644 --- a/lexai-functions/openai_translate.py +++ b/lexai-functions/openai_translate.py @@ -1,4 +1,13 @@ -"""OpenAI translation: chat → English for RunPod; RunPod reply → UI language.""" +"""OpenAI helpers wrapping a non-English UI around an English-only RunPod model. + +``normalize_to_english`` batches prior turns plus the current user message into one +structured JSON translation call (stable indices, translation-only system prompt). +``translate_english_to_ui_language`` maps the assistant's English reply back to the +client's display language. English UI skips both calls. + +Model id: ``OPENAI_TRANSLATION_MODEL`` (default ``gpt-5.1``). ``_chat_complete`` tries +``max_completion_tokens`` first, then falls back to ``max_tokens`` for older SDK shapes. +""" from __future__ import annotations @@ -8,6 +17,7 @@ from openai import OpenAI +# System prompts: batch in (_BATCH_SYSTEM) vs single assistant string out (_OUT_SYSTEM_TEMPLATE). _BATCH_SYSTEM = """You translate legal app chat fragments to English for a downstream English-only legal model. Rules: @@ -26,11 +36,13 @@ def is_ui_english(language: str | None) -> bool: + """True when the client is using English; skips translation in/out.""" s = (language or "").strip().lower() return s in ("english", "en", "") def _client() -> OpenAI: + """Configured OpenAI client; requires ``OPENAI_API_KEY`` in the environment.""" api_key = os.environ.get("OPENAI_API_KEY") if not api_key: raise RuntimeError("OPENAI_API_KEY is not set") @@ -38,10 +50,12 @@ def _client() -> OpenAI: def translation_model() -> str: + """Model id for translation calls (not the RunPod legal model).""" return os.environ.get("OPENAI_TRANSLATION_MODEL", "gpt-5.1").strip() def require_openai_if_translating(ui_language: str) -> None: + """Fail fast before RAG/RunPod when non-English UI is selected but no API key is bound.""" if is_ui_english(ui_language): return if not os.environ.get("OPENAI_API_KEY"): @@ -54,6 +68,7 @@ def _chat_complete( messages: list[dict[str, str]], json_object: bool, ) -> str: + """One chat.completions call; ``json_object=True`` for batch translate payloads.""" model = translation_model() kwargs: dict[str, Any] = { "model": model, @@ -63,6 +78,7 @@ def _chat_complete( if json_object: kwargs["response_format"] = {"type": "json_object"} try: + # Newer OpenAI Python SDK uses max_completion_tokens. resp = client.chat.completions.create(**kwargs, max_completion_tokens=8192) except TypeError: resp = client.chat.completions.create(**kwargs, max_tokens=8192) @@ -75,7 +91,11 @@ def normalize_to_english( current_prompt: str, ui_language: str, ) -> tuple[list[dict[str, str]], str]: - """Return (chat_history with English contents, English current user prompt).""" + """Return ``(history_en, prompt_en)`` for RunPod. + + Non-English: one batched JSON object over all segments so roles stay aligned with + translated text. Validates that every input index ``i`` is returned exactly once. + """ history: list[dict[str, str]] = [] for m in chat_history: if not isinstance(m, dict): @@ -131,6 +151,7 @@ def normalize_to_english( def translate_english_to_ui_language(text: str, ui_language: str) -> str: + """Map RunPod's English assistant string to the UI language; no-op for English.""" if is_ui_english(ui_language): return text client = _client()