From bf86cadb975793bc10f5897905a1a50a3034eb0d Mon Sep 17 00:00:00 2001 From: ibadurrehmandg Date: Sun, 8 Feb 2026 00:50:56 +0500 Subject: [PATCH 1/2] enhanced agents and prompt templates --- specgap/app/core/prompts.py | 129 ++++++++++++---------- specgap/app/services/biz_engine.py | 158 +++++++++++++++++++-------- specgap/app/services/cross_check.py | 162 +++++++++++++++++++--------- specgap/app/services/tech_engine.py | 99 ++++++++++++----- specgap/specgap_audits.db | Bin 0 -> 49152 bytes 5 files changed, 367 insertions(+), 181 deletions(-) create mode 100644 specgap/specgap_audits.db diff --git a/specgap/app/core/prompts.py b/specgap/app/core/prompts.py index c6eef0e..25c8bc9 100644 --- a/specgap/app/core/prompts.py +++ b/specgap/app/core/prompts.py @@ -1,75 +1,90 @@ - - COUNCIL_PERSONAS = { "legal": { "role": "Corporate General Counsel", - "focus": "Liability, IP ownership, termination rights, and contract traps.", + "focus": "Liability, IP, termination, hidden contract traps", }, "business": { "role": "Chief Operating Officer (COO)", - "focus": "Operational viability, feature completeness vs. promise, and timeline realism.", + "focus": "Feature completeness, operational viability, timeline realism", }, "finance": { "role": "CFO & Audit Partner", - "focus": "Hidden costs, payment terms, ROI, and financial risk.", + "focus": "Costs, payment terms, ROI, financial risks", } } PROMPT_TEMPLATES = { "ROUND_1": """ - Role: You are {role}. - Domain: {domain} - Task: Analyze the provided documents (Contract + Tech Spec). - Output: A list of initial findings (Risks/Gaps). - Format: JSON. - """, - +Role: {role} +Domain: {domain} +Task: Identify Risks/Gaps in the provided documents (Contract + Tech Spec). +Focus: {focus} +Output: JSON only +Instructions: +- Cite exact text for every finding. +- Classify gaps as Critical / High / Medium / Low +- Optional: Include "suggested_fix" if obvious. +Format: +{{ + "findings": [ + {{ + "title": "...", + "description": "...", + "severity": "Critical|High|Medium|Low", + "source": "File Name / Section", + "suggested_fix": "..." + }} + ] +}} +""", + "ROUND_2": """ - Role: You are {role}. - Domain: {domain} - Task: Review your initial findings against the opinions of your peers. - - [YOUR PREVIOUS DRAFT]: - {current_draft} - - [PEER FEEDBACK]: - {peer_drafts} - - Instruction: - - If a peer found a risk you missed, verify it and add it. - - If a peer contradicts you, debate it (or refine your stance). - Output: Updated Draft 2. - """, - +Role: {role} +Domain: {domain} +Task: Update your findings using peer feedback. +[Your Draft]: {current_draft} +[Peers Drafts]: {peer_drafts} +Output: JSON only +Instructions: +- Merge missing findings from peers +- Resolve contradictions: keep the one with higher severity +- Retain source references +Format same as ROUND_1 +""", + "ROUND_3": """ - Role: You are {role}. - Domain: {domain} - Task: Finalize your "Flashcards" for the user based on your analysis. - - [YOUR ANALYSIS FROM PREVIOUS ROUNDS]: - {current_draft} - - [PEER INSIGHTS]: - {peer_drafts} - - Instruction: Convert your findings into binary choices for the user (Swipe Right to Fix, Left to Ignore). - Based on the documents and your peer discussions, create 3-5 actionable flashcards. - - CRITICAL: You MUST output ONLY valid JSON. No explanations, no markdown, just the JSON object. - - REQUIRED OUTPUT FORMAT: +Role: {role} +Domain: {domain} +Task: Convert findings into actionable Flashcards. +[Analysis]: {current_draft} +[Peer Insights]: {peer_drafts} +Output: JSON only +Instructions: +- Max 3-5 flashcards per persona +- Provide: + - id: unique identifier + - card_type: "Risk" | "Opportunity" + - title: short headline + - description: concise explanation (1-2 sentences) + - fix_action: what user should do + - severity: Critical / High / Medium / Low + - impact: High / Medium / Low (for prioritization) + - swipe_right_payload: exact text/action if user accepts +- Do not add extra text or commentary +Format: +{{ + "flashcards": [ {{ - "flashcards": [ - {{ - "id": "unique_id_1", - "card_type": "Risk", - "title": "Short Headline", - "description": "2 sentence explanation of the issue.", - "fix_action": "Add Liability Cap", - "severity": "High", - "swipe_right_payload": "The exact text/clause to be added to the document if the user accepts this fix." - }} - ] + "id": "...", + "card_type": "...", + "title": "...", + "description": "...", + "fix_action": "...", + "severity": "...", + "impact": "...", + "swipe_right_payload": "..." }} - """ -} \ No newline at end of file + ] +}} +""" +} diff --git a/specgap/app/services/biz_engine.py b/specgap/app/services/biz_engine.py index acabf75..3f0c050 100644 --- a/specgap/app/services/biz_engine.py +++ b/specgap/app/services/biz_engine.py @@ -1,53 +1,119 @@ import json +from datetime import datetime +from typing import Dict, Any, List from app.core.config import model_text -async def analyze_proposal_leverage(proposal_text: str): +# ----------------------------- +# Schema guard +# ----------------------------- +REQUIRED_KEYS = { + "leverage_score": int, + "favor_direction": str, + "trap_clauses": list, + "negotiation_tips": list +} - system_prompt = """ - Role: You are SpecGap, a cynical and ruthless Corporate Lawyer (The 'Auto-Negotiator'). - Task: Audit the attached documents (Proposals, Contracts, Requirements). - - NOTE: The input may contain MULTIPLE documents separated by '=== SOURCE DOCUMENT: [Name] ==='. - - Analysis Goals: - 1. CROSS-CHECK: Does the Proposal (File B) actually meet the Requirements (File A)? - 2. LEVERAGE SCORE: Calculate a score from 0 to 100. - 3. TRAP DETECTION: Find clauses that look standard but are dangerous. - 4. AGENTIC REDLINING: For every major risk, generate the ACTUAL legal text to fix it. - - Output Format (JSON ONLY): - { - "leverage_score": Integer (0-100), - "favor_direction": "Vendor" or "Client" or "Neutral", - "trap_clauses": [ - { - "clause_snippet": "Quote text (Identify Source File)", - "risk_explanation": "Why this is dangerous", - "severity": "High/Critical", - "redline_suggestion": { - "original_text": "...", - "proposed_text": "...", - "negotiation_argument": "..." - } - } - ], - "negotiation_tips": ["..."] - } +def log_step(step: str): + print(f"[{datetime.now().isoformat()}] {step}") + +def validate_and_fix(output: dict) -> dict: + """Ensure required keys exist and values are valid.""" + fixed = {} + for key, key_type in REQUIRED_KEYS.items(): + if key not in output: + fixed[key] = [] if key_type == list else None + else: + fixed[key] = output[key] + + # Clamp leverage score + if isinstance(fixed["leverage_score"], int): + fixed["leverage_score"] = max(0, min(100, fixed["leverage_score"])) + + # Normalize favor direction + if fixed["favor_direction"] not in ["Vendor", "Client", "Neutral"]: + fixed["favor_direction"] = "Neutral" + + return fixed + +def chunk_text(text: str, max_len: int = 40000) -> List[str]: + """Split very large proposals into manageable chunks.""" + return [text[i:i+max_len] for i in range(0, len(text), max_len)] + +# ----------------------------- +# Main Function +# ----------------------------- +async def analyze_proposal_leverage(proposal_text: str, retries: int = 2) -> Dict[str, Any]: + """ + Legal Audit / Negotiation Agent: + Detect leverage, hidden risks, and negotiation tips. + Handles large proposals, JSON drift, and retry on failure. """ + log_step("Preparing system prompt for Legal Audit") + + system_prompt = """ +You are SpecGap, a ruthless corporate lawyer. + +TASK: +Audit provided business documents (may contain multiple files). - full_prompt = f"{system_prompt}\n\n--- BUSINESS PROPOSAL TEXT ---\n{proposal_text}" - - try: - response = await model_text.generate_content_async(full_prompt) - - # Clean JSON output - cleaned_text = response.text.strip() - if cleaned_text.startswith("```json"): - cleaned_text = cleaned_text[7:] - if cleaned_text.endswith("```"): - cleaned_text = cleaned_text[:-3] +GOALS: +1. Check if Proposal meets Requirements. +2. Score leverage (0–100). +3. Detect hidden or dangerous clauses. +4. Provide exact redline text for High or Critical risks. + +RULES: +- Cite exact clause text. +- Do not invent clauses. +- If no risks exist, return empty arrays. +- Redline text must be legally enforceable. +- This is a hypothetical risk analysis, not legal advice. + +SEVERITY RUBRIC: +Critical = unlimited liability, IP ownership transfer, uncapped indemnity +High = asymmetric termination, vague scope, jurisdiction mismatch +Medium = missing SLAs, unclear payments +Low = ambiguity only + +OUTPUT JSON ONLY: +{ + "leverage_score": 0-100, + "favor_direction": "Vendor|Client|Neutral", + "trap_clauses": [...], + "negotiation_tips": ["..."] +} +""" + + # Chunk if text is too long + chunks = chunk_text(proposal_text) + + # Combine prompt + chunks + prompts = [f"{system_prompt}\n\n--- DOCUMENTS (chunk {i+1}) ---\n{chunk}" + for i, chunk in enumerate(chunks)] + full_prompt = "\n".join(prompts) if len(prompts) > 1 else prompts[0] + + attempt = 0 + while attempt <= retries: + try: + log_step(f"Calling model_text.generate_content_async (attempt {attempt+1})") + response = await model_text.generate_content_async(full_prompt) - return json.loads(cleaned_text) - - except Exception as e: - return {"error": "Failed to analyze proposal", "details": str(e)} \ No newline at end of file + cleaned = response.text.strip() + if cleaned.startswith("```"): + cleaned = cleaned.split("```")[1] + + parsed = json.loads(cleaned) + return validate_and_fix(parsed) + + except json.JSONDecodeError: + log_step("JSON parse failed, returning raw output snippet") + return { + "error": "Model output was not valid JSON", + "raw_output": response.text[:1500] + } + + except Exception as e: + log_step(f"Attempt {attempt+1} failed: {e}") + attempt += 1 + + return {"error": "Proposal leverage analysis failed after retries"} diff --git a/specgap/app/services/cross_check.py b/specgap/app/services/cross_check.py index 184768a..93631d4 100644 --- a/specgap/app/services/cross_check.py +++ b/specgap/app/services/cross_check.py @@ -1,65 +1,123 @@ import json +import asyncio +from typing import Optional, Dict, Any, Union, List from app.core.config import model_vision +from datetime import datetime + +# ----------------------------- +# Helper Functions +# ----------------------------- + +def chunk_text(text: str, max_len: int = 40000) -> List[str]: + """Split large text into manageable chunks.""" + return [text[i:i+max_len] for i in range(0, len(text), max_len)] + +def validate_diagram(diagram: Union[str, Dict[str, Any]]) -> str: + """Ensure diagram is a string for prompt inclusion.""" + if isinstance(diagram, dict): + return json.dumps(diagram, indent=2) + return str(diagram) + +def extract_patch_pack(result: Dict[str, Any]) -> Dict[str, Any]: + """Extract jira tickets and negotiation email from Orchestrator output.""" + patch = result.get("PATCH_PACK", {}) + return { + "jira_tickets": patch.get("jira_tickets", []), + "negotiation_email": patch.get("negotiation_email", "") + } + +def validate_json(raw_text: str) -> Dict[str, Any]: + """Safely parse JSON from model output.""" + try: + if raw_text.startswith("```json"): + raw_text = raw_text[7:] + if raw_text.endswith("```"): + raw_text = raw_text[:-3] + return json.loads(raw_text) + except json.JSONDecodeError: + return {"error": "Failed to parse JSON", "raw_response": raw_text} + +def log_step(step: str): + """Simple timestamped logging.""" + print(f"[{datetime.now().isoformat()}] {step}") + +# ----------------------------- +# Main Orchestrator Function +# ----------------------------- async def run_cross_check( - tech_text: str, - proposal_text: str, - diagram_data: dict = None, - tech_report: dict = None, - legal_report: dict = None -): - - + tech_text: str, + proposal_text: str, + diagram_data: Optional[Union[str, Dict[str, Any]]] = None, + tech_report: Optional[Dict[str, Any]] = None, + legal_report: Optional[Dict[str, Any]] = None, + max_text_length: int = 40000, + retries: int = 2 +) -> Dict[str, Any]: + """ + Orchestrator agent: Validates and synthesizes outputs from Tech & Legal agents. + Supports chunking, retries, logging, and safe JSON parsing. + """ + + log_step("Preparing system instructions and input text") system_instruction = """ Role: You are SpecGap, the Chief Technology & Legal Officer (The Orchestrator). - - Task: Validate and Synthesize the findings from your sub-agents (Tech Auditor & Legal Negotiator). - - Inputs: - 1. Technical Spec (The Reality). - 2. Business Proposal (The Promise). - 3. Tech Auditor Report (Gaps found by engineering). - 4. Legal Negotiator Report (Risks found by legal). - + + Task: Validate and Synthesize findings from Tech Auditor & Legal Negotiator. + Goal: - 1. VERIFY: Do the Legal Risks exacerbate the Tech Gaps? (e.g. "Missing SLA" + "No Refund Clause" = Critical Failure). - 2. VISUALIZE: Compare the text to the diagram. - 3. ACTION: Generate the final "Patch Pack". - - Output Requirements (JSON): - 1. CONTRADICTIONS: List where documents disagree. - 2. STRATEGIC_SYNTHESIS: A high-level summary of why this deal is good/bad based on the COMBINED Tech+Legal view. - 3. REALITY_DIAGRAM_MERMAID: Generate strictly valid Mermaid.js code (graph TD). - 4. PATCH_PACK: - - "jira_tickets": List of tickets to fix tech gaps. - - "negotiation_email": A pre-written email to the vendor incorporating the Legal Redlines and Tech Gaps. + 1. VERIFY contradictions and interactions. + 2. VISUALIZE architecture via Mermaid diagram. + 3. ACTION: Generate final Patch Pack (Jira + negotiation email). + + Output strictly as JSON: + - CONTRADICTIONS + - STRATEGIC_SYNTHESIS + - REALITY_DIAGRAM_MERMAID + - PATCH_PACK """ + + # Prepare text chunks + tech_chunks = chunk_text(tech_text, max_text_length) + proposal_chunks = chunk_text(proposal_text, max_text_length) + diagram_str = validate_diagram(diagram_data) if diagram_data else None + prompt_parts = [system_instruction] - - prompt_parts.append(f"\n--- TECH SPEC ---\n{tech_text[:40000]}") - prompt_parts.append(f"\n--- PROPOSAL ---\n{proposal_text[:40000]}") - + + for i, (tech_chunk, proposal_chunk) in enumerate(zip(tech_chunks, proposal_chunks)): + log_step(f"Adding chunk {i+1} to prompt") + prompt_parts.append(f"\n--- TECH SPEC (chunk {i+1}) ---\n{tech_chunk}") + prompt_parts.append(f"\n--- PROPOSAL (chunk {i+1}) ---\n{proposal_chunk}") + if tech_report: - prompt_parts.append(f"\n--- PRIOR AGENT FINDINGS: TECH AUDIT ---\n{json.dumps(tech_report)}") + prompt_parts.append(f"\n--- PRIOR TECH AUDIT ---\n{json.dumps(tech_report, indent=2)}") if legal_report: - prompt_parts.append(f"\n--- PRIOR AGENT FINDINGS: LEGAL AUDIT ---\n{json.dumps(legal_report)}") - - if diagram_data: - prompt_parts.append("--- ARCHITECTURE DIAGRAM (See Attached Image) ---") - prompt_parts.append(diagram_data) - + prompt_parts.append(f"\n--- PRIOR LEGAL AUDIT ---\n{json.dumps(legal_report, indent=2)}") + if diagram_str: + prompt_parts.append(f"\n--- ARCHITECTURE DIAGRAM ---\n{diagram_str}") + prompt_parts.append("\nGenerate the Synthesized JSON Report now.") - try: - response = await model_vision.generate_content_async(prompt_parts) - - cleaned_text = response.text.strip() - if cleaned_text.startswith("```json"): - cleaned_text = cleaned_text[7:] - if cleaned_text.endswith("```"): - cleaned_text = cleaned_text[:-3] - - return json.loads(cleaned_text) - - except Exception as e: - return {"error": "Cross-check failed", "details": str(e)} \ No newline at end of file + # Retry loop + attempt = 0 + while attempt <= retries: + try: + log_step(f"Calling model_vision (attempt {attempt+1})") + response = await model_vision.generate_content_async(prompt_parts) + result = validate_json(response.text.strip()) + log_step("Cross-check successful") + return result + except Exception as e: + log_step(f"Attempt {attempt+1} failed: {e}") + attempt += 1 + return {"error": "Cross-check failed after retries"} + +# ----------------------------- +# Optional Convenience Function +# ----------------------------- +async def run_and_extract_patch_pack(*args, **kwargs) -> Dict[str, Any]: + """ + Run cross-check and directly return the Patch Pack (jira + email) + """ + result = await run_cross_check(*args, **kwargs) + return extract_patch_pack(result) diff --git a/specgap/app/services/tech_engine.py b/specgap/app/services/tech_engine.py index a632666..4b714bb 100644 --- a/specgap/app/services/tech_engine.py +++ b/specgap/app/services/tech_engine.py @@ -1,27 +1,65 @@ import json +from datetime import datetime +from typing import List, Dict, Any from app.core.config import model_text -async def analyze_tech_gaps(spec_text: str): - - # 1. The System Prompt (The "Senior Engineer" Persona) +# ----------------------------- +# Helper Functions +# ----------------------------- + +def chunk_text(text: str, max_len: int = 40000) -> List[str]: + """Split large text into manageable chunks.""" + return [text[i:i+max_len] for i in range(0, len(text), max_len)] + +def validate_json(raw_text: str) -> Dict[str, Any]: + """Safely parse JSON from model output.""" + try: + if raw_text.startswith("```json"): + raw_text = raw_text[7:] + if raw_text.endswith("```"): + raw_text = raw_text[:-3] + return json.loads(raw_text) + except json.JSONDecodeError: + return {"error": "Failed to parse JSON", "raw_response": raw_text} + +def log_step(step: str): + """Simple timestamped logging.""" + print(f"[{datetime.now().isoformat()}] {step}") + +# ----------------------------- +# Main Function +# ----------------------------- + +async def analyze_tech_gaps( + spec_text: str, + max_text_length: int = 40000, + retries: int = 2 +) -> Dict[str, Any]: + """ + Tech Gap Analysis Agent: + Detects missing components, consistency errors, and ambiguity in technical specifications. + Supports chunking, retries, and safe JSON parsing. + """ + + log_step("Preparing system prompt for Tech Gap Analysis") system_prompt = """ Role: You are SpecGap, a Senior Principal Software Architect. Task: Perform 'ABSENCE DETECTION' and 'CONSISTENCY CHECK' on the provided documents. - - NOTE: The input may contain MULTIPLE documents (e.g., Requirements and Proposals), + + NOTE: The input may contain MULTIPLE documents (e.g., Requirements and Proposals), separated by '=== SOURCE DOCUMENT: [Name] ==='. - + Instructions: 1. CROSS-REFERENCE: If File A (Requirements) asks for a feature, check if File B (Proposal) implements it. 2. Analyze the text for mentioned features that lack defined logic. (e.g., If 'Auth' is mentioned but no 'Token Expiry' or 'OAuth provider' is defined, flag it). 3. Look for 'Happy Path Bias' (where only success scenarios are described, but error states are missing). - + CRITICAL INSTRUCTION: CITATIONS REQUIRED - For every gap found, you MUST provide a "source_reference". + For every gap found, provide a "source_reference". - Quote the exact text from the document. - Mention which SOURCE FILE the text comes from. - + Output Format: Return ONLY valid JSON. { @@ -39,21 +77,30 @@ async def analyze_tech_gaps(spec_text: str): } """ - # 2. The Interaction - # We combine the prompt + the user's PDF text - full_prompt = f"{system_prompt}\n\n--- TECHNICAL SPECIFICATION ---\n{spec_text}" + # 1. Chunk the spec_text if too long + chunks = chunk_text(spec_text, max_text_length) - try: - response = await model_text.generate_content_async(full_prompt) - - # 3. Cleaning the Output (Gemini sometimes adds ```json markers) - cleaned_text = response.text.strip() - if cleaned_text.startswith("```json"): - cleaned_text = cleaned_text[7:] - if cleaned_text.endswith("```"): - cleaned_text = cleaned_text[:-3] - - return json.loads(cleaned_text) - - except Exception as e: - return {"error": "Failed to analyze tech spec", "details": str(e)} \ No newline at end of file + full_prompt = [] + for i, chunk in enumerate(chunks): + log_step(f"Adding chunk {i+1} to prompt") + full_prompt.append(f"{system_prompt}\n--- TECHNICAL SPEC (chunk {i+1}) ---\n{chunk}") + + # Retry mechanism + attempt = 0 + while attempt <= retries: + try: + log_step(f"Calling model_text.generate_content_async (attempt {attempt+1})") + # If multiple chunks, join into one prompt + combined_prompt = "\n".join(full_prompt) if len(full_prompt) > 1 else full_prompt[0] + response = await model_text.generate_content_async(combined_prompt) + + log_step("Cleaning and validating JSON output") + result = validate_json(response.text.strip()) + log_step("Tech Gap Analysis successful") + return result + + except Exception as e: + log_step(f"Attempt {attempt+1} failed: {e}") + attempt += 1 + + return {"error": "Tech Gap Analysis failed after retries"} diff --git a/specgap/specgap_audits.db b/specgap/specgap_audits.db new file mode 100644 index 0000000000000000000000000000000000000000..687e6f4260bc8741955cafb5169bf1601dbf2651 GIT binary patch literal 49152 zcmeI*O>g5w7{Kv(w_7(!tKAE-xS0cLEdhdEx)MT2q*=3+O%uolw0pALc$%6faj>1L zg;Nk5;FItr_$V9^H_m%x#&zsCb#vol{UfD~=f%%6zZs8X_0X5!bo^MDXF)h|WAjPv zQBBiopPOc_R;$bRu6!?F>Ue8;Ab)EspYQruuYGX&+up-BwWj`0ZTHQ?KX)&h|2DnG zUk$(h_s(hk4_V9$0R#|0009L4AA#$pZnP|I@s>aJ#Fc;LxO2~sonbJUh-n<9hrU-V z-fj2o*1$H8yU*-b=H0!_Ue_%4G!Jf$eb4&2UDplE(tc^h?&(+*+ZB%*X}m$}sAJzc zW25eS=69`r`^#4U;K@_V?Dhs`_uEeAJ+o0?hP$)DFcfYqJV$=Zc_zm+I6kpezcUHq zPe)FCJrh~q!w;?1y2MN(4D9E2Usa@RPAslsb6~%cX{@9YiYORg3eP<1^*VN|t9mTU z*2B*u5jw?Tspenzk55|t*X9fRwRs>78`h_dZKG#twIrA5hq3(9CMPWplH>K+{HVy< zTxdJG@qAxfypuG~CNdPmAoL<<76#voVeCxZNjg7iO}%^pl=aKkKw4`ate(~ib=~+( zrm$C@LJDz{czTAN&C!>3+2YFrr#04MYg;#tWzcuagUSSFLFA5|b2mCKgw{8QE_5s( zd46JRp+C@#7cvd2JdIck&z)!{hPU(B-ke9VXGtQ(2G&noy3v-2yj`A1evK3`9&An` z?^TjOUTG~F_jTh$Mt{FNdJvA>ssE!J`@vLR6NMDEHm9)GwIq+V7S^vvnr`gxYrnTw zcBOOy6(ZkDBd_jB*+Sd6CFMge>^Sq8SGEzYUM$&0^!V^F*@Vpg z%d1iCm@4FYZBnMXjx&+RH~W9?m2LDYOg4Z*>2fz(xk0JMYwIoxA@5(RI*Q$R9%ZFz zU=fBv=uAWu$5h8#90tg_000IagfB*srAi(oKYXAWR5I_I{1Q0*~ z0R#|00DIc9_iAbfB*srAbZT0Ab Date: Mon, 9 Feb 2026 01:43:06 +0500 Subject: [PATCH 2/2] Add files via upload --- README.md | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..9d3d930 --- /dev/null +++ b/README.md @@ -0,0 +1,152 @@ +# SpecGap Documentation + +## Project Overview +SpecGap is a two-part application: +- Frontend: A React + Vite UI that lets users upload documents, run audits, and review findings. +- Backend: A FastAPI service that parses documents, runs multi-agent analysis with Gemini via LangGraph, and returns structured audit results and patch packs. + +The backend supports three modes: +- Council session (fast flashcards) +- Deep analysis (tech + legal + synthesis) +- Full spectrum (council + deep analysis together) + +## Folder Structure +- Frontend/ + - Vite + React UI, API client, pages, layout components, and UI primitives. +- specgap/ + - Python backend (FastAPI), AI workflows, parsers, and database layer. +- and prompt templates + - Miscellaneous folder (not referenced in code paths). +- test.json + - Standalone file (not referenced in code paths). + +### Frontend Highlights +- Frontend/src/App.tsx: Route setup and application shell. +- Frontend/src/api/client.ts: API client for backend calls. +- Frontend/src/pages/: Core screens (Upload, Audits, Results, Search, etc.). +- Frontend/src/components/: Layout, audit UI, and reusable UI components. + +### Backend Highlights +- specgap/app/main.py: FastAPI app and API endpoints. +- specgap/app/services/workflow.py: Council multi-agent workflow (LangGraph). +- specgap/app/services/parser.py: Document parsing (PDF, DOCX, TXT/MD, OCR). +- specgap/app/services/tech_engine.py: Tech gap analyzer. +- specgap/app/services/biz_engine.py: Legal/negotiation analyzer. +- specgap/app/services/cross_check.py: Orchestrator synthesis. +- specgap/app/services/patch_pack.py: Output file generation. +- specgap/app/core/database.py: SQLAlchemy models + persistence. + +## Architecture and Data Flow +1. Frontend upload (React UI) sends files via multipart form-data to FastAPI. +2. Parser extracts text from PDF/DOCX/TXT/MD; OCR is attempted if needed. +3. Council session (LangGraph): + - Round 1: Independent agent drafts (legal, business, finance). + - Round 2: Cross-check peer drafts. + - Round 3: Generate flashcards. +4. Deep analysis (optional): + - Tech gap analysis (architect agent). + - Legal leverage analysis (lawyer agent). + - Cross-check synthesis + Mermaid diagram output. +5. Patch pack can be generated from selected cards (contract addendum, spec update, negotiation email). + +## Installation and Setup + +### Backend (Python) +Requirements are listed in specgap/requirements.txt. + +```bash +cd specgap +python -m venv .venv +. .venv/Scripts/Activate +pip install -r requirements.txt +``` + +### Frontend (Node) +Dependencies are managed via npm in Frontend/package.json. + +```bash +cd Frontend +npm install +``` + +## Environment Variables + +### Backend +Loaded via python-dotenv in specgap/app/core/config.py. + +- GEMINI_API_KEY (required): Google Gemini API key. +- DATABASE_URL (optional): Overrides SQLite DB path. + +Example .env: +``` +GEMINI_API_KEY=your_key_here +DATABASE_URL=sqlite:///./specgap_audits.db +``` + +### Frontend +Defined in Vite and read in Frontend/src/api/client.ts. + +- VITE_API_URL (optional): Base API URL. Defaults to /api which proxies to http://localhost:8000 in dev via Frontend/vite.config.ts. + +Example .env: +``` +VITE_API_URL=http://localhost:8000 +``` + +## How to Run Locally + +### Start Backend +```bash +cd specgap +python run_backend.py +``` +Default: http://localhost:8000 + +### Start Frontend +```bash +cd Frontend +npm run dev +``` +Default: http://localhost:8080 + +The dev server proxies /api to http://localhost:8000 automatically. + +## API Endpoints + +Implemented in specgap/app/main.py: + +### Health +- GET / + - Returns status and architecture info. + +### Council Session +- POST /audit/council-session + - Query: domain (optional, default Software Engineering) + - Body: multipart form-data with files + - Response: flashcards (council_verdict) + +### Patch Pack Generator +- POST /audit/patch-pack + - Body: JSON { selected_cards: [...], domain?: string } + - Response: generated files (Contract_Addendum.txt, Spec_Update.md, Negotiation_Email.txt) + +### Deep Analysis +- POST /audit/deep-analysis + - Query: domain + - Body: multipart form-data with files + - Response: tech_audit, legal_audit, executive_synthesis + +### Full Spectrum Analysis +- POST /audit/full-spectrum + - Query: domain + - Body: multipart form-data with files + - Response: council verdict + deep analysis bundle + +Note: The frontend client references additional endpoints (audits listing, comments, vector search) in Frontend/src/api/client.ts, but those routes are not present in the backend at this time. + +## Contribution Guidelines +- Keep frontend code in Frontend/src/ with TypeScript, React, and Tailwind conventions. +- Keep backend code in specgap/app/ and follow async FastAPI patterns. +- Favor new endpoints and services in clearly named modules under specgap/app/services/. +- Update environment variable docs whenever introducing new config keys. +- Add unit tests where possible (frontend uses Vitest; backend currently has no test harness).