aunraza19 · techieworld2 · Feb 8, 2026 · Copilot · Feb 8, 2026 · Copilot
diff --git a/specgap/.env.example b/specgap/.env.example
diff --git a/specgap/app/main.py b/specgap/app/main.py
@@ -26,7 +26,8 @@
 from app.services.patch_pack import build_patch_pack_files
 from app.services.tech_engine import analyze_tech_gaps
 from app.services.biz_engine import analyze_proposal_leverage
-from app.services.cross_check import run_cross_check
+from app.services.cross_check import run_cross_check, run_smart_comparison
-from app.services.cross_check import run_cross_check, run_smart_comparison
+from app.services.cross_check import run_smart_comparison
-from app.services.cross_check import run_cross_check, run_smart_comparison
+from app.services.cross_check import run_smart_comparison
+from app.services.chunker import condense_large_document
 
 
 # ============== LOGGING SETUP ==============
@@ -133,6 +134,14 @@ async def run_council_session(
 
     logger.info(f"Council session started for: {file_names}")
 
+    # Condense large documents for council efficiency (Test Case 1: 200-page PDFs)
+    if len(combined_text) > settings.MAX_CONTEXT_CHARS:
+        logger.info(
+            f"Large document detected ({len(combined_text):,} chars), "
+            f"condensing for council (limit: {settings.MAX_CONTEXT_CHARS:,})..."
+        )
+        combined_text = await condense_large_document(combined_text)
+
     initial_state = {
         "combined_context": combined_text,
         "domain": domain,
@@ -238,13 +247,15 @@ async def run_deep_analysis(
     """
     combined_text = ""
     file_names = []
-
+    file_texts = {}  
+
     for f in files:
         await f.seek(0)
         text, _ = await extract_text_from_file(f)
         combined_text += f"\n=== SOURCE DOCUMENT: {f.filename} ===\n{text}"
         file_names.append(f.filename)
-
+        file_texts[f.filename] = text
+
     logger.info(f"Deep analysis started for: {file_names}")
 
     try:
@@ -255,14 +266,20 @@ async def run_deep_analysis(
         # Run Biz Engine
         logger.info("[Deep Audit] Running Legal Leverage Analysis...")
         legal_report = await analyze_proposal_leverage(combined_text)
-
-        # Run Cross-Check
+
+        tech_valid = not tech_report.get("error") or bool(tech_report.get("critical_gaps"))
+        legal_valid = not legal_report.get("error") or bool(legal_report.get("trap_clauses"))
+
+        if not tech_valid:
+            logger.warning("Tech engine returned error, cross-check will run without tech context")
+        if not legal_valid:
+            logger.warning("Legal engine returned error, cross-check will run without legal context")
+
         logger.info("[Deep Audit] Running Cross-Check Synthesis...")
-        synthesis = await run_cross_check(
-            tech_text=combined_text,
-            proposal_text=combined_text,
-            tech_report=tech_report,
-            legal_report=legal_report
+        synthesis = await run_smart_comparison(
+            file_texts=file_texts,
+            tech_report=tech_report if tech_valid else None,
+            legal_report=legal_report if legal_valid else None
         )
 
         logger.info("Deep analysis completed successfully")
@@ -298,7 +315,6 @@ async def run_deep_analysis_legacy(
     return await run_deep_analysis(files, domain)
 
 
-# ============== FULL SPECTRUM ENDPOINT ==============
 
 @app.post("/api/v1/audit/full-spectrum", tags=["Audit"])
 async def run_full_spectrum_analysis(
@@ -316,17 +332,27 @@ async def run_full_spectrum_analysis(
     """
     combined_text = ""
     file_names = []
+    file_texts = {} 
 
     for f in files:
         await f.seek(0)
         text, _ = await extract_text_from_file(f)
         combined_text += f"\n=== SOURCE DOCUMENT: {f.filename} ===\n{text}"
         file_names.append(f.filename)
-
+        file_texts[f.filename] = text
+
     logger.info(f"Full spectrum analysis started for: {file_names}")
 
+    council_text = combined_text
+    if len(combined_text) > settings.MAX_CONTEXT_CHARS:
+        logger.info(
+            f"Large document detected ({len(combined_text):,} chars), "
+            f"condensing for council..."
+        )
+        council_text = await condense_large_document(combined_text)
+
     council_state = {
-        "combined_context": combined_text,
+        "combined_context": council_text,
         "domain": domain,
         "round_1_drafts": {},
         "round_2_drafts": {},
@@ -344,11 +370,14 @@ async def run_full_spectrum_analysis(
         logger.info("[Full Spectrum] Running Deep Analysis...")
         tech_report = await analyze_tech_gaps(combined_text)
         legal_report = await analyze_proposal_leverage(combined_text)
-        synthesis = await run_cross_check(
-            tech_text=combined_text,
-            proposal_text=combined_text,
-            tech_report=tech_report,
-            legal_report=legal_report
+
+        tech_valid = not tech_report.get("error") or bool(tech_report.get("critical_gaps"))
+        legal_valid = not legal_report.get("error") or bool(legal_report.get("trap_clauses"))
+
+        synthesis = await run_smart_comparison(
+            file_texts=file_texts,
+            tech_report=tech_report if tech_valid else None,
+            legal_report=legal_report if legal_valid else None
         )
 
         logger.info("Full spectrum analysis completed successfully")
@@ -393,12 +422,7 @@ async def run_full_spectrum_legacy(
 async def classify_uploaded_document(
     file: UploadFile = File(..., description="Document to classify")
 ):
-    """
-    Classify a document to determine recommended analysis agents.
-
-    Useful for understanding what type of document you're uploading
-    before running a full analysis.
-    """
+
     await file.seek(0)
     text, metadata = await extract_text_from_file(file)
     classification = await classify_document(text, file.filename)
@@ -415,11 +439,7 @@ async def classify_uploaded_document(
 async def extract_document_text(
     file: UploadFile = File(..., description="Document to extract text from")
 ):
-    """
-    Extract text from a document without analysis.
-
-    Useful for previewing what the AI will see.
-    """
+
     await file.seek(0)
     content = await file.read()
     file_hash = compute_file_hash(content)
@@ -437,7 +457,6 @@ async def extract_document_text(
     }
 
 
-# ============== AUDIT HISTORY ==============
 
 @app.get("/api/v1/audits", tags=["History"])
 async def list_audits(
@@ -446,9 +465,7 @@ async def list_audits(
     audit_type: str = Query(None, description="Filter by audit type"),
     risk_level: str = Query(None, description="Filter by risk level")
 ):
-    """
-    List saved audit records with optional filtering.
-    """
+
-    
+    """
+    List stored audits with optional filtering and pagination.
+
+    This endpoint returns a paginated list of audit records, which can be
+    filtered by audit type and risk level for easier browsing of history.
+    """
-    
+    """
+    List stored audits with optional filtering and pagination.
+
+    This endpoint returns a paginated list of audit records, which can be
+    filtered by audit type and risk level for easier browsing of history.
+    """
     from app.core.database import get_db_session
 
     with get_db_session() as db:
@@ -481,9 +498,7 @@ async def list_audits(
 
 @app.get("/api/v1/audits/statistics", tags=["History"])
 async def get_audit_statistics():
-    """
-    Get aggregate statistics for dashboard.
-    """
+
     from app.core.database import get_db_session
 
     with get_db_session() as db:
@@ -497,9 +512,7 @@ async def get_audit_statistics():
 
 @app.get("/api/v1/audits/{audit_id}", tags=["History"])
 async def get_audit_detail(audit_id: str):
-    """
-    Get detailed audit record by ID.
-    """
+
     from app.core.database import get_db_session
 
     with get_db_session() as db:

diff --git a/specgap/app/services/__init__.py b/specgap/app/services/__init__.py
@@ -6,8 +6,11 @@
 from .workflow import council_app, CouncilState
 from .tech_engine import analyze_tech_gaps
 from .biz_engine import analyze_proposal_leverage
-from .cross_check import run_cross_check
+from .cross_check import run_cross_check, run_smart_comparison, run_single_doc_audit
 from .patch_pack import build_patch_pack_files
+from .safe_parse import safe_parse_llm_response, extract_json
+from .sanitizer import sanitize_document_text, wrap_as_document_context
+from .chunker import chunk_document, condense_large_document
 from .parser import (
     extract_text_from_file,
     extract_text_from_pdf,
@@ -28,8 +31,18 @@
     "analyze_tech_gaps",
     "analyze_proposal_leverage",
     "run_cross_check",
+    "run_smart_comparison",
+    "run_single_doc_audit",
     "build_patch_pack_files",
 
+    # Utilities (Test Case fixes)
+    "safe_parse_llm_response",
+    "extract_json",
+    "sanitize_document_text",
+    "wrap_as_document_context",
+    "chunk_document",
+    "condense_large_document",
+
     # Parser
     "extract_text_from_file",
     "extract_text_from_pdf",

diff --git a/specgap/app/services/biz_engine.py b/specgap/app/services/biz_engine.py
@@ -10,6 +10,8 @@
 from app.core.config import model_text, settings
 from app.core.logging import get_logger
 from app.core.exceptions import AIModelError, AIResponseParseError
+from app.services.safe_parse import safe_parse_llm_response
+from app.services.sanitizer import wrap_as_document_context
 
 logger = get_logger("biz_engine")
 
@@ -98,7 +100,7 @@ async def analyze_proposal_leverage(
         logger.warning(f"Truncating input from {len(proposal_text):,} to {max_chars:,} chars")
         proposal_text = proposal_text[:max_chars] + "\n\n[...content truncated...]"
 
-    full_prompt = f"{LEGAL_SYSTEM_PROMPT}\n\n--- BUSINESS PROPOSAL TEXT ---\n{proposal_text}"
+    full_prompt = f"{LEGAL_SYSTEM_PROMPT}\n\n{wrap_as_document_context(proposal_text, label='BUSINESS PROPOSAL')}"
 
     last_error = None
     for attempt in range(max_retries):
@@ -115,8 +117,15 @@ async def analyze_proposal_leverage(
                     details="Empty response"
                 )
 
-            cleaned = _clean_json_response(response.text)
-            result = json.loads(cleaned)
+            result = safe_parse_llm_response(
+                response.text,
+                expected_keys=["leverage_score", "trap_clauses"]
+            )
+
+            if result.get("parse_error"):
+                last_error = AIResponseParseError(agent="biz_engine", raw_response=response.text)
+                logger.warning(f"JSON parse error on attempt {attempt + 1}: {result.get('error_message')}")
+                continue  # Retry with next attempt
 
             # Validate and set defaults
             if "leverage_score" not in result:
@@ -136,10 +145,6 @@ async def analyze_proposal_leverage(
 
             return result
 
-        except json.JSONDecodeError as e:
-            last_error = AIResponseParseError(agent="biz_engine", raw_response=response.text if response else None)
-            logger.warning(f"JSON parse error on attempt {attempt + 1}: {e}")
-
         except Exception as e:
             last_error = e
             logger.warning(f"Legal analysis attempt {attempt + 1} failed: {e}")