Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 63 additions & 6 deletions rag-service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class AskRequest(BaseModel):

class SummarizeRequest(BaseModel):
session_ids: list = []
length: str = Field(default="medium")


class CompareRequest(BaseModel):
Expand Down Expand Up @@ -221,7 +222,7 @@ def ask_question(request: Request, data: AskRequest):


# ===============================
# SUMMARIZE
# SUMMARIZE (Improved Version)
# ===============================
@app.post("/summarize")
@limiter.limit("15/15 minutes")
Expand All @@ -240,14 +241,70 @@ def summarize_pdf(request: Request, data: SummarizeRequest):
if not vectorstores:
return {"summary": "No documents found."}

# ---------------------------------------------------------
# 1️⃣ Increase Retrieval Depth (k=12 instead of 6)
# ---------------------------------------------------------
docs = []
for vs in vectorstores:
docs.extend(vs.similarity_search("Summarize the document", k=6))
docs.extend(
vs.similarity_search(
"Provide a complete structured summary of the document.",
k=12
)
)

context = "\n\n".join([d.page_content for d in docs])
if not docs:
return {"summary": "No document context available."}

# ---------------------------------------------------------
# 2️⃣ Extractive Stage (Key Points First)
# ---------------------------------------------------------
context_chunks = [d.page_content for d in docs]

extractive_prompt = (
"Extract the 10 most important factual points from the document below.\n"
"List them as short bullet points.\n"
"Do NOT summarize yet.\n\n"
+ "\n\n".join(context_chunks)
)

key_points = generate_response(extractive_prompt, 300)

# ---------------------------------------------------------
# 3️⃣ Length Control
# ---------------------------------------------------------
if data.length == "short":
instruction = "Provide 3-5 concise bullet points."
max_tokens = 200
elif data.length == "long":
instruction = "Provide a detailed structured summary with 10-15 bullet points."
max_tokens = 500
else:
instruction = "Provide 5-8 well-structured bullet points."
max_tokens = 350

# ---------------------------------------------------------
# 4️⃣ Abstractive Stage (Final Structured Summary)
# ---------------------------------------------------------
final_prompt = (
"You are an expert document summarizer.\n"
"Using the extracted key points below, generate a structured summary.\n"
"Organize your response into:\n"
"- Executive Summary\n"
"- Key Findings\n"
"- Conclusion\n\n"
f"{instruction}\n\n"
f"Extracted Key Points:\n{key_points}\n\n"
"Final Summary:"
)

summary = generate_response(final_prompt, max_tokens)

prompt = f"Summarize this document:\n\n{context}\n\nSummary:"
summary = generate_response(prompt, 250)
# ---------------------------------------------------------
# 5️⃣ Basic Validation
# ---------------------------------------------------------
if len(summary.split()) < 40:
summary += "\n\n(Note: The document may contain limited summarizable content.)"

return {"summary": summary}

Expand Down Expand Up @@ -293,4 +350,4 @@ def health():


if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=5000)
uvicorn.run("main:app", host="0.0.0.0", port=5000)