From a149819b896a63f7e59677ae28f0001820d6fe91 Mon Sep 17 00:00:00 2001 From: Neha Pandey Date: Thu, 5 Mar 2026 17:31:57 +0530 Subject: [PATCH 1/2] feat: add PDF research agent example using bindufy --- examples/beginner/pdf_research_agent.py | 127 ++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 examples/beginner/pdf_research_agent.py diff --git a/examples/beginner/pdf_research_agent.py b/examples/beginner/pdf_research_agent.py new file mode 100644 index 00000000..1ce77190 --- /dev/null +++ b/examples/beginner/pdf_research_agent.py @@ -0,0 +1,127 @@ +""" +PDF Research Agent Example for Bindu + +This example agent accepts either a PDF file path or raw text and +returns a structured summary. It demonstrates how to wrap a simple +document-processing workflow using `bindufy()` so the agent becomes +a live service. + + +Prerequisites +------------- + uv add bindu agno openai pypdf + +Usage +----- + export OPENAI_API_KEY="sk-..." + python pdf_research_agent.py + +The agent will be live at http://localhost:3775 +Send it a message like: + {"role": "user", "content": "/path/to/paper.pdf"} +or paste raw text directly as the message content. +""" + +import os + +from agno.agent import Agent +from agno.models.ollama import Ollama + +from bindu.penguin.bindufy import bindufy + +# --------------------------------------------------------------------------- +# 1. Helper — extract text from a PDF path or pass raw text straight through +# --------------------------------------------------------------------------- + +def _read_content(source: str) -> str: + """Return plain text from a PDF file path, or the source string itself.""" + if source.strip().endswith(".pdf") and os.path.isfile(source.strip()): + try: + from pypdf import PdfReader # optional dependency + reader = PdfReader(source.strip()) + pages = [page.extract_text() or "" for page in reader.pages] + return "\n\n".join(pages) + except ImportError: + return ( + f"[pypdf not installed — cannot read '{source.strip()}'. " + "Run: uv add pypdf]" + ) + return source # treat as raw document text + + +# --------------------------------------------------------------------------- +# 2. Agent definition +# --------------------------------------------------------------------------- + +agent = Agent( + instructions=( + "You are a research assistant that reads documents and produces clear, " + "concise summaries. When given document text:\n" + " 1. Identify the main topic or thesis.\n" + " 2. List the key findings or arguments (3-5 bullet points).\n" + " 3. Note any important conclusions or recommendations.\n" + "Be factual and brief. If the text is too short or unclear, say so." + ), + model=Ollama(id="llama3"), +) + + +# --------------------------------------------------------------------------- +# 3. Bindu configuration +# --------------------------------------------------------------------------- + +config = { + "author": "your.email@example.com", + "name": "pdf_research_agent", + "description": "Summarises PDF files and document text using an LLM.", + "version": "1.0.0", + "capabilities": {}, + "auth": {"enabled": False}, + "storage": {"type": "memory"}, + "scheduler": {"type": "memory"}, + "deployment": { + "url": "http://localhost:3775", + "expose": True, + }, +} + + +# --------------------------------------------------------------------------- +# 4. Handler — the bridge between Bindu messages and the agent +# --------------------------------------------------------------------------- + +def handler(messages: list[dict[str, str]]): + """ + Receive a conversation history from Bindu, extract the latest user + message, read its content (PDF or raw text), and return a summary. + + Args: + messages: Standard A2A message list, e.g. + [{"role": "user", "content": "/path/to/doc.pdf"}] + + Returns: + Agent response with the document summary. + """ + # Grab the most recent user message + user_messages = [m for m in messages if m.get("role") == "user"] + if not user_messages: + return "No user message found. Please send a PDF path or document text." + + user_input = user_messages[-1].get("content", "") + document_text = _read_content(user_input) + + # Build a prompt that includes the full document text + prompt = f"Summarize the following document and highlight the key insights:\n\n{document_text}" + enriched_messages = [{"role": "user", "content": prompt}] + + result = agent.run(input=enriched_messages) + return result + + +# --------------------------------------------------------------------------- +# 5. Bindu-fy the agent — one call turns it into a live microservice +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + print("PDF Research Agent running at http://localhost:3775") + bindufy(config, handler) \ No newline at end of file From 7267aa604d3d42ce74b0856b897f48c23674c222 Mon Sep 17 00:00:00 2001 From: Neha Pandey Date: Mon, 23 Mar 2026 21:37:10 +0530 Subject: [PATCH 2/2] fix: restructure agent into standalone folder with skill.yaml, README, env example --- examples/pdf_research_agent/.env.example | 43 ++++++++++ examples/pdf_research_agent/README.md | 0 .../pdf_research_agent.py | 22 ++--- .../skills/pdf-research-skill/skill.yaml | 80 +++++++++++++++++++ 4 files changed, 135 insertions(+), 10 deletions(-) create mode 100644 examples/pdf_research_agent/.env.example create mode 100644 examples/pdf_research_agent/README.md rename examples/{beginner => pdf_research_agent}/pdf_research_agent.py (93%) create mode 100644 examples/pdf_research_agent/skills/pdf-research-skill/skill.yaml diff --git a/examples/pdf_research_agent/.env.example b/examples/pdf_research_agent/.env.example new file mode 100644 index 00000000..c1f3322f --- /dev/null +++ b/examples/pdf_research_agent/.env.example @@ -0,0 +1,43 @@ +# ───────────────────────────────────────────────────────────────────────────── +# PDF Research Agent — Environment Variables +# Copy this file to .env and fill in your values before running the agent. +# +# cp .env.example .env +# ───────────────────────────────────────────────────────────────────────────── + + +# ── LLM Provider ───────────────────────────────────────────────────────────── +# Required. Get your free key at https://openrouter.ai +OPENROUTER_API_KEY=your_openrouter_api_key_here + + +# ── Bindu Deployment ───────────────────────────────────────────────────────── +# URL the agent server binds to. Change port if 3775 is already in use. +BINDU_DEPLOYMENT_URL=http://localhost:3775 + + +# ── Storage Backend ────────────────────────────────────────────────────────── +# "memory" — default, no external dependency, data lost on restart +# "postgres" — persistent storage, requires DATABASE_URL below +STORAGE_TYPE=memory + +# Required only when STORAGE_TYPE=postgres +# Example: postgresql+asyncpg://user:password@localhost:5432/bindu +DATABASE_URL= + + +# ── Scheduler Backend ──────────────────────────────────────────────────────── +# "memory" — default, in-process scheduler +# "redis" — distributed scheduler, requires REDIS_URL below +SCHEDULER_TYPE=memory + +# Required only when SCHEDULER_TYPE=redis +# Example: redis://localhost:6379/0 +REDIS_URL= + + +# ── Observability / Error Tracking (optional) ──────────────────────────────── +# Leave blank to disable Sentry error tracking. +SENTRY_DSN= +SENTRY_ENVIRONMENT=development +SENTRY_TRACES_SAMPLE_RATE=0.1 \ No newline at end of file diff --git a/examples/pdf_research_agent/README.md b/examples/pdf_research_agent/README.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/beginner/pdf_research_agent.py b/examples/pdf_research_agent/pdf_research_agent.py similarity index 93% rename from examples/beginner/pdf_research_agent.py rename to examples/pdf_research_agent/pdf_research_agent.py index 1ce77190..99290252 100644 --- a/examples/beginner/pdf_research_agent.py +++ b/examples/pdf_research_agent/pdf_research_agent.py @@ -9,11 +9,11 @@ Prerequisites ------------- - uv add bindu agno openai pypdf + uv add bindu agno pypdf python-dotenv Usage ----- - export OPENAI_API_KEY="sk-..." + export OPENROUTER_API_KEY="your_api_key_here" python pdf_research_agent.py The agent will be live at http://localhost:3775 @@ -21,13 +21,12 @@ {"role": "user", "content": "/path/to/paper.pdf"} or paste raw text directly as the message content. """ - -import os - -from agno.agent import Agent -from agno.models.ollama import Ollama - from bindu.penguin.bindufy import bindufy +from agno.agent import Agent +from agno.models.openrouter import OpenRouter +from dotenv import load_dotenv +import os +load_dotenv() # --------------------------------------------------------------------------- # 1. Helper — extract text from a PDF path or pass raw text straight through @@ -62,7 +61,10 @@ def _read_content(source: str) -> str: " 3. Note any important conclusions or recommendations.\n" "Be factual and brief. If the text is too short or unclear, say so." ), - model=Ollama(id="llama3"), + model=OpenRouter( + id="openai/gpt-4o-mini", + api_key=os.getenv("OPENROUTER_API_KEY") + ), ) @@ -73,7 +75,7 @@ def _read_content(source: str) -> str: config = { "author": "your.email@example.com", "name": "pdf_research_agent", - "description": "Summarises PDF files and document text using an LLM.", + "description": "Summarises PDF files and document text using OpenRouter.", "version": "1.0.0", "capabilities": {}, "auth": {"enabled": False}, diff --git a/examples/pdf_research_agent/skills/pdf-research-skill/skill.yaml b/examples/pdf_research_agent/skills/pdf-research-skill/skill.yaml new file mode 100644 index 00000000..65f108ba --- /dev/null +++ b/examples/pdf_research_agent/skills/pdf-research-skill/skill.yaml @@ -0,0 +1,80 @@ +id: pdf-research-skill +name: PDF Research Skill +version: 1.0.0 +author: your.email@example.com +description: > + Processes PDF files and raw document text to produce structured, LLM-generated + summaries. Identifies the main thesis, extracts key findings, and surfaces + conclusions or recommendations. Powered by OpenRouter and exposed as a live + A2A-compliant Bindu microservice. + +features: + - Accept a local PDF file path or raw document text as input + - Extract full text from multi-page PDFs using pypdf + - Generate structured summaries with thesis, key findings, and conclusions + - Graceful fallback to raw-text mode when pypdf is unavailable + - Deployed as a live Bindu microservice via bindufy() + - A2A / AP2 / X402 protocol-compliant out of the box + +tags: + - pdf + - research + - summarisation + - document-analysis + - openrouter + - nlp + +input_modes: + - text/plain # Raw document text pasted directly + - application/pdf # Local PDF file path resolved server-side + +output_modes: + - text/plain # Structured summary returned as plain text + +examples: + - input: "/home/user/papers/attention_is_all_you_need.pdf" + output: | + **Topic:** Transformer architecture for sequence-to-sequence tasks. + **Key Findings:** + - Self-attention replaces recurrence and convolution entirely. + - Multi-head attention allows the model to attend to different positions jointly. + - The model achieves state-of-the-art BLEU on WMT 2014 EN-DE and EN-FR. + - Training time is significantly reduced compared to RNN-based models. + - Positional encodings preserve sequence order without recurrence. + **Conclusions:** The Transformer generalises well to other tasks beyond MT and + enables highly parallelisable training at scale. + + - input: | + Climate change is the long-term shift in global temperatures and weather + patterns. While natural factors play a role, human activity since the 1800s + has been the dominant driver through greenhouse gas emissions... + output: | + **Topic:** Causes and impacts of climate change. + **Key Findings:** + - Human activity is the primary driver since the industrial revolution. + - Greenhouse gas emissions are the main mechanism of warming. + - Weather patterns and sea levels are measurably shifting. + **Conclusions:** Urgent systemic action is required to limit warming below 1.5°C. + +capabilities_detail: + pdf_extraction: + description: Reads multi-page PDFs page-by-page using pypdf and joins extracted text. + fallback: If pypdf is not installed, returns a clear installation hint to the caller. + max_pages: unlimited # all pages extracted; very large PDFs may increase latency + + summarisation: + model: openrouter/openai/gpt-4o-mini + prompt_strategy: | + Instruction-tuned prompt asks the model to: + 1. Identify the main topic or thesis. + 2. List 3–5 key findings or arguments. + 3. Surface important conclusions or recommendations. + fallback_behaviour: If input text is too short or unclear the model says so explicitly. + + transport: + protocol: A2A (JSON-RPC over HTTP) + endpoint: POST / + port: 3775 + auth: disabled (configurable via config.auth.enabled) + storage: in-memory (configurable — see .env.example for Postgres/Redis options) + scheduler: in-memory (configurable — see .env.example for Redis options) \ No newline at end of file