diff --git a/samples/personalized_learning/.env.template b/samples/personalized_learning/.env.template index 46a00a3f1..1361bc711 100644 --- a/samples/personalized_learning/.env.template +++ b/samples/personalized_learning/.env.template @@ -43,3 +43,53 @@ AGENT_ENGINE_RESOURCE_ID= # GCS_CONTEXT_BUCKET=your-bucket-name # GCS_CONTEXT_PREFIX=learner_context/ + +# ============================================================================= +# IMPORTANT - Access Control (you MUST configure this!) +# ============================================================================= +# +# By default, access is restricted to @google.com accounts, which means you +# won't be able to access your own deployed application unless you work at Google! +# +# ⚠️ BEFORE DEPLOYING: Update these settings to allow YOUR email/domain. +# +# The server is the single source of truth for authorization. Both options below +# are checked - a user is allowed if they match EITHER the domain OR the email list. +# +# ----------------------------------------------------------------------------- + +# Option 1: Restrict to a specific email domain +# Examples: +# VITE_ALLOWED_DOMAIN=yourcompany.com (allows anyone@yourcompany.com) +# VITE_ALLOWED_DOMAIN=gmail.com (allows any Gmail user - use with caution!) +# VITE_ALLOWED_DOMAIN= (disable domain restriction, use email list only) +# +VITE_ALLOWED_DOMAIN=google.com + +# Option 2: Whitelist specific email addresses (comma-separated) +# These users are allowed regardless of domain setting above. +# Examples: +# VITE_ALLOWED_EMAILS=you@gmail.com +# VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org,charlie@university.edu +# +# VITE_ALLOWED_EMAILS= + +# ----------------------------------------------------------------------------- +# Quick setup examples: +# +# Allow only yourself: +# VITE_ALLOWED_DOMAIN= +# VITE_ALLOWED_EMAILS=your.email@gmail.com +# +# Allow your whole company: +# VITE_ALLOWED_DOMAIN=yourcompany.com +# VITE_ALLOWED_EMAILS= +# +# Allow your company + a few external collaborators: +# VITE_ALLOWED_DOMAIN=yourcompany.com +# VITE_ALLOWED_EMAILS=external.collaborator@gmail.com,partner@othercorp.com +# +# Allow anyone with a Google account (public demo): +# VITE_ALLOWED_DOMAIN= +# VITE_ALLOWED_EMAILS= +# ----------------------------------------------------------------------------- diff --git a/samples/personalized_learning/.gitignore b/samples/personalized_learning/.gitignore index b69a9a798..2b3b58158 100644 --- a/samples/personalized_learning/.gitignore +++ b/samples/personalized_learning/.gitignore @@ -27,7 +27,7 @@ demo-message-log.json .DS_Store Thumbs.db -# Large media assets (generate via NotebookLM - see NOTEBOOKLM_GUIDE.md) +# Large media assets (generate via NotebookLM - see Quickstart.ipynb) public/assets/*.m4a public/assets/*.mp4 @@ -42,3 +42,9 @@ public/assets/*.mp4 # Temporary A2UI copy for Cloud Run deployment a2ui-web-lib/ + +# Tests (not part of the demo distribution) +tests/ + +# Linter cache +.ruff_cache/ diff --git a/samples/personalized_learning/Quickstart.ipynb b/samples/personalized_learning/Quickstart.ipynb index 849b30822..56aa8c38f 100644 --- a/samples/personalized_learning/Quickstart.ipynb +++ b/samples/personalized_learning/Quickstart.ipynb @@ -523,30 +523,39 @@ "source": [ "### Access Control\n", "\n", - "By default, the demo restricts access to `@google.com` email addresses. This is configured in [`src/firebase-auth.ts`](src/firebase-auth.ts).\n", + "⚠️ **IMPORTANT: You must configure access control to use your deployed app!**\n", "\n", - "**To change the allowed domain:**\n", - "```typescript\n", - "// In src/firebase-auth.ts\n", - "const ALLOWED_DOMAIN = \"yourcompany.com\"; // Change to your domain\n", - "```\n", + "By default, access is restricted to `@google.com` accounts. If you don't work at Google, you'll be locked out of your own deployment.\n", "\n", - "**To allow specific external collaborators (whitelist):**\n", - "```typescript\n", - "// In src/firebase-auth.ts\n", - "const ALLOWED_EMAILS: string[] = [\n", - " \"alice@example.com\",\n", - " \"bob@partner.org\",\n", - " \"charlie@university.edu\",\n", - "];\n", - "```\n", + "**Before deploying**, add these lines to your `.env` file:\n", + "\n", + "```bash\n", + "# Option 1: Allow a specific domain (your company)\n", + "VITE_ALLOWED_DOMAIN=yourcompany.com\n", + "\n", + "# Option 2: Allow specific email addresses (yourself + collaborators)\n", + "VITE_ALLOWED_DOMAIN=\n", + "VITE_ALLOWED_EMAILS=your.email@gmail.com,collaborator@example.com\n", "\n", - "**To allow anyone with a Google account:**\n", - "```typescript\n", - "const ALLOWED_DOMAIN = \"\"; // Disable domain restriction\n", - "const ALLOWED_EMAILS: string[] = []; // Empty whitelist = allow all\n", + "# Option 3: Allow anyone with a Google account (public demo)\n", + "VITE_ALLOWED_DOMAIN=\n", + "VITE_ALLOWED_EMAILS=\n", "```\n", "\n", + "**How it works:**\n", + "\n", + "The server is the single source of truth for authorization. When a user signs in:\n", + "1. Firebase authenticates them (Google OAuth)\n", + "2. The client calls `/api/check-access` with the user's token\n", + "3. The server checks if their email matches `VITE_ALLOWED_DOMAIN` or `VITE_ALLOWED_EMAILS`\n", + "4. If not authorized, they're signed out and shown an error\n", + "\n", + "| Configuration | Who can access |\n", + "|--------------|----------------|\n", + "| `VITE_ALLOWED_DOMAIN=yourcompany.com` | Anyone with @yourcompany.com |\n", + "| `VITE_ALLOWED_EMAILS=you@gmail.com` | Only your email |\n", + "| Both empty | Anyone with a Google account |\n", + "\n", "> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" ] }, diff --git a/samples/personalized_learning/README.md b/samples/personalized_learning/README.md index 55b373088..e3ff0201b 100644 --- a/samples/personalized_learning/README.md +++ b/samples/personalized_learning/README.md @@ -1,688 +1,165 @@ # Personalized Learning Demo -A full-stack sample demonstrating A2UI's capabilities for building AI-powered educational applications with remote agents, dynamic content generation, and custom UI components. +A full-stack sample demonstrating A2UI's capabilities for AI-powered educational applications. **Contributed by Google Public Sector's Rapid Innovation Team.** -![Personalized Learning Demo](assets/hero.jpg) +[![Watch the demo](https://img.youtube.com/vi/fgkiwyHj9g8/maxresdefault.jpg)](https://www.youtube.com/watch?v=fgkiwyHj9g8) ---- - -## Overview - -This demo showcases how A2UI enables agents to generate rich, interactive user interfaces dynamically. It demonstrates: - -| Concept | Implementation | -|---------|----------------| -| **Remote Agent Deployment** | ADK agent deployed to Vertex AI Agent Engine, running independently from the UI | -| **A2A Protocol** | Agent-to-Agent protocol for frontend-to-agent communication | -| **Custom UI Components** | Flashcard and QuizCard components extending the A2UI component library | -| **Dynamic Content Generation** | Personalized A2UI JSON generated on-the-fly based on user requests | -| **Dynamic Context from GCS** | Learner profiles loaded from Cloud Storage at runtime | -| **Intelligent Content Matching** | LLM-powered topic-to-textbook matching across 167 OpenStax chapters | - -### What Makes This Demo Unique - -Unlike traditional chat applications where the UI is fixed and only text flows between client and server, this demo shows how **agents can generate entire UI experiences**. When a student asks for flashcards on photosynthesis, the agent: - -1. Matches the topic to relevant OpenStax textbook content -2. Generates personalized study materials using an LLM -3. Returns A2UI JSON describing an interactive flashcard interface -4. The frontend renders the flashcards as flippable, interactive cards - -The same request from different students (with different learner profiles) produces different content tailored to their learning style and misconceptions. +_This video demonstrates two use cases: personalized learning, which is the focus of this sample, plus a workforce development application built on the same A2UI framework—included to show how these patterns adapt to other domains._ --- -## Architecture - -![Architecture Diagram](assets/architecture.jpg) +## tl;dr -### Component Overview +This sample shows how agents within a chat can use A2UI to go beyond text responses and generate dynamic UI elements. When a student asks for flashcards on photosynthesis, the agent matches the topic to OpenStax textbook content, generates personalized study materials, and returns A2UI JSON that the frontend renders as interactive, flippable cards. -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ FRONTEND (Browser) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ Vite + TypeScript │ │ -│ │ ├── A2UI Lit Renderer (@a2ui/web-lib) │ │ -│ │ ├── Custom Components (Flashcard, QuizCard) │ │ -│ │ ├── Chat Orchestrator (intent routing, response handling) │ │ -│ │ └── A2A Client (Agent Engine communication) │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ API SERVER (Node.js) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ api-server.ts │ │ -│ │ ├── /api/chat-with-intent → Gemini (intent + response + keywords) │ │ -│ │ ├── /a2ui-agent/a2a/query → Agent Engine proxy │ │ -│ │ └── Intent detection, keyword extraction, response generation │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ VERTEX AI AGENT ENGINE (Remote Agent) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ deploy.py → ADK Agent │ │ -│ │ ├── generate_flashcards(topic) → A2UI JSON │ │ -│ │ ├── generate_quiz(topic) → A2UI JSON │ │ -│ │ ├── get_textbook_content(topic) → OpenStax content │ │ -│ │ ├── get_audio_content() → AudioPlayer A2UI │ │ -│ │ └── get_video_content() → Video A2UI │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EXTERNAL RESOURCES │ -│ ├── OpenStax GitHub (raw.githubusercontent.com) → CNXML textbook content │ -│ ├── GCS Bucket ({project}-learner-context) → Learner profiles │ -│ └── GCS Bucket ({project}-openstax) → Optional content cache │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Files +Here are the concepts we're demonstrating: -| File | Purpose | -|------|---------| -| [deploy.py](deploy.py) | Deployment script with embedded agent code for Agent Engine | -| [agent/agent.py](agent/agent.py) | Modular agent code for local development | -| [api-server.ts](api-server.ts) | Node.js server handling intent detection and Agent Engine proxy | -| [src/chat-orchestrator.ts](src/chat-orchestrator.ts) | Frontend orchestration: routes intents to appropriate handlers | -| [src/a2a-client.ts](src/a2a-client.ts) | A2A protocol client with fallback content | -| [src/a2ui-renderer.ts](src/a2ui-renderer.ts) | Renders A2UI JSON using the Lit renderer | -| [src/flashcard.ts](src/flashcard.ts) | Custom Flashcard component (Lit web component) | -| [src/quiz-card.ts](src/quiz-card.ts) | Custom QuizCard component (Lit web component) | +- **Custom A2UI Components** — Flashcard and QuizCard extend the standard A2UI UI component library +- **Remote Agent** — ADK agent deployed to Vertex AI Agent Engine, decoupled from the UI +- **A2A Protocol** — Frontend-to-agent communication via Agent-to-Agent protocol +- **Dynamic Context** — Learner profiles loaded from GCS at runtime (no redeployment needed) +- **Content Retrieval** — LLM-powered information retrieval across 167 OpenStax Biology chapters +- **Server-side Auth** — API endpoints verify Firebase ID tokens and enforce domain/email allowlists --- -## Data Flow - -### Complete Request Lifecycle - -Here's what happens when a user asks "Quiz me on photosynthesis": - -#### 1. User Message → API Server - -The frontend sends the message to `/api/chat-with-intent`: - -```typescript -// src/chat-orchestrator.ts:205-229 -const response = await fetch("/api/chat-with-intent", { - method: "POST", - body: JSON.stringify({ - systemPrompt: this.systemPrompt, - messages: this.conversationHistory.slice(-10), - userMessage: message, - recentContext: recentContext, - }), -}); -``` - -#### 2. Intent Detection + Keyword Extraction (Single LLM Call) - -The API server uses Gemini to detect intent AND extract keywords in one call: - -```typescript -// api-server.ts:639-681 -const combinedSystemPrompt = `${systemPrompt} - -## INTENT CLASSIFICATION -- flashcards: user wants study cards -- quiz: user wants to be tested -- podcast: user wants audio content -... - -## KEYWORDS (for flashcards, podcast, video, quiz only) -When the intent is content-generating, include a "keywords" field with: -1. The CORRECTED topic (fix any spelling mistakes) -2. Related biology terms for content retrieval -3. Specific subtopics within that subject area -`; -``` - -**Response:** -```json -{ - "intent": "quiz", - "text": "Let's test your knowledge on photosynthesis!", - "keywords": "photosynthesis, chloroplast, chlorophyll, light reaction, calvin cycle, ATP" -} -``` - -#### 3. Frontend Routes to Agent Engine - -Based on the detected intent, the orchestrator calls the A2A client: - -```typescript -// src/chat-orchestrator.ts:161-165 -const a2uiResult = await this.a2aClient.generateContent( - intent, // "quiz" - topicContext // keywords from Gemini -); -``` - -#### 4. Agent Engine Query - -The API server proxies the request to Agent Engine using `:streamQuery`: - -```typescript -// api-server.ts:241 -const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectNumber}/locations/${location}/reasoningEngines/${resourceId}:streamQuery`; - -const requestPayload = { - class_method: "stream_query", - input: { - user_id: "demo-user", - message: "Generate quiz for: photosynthesis, chloroplast, chlorophyll...", - }, -}; -``` - -#### 5. Agent Tool Execution - -The ADK agent receives the request and executes the appropriate tool: - -```python -# deploy.py:484-565 (generate_quiz function) -async def generate_quiz(tool_context: ToolContext, topic: str) -> str: - # Fetch OpenStax content for context - openstax_data = fetch_openstax_content(topic) - textbook_context = openstax_data.get("content", "") - sources = openstax_data.get("sources", []) - - # Generate quiz using Gemini with structured output - response = client.models.generate_content( - model=model_id, - contents=prompt, - config=types.GenerateContentConfig( - response_mime_type="application/json", - response_schema=quiz_schema, - ), - ) -``` - -#### 6. Content Matching (Keyword → Chapter → Module → GitHub) - -The agent uses a tiered matching system to find relevant content: - -```python -# deploy.py:750-757 - Word boundary matching for keywords -for keyword, slugs in KEYWORD_HINTS.items(): - pattern = r'\b' + re.escape(keyword) + r'\b' - if re.search(pattern, topic_lower): - matched_slugs.update(slugs) -``` - -If no keyword match: -```python -# deploy.py:759-763 - LLM fallback -if not matched_slugs: - llm_slugs = llm_match_topic_to_chapters(topic) - matched_slugs.update(llm_slugs) -``` - -Then fetch content: -```python -# deploy.py:788-797 - GitHub fetch -github_url = f"https://raw.githubusercontent.com/openstax/osbooks-biology-bundle/main/modules/{module_id}/index.cnxml" -with urllib.request.urlopen(github_url, timeout=10) as response: - cnxml = response.read().decode('utf-8') - text = parse_cnxml_to_text(cnxml) -``` - -#### 7. LLM Content Generation with Structured Output - -The agent uses Gemini's structured output to generate quiz content: - -```python -# deploy.py:981-1004 -quiz_schema = { - "type": "array", - "items": { - "type": "object", - "properties": { - "question": {"type": "string"}, - "options": { - "type": "array", - "items": { - "properties": { - "label": {"type": "string"}, - "value": {"type": "string"}, - "isCorrect": {"type": "boolean"}, - }, - }, - }, - "explanation": {"type": "string"}, - "category": {"type": "string"}, - }, - }, -} -``` - -#### 8. A2UI JSON Response +## Quick Start -The agent builds and returns A2UI JSON: +Complete Steps 1–6 in [Quickstart.ipynb](Quickstart.ipynb) first to set up GCP, deploy the agent, and configure environment variables. Then: -```json -{ - "format": "quiz", - "surfaceId": "learningContent", - "a2ui": [ - {"beginRendering": {"surfaceId": "learningContent", "root": "mainColumn"}}, - { - "surfaceUpdate": { - "surfaceId": "learningContent", - "components": [ - {"id": "mainColumn", "component": {"Column": {...}}}, - {"id": "header", "component": {"Text": {"text": {"literalString": "Quick Quiz: Photosynthesis"}}}}, - {"id": "q1", "component": {"QuizCard": { - "question": {"literalString": "Where do the light reactions occur?"}, - "options": [...], - "explanation": {...} - }}} - ] - } - } - ], - "source": { - "title": "Overview of Photosynthesis", - "url": "https://openstax.org/books/biology-ap-courses/pages/8-1-overview-of-photosynthesis", - "provider": "OpenStax Biology for AP Courses" - } -} +```bash +cd samples/personalized_learning +npm install +npm run dev ``` -#### 9. Frontend Rendering +Open the URL shown in your terminal (typically http://localhost:5174, but the port may vary) and try prompts like: +- "Help me understand ATP" +- "Quiz me on meiosis" +- "Flashcards for photosynthesis" -The A2UI renderer processes the JSON and renders components: - -```typescript -// src/a2ui-renderer.ts:64-78 -const processor = v0_8.Data.createSignalA2uiMessageProcessor(); -processor.processMessages(a2uiMessages); - -const surfaces = processor.getSurfaces(); -for (const [surfaceId, surface] of surfaces.entries()) { - this.renderSurface(container, surfaceId, surface, processor); -} -``` +The demo works without a deployed agent too—it falls back to sample content in [src/a2a-client.ts](src/a2a-client.ts). --- -## Content Retrieval System - -The agent uses a sophisticated system to map user topics to relevant textbook content. - -### Tier 1: Keyword Matching (Fast Path) - -The `KEYWORD_HINTS` dictionary maps ~100 biology keywords to chapter slugs: - -```python -# deploy.py:476-627 -KEYWORD_HINTS = { - # Energy & Metabolism - "atp": ["6-4-atp-adenosine-triphosphate", "6-1-energy-and-metabolism"], - "photosynthesis": ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"], - "meiosis": ["11-1-the-process-of-meiosis"], - - # Nervous System - "neuron": ["26-1-neurons-and-glial-cells", "26-2-how-neurons-communicate"], - "vision": ["27-5-vision"], - - # ... ~100 more keywords -} -``` - -**Word Boundary Matching**: The system uses regex word boundaries to prevent false positives: - -```python -# deploy.py:752-756 -pattern = r'\b' + re.escape(keyword) + r'\b' -if re.search(pattern, topic_lower): - matched_slugs.update(slugs) -``` - -This ensures "vision" matches "teach me about vision" but NOT "explain cell division" (which contains "vision" as a substring). - -### Tier 2: LLM Fallback (When Keywords Miss) - -For unrecognized topics, the agent uses Gemini to match: - -```python -# deploy.py:677-740 -def llm_match_topic_to_chapters(topic: str, max_chapters: int = 2) -> list: - prompt = f"""Match the user's topic to the MOST relevant chapters. - -User's topic: "{topic}" - -Available chapters from OpenStax Biology for AP Courses: -{chapter_list} +## Architecture -INSTRUCTIONS: -1. Return EXACTLY {max_chapters} chapter slugs -2. Order by relevance - MOST relevant first -3. For biology topics (even misspelled like "meitosis"), ALWAYS find matches -4. Return empty [] ONLY for non-biology topics -""" ``` - -This handles: -- **Misspellings**: "meitosis" → meiosis chapters -- **Alternate terms**: "cell energy" → ATP chapters -- **Complex queries**: "how do plants make food" → photosynthesis chapters - -### Chapter → Module → Content Mapping - -Each chapter slug maps to one or more module IDs: - -```python -# deploy.py:305-473 -CHAPTER_TO_MODULES = { - "8-1-overview-of-photosynthesis": ["m62794"], - "11-1-the-process-of-meiosis": ["m62810"], - # ... 167 chapters -} +Browser → API Server → Agent Engine → OpenStax → A2UI Response + (intent) (content) (fetch) (render) ``` -Module IDs correspond to CNXML files in the OpenStax GitHub repository: +**Frontend (Browser):** Vite + TypeScript app using the A2UI Lit renderer with custom Flashcard and QuizCard components. The chat orchestrator detects user intent and routes requests appropriately. -``` -https://raw.githubusercontent.com/openstax/osbooks-biology-bundle/main/modules/m62794/index.cnxml -``` +**API Server (Node.js):** Handles intent detection via Gemini and proxies requests to Agent Engine. Verifies Firebase ID tokens on all API endpoints. Lives in [api-server.ts](api-server.ts). -### Content Source +**Agent Engine (Vertex AI):** ADK agent with tools for generating flashcards, quizzes, and fetching textbook content. Deployed via [deploy.py](deploy.py). -All educational content comes from [OpenStax Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses), a free, peer-reviewed college textbook licensed under CC BY 4.0. +**Content Pipeline:** When a user asks about "ATP hydrolysis," the agent maps the topic to relevant textbook chapters using a simple keyword matching system (we use Gemini as a fallback to help if there are no good keyword matches). The agent then fetches the actual CNXML content from [OpenStax's GitHub repo](https://github.com/openstax/osbooks-biology-bundle) and uses that source material—combined with the learner's profile—to generate grounded, personalized A2UI responses. This ensures flashcards and quizzes are rooted in peer-reviewed textbook content, not just LLM trained parameters data. --- -## Dynamic Personalization - -### Learner Context System - -Learner profiles are stored in GCS and loaded at runtime: - -``` -gs://{PROJECT_ID}-learner-context/learner_context/ -├── 01_maria_learner_profile.txt -├── 02_chemistry_bond_energy.txt -├── 03_chemistry_thermodynamics.txt -├── 04_biology_atp_cellular_respiration.txt -├── 05_misconception_resolution.txt -└── 06_mcat_practice_concepts.txt -``` - -### The Demo Learner: Maria - -The demo includes a pre-configured learner profile ([learner_context/01_maria_learner_profile.txt](learner_context/01_maria_learner_profile.txt)): - -- **Demographics**: Pre-med student at Cymbal University, preparing for MCAT -- **Learning Style**: Visual-kinesthetic, responds to sports/gym analogies -- **Strengths**: AP Biology (92% proficiency) -- **Gaps**: Chemistry bond energy (65% proficiency) -- **Key Misconception**: Believes "energy is stored in ATP bonds" (incorrect) - -### Switching Students - -To personalize for a different student: +## Key Files -```bash -# Edit the learner profile -nano learner_context/01_maria_learner_profile.txt - -# Upload to GCS (agent picks up changes on next request) -gsutil cp learner_context/*.txt gs://{PROJECT_ID}-learner-context/learner_context/ -``` - -No redeployment required—the agent loads context dynamically. +| File | Purpose | +|------|---------| +| [Quickstart.ipynb](Quickstart.ipynb) | Step-by-step setup notebook | +| [deploy.py](deploy.py) | Agent deployment with embedded agent code | +| [api-server.ts](api-server.ts) | Intent detection and Agent Engine proxy | +| [src/chat-orchestrator.ts](src/chat-orchestrator.ts) | Frontend routing logic | +| [src/flashcard.ts](src/flashcard.ts) | Custom Flashcard component | +| [src/quiz-card.ts](src/quiz-card.ts) | Custom QuizCard component | +| [learner_context/](learner_context/) | Sample learner profiles | --- -## Custom UI Components - -This demo extends A2UI with two custom Lit components. - -### Flashcard Component - -A flippable card showing question (front) and answer (back): - -```typescript -// src/flashcard.ts:34-269 -@customElement("a2ui-flashcard") -export class Flashcard extends LitElement { - @property({ attribute: false }) front: StringValue | null = null; - @property({ attribute: false }) back: StringValue | null = null; - @property({ attribute: false }) category: StringValue | null = null; +## Custom Components - @state() private _flipped = false; +This demo extends A2UI with two Lit web components that agents can generate at runtime. - private handleClick() { - this._flipped = !this._flipped; - } -} -``` +**Flashcard** — A flippable card with front (question) and back (answer). Click to flip. -**A2UI JSON format:** ```json -{ - "id": "card1", - "component": { - "Flashcard": { - "front": {"literalString": "Why does ATP hydrolysis release energy?"}, - "back": {"literalString": "Because the products (ADP + Pi) are MORE STABLE..."}, - "category": {"literalString": "Biochemistry"} - } - } -} +{"Flashcard": {"front": {"literalString": "What is ATP?"}, "back": {"literalString": "Adenosine triphosphate..."}}} ``` -### QuizCard Component - -An interactive multiple-choice quiz with immediate feedback: - -```typescript -// src/quiz-card.ts:35-348 -@customElement("a2ui-quizcard") -export class QuizCard extends LitElement { - @property({ attribute: false }) question: StringValue | null = null; - @property({ attribute: false }) options: QuizOption[] = []; - @property({ attribute: false }) explanation: StringValue | null = null; - - @state() private selectedValue: string | null = null; - @state() private submitted = false; -} -``` +**QuizCard** — Multiple-choice question with immediate feedback and explanation. -**A2UI JSON format:** ```json -{ - "id": "quiz1", - "component": { - "QuizCard": { - "question": {"literalString": "Where do the light reactions occur?"}, - "options": [ - {"label": {"literalString": "Thylakoid membrane"}, "value": "a", "isCorrect": true}, - {"label": {"literalString": "Stroma"}, "value": "b", "isCorrect": false} - ], - "explanation": {"literalString": "Light reactions occur in the thylakoid..."}, - "category": {"literalString": "Photosynthesis"} - } - } -} -``` - ---- - -## Local Development - -### Quick Start - -> **Prerequisites:** Complete Steps 1-6 in [Quickstart.ipynb](Quickstart.ipynb) first to set up GCP, deploy the agent, and configure environment variables. - -```bash -cd samples/personalized_learning -npm install -npm run dev +{"QuizCard": {"question": {"literalString": "Where do light reactions occur?"}, "options": [...], "explanation": {...}}} ``` -Open http://localhost:5174 +Both components are registered in [src/main.ts](src/main.ts) and rendered by the standard A2UI Lit renderer. -### With vs Without a Deployed Agent +--- -The demo works in two modes: +## Personalization -| Mode | How it works | When to use | -|------|--------------|-------------| -| **With deployed agent** | Requests go to Agent Engine, which fetches live OpenStax content | Production, full demo | -| **Without deployed agent** | Falls back to pre-built sample content in [a2a-client.ts](src/a2a-client.ts) | Quick local testing | +Learner profiles live in GCS at `gs://{PROJECT_ID}-learner-context/learner_context/`. The demo includes a sample student "Maria" — a pre-med student preparing for the MCAT who responds well to sports analogies and has a common misconception about ATP bond energy. -The code automatically falls back to sample content if the agent is unreachable—no configuration change needed. +To personalize for a different student, edit the files in [learner_context/](learner_context/) and upload to GCS. The agent picks up changes on the next request—no redeployment required. --- ## Production Deployment -### Cloud Run + Firebase Hosting - -The demo can be deployed to Cloud Run with Firebase Hosting for a shareable URL: +For a shareable URL via Cloud Run + Firebase Hosting: ```bash python deploy_hosting.py --project YOUR_PROJECT_ID ``` -This deploys: -- **Frontend + API Server** → Cloud Run -- **Firebase Hosting** → CDN + custom domain - -See [Quickstart.ipynb](Quickstart.ipynb) Step 7 for detailed instructions. - ---- - -## Known Limitations & Future Improvements - -### Latency - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| LLM fallback adds 2-5 seconds | Tier 2 matching requires an LLM call when keywords miss | Expand `KEYWORD_HINTS` to cover more common terms, or use semantic search with embeddings | -| Cold start on Agent Engine | First request after idle period is slow | Keep agent warm with periodic health checks | - -### Information Retrieval - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Keyword-based matching | Simple word boundary regex | Use vector embeddings for semantic similarity | -| Single-topic queries only | Multi-topic requests may return wrong content | Implement query decomposition | -| Limited to exact matches | Synonyms not handled | Add synonym expansion or use LLM for all matching | - -### Content Coverage - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Biology only | Only OpenStax Biology for AP Courses | Extend to other OpenStax textbooks (chemistry, physics, etc.) | -| English only | No internationalization | Add multi-language support | - -### UI Limitations - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Sidebar non-functional | Navigation and settings are placeholder | Implement course navigation, settings panel | -| No progress tracking | Sessions are ephemeral | Add persistent learner progress | - -### Media Generation - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Pre-generated audio/video | Podcast and video are static files generated with NotebookLM | Integrate dynamic TTS or video generation APIs | +See Step 7 in [Quickstart.ipynb](Quickstart.ipynb) for Firebase setup details. --- -## Troubleshooting - -### "No Content Available" for Valid Biology Topics - -**Symptom**: Agent returns "I couldn't find any OpenStax Biology content related to [topic]" +## Access Control -**Cause**: Topic didn't match any keywords and LLM fallback found no relevant chapters - -**Solutions**: -1. Try more specific biology terminology -2. Check if the topic is covered in AP Biology curriculum -3. Add the keyword to `KEYWORD_HINTS` in [deploy.py:476-627](deploy.py) - -### Slow Responses (5+ seconds) - -**Symptom**: Long delay before content appears - -**Cause**: LLM fallback is being triggered (no keyword match) - -**Solutions**: -1. Add common user terms to `KEYWORD_HINTS` -2. Pre-warm the agent with a health check -3. Use the optional GCS content cache to avoid GitHub fetches - -### Stale Content After Agent Update - -**Symptom**: Agent returns outdated content after redeployment - -**Cause**: Agent Engine caches the previous deployment - -**Solutions**: -1. Wait 1-2 minutes for cache to clear -2. Deploy with a new resource ID -3. Clear browser session storage - -### Quiz Returns Flashcards - -**Symptom**: Requested a quiz but got flashcards - -**Cause**: Agent Engine returned flashcards; the API server's local quiz generation may have failed - -**Solutions**: -1. Check API server logs for errors -2. Verify Gemini API access -3. The [api-server.ts:822-836](api-server.ts) has fallback logic that should generate quizzes locally - -### Checking Agent Engine Logs - -To debug content fetching issues: +**Important:** By default, access is restricted to `@google.com` accounts. That's just because the authors of this sample... work at Google. You must configure your own domain and/or specific email addresses in `.env` to access your deployment: ```bash -gcloud logging read 'resource.type="aiplatform.googleapis.com/ReasoningEngine"' \ - --limit=50 --project=YOUR_PROJECT \ - --format="table(timestamp,textPayload)" +# Allow your domain +VITE_ALLOWED_DOMAIN=yourcompany.com + +# Or whitelist specific emails +VITE_ALLOWED_DOMAIN= +VITE_ALLOWED_EMAILS=you@gmail.com,collaborator@example.com ``` +The server is the single source of truth—authorization is enforced via the `/api/check-access` endpoint. See the Access Control section in [Quickstart.ipynb](Quickstart.ipynb) for details. + --- -## Content Attribution +## Design Notes -Educational content is sourced from [OpenStax](https://openstax.org/), licensed under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/). +**Intent-based response routing:** This demo uses a hybrid response pattern where "general" intents return plain text while UI-specific intents (flashcards, quiz, etc.) return A2UI components. This mirrors how [gemini.google.com](https://gemini.google.com) handles rich content—users see conversational text for explanations and interactive UI for artifacts. The orchestrator in [src/chat-orchestrator.ts](src/chat-orchestrator.ts) handles this routing. -Specifically: [Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses) — OpenStax, Rice University +**CORS in enterprise environments:** The included [api-server.ts](api-server.ts) proxies requests to Agent Engine, which sidesteps browser CORS restrictions. If deploying behind stricter policies (e.g., Domain Restricted Sharing), you may need to add token caching or adjust the proxy to handle additional auth flows. --- -## Security Notice +## Known Limitations -> **Warning:** When building production applications, treat any agent outside your control as potentially untrusted. This demo connects to Agent Engine within your own GCP project. Always review agent code before deploying. +- **Keyword matching**: Topic-to-chapter mapping uses a simple keyword dictionary with LLM fallback. This is intentionally naive—a production system would use embeddings or a proper search index. Content retrieval isn't the focus of this A2UI demo. +- **Source citation accuracy**: When the agent expands a topic (e.g., "telomeres" → "telomeres, DNA, chromosome, replication, cell division"), keyword matching may cite a less relevant source. The LLM fallback only triggers when zero keywords match, not when wrong keywords match. A production system would use semantic search or LLM-based reranking to select the most relevant source. +- **Latency**: LLM fallback for topic matching adds 2–5 seconds when keywords don't match +- **Single topics only**: Multi-topic requests may return wrong content +- **Audio/video**: Pre-generated files only, not dynamic +- **Sidebar**: Placeholder UI; only the chat is functional +- **Deployment path**: `deploy_hosting.py` assumes `renderers/lit` is at `../../renderers/lit`; update if repo structure changes --- -## Related Documentation +## Content Attribution -- [A2UI Specification](../../docs/) — Canonical A2UI format documentation -- [A2UI Lit Renderer](../../renderers/lit/) — The web component renderer used by this demo -- [Quickstart.ipynb](Quickstart.ipynb) — Step-by-step setup notebook -- [Main A2UI README](../../README.md) — Project overview and philosophy +Educational content from [OpenStax](https://openstax.org/), licensed under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/). --- -## License +## Related -Apache 2.0 — See the repository root for details. +- [A2UI Specification](../../docs/) +- [A2UI Lit Renderer](../../renderers/lit/) +- [Main A2UI README](../../README.md) diff --git a/samples/personalized_learning/agent/.env.template b/samples/personalized_learning/agent/.env.template deleted file mode 100644 index da5bc32a2..000000000 --- a/samples/personalized_learning/agent/.env.template +++ /dev/null @@ -1,15 +0,0 @@ -# Personalized Learning Agent Configuration - -# Google Cloud Project (required) -GOOGLE_CLOUD_PROJECT=your-project-id - -# Optional: GCS bucket for learner context data -# If not set, loads from local learner_context/ directory -# GCS_CONTEXT_BUCKET=your-bucket-name -# GCS_CONTEXT_PREFIX=learner_context/ - -# Model configuration -LITELLM_MODEL=gemini-2.5-flash - -# Server configuration (for local development) -PORT=8081 diff --git a/samples/personalized_learning/agent/Dockerfile b/samples/personalized_learning/agent/Dockerfile deleted file mode 100644 index 6a082ddaf..000000000 --- a/samples/personalized_learning/agent/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM python:3.11-slim - -WORKDIR /app - -# Install dependencies -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -# Copy agent code -COPY *.py ./ - -# Create learner context directory -RUN mkdir -p /app/learner_context - -# Set environment variables -ENV PORT=8080 -# GOOGLE_CLOUD_PROJECT must be set at runtime -ENV GOOGLE_CLOUD_LOCATION=global - -# Expose port -EXPOSE 8080 - -# Run the server -CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/samples/personalized_learning/agent/agent.py b/samples/personalized_learning/agent/agent.py index 4a3040499..cdf1568dc 100644 --- a/samples/personalized_learning/agent/agent.py +++ b/samples/personalized_learning/agent/agent.py @@ -39,12 +39,7 @@ from google.adk.agents import Agent from google.adk.tools import ToolContext -# ============================================================================ -# MODULE-LEVEL CONFIGURATION -# These variables are captured by cloudpickle during deployment. -# They are set at import time from environment variables, ensuring they -# persist in the deployed agent even though os.environ is not pickled. -# ============================================================================ +# Captured at import time for cloudpickle serialization during deployment _CONFIG_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT") _CONFIG_LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") @@ -53,8 +48,7 @@ try: from .context_loader import get_combined_context, load_context_file from .a2ui_templates import get_system_prompt, SURFACE_ID as _IMPORTED_SURFACE_ID - from .openstax_content import fetch_content_for_topic, fetch_chapter_content - from .openstax_chapters import OPENSTAX_CHAPTERS, KEYWORD_HINTS, get_openstax_url_for_chapter + from .openstax_content import fetch_content_for_topic _HAS_EXTERNAL_MODULES = True _HAS_OPENSTAX = True except Exception as e: @@ -65,19 +59,18 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# Log warnings for degraded functionality +# Log errors for missing modules (these are required, not optional) if not _HAS_EXTERNAL_MODULES: - logger.warning( - "External modules (context_loader, a2ui_templates) not available. " - "Using embedded fallback content. Import error: %s", + logger.error( + "Required modules (context_loader, a2ui_templates) not available. " + "Import error: %s", _IMPORT_ERROR if '_IMPORT_ERROR' in globals() else "unknown" ) if not _HAS_OPENSTAX: - logger.warning( - "OpenStax content modules not available. Flashcards and quizzes will use " - "embedded content only, without textbook source material. " - "This may result in less accurate educational content." + logger.error( + "OpenStax content modules not available. " + "Flashcards and quizzes will not have textbook source material." ) # Model configuration - use Gemini 2.5 Flash (available in us-central1) @@ -89,217 +82,9 @@ # Surface ID for A2UI rendering (use imported value if available, else fallback) SURFACE_ID = _IMPORTED_SURFACE_ID if _HAS_EXTERNAL_MODULES else "learningContent" -# ============================================================================ -# GCS CONTEXT LOADING (for Agent Engine - loads dynamic context from GCS) -# ============================================================================ -# GCS configuration - set via environment variables -GCS_CONTEXT_BUCKET = os.getenv("GCS_CONTEXT_BUCKET", "a2ui-demo-context") -GCS_CONTEXT_PREFIX = os.getenv("GCS_CONTEXT_PREFIX", "learner_context/") -# Context files to load -CONTEXT_FILES = [ - "01_maria_learner_profile.txt", - "02_chemistry_bond_energy.txt", - "03_chemistry_thermodynamics.txt", - "04_biology_atp_cellular_respiration.txt", - "05_misconception_resolution.txt", - "06_mcat_practice_concepts.txt", -] - - -def _load_from_gcs(filename: str) -> Optional[str]: - """Load a context file from GCS bucket.""" - try: - from google.cloud import storage - - client = storage.Client() - bucket = client.bucket(GCS_CONTEXT_BUCKET) - blob = bucket.blob(f"{GCS_CONTEXT_PREFIX}{filename}") - - if blob.exists(): - content = blob.download_as_text() - logger.info(f"Loaded {filename} from GCS bucket {GCS_CONTEXT_BUCKET}") - return content - else: - logger.warning(f"File {filename} not found in GCS bucket {GCS_CONTEXT_BUCKET}") - return None - - except Exception as e: - logger.warning(f"Failed to load from GCS: {e}") - return None - - -def _load_all_context_from_gcs() -> dict[str, str]: - """Load all context files from GCS.""" - context = {} - for filename in CONTEXT_FILES: - content = _load_from_gcs(filename) - if content: - context[filename] = content - logger.info(f"Loaded {len(context)} context files from GCS") - return context - - -def _get_combined_context_from_gcs() -> str: - """Get all context combined from GCS.""" - all_context = _load_all_context_from_gcs() - - if all_context: - combined = [] - for filename, content in sorted(all_context.items()): - combined.append(f"=== {filename} ===\n{content}\n") - return "\n".join(combined) - - # Return empty string if GCS load failed - will trigger fallback - return "" - - -# ============================================================================ -# EMBEDDED CONTEXT DATA (fallback when GCS is unavailable) -# ============================================================================ - -EMBEDDED_LEARNER_PROFILE = """ -## Learner Profile: Maria Santos - -**Background:** -- Pre-med sophomore majoring in Biochemistry -- Preparing for MCAT in 8 months -- Works part-time as a pharmacy technician (20 hrs/week) - -**Learning Style:** -- Visual-kinesthetic learner -- Prefers analogies connecting to real-world applications -- Responds well to gym/fitness metaphors (exercises regularly) -- Benefits from spaced repetition for memorization - -**Current Progress:** -- Completed: Cell structure, basic chemistry -- In progress: Cellular energetics (ATP, metabolism) -- Struggling with: Thermodynamics concepts, especially Gibbs free energy - -**Known Misconceptions:** -- Believes "energy is stored in bonds" (common misconception) -- Needs clarification that bond BREAKING releases energy in ATP hydrolysis -""" - -EMBEDDED_CURRICULUM_CONTEXT = """ -## Current Topic: ATP and Cellular Energy - -**Learning Objectives:** -1. Explain why ATP is considered the "energy currency" of cells -2. Describe the structure of ATP and how it stores potential energy -3. Understand that energy is released during hydrolysis due to product stability, not bond breaking -4. Connect ATP usage to cellular processes like muscle contraction - -**Key Concepts:** -- Adenosine triphosphate structure (adenine + ribose + 3 phosphate groups) -- Phosphoanhydride bonds and electrostatic repulsion -- Hydrolysis reaction: ATP + H2O → ADP + Pi + Energy -- Gibbs free energy change (ΔG = -30.5 kJ/mol) -- Coupled reactions in cellular metabolism - -**Common Misconceptions to Address:** -- "Energy stored in bonds" - Actually, breaking bonds REQUIRES energy; - the energy released comes from forming more stable products (ADP + Pi) -- ATP is not a long-term energy storage molecule (that's glycogen/fat) -""" - -EMBEDDED_MISCONCEPTION_CONTEXT = """ -## Misconception Resolution: "Energy Stored in Bonds" - -**The Misconception:** -Many students believe ATP releases energy because "energy is stored in the phosphate bonds." - -**The Reality:** -- Breaking ANY chemical bond REQUIRES energy input (endothermic) -- Energy is released when NEW, more stable bonds FORM (exothermic) -- ATP hydrolysis releases energy because the products (ADP + Pi) are MORE STABLE than ATP - -**Why ATP is "High Energy":** -- The three phosphate groups are negatively charged and repel each other -- This electrostatic repulsion creates molecular strain (like a compressed spring) -- When the terminal phosphate is removed, the products achieve better stability -- The energy comes from relieving this strain, not from "stored bond energy" - -**Gym Analogy for Maria:** -Think of ATP like holding a heavy plank position: -- Holding the plank (ATP) requires constant energy expenditure to maintain -- Dropping to rest (ADP + Pi) releases that tension -- The "energy" wasn't stored in your muscles - it was the relief of an unstable state -""" - - -def _get_combined_context_fallback() -> str: - """Get combined context using embedded data when files aren't available.""" - return f""" -{EMBEDDED_LEARNER_PROFILE} - -{EMBEDDED_CURRICULUM_CONTEXT} - -{EMBEDDED_MISCONCEPTION_CONTEXT} -""" - - -def _get_system_prompt_fallback(format_type: str, context: str) -> str: - """Generate system prompt for A2UI generation (fallback for Agent Engine).""" - if format_type.lower() == "flashcards": - return f"""You are creating MCAT study flashcards for Maria, a pre-med student. - -## Maria's Profile -{context} - -## Your Task -Create 4-5 high-quality flashcards about ATP and bond energy that: -1. Directly address her misconception that "energy is stored in bonds" -2. Use sports/gym analogies she loves (compressed springs, holding planks, etc.) -3. Are MCAT exam-focused with precise scientific language -4. Have COMPLETE, THOUGHTFUL answers - not placeholders - -## A2UI JSON Format -Output a JSON array starting with beginRendering, then surfaceUpdate with components. -Use Flashcard components with front, back, and category fields. -Use surfaceId: "{SURFACE_ID}" - -Generate the flashcards JSON (output ONLY valid JSON, no markdown):""" - - if format_type.lower() == "quiz": - return f"""You are creating MCAT practice quiz questions for Maria, a pre-med student. - -## Maria's Profile -{context} - -## Your Task -Create 2-3 interactive quiz questions about ATP and bond energy that: -1. Test her understanding of WHY ATP hydrolysis releases energy -2. Include plausible wrong answers reflecting common misconceptions -3. Provide detailed explanations using sports/gym analogies -4. Are MCAT exam-style with precise scientific language - -## A2UI JSON Format -Output a JSON array with QuizCard components. Each QuizCard has: -- question: The question text -- options: Array of 4 choices with label, value (a/b/c/d), isCorrect -- explanation: Detailed explanation shown after answering -- category: Topic category -Use surfaceId: "{SURFACE_ID}" - -Generate the quiz JSON (output ONLY valid JSON, no markdown):""" - - return f"""Generate A2UI JSON for {format_type} content. - -## Context -{context} - -Use surfaceId: "{SURFACE_ID}" -Output ONLY valid JSON, no markdown.""" - - -# ============================================================================ -# CACHING FOR PERFORMANCE -# ============================================================================ - -# Context cache with TTL +# Context cache with TTL for performance _CONTEXT_CACHE: dict[str, Tuple[str, float]] = {} _CONTEXT_CACHE_TTL = 300 # 5 minutes @@ -334,77 +119,54 @@ def clear_context_cache() -> None: logger.info("Context cache cleared") -# Wrapper functions with priority: local files -> GCS -> embedded fallback def _safe_get_combined_context() -> str: """ - Get combined context with fallback chain: - 1. Local files (via external modules) - for local development - 2. GCS bucket - for Agent Engine with dynamic context - 3. Embedded data - final fallback + Get combined learner context. Uses context_loader which handles + local files (for development) and GCS fallback (for Agent Engine). """ - # Try local files first (for local development with adk web) - if _HAS_EXTERNAL_MODULES: - try: - context = get_combined_context() - if context: - logger.info("Loaded context from local files") - return context - except Exception as e: - logger.warning(f"Failed to load context from local files: {e}") + if not _HAS_EXTERNAL_MODULES: + raise RuntimeError( + "context_loader module not available. Cannot load learner context." + ) - # Try GCS (for Agent Engine deployment) - gcs_context = _get_combined_context_from_gcs() - if gcs_context: - logger.info("Loaded context from GCS") - return gcs_context + try: + context = get_combined_context() + if context: + return context + except Exception as e: + logger.error(f"Failed to load learner context: {e}") + raise RuntimeError(f"Could not load learner context: {e}") - # Fall back to embedded data - logger.info("Using embedded fallback context") - return _get_combined_context_fallback() + raise RuntimeError( + "No learner context found. Ensure context files exist in " + "learner_context/ or GCS bucket is configured." + ) def _safe_load_context_file(filename: str) -> Optional[str]: """ - Load context file with fallback chain: - 1. Local files (via external modules) - 2. GCS bucket - 3. Embedded data + Load a single context file. Uses context_loader which handles + local files and GCS fallback. """ - # Try local files first - if _HAS_EXTERNAL_MODULES: - try: - content = load_context_file(filename) - if content: - return content - except Exception as e: - logger.debug(f"Failed to load context file {filename} from local: {e}") - - # Try GCS - gcs_content = _load_from_gcs(filename) - if gcs_content: - return gcs_content + if not _HAS_EXTERNAL_MODULES: + logger.warning(f"context_loader not available, cannot load {filename}") + return None - # Fall back to embedded data based on filename - if "learner_profile" in filename: - return EMBEDDED_LEARNER_PROFILE - if "misconception" in filename: - return EMBEDDED_MISCONCEPTION_CONTEXT - return None + try: + return load_context_file(filename) + except Exception as e: + logger.warning(f"Failed to load {filename}: {e}") + return None def _safe_get_system_prompt(format_type: str, context: str) -> str: - """Get system prompt, using fallback if external modules unavailable.""" - if _HAS_EXTERNAL_MODULES: - try: - return get_system_prompt(format_type, context) - except Exception as e: - logger.warning(f"Failed to get system prompt: {e}, using fallback") - return _get_system_prompt_fallback(format_type, context) - - -# ============================================================================ -# TOOL FUNCTIONS -# ============================================================================ + """Get system prompt from a2ui_templates module.""" + if not _HAS_EXTERNAL_MODULES: + raise RuntimeError( + "a2ui_templates module not available. " + "Cannot generate system prompts without it." + ) + return get_system_prompt(format_type, context) async def generate_flashcards( @@ -425,10 +187,7 @@ async def generate_flashcards( Returns: A2UI JSON for flashcard components that can be rendered in the chat """ - logger.info("=" * 60) - logger.info("GENERATE_FLASHCARDS CALLED") - logger.info(f"Topic received: {topic or '(none)'}") - logger.info("=" * 60) + logger.info(f"Generating flashcards for topic: {topic or '(none)'}") # Get learner context (profile, preferences, misconceptions) - uses cache learner_context = _get_cached_context() @@ -443,10 +202,7 @@ async def generate_flashcards( openstax_content = content_result.get("combined_content", "") sources = content_result.get("sources", []) matched_chapters = content_result.get("matched_chapters", []) - logger.info(f"OpenStax fetch result:") - logger.info(f" - Matched chapters: {matched_chapters}") - logger.info(f" - Sources: {sources}") - logger.info(f" - Content length: {len(openstax_content)} chars") + logger.info(f"OpenStax: matched {len(matched_chapters)} chapters, {len(openstax_content)} chars") if not openstax_content: logger.warning("NO CONTENT RETURNED from OpenStax fetch!") except Exception as e: @@ -802,27 +558,15 @@ async def get_textbook_content( } -# ============================================================================ -# HELPER FUNCTIONS -# ============================================================================ - - async def _generate_a2ui_content( format_type: str, context: str, tool_context: ToolContext, ) -> dict[str, Any]: - """ - Generate A2UI content using the Gemini model. - - This is an internal helper that calls the LLM to generate A2UI JSON. - """ + """Generate A2UI content using the Gemini model.""" from google import genai from google.genai import types - # Initialize client with VertexAI - use us-central1 for consistency with Agent Engine - # Use module-level config variables (captured by cloudpickle) with - # environment variable fallback for local development project = _CONFIG_PROJECT or os.getenv("GOOGLE_CLOUD_PROJECT") location = _CONFIG_LOCATION or os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") @@ -870,14 +614,7 @@ async def _generate_a2ui_content( return {"error": str(e)} -# ============================================================================ -# AGENT DEFINITION -# ============================================================================ - -# System prompt for tool selection and agent behavior. -# Note: Maria's profile also appears in src/chat-orchestrator.ts (for chat responses) -# and learner_context/ files (for dynamic personalization). This duplication is -# intentional—the frontend and agent operate independently. +# System prompt for tool selection and agent behavior SYSTEM_PROMPT = """# Personalized Learning Agent You are a personalized learning assistant that helps students study biology more effectively. @@ -949,20 +686,8 @@ async def _generate_a2ui_content( and explain the content to the learner. """ -# ============================================================================ -# AGENT FACTORY FOR AGENT ENGINE DEPLOYMENT -# ============================================================================ -# Agent Engine requires a class that creates the agent on the SERVER, -# not a pre-instantiated agent object. This avoids serialization issues -# with live objects (connections, locks, etc). - - def create_agent() -> Agent: - """Factory function to create the ADK agent. - - This is called on the server side after deployment, avoiding - serialization of live objects. - """ + """Create the ADK agent with all tools.""" return Agent( name="personalized_learning_agent", model=MODEL_ID, @@ -978,154 +703,7 @@ def create_agent() -> Agent: ) -# For local development with `adk web`, we still need a module-level agent -# This is only instantiated when running locally, not during deployment +# Module-level agent for local development with `adk web` root_agent = create_agent() -# ============================================================================ -# SERVER-SIDE AGENT WRAPPER FOR AGENT ENGINE DEPLOYMENT -# ============================================================================ -# This wrapper class enables lazy initialization - the agent is created -# on the server side after deployment, avoiding serialization of live objects. - - -class ServerSideAgent: - """ - Wrapper class for Agent Engine deployment using ReasoningEngine pattern. - - This class is COMPLETELY SELF-CONTAINED - it does not import from the - 'agent' package to avoid module resolution issues during unpickling. - All agent creation logic is inlined here. - - Usage: - reasoning_engines.ReasoningEngine.create( - ServerSideAgent, # Pass the CLASS, not an instance - requirements=[...], - ) - """ - - def __init__(self): - """Initialize the agent on the server side.""" - # ALL imports happen inside __init__ to avoid capture during pickling - import os - from google.adk.agents import Agent - from vertexai.agent_engines import AdkApp - - # Model configuration - model_id = os.getenv("GENAI_MODEL", "gemini-2.5-flash") - - # Create a simple agent with basic instruction - # Tools would need to be defined inline here too to avoid imports - self.agent = Agent( - name="personalized_learning_agent", - model=model_id, - instruction="""You are a personalized learning assistant that helps students study biology. - -You can help students understand concepts like ATP, cellular respiration, and bond energy. -Use sports and gym analogies when explaining concepts. - -When asked for flashcards or quizzes, explain that this feature requires the full agent deployment. -For now, you can have a helpful conversation about biology topics.""", - tools=[], # No tools for now - keep it simple - ) - - # Wrap in AdkApp for session management and tracing - self.app = AdkApp(agent=self.agent, enable_tracing=True) - - def query(self, *, user_id: str, message: str, **kwargs): - """ - Handle a query from the user. - - This method signature matches what ReasoningEngine expects. - """ - return self.app.query(user_id=user_id, message=message, **kwargs) - - async def aquery(self, *, user_id: str, message: str, **kwargs): - """ - Handle an async query from the user. - """ - return await self.app.aquery(user_id=user_id, message=message, **kwargs) - - def stream_query(self, *, user_id: str, message: str, **kwargs): - """ - Handle a streaming query from the user. - """ - return self.app.stream_query(user_id=user_id, message=message, **kwargs) - - -# ============================================================================ -# LEGACY COMPATIBILITY (for server.py) -# ============================================================================ - -class LearningMaterialAgent: - """ - Legacy wrapper for backwards compatibility with server.py. - - This class wraps the ADK agent's tools to maintain the same interface - that server.py expects. - """ - - SUPPORTED_FORMATS = SUPPORTED_FORMATS - - def __init__(self, init_client: bool = True): - self._init_client = init_client - - async def generate_content( - self, - format_type: str, - additional_context: str = "", - ) -> dict[str, Any]: - """Generate content using the appropriate tool.""" - # Create a minimal tool context (duck-typed to match ToolContext interface) - class MinimalToolContext: - def __init__(self): - self.state = {} - - ctx = MinimalToolContext() - - format_lower = format_type.lower() - - if format_lower == "flashcards": - return await generate_flashcards(ctx, additional_context or None) - elif format_lower == "quiz": - return await generate_quiz(ctx, additional_context or None) - elif format_lower in ["audio", "podcast"]: - return await get_audio_content(ctx) - elif format_lower == "video": - return await get_video_content(ctx) - else: - return { - "error": f"Unsupported format: {format_type}", - "supported_formats": SUPPORTED_FORMATS, - } - - async def stream(self, request: str, session_id: str = "default"): - """Stream response for A2A compatibility.""" - parts = request.split(":", 1) - format_type = parts[0].strip().lower() - additional_context = parts[1].strip() if len(parts) > 1 else "" - - yield { - "is_task_complete": False, - "updates": f"Generating {format_type}...", - } - - result = await self.generate_content(format_type, additional_context) - - yield { - "is_task_complete": True, - "content": result, - } - - -# Singleton for backwards compatibility -_agent_instance = None - - -def get_agent() -> LearningMaterialAgent: - """Get or create the legacy agent wrapper singleton.""" - global _agent_instance - if _agent_instance is None: - _agent_instance = LearningMaterialAgent() - return _agent_instance diff --git a/samples/personalized_learning/agent/openstax_chapters.py b/samples/personalized_learning/agent/openstax_chapters.py index a87d308bc..07f53c157 100644 --- a/samples/personalized_learning/agent/openstax_chapters.py +++ b/samples/personalized_learning/agent/openstax_chapters.py @@ -2,12 +2,21 @@ Complete OpenStax Biology AP Courses Chapter Index This module provides a comprehensive mapping of all chapters in the OpenStax -Biology for AP Courses textbook, along with intelligent topic matching. +Biology for AP Courses textbook, along with topic-to-chapter matching. Content is sourced from the OpenStax GitHub repository: https://github.com/openstax/osbooks-biology-bundle The module IDs (e.g., m62767) correspond to CNXML files in the modules/ directory. + +NOTE ON KEYWORD MATCHING APPROACH: +----------------------------------- +The KEYWORD_HINTS dictionary below uses a simple keyword-based approach to map +user topics to textbook chapters. This is admittedly naive and could be improved +with more sophisticated techniques (semantic search, embeddings, etc.). However, +content retrieval is not the main focus of this A2UI demo - we're demonstrating +the agent-to-UI rendering pipeline, not building a production content system. +For a real application, consider using vector embeddings or a proper search index. """ # GitHub raw content base URL for fetching module content @@ -404,197 +413,240 @@ def get_chapter_list_for_llm() -> str: "cloning": ["17-1-biotechnology"], "genome": ["17-2-mapping-genomes", "17-3-whole-genome-sequencing"], "genomics": ["17-4-applying-genomics", "17-5-genomics-and-proteomics"], + + # Additional keywords for better coverage (plurals and common phrases) + # Energy & Metabolism + "light reactions": ["8-2-the-light-dependent-reaction-of-photosynthesis"], + "energy metabolism": ["6-1-energy-and-metabolism", "7-1-energy-in-living-systems"], + "metabolism": ["6-1-energy-and-metabolism", "7-1-energy-in-living-systems"], + "enzymes": ["6-5-enzymes"], + "proteins": ["3-4-proteins", "15-5-ribosomes-and-protein-synthesis"], + "nucleic acids": ["3-5-nucleic-acids"], + "ribosomes": ["15-5-ribosomes-and-protein-synthesis", "4-3-eukaryotic-cells"], + + # Body Systems (full names) + "respiratory system": ["30-1-systems-of-gas-exchange", "30-3-breathing"], + "digestive system": ["25-1-digestive-systems", "25-3-digestive-system-processes"], + "skeletal system": ["29-1-types-of-skeletal-systems", "29-2-bone"], + "muscular system": ["29-4-muscle-contraction-and-locomotion"], + "circulatory system": ["31-1-overview-of-the-circulatory-system"], + "immune system": ["33-1-innate-immune-response", "33-2-adaptive-immune-response"], + + # Plurals + "hormones": ["28-1-types-of-hormones", "28-2-how-hormones-work"], + "neurons": ["26-1-neurons-and-glial-cells", "26-2-how-neurons-communicate"], + "lungs": ["30-1-systems-of-gas-exchange"], + "kidneys": ["32-2-the-kidneys-and-osmoregulatory-organs"], + "antibodies": ["33-3-antibodies"], + "mutations": ["14-6-dna-repair"], + "ecosystems": ["37-1-ecology-for-ecosystems", "37-2-energy-flow-through-ecosystems"], + "biomes": ["35-3-terrestrial-biomes", "35-4-aquatic-biomes"], + "viruses": ["21-1-viral-evolution-morphology-and-classification", "21-2-virus-infection-and-hosts"], + "prokaryotes": ["4-2-prokaryotic-cells", "22-1-prokaryotic-diversity"], + "eukaryotes": ["4-3-eukaryotic-cells"], + "chromosomes": ["13-1-chromosomal-theory-and-genetic-linkages", "13-2-chromosomal-basis-of-inherited-disorders"], + + # Genetics + "homeostasis": ["24-3-homeostasis"], + "chromosome": ["13-1-chromosomal-theory-and-genetic-linkages", "13-2-chromosomal-basis-of-inherited-disorders"], + "allele": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "alleles": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "dominant": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "recessive": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "dominant traits": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "recessive traits": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "genetic disorders": ["13-2-chromosomal-basis-of-inherited-disorders"], + "genetic disorder": ["13-2-chromosomal-basis-of-inherited-disorders"], + + # Evolution + "adaptation": ["19-3-adaptive-evolution"], + "phylogenetic": ["20-2-determining-evolutionary-relationships", "20-3-perspectives-on-the-phylogenetic-tree"], + "phylogenetics": ["20-2-determining-evolutionary-relationships", "20-3-perspectives-on-the-phylogenetic-tree"], + "fossil": ["18-1-understanding-evolution"], + "fossils": ["18-1-understanding-evolution"], + "common ancestor": ["20-2-determining-evolutionary-relationships"], + "ancestors": ["20-2-determining-evolutionary-relationships"], + + # Ecology + "energy flow": ["37-2-energy-flow-through-ecosystems"], + "trophic": ["37-2-energy-flow-through-ecosystems"], } # ============================================================================= # CHAPTER TO MODULE ID MAPPING # Maps chapter slugs to their corresponding module IDs from the OpenStax GitHub +# These IDs were verified against the actual OpenStax osbooks-biology-bundle repo # ============================================================================= CHAPTER_TO_MODULES: dict[str, list[str]] = { - # Unit 1: The Chemistry of Life - "1-1-the-science-of-biology": ["m62716"], - "1-2-themes-and-concepts-of-biology": ["m62717", "m62718"], - "2-1-atoms-isotopes-ions-and-molecules-the-building-blocks": ["m62719"], - "2-2-water": ["m62720"], - "2-3-carbon": ["m62721", "m62722"], - "3-1-synthesis-of-biological-macromolecules": ["m62723"], - "3-2-carbohydrates": ["m62724"], - "3-3-lipids": ["m62726"], - "3-4-proteins": ["m62730"], - "3-5-nucleic-acids": ["m62733", "m62735"], - - # Unit 2: The Cell - "4-1-studying-cells": ["m62736"], - "4-2-prokaryotic-cells": ["m62738"], - "4-3-eukaryotic-cells": ["m62740"], - "4-4-the-endomembrane-system-and-proteins": ["m62742", "m62743"], + "1-1-the-science-of-biology": ["m62717"], + "1-2-themes-and-concepts-of-biology": ["m62718"], + "2-1-atoms-isotopes-ions-and-molecules-the-building-blocks": ["m62720"], + "2-2-water": ["m62721"], + "2-3-carbon": ["m62722"], + "3-1-synthesis-of-biological-macromolecules": ["m62724"], + "3-2-carbohydrates": ["m62726"], + "3-3-lipids": ["m62730"], + "3-4-proteins": ["m62733"], + "3-5-nucleic-acids": ["m62735"], + "4-1-studying-cells": ["m62738"], + "4-2-prokaryotic-cells": ["m62740"], + "4-3-eukaryotic-cells": ["m62742"], + "4-4-the-endomembrane-system-and-proteins": ["m62743"], "4-5-cytoskeleton": ["m62744"], "4-6-connections-between-cells-and-cellular-activities": ["m62746"], - "5-1-components-and-structure": ["m62780"], - "5-2-passive-transport": ["m62773"], - "5-3-active-transport": ["m62753"], - "5-4-bulk-transport": ["m62770", "m62772"], - "6-1-energy-and-metabolism": ["m62761"], - "6-2-potential-kinetic-free-and-activation-energy": ["m62763"], - "6-3-the-laws-of-thermodynamics": ["m62764"], - "6-4-atp-adenosine-triphosphate": ["m62767"], - "6-5-enzymes": ["m62768", "m62778"], - "7-1-energy-in-living-systems": ["m62784"], - "7-2-glycolysis": ["m62785"], - "7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle": ["m62786"], - "7-4-oxidative-phosphorylation": ["m62787"], - "7-5-metabolism-without-oxygen": ["m62788"], - "7-6-connections-of-carbohydrate-protein-and-lipid-metabolic-pathways": ["m62789"], - "7-7-regulation-of-cellular-respiration": ["m62790", "m62791", "m62792"], - "8-1-overview-of-photosynthesis": ["m62793"], - "8-2-the-light-dependent-reaction-of-photosynthesis": ["m62794"], - "8-3-using-light-to-make-organic-molecules": ["m62795", "m62796"], - "9-1-signaling-molecules-and-cellular-receptors": ["m62797"], - "9-2-propagation-of-the-signal": ["m62798"], - "9-3-response-to-the-signal": ["m62799"], - "9-4-signaling-in-single-celled-organisms": ["m62800", "m62801"], - "10-1-cell-division": ["m62802"], - "10-2-the-cell-cycle": ["m62803"], - "10-3-control-of-the-cell-cycle": ["m62804"], - "10-4-cancer-and-the-cell-cycle": ["m62805"], - "10-5-prokaryotic-cell-division": ["m62806", "m62808"], - - # Unit 3: Genetics - "11-1-the-process-of-meiosis": ["m62809"], - "11-2-sexual-reproduction": ["m62810", "m62811"], - "12-1-mendels-experiments-and-the-laws-of-probability": ["m62812", "m62813"], + "5-1-components-and-structure": ["m62773"], + "5-2-passive-transport": ["m62753"], + "5-3-active-transport": ["m62770"], + "5-4-bulk-transport": ["m62772"], + "6-1-energy-and-metabolism": ["m62763"], + "6-2-potential-kinetic-free-and-activation-energy": ["m62764"], + "6-3-the-laws-of-thermodynamics": ["m62767"], + "6-4-atp-adenosine-triphosphate": ["m62768"], + "6-5-enzymes": ["m62778"], + "7-1-energy-in-living-systems": ["m62786"], + "7-2-glycolysis": ["m62787"], + "7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle": ["m62788"], + "7-4-oxidative-phosphorylation": ["m62789"], + "7-5-metabolism-without-oxygen": ["m62790"], + "7-6-connections-of-carbohydrate-protein-and-lipid-metabolic-pathways": ["m62791"], + "7-7-regulation-of-cellular-respiration": ["m62792"], + "8-1-overview-of-photosynthesis": ["m62794"], + "8-2-the-light-dependent-reaction-of-photosynthesis": ["m62795"], + "8-3-using-light-to-make-organic-molecules": ["m62796"], + "9-1-signaling-molecules-and-cellular-receptors": ["m62798"], + "9-2-propagation-of-the-signal": ["m62799"], + "9-3-response-to-the-signal": ["m62800"], + "9-4-signaling-in-single-celled-organisms": ["m62801"], + "10-1-cell-division": ["m62803"], + "10-2-the-cell-cycle": ["m62804"], + "10-3-control-of-the-cell-cycle": ["m62805"], + "10-4-cancer-and-the-cell-cycle": ["m62806"], + "10-5-prokaryotic-cell-division": ["m62808"], + "11-1-the-process-of-meiosis": ["m62810"], + "11-2-sexual-reproduction": ["m62811"], + "12-1-mendels-experiments-and-the-laws-of-probability": ["m62813"], "12-2-characteristics-and-traits": ["m62817"], "12-3-laws-of-inheritance": ["m62819"], - "13-1-chromosomal-theory-and-genetic-linkages": ["m62820"], - "13-2-chromosomal-basis-of-inherited-disorders": ["m62821", "m62822"], - "14-1-historical-basis-of-modern-understanding": ["m62823"], - "14-2-dna-structure-and-sequencing": ["m62824"], - "14-3-basics-of-dna-replication": ["m62825"], - "14-4-dna-replication-in-prokaryotes": ["m62826"], - "14-5-dna-replication-in-eukaryotes": ["m62827", "m62828"], - "14-6-dna-repair": ["m62829", "m62830"], - "15-1-the-genetic-code": ["m62833"], - "15-2-prokaryotic-transcription": ["m62837"], - "15-3-eukaryotic-transcription": ["m62838"], - "15-4-rna-processing-in-eukaryotes": ["m62840"], - "15-5-ribosomes-and-protein-synthesis": ["m62842", "m62843"], - "16-1-regulation-of-gene-expression": ["m62844"], - "16-2-prokaryotic-gene-regulation": ["m62845"], - "16-3-eukaryotic-epigenetic-gene-regulation": ["m62846"], - "16-4-eukaryotic-transcriptional-gene-regulation": ["m62847"], - "16-5-eukaryotic-post-transcriptional-gene-regulation": ["m62848"], - "16-6-eukaryotic-translational-and-post-translational-gene-regulation": ["m62849"], - "16-7-cancer-and-gene-regulation": ["m62850", "m62851"], - "17-1-biotechnology": ["m62852"], - "17-2-mapping-genomes": ["m62853"], - "17-3-whole-genome-sequencing": ["m62855"], - "17-4-applying-genomics": ["m62857"], - "17-5-genomics-and-proteomics": ["m62860", "m62861"], - - # Unit 4: Evolutionary Processes - "18-1-understanding-evolution": ["m62862"], - "18-2-formation-of-new-species": ["m62863"], - "18-3-reconnection-and-rates-of-speciation": ["m62864", "m62865"], - "19-1-population-evolution": ["m62866"], - "19-2-population-genetics": ["m62867"], - "19-3-adaptive-evolution": ["m62868", "m62869"], - "20-1-organizing-life-on-earth": ["m62870"], - "20-2-determining-evolutionary-relationships": ["m62871"], - "20-3-perspectives-on-the-phylogenetic-tree": ["m62872", "m62873"], - - # Unit 5: Biological Diversity - "21-1-viral-evolution-morphology-and-classification": ["m62874"], - "21-2-virus-infection-and-hosts": ["m62875"], - "21-3-prevention-and-treatment-of-viral-infections": ["m62876"], - "21-4-other-acellular-entities-prions-and-viroids": ["m62877", "m62878"], - "22-1-prokaryotic-diversity": ["m62879"], - "22-2-structure-of-prokaryotes": ["m62880"], - "22-3-prokaryotic-metabolism": ["m62881"], - "22-4-bacterial-diseases-in-humans": ["m62882"], - "22-5-beneficial-prokaryotes": ["m62883", "m62884"], - - # Unit 6: Plant Structure and Function - "23-1-the-plant-body": ["m62885"], - "23-2-stems": ["m62886"], - "23-3-roots": ["m62887"], - "23-4-leaves": ["m62888"], - "23-5-transport-of-water-and-solutes-in-plants": ["m62889"], - "23-6-plant-sensory-systems-and-responses": ["m62890", "m62891"], - - # Unit 7: Animal Structure and Function - "24-1-animal-form-and-function": ["m62892"], - "24-2-animal-primary-tissues": ["m62893"], - "24-3-homeostasis": ["m62894", "m62895"], - "25-1-digestive-systems": ["m62896"], - "25-2-nutrition-and-energy-production": ["m62897"], - "25-3-digestive-system-processes": ["m62898"], - "25-4-digestive-system-regulation": ["m62899", "m62900"], - "26-1-neurons-and-glial-cells": ["m62901"], - "26-2-how-neurons-communicate": ["m62902"], - "26-3-the-central-nervous-system": ["m62903"], - "26-4-the-peripheral-nervous-system": ["m62904"], - "26-5-nervous-system-disorders": ["m62905", "m62906"], - "27-1-sensory-processes": ["m62907"], - "27-2-somatosensation": ["m62908"], - "27-3-taste-and-smell": ["m62909"], - "27-4-hearing-and-vestibular-sensation": ["m62910"], - "27-5-vision": ["m62911", "m62912"], - "28-1-types-of-hormones": ["m62913"], - "28-2-how-hormones-work": ["m62914"], - "28-3-regulation-of-body-processes": ["m62915"], - "28-4-regulation-of-hormone-production": ["m62916"], - "28-5-endocrine-glands": ["m62917", "m62918"], - "29-1-types-of-skeletal-systems": ["m62919"], - "29-2-bone": ["m62920"], - "29-3-joints-and-skeletal-movement": ["m62921"], - "29-4-muscle-contraction-and-locomotion": ["m62922", "m62923"], - "30-1-systems-of-gas-exchange": ["m62924"], - "30-2-gas-exchange-across-respiratory-surfaces": ["m62925"], - "30-3-breathing": ["m62926"], - "30-4-transport-of-gases-in-human-bodily-fluids": ["m62927", "m62928"], - "31-1-overview-of-the-circulatory-system": ["m62929"], - "31-2-components-of-the-blood": ["m62930"], - "31-3-mammalian-heart-and-blood-vessels": ["m62931"], - "31-4-blood-flow-and-blood-pressure-regulation": ["m62932", "m62933"], - "32-1-osmoregulation-and-osmotic-balance": ["m62934"], - "32-2-the-kidneys-and-osmoregulatory-organs": ["m62935"], - "32-3-excretion-systems": ["m62936"], - "32-4-nitrogenous-wastes": ["m62937"], - "32-5-hormonal-control-of-osmoregulatory-functions": ["m62938", "m62939"], - "33-1-innate-immune-response": ["m62940"], - "33-2-adaptive-immune-response": ["m62941"], - "33-3-antibodies": ["m62942"], - "33-4-disruptions-in-the-immune-system": ["m62943", "m62944"], - "34-1-reproduction-methods": ["m62945"], - "34-2-fertilization": ["m62946"], - "34-3-human-reproductive-anatomy-and-gametogenesis": ["m62947"], - "34-4-hormonal-control-of-human-reproduction": ["m62948"], - "34-5-fertilization-and-early-embryonic-development": ["m62949"], - "34-6-organogenesis-and-vertebrate-axis-formation": ["m62950"], - "34-7-human-pregnancy-and-birth": ["m62951", "m62952"], - - # Unit 8: Ecology - "35-1-the-scope-of-ecology": ["m62953"], - "35-2-biogeography": ["m62954"], - "35-3-terrestrial-biomes": ["m62955"], - "35-4-aquatic-biomes": ["m62956"], - "35-5-climate-and-the-effects-of-global-climate-change": ["m62957", "m62958"], - "36-1-population-demography": ["m62959"], - "36-2-life-histories-and-natural-selection": ["m62960"], - "36-3-environmental-limits-to-population-growth": ["m62961"], - "36-4-population-dynamics-and-regulation": ["m62962"], - "36-5-human-population-growth": ["m62963"], - "36-6-community-ecology": ["m62964"], - "36-7-behavioral-biology-proximate-and-ultimate-causes-of-behavior": ["m62965", "m62966"], - "37-1-ecology-for-ecosystems": ["m62967"], - "37-2-energy-flow-through-ecosystems": ["m62968"], - "37-3-biogeochemical-cycles": ["m62969", "m62970"], - "38-1-the-biodiversity-crisis": ["m62971"], - "38-2-the-importance-of-biodiversity-to-human-life": ["m62972"], - "38-3-threats-to-biodiversity": ["m62973"], - "38-4-preserving-biodiversity": ["m62974", "m62975"], + "13-1-chromosomal-theory-and-genetic-linkages": ["m62821"], + "13-2-chromosomal-basis-of-inherited-disorders": ["m62822"], + "14-1-historical-basis-of-modern-understanding": ["m62824"], + "14-2-dna-structure-and-sequencing": ["m62825"], + "14-3-basics-of-dna-replication": ["m62826"], + "14-4-dna-replication-in-prokaryotes": ["m62828"], + "14-5-dna-replication-in-eukaryotes": ["m62829"], + "14-6-dna-repair": ["m62830"], + "15-1-the-genetic-code": ["m62837"], + "15-2-prokaryotic-transcription": ["m62838"], + "15-3-eukaryotic-transcription": ["m62840"], + "15-4-rna-processing-in-eukaryotes": ["m62842"], + "15-5-ribosomes-and-protein-synthesis": ["m62843"], + "16-1-regulation-of-gene-expression": ["m62845"], + "16-2-prokaryotic-gene-regulation": ["m62846"], + "16-3-eukaryotic-epigenetic-gene-regulation": ["m62847"], + "16-4-eukaryotic-transcriptional-gene-regulation": ["m62848"], + "16-5-eukaryotic-post-transcriptional-gene-regulation": ["m62849"], + "16-6-eukaryotic-translational-and-post-translational-gene-regulation": ["m62850"], + "16-7-cancer-and-gene-regulation": ["m62851"], + "17-1-biotechnology": ["m62853"], + "17-2-mapping-genomes": ["m62855"], + "17-3-whole-genome-sequencing": ["m62857"], + "17-4-applying-genomics": ["m62860"], + "17-5-genomics-and-proteomics": ["m62861"], + "18-1-understanding-evolution": ["m62863"], + "18-2-formation-of-new-species": ["m62864"], + "18-3-reconnection-and-rates-of-speciation": ["m62865"], + "19-1-population-evolution": ["m62868"], + "19-2-population-genetics": ["m62870"], + "19-3-adaptive-evolution": ["m62871"], + "20-1-organizing-life-on-earth": ["m62874"], + "20-2-determining-evolutionary-relationships": ["m62903"], + "20-3-perspectives-on-the-phylogenetic-tree": ["m62876"], + "21-1-viral-evolution-morphology-and-classification": ["m62881"], + "21-2-virus-infection-and-hosts": ["m62882"], + "21-3-prevention-and-treatment-of-viral-infections": ["m62904"], + "21-4-other-acellular-entities-prions-and-viroids": ["m62887"], + "22-1-prokaryotic-diversity": ["m62891"], + "22-2-structure-of-prokaryotes": ["m62893"], + "22-3-prokaryotic-metabolism": ["m62894"], + "22-4-bacterial-diseases-in-humans": ["m62896"], + "22-5-beneficial-prokaryotes": ["m62897"], + "23-1-the-plant-body": ["m62951"], + "23-2-stems": ["m62905"], + "23-3-roots": ["m62906"], + "23-4-leaves": ["m62908"], + "23-5-transport-of-water-and-solutes-in-plants": ["m62969"], + "23-6-plant-sensory-systems-and-responses": ["m62930"], + "24-1-animal-form-and-function": ["m62916"], + "24-2-animal-primary-tissues": ["m62918"], + "24-3-homeostasis": ["m62931"], + "25-1-digestive-systems": ["m62919"], + "25-2-nutrition-and-energy-production": ["m62920"], + "25-3-digestive-system-processes": ["m62921"], + "25-4-digestive-system-regulation": ["m62922"], + "26-1-neurons-and-glial-cells": ["m62924"], + "26-2-how-neurons-communicate": ["m62925"], + "26-3-the-central-nervous-system": ["m62926"], + "26-4-the-peripheral-nervous-system": ["m62928"], + "26-5-nervous-system-disorders": ["m62929"], + "27-1-sensory-processes": ["m62994"], + "27-2-somatosensation": ["m62946"], + "27-3-taste-and-smell": ["m62947"], + "27-4-hearing-and-vestibular-sensation": ["m62954"], + "27-5-vision": ["m62957"], + "28-1-types-of-hormones": ["m62961"], + "28-2-how-hormones-work": ["m62963"], + "28-3-regulation-of-body-processes": ["m62996"], + "28-4-regulation-of-hormone-production": ["m62971"], + "28-5-endocrine-glands": ["m62995"], + "29-1-types-of-skeletal-systems": ["m62977"], + "29-2-bone": ["m62978"], + "29-3-joints-and-skeletal-movement": ["m62979"], + "29-4-muscle-contraction-and-locomotion": ["m62980"], + "30-1-systems-of-gas-exchange": ["m62982"], + "30-2-gas-exchange-across-respiratory-surfaces": ["m62998"], + "30-3-breathing": ["m62987"], + "30-4-transport-of-gases-in-human-bodily-fluids": ["m62988"], + "31-1-overview-of-the-circulatory-system": ["m62990"], + "31-2-components-of-the-blood": ["m62991"], + "31-3-mammalian-heart-and-blood-vessels": ["m62992"], + "31-4-blood-flow-and-blood-pressure-regulation": ["m62993"], + "32-1-osmoregulation-and-osmotic-balance": ["m63000"], + "32-2-the-kidneys-and-osmoregulatory-organs": ["m63001"], + "32-3-excretion-systems": ["m63002"], + "32-4-nitrogenous-wastes": ["m63003"], + "32-5-hormonal-control-of-osmoregulatory-functions": ["m63004"], + "33-1-innate-immune-response": ["m63006"], + "33-2-adaptive-immune-response": ["m63007"], + "33-3-antibodies": ["m63008"], + "33-4-disruptions-in-the-immune-system": ["m63009"], + "34-1-reproduction-methods": ["m63011"], + "34-2-fertilization": ["m63012"], + "34-3-human-reproductive-anatomy-and-gametogenesis": ["m63013"], + "34-4-hormonal-control-of-human-reproduction": ["m63014"], + "34-5-fertilization-and-early-embryonic-development": ["m63016"], + "34-6-organogenesis-and-vertebrate-axis-formation": ["m63043"], + "34-7-human-pregnancy-and-birth": ["m63018"], + "35-1-the-scope-of-ecology": ["m63021"], + "35-2-biogeography": ["m63023"], + "35-3-terrestrial-biomes": ["m63024"], + "35-4-aquatic-biomes": ["m63025"], + "35-5-climate-and-the-effects-of-global-climate-change": ["m63026"], + "36-1-population-demography": ["m63028"], + "36-2-life-histories-and-natural-selection": ["m63029"], + "36-3-environmental-limits-to-population-growth": ["m63030"], + "36-4-population-dynamics-and-regulation": ["m63031"], + "36-5-human-population-growth": ["m63032"], + "36-6-community-ecology": ["m63033"], + "36-7-behavioral-biology-proximate-and-ultimate-causes-of-behavior": ["m63034"], + "37-1-ecology-for-ecosystems": ["m63036"], + "37-2-energy-flow-through-ecosystems": ["m63037"], + "37-3-biogeochemical-cycles": ["m63040"], + "38-1-the-biodiversity-crisis": ["m63048"], + "38-2-the-importance-of-biodiversity-to-human-life": ["m63049"], + "38-3-threats-to-biodiversity": ["m63050"], + "38-4-preserving-biodiversity": ["m63051"], } diff --git a/samples/personalized_learning/agent/openstax_content.py b/samples/personalized_learning/agent/openstax_content.py index 2bb73ffa3..c6befa612 100644 --- a/samples/personalized_learning/agent/openstax_content.py +++ b/samples/personalized_learning/agent/openstax_content.py @@ -13,6 +13,7 @@ import logging import os import re +import ssl import time import xml.etree.ElementTree as ET from concurrent.futures import ThreadPoolExecutor @@ -20,6 +21,16 @@ logger = logging.getLogger(__name__) +# SSL context for GitHub fetches - uses certifi CA bundle if available +def _get_ssl_context() -> ssl.SSLContext: + """Get SSL context with proper CA certificates.""" + try: + import certifi + return ssl.create_default_context(cafile=certifi.where()) + except ImportError: + # certifi not available, use system defaults + return ssl.create_default_context() + # GCS configuration GCS_OPENSTAX_BUCKET = os.getenv("GCS_OPENSTAX_BUCKET", "") GCS_OPENSTAX_PREFIX = os.getenv("GCS_OPENSTAX_PREFIX", "openstax_modules/") @@ -35,6 +46,7 @@ # ============================================================================ # Module cache with TTL - caches parsed content to avoid re-fetching +# Note: Cache grows unbounded. For production, consider adding LRU eviction. _MODULE_CACHE: dict[str, Tuple[str, float]] = {} _MODULE_CACHE_TTL = 3600 # 1 hour (content rarely changes) @@ -193,7 +205,7 @@ def fetch_module_from_github(module_id: str) -> Optional[str]: url = f"{GITHUB_RAW_BASE}/{module_id}/index.cnxml" try: - with urllib.request.urlopen(url, timeout=10) as response: + with urllib.request.urlopen(url, timeout=10, context=_get_ssl_context()) as response: content = response.read().decode('utf-8') logger.info(f"Fetched module {module_id} from GitHub") return content diff --git a/samples/personalized_learning/agent/openstax_modules.py b/samples/personalized_learning/agent/openstax_modules.py index dd5e09989..f7d9d876c 100644 --- a/samples/personalized_learning/agent/openstax_modules.py +++ b/samples/personalized_learning/agent/openstax_modules.py @@ -687,13 +687,16 @@ # CHAPTER 7: CELLULAR RESPIRATION # ========================================================================== "cellular respiration": ["m62786", "m62787", "m62788", "m62789"], - "respiration": ["m62786", "m62982", "m62987"], + "respiration": ["m62786", "m62787", "m62788", "m62789"], "aerobic respiration": ["m62786", "m62789"], "glycolysis": ["m62787"], "pyruvate": ["m62787", "m62788"], "citric acid cycle": ["m62788"], + "citric acid": ["m62788"], "krebs cycle": ["m62788"], + "krebs": ["m62788"], "tca cycle": ["m62788"], + "tca": ["m62788"], "acetyl coa": ["m62788"], "nadh": ["m62788", "m62789"], "fadh2": ["m62788", "m62789"], diff --git a/samples/personalized_learning/agent/pyproject.toml b/samples/personalized_learning/agent/pyproject.toml index 1f34087b6..d9cadd260 100644 --- a/samples/personalized_learning/agent/pyproject.toml +++ b/samples/personalized_learning/agent/pyproject.toml @@ -8,9 +8,7 @@ dependencies = [ "google-genai>=1.0.0", "google-cloud-storage>=2.10.0", "python-dotenv>=1.0.0", - "uvicorn>=0.24.0", - "fastapi>=0.104.0", - "pydantic>=2.5.0", + "certifi>=2023.0.0", ] [project.optional-dependencies] diff --git a/samples/personalized_learning/agent/requirements.txt b/samples/personalized_learning/agent/requirements.txt index 70959914e..7e623419c 100644 --- a/samples/personalized_learning/agent/requirements.txt +++ b/samples/personalized_learning/agent/requirements.txt @@ -1,9 +1,5 @@ google-adk>=0.3.0 google-genai>=1.0.0 google-cloud-storage>=2.10.0 -a2a-sdk>=0.2.0 +certifi>=2023.0.0 python-dotenv>=1.0.0 -uvicorn>=0.24.0 -fastapi>=0.104.0 -pydantic>=2.5.0 -litellm>=1.0.0 diff --git a/samples/personalized_learning/agent/server.py b/samples/personalized_learning/agent/server.py deleted file mode 100644 index b7c71f20d..000000000 --- a/samples/personalized_learning/agent/server.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -FastAPI Server for Personalized Learning Agent - -Provides HTTP endpoints for the A2A agent that generates A2UI learning materials. -This can run locally or be deployed to Agent Engine. -""" - -import json -import logging -import os -from typing import Any - -from fastapi import FastAPI, HTTPException -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse -from pydantic import BaseModel - -from agent import get_agent, LearningMaterialAgent - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -app = FastAPI( - title="Personalized Learning Agent", - description="A2A agent for generating personalized A2UI learning materials", - version="0.1.0", -) - -# CORS for local development -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -class GenerateRequest(BaseModel): - """Request model for content generation.""" - - format: str - context: str = "" - session_id: str = "default" - - -class A2ARequest(BaseModel): - """A2A protocol request model.""" - - message: str - session_id: str = "default" - extensions: list[str] = [] - - -@app.get("/health") -async def health_check(): - """Health check endpoint.""" - return {"status": "healthy", "agent": "personalized-learning-agent"} - - -@app.get("/capabilities") -async def get_capabilities(): - """Return agent capabilities for A2A discovery.""" - return { - "name": "Personalized Learning Agent", - "description": "Generates personalized A2UI learning materials", - "supported_formats": LearningMaterialAgent.SUPPORTED_FORMATS, - "extensions": [ - { - "uri": "https://a2ui.org/a2a-extension/a2ui/v0.8", - "description": "Provides agent driven UI using the A2UI JSON format.", - } - ], - } - - -@app.post("/generate") -async def generate_content(request: GenerateRequest): - """ - Generate A2UI content for the specified format. - - Args: - request: Generation request with format and optional context - - Returns: - A2UI JSON response - """ - logger.info(f"Generate request: format={request.format}, context={request.context[:50]}...") - - agent = get_agent() - result = await agent.generate_content(request.format, request.context) - - if "error" in result: - raise HTTPException(status_code=400, detail=result["error"]) - - return result - - -@app.post("/a2a/stream") -async def a2a_stream(request: A2ARequest): - """ - A2A-compatible streaming endpoint. - - Args: - request: A2A request with message - - Returns: - Streaming response with A2UI JSON - """ - logger.info(f"A2A stream request: {request.message}") - - agent = get_agent() - - async def generate(): - async for chunk in agent.stream(request.message, request.session_id): - yield f"data: {json.dumps(chunk)}\n\n" - - return StreamingResponse( - generate(), - media_type="text/event-stream", - ) - - -@app.post("/a2a/query") -async def a2a_query(request: A2ARequest): - """ - A2A-compatible non-streaming endpoint. - - Args: - request: A2A request with message in format "type:context" - - Returns: - A2UI JSON response - """ - logger.info(f"A2A query request: {request.message}") - - # Parse message (format: "type:context" or just "type") - parts = request.message.split(":", 1) - format_type = parts[0].strip() - context = parts[1].strip() if len(parts) > 1 else "" - - agent = get_agent() - result = await agent.generate_content(format_type, context) - - return result - - -if __name__ == "__main__": - import uvicorn - - port = int(os.getenv("PORT", "8081")) - uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/samples/personalized_learning/agent/tests/test_agent.py b/samples/personalized_learning/agent/tests/test_agent.py deleted file mode 100644 index 67bfa3441..000000000 --- a/samples/personalized_learning/agent/tests/test_agent.py +++ /dev/null @@ -1,363 +0,0 @@ -""" -Unit and Integration Tests for Personalized Learning Agent - -Tests the context loader, A2UI templates, and agent functionality. -""" - -import json -import os -import sys -import asyncio -from pathlib import Path - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from context_loader import ( - load_context_file, - load_all_context, - get_learner_profile, - get_misconception_context, - get_combined_context, -) -from a2ui_templates import ( - get_system_prompt, - FLASHCARD_EXAMPLE, - AUDIO_EXAMPLE, - VIDEO_EXAMPLE, - SURFACE_ID, -) - -# ============================================================================= -# Test Results Tracking -# ============================================================================= - -passed = 0 -failed = 0 - - -def test(name): - """Decorator for test functions.""" - def decorator(fn): - global passed, failed - try: - result = fn() - if asyncio.iscoroutine(result): - asyncio.run(result) - print(f"✓ {name}") - passed += 1 - except AssertionError as e: - print(f"✗ {name}") - print(f" Error: {e}") - failed += 1 - except Exception as e: - print(f"✗ {name}") - print(f" Exception: {type(e).__name__}: {e}") - failed += 1 - return fn - return decorator - - -# ============================================================================= -# Context Loader Tests -# ============================================================================= - -print("=" * 60) -print("Personalized Learning Agent - Python Tests") -print("=" * 60) -print("\n--- Context Loader Tests ---\n") - - -@test("load_context_file loads maria profile") -def test_load_maria_profile(): - content = load_context_file("01_maria_learner_profile.txt") - assert content is not None, "Content should not be None" - assert "Maria" in content, "Content should contain 'Maria'" - assert "MCAT" in content, "Content should contain 'MCAT'" - - -@test("load_context_file loads misconception resolution") -def test_load_misconception(): - content = load_context_file("05_misconception_resolution.txt") - assert content is not None, "Content should not be None" - assert "ATP" in content, "Content should contain 'ATP'" - assert "bond" in content.lower(), "Content should mention bonds" - - -@test("load_context_file returns None for missing file") -def test_load_missing_file(): - content = load_context_file("nonexistent_file.txt") - assert content is None, "Should return None for missing file" - - -@test("load_all_context loads multiple files") -def test_load_all_context(): - context = load_all_context() - assert isinstance(context, dict), "Should return a dict" - assert len(context) >= 1, "Should load at least one file" - # Check that keys are filenames - for key in context.keys(): - assert key.endswith(".txt"), f"Key {key} should be a .txt filename" - - -@test("get_learner_profile returns Maria's profile") -def test_get_learner_profile(): - profile = get_learner_profile() - assert profile is not None, "Profile should not be None" - assert "Maria" in profile, "Profile should contain Maria" - assert "Cymbal" in profile, "Profile should mention Cymbal University" - - -@test("get_misconception_context returns resolution content") -def test_get_misconception_context(): - content = get_misconception_context() - assert content is not None, "Content should not be None" - assert "misconception" in content.lower(), "Should discuss misconception" - - -@test("get_combined_context combines all files") -def test_get_combined_context(): - combined = get_combined_context() - assert isinstance(combined, str), "Should return a string" - assert len(combined) > 1000, "Combined context should be substantial" - # Should contain section markers - assert "===" in combined, "Should contain section markers" - - -# ============================================================================= -# A2UI Templates Tests -# ============================================================================= - -print("\n--- A2UI Templates Tests ---\n") - - -@test("SURFACE_ID is set correctly") -def test_surface_id(): - assert SURFACE_ID == "learningContent", f"SURFACE_ID should be 'learningContent', got {SURFACE_ID}" - - -@test("FLASHCARD_EXAMPLE contains valid A2UI structure") -def test_flashcard_example(): - assert "beginRendering" in FLASHCARD_EXAMPLE - assert "surfaceUpdate" in FLASHCARD_EXAMPLE - assert "Flashcard" in FLASHCARD_EXAMPLE - assert SURFACE_ID in FLASHCARD_EXAMPLE - - -@test("AUDIO_EXAMPLE contains valid A2UI structure") -def test_audio_example(): - assert "beginRendering" in AUDIO_EXAMPLE - assert "surfaceUpdate" in AUDIO_EXAMPLE - assert "Audio" in AUDIO_EXAMPLE - assert "/assets/podcast.m4a" in AUDIO_EXAMPLE - - -@test("VIDEO_EXAMPLE contains valid A2UI structure") -def test_video_example(): - assert "beginRendering" in VIDEO_EXAMPLE - assert "surfaceUpdate" in VIDEO_EXAMPLE - assert "Video" in VIDEO_EXAMPLE - assert "/assets/video.mp4" in VIDEO_EXAMPLE - - -@test("get_system_prompt generates flashcards prompt") -def test_system_prompt_flashcards(): - context = "Test context for Maria" - prompt = get_system_prompt("flashcards", context) - assert "flashcards" in prompt.lower() - assert context in prompt - assert SURFACE_ID in prompt - assert "Flashcard" in prompt - - -@test("get_system_prompt generates audio prompt") -def test_system_prompt_audio(): - context = "Test context" - prompt = get_system_prompt("audio", context) - assert "audio" in prompt.lower() or "Audio" in prompt - assert context in prompt - - -@test("get_system_prompt includes learner context") -def test_system_prompt_includes_context(): - context = "Maria is a pre-med student with ATP misconception" - prompt = get_system_prompt("flashcards", context) - assert "Maria" in prompt - assert "ATP" in prompt - - -# ============================================================================= -# Agent Tests -# ============================================================================= - -print("\n--- Agent Tests ---\n") - -# Import agent after context tests to ensure dependencies work -try: - from agent import LearningMaterialAgent, get_agent - AGENT_AVAILABLE = True -except ImportError as e: - print(f" (Skipping agent tests: {e})") - AGENT_AVAILABLE = False - - -if AGENT_AVAILABLE: - # Create a test agent without initializing the Gemini client - # This allows testing static methods without credentials - _test_agent = LearningMaterialAgent(init_client=False) - - @test("LearningMaterialAgent has correct supported formats") - def test_agent_formats(): - assert "flashcards" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "audio" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "podcast" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "video" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "quiz" in LearningMaterialAgent.SUPPORTED_FORMATS - - - @test("agent._get_audio_reference returns valid A2UI") - def test_audio_reference(): - result = _test_agent._get_audio_reference() - assert result["format"] == "audio" - assert result["surfaceId"] == SURFACE_ID - assert isinstance(result["a2ui"], list) - assert len(result["a2ui"]) == 2 - assert "beginRendering" in result["a2ui"][0] - assert "surfaceUpdate" in result["a2ui"][1] - - - @test("agent._get_video_reference returns valid A2UI") - def test_video_reference(): - result = _test_agent._get_video_reference() - assert result["format"] == "video" - assert result["surfaceId"] == SURFACE_ID - assert isinstance(result["a2ui"], list) - assert len(result["a2ui"]) == 2 - - - @test("audio A2UI has all required components") - def test_audio_components(): - result = _test_agent._get_audio_reference() - components = result["a2ui"][1]["surfaceUpdate"]["components"] - component_ids = {c["id"] for c in components} - - # Check all required components exist - required = {"audioCard", "audioContent", "audioHeader", "audioIcon", - "audioTitle", "audioPlayer", "audioDescription"} - missing = required - component_ids - assert not missing, f"Missing components: {missing}" - - - @test("video A2UI has all required components") - def test_video_components(): - result = _test_agent._get_video_reference() - components = result["a2ui"][1]["surfaceUpdate"]["components"] - component_ids = {c["id"] for c in components} - - required = {"videoCard", "videoContent", "videoTitle", "videoPlayer", "videoDescription"} - missing = required - component_ids - assert not missing, f"Missing components: {missing}" - - -# ============================================================================= -# A2UI JSON Validation Tests -# ============================================================================= - -print("\n--- A2UI JSON Validation Tests ---\n") - - -def validate_a2ui_message(message): - """Validate a single A2UI message structure.""" - valid_keys = {"beginRendering", "surfaceUpdate", "dataModelUpdate", "deleteSurface"} - message_keys = set(message.keys()) - action_keys = message_keys & valid_keys - - if len(action_keys) != 1: - return False, f"Expected exactly one action key, got {len(action_keys)}" - - action = list(action_keys)[0] - - if action == "beginRendering": - br = message["beginRendering"] - if "surfaceId" not in br or "root" not in br: - return False, "beginRendering missing surfaceId or root" - - elif action == "surfaceUpdate": - su = message["surfaceUpdate"] - if "surfaceId" not in su: - return False, "surfaceUpdate missing surfaceId" - if "components" not in su or not isinstance(su["components"], list): - return False, "surfaceUpdate missing components array" - for comp in su["components"]: - if "id" not in comp or "component" not in comp: - return False, f"Component missing id or component: {comp}" - - return True, "OK" - - -def validate_a2ui_payload(messages): - """Validate a complete A2UI payload.""" - if not isinstance(messages, list): - return False, "Payload must be a list" - if len(messages) == 0: - return False, "Payload cannot be empty" - if "beginRendering" not in messages[0]: - return False, "First message must be beginRendering" - - for i, msg in enumerate(messages): - valid, error = validate_a2ui_message(msg) - if not valid: - return False, f"Message {i}: {error}" - - # Validate component references - all_ids = set() - references = [] - - for msg in messages: - if "surfaceUpdate" in msg: - for comp in msg["surfaceUpdate"]["components"]: - all_ids.add(comp["id"]) - comp_def = comp["component"] - comp_type = list(comp_def.keys())[0] - props = comp_def[comp_type] - - if isinstance(props, dict): - if "child" in props and isinstance(props["child"], str): - references.append((comp["id"], props["child"])) - if "children" in props and isinstance(props["children"], dict): - if "explicitList" in props["children"]: - for child_id in props["children"]["explicitList"]: - references.append((comp["id"], child_id)) - - for parent_id, child_id in references: - if child_id not in all_ids: - return False, f"Component {parent_id} references non-existent child: {child_id}" - - return True, "OK" - - -if AGENT_AVAILABLE: - @test("audio reference passes A2UI validation") - def test_validate_audio(): - result = _test_agent._get_audio_reference() - valid, error = validate_a2ui_payload(result["a2ui"]) - assert valid, f"Audio A2UI validation failed: {error}" - - - @test("video reference passes A2UI validation") - def test_validate_video(): - result = _test_agent._get_video_reference() - valid, error = validate_a2ui_payload(result["a2ui"]) - assert valid, f"Video A2UI validation failed: {error}" - - -# ============================================================================= -# Summary -# ============================================================================= - -print("\n" + "=" * 60) -print(f"Python Tests Complete: {passed} passed, {failed} failed") -print("=" * 60) - -if failed > 0: - sys.exit(1) diff --git a/samples/personalized_learning/agent/tests/test_caching.py b/samples/personalized_learning/agent/tests/test_caching.py deleted file mode 100644 index 147c74d43..000000000 --- a/samples/personalized_learning/agent/tests/test_caching.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Unit tests for caching functionality in the personalized learning agent. - -Tests: -- Learner context caching (TTL-based) -- OpenStax module content caching (TTL-based) -""" - -import time -import unittest -from unittest.mock import patch, MagicMock - -# Import the modules we're testing -import sys -import os - -# Add parent directories to path for imports -parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, parent_dir) - -# Direct imports of the module files -import importlib.util - -# Load agent.py as a module -agent_path = os.path.join(parent_dir, 'agent.py') -spec = importlib.util.spec_from_file_location("agent_module", agent_path) -agent_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(agent_module) - -# Import openstax_content -import openstax_content - - -class TestContextCaching(unittest.TestCase): - """Tests for learner context caching in agent.py""" - - def setUp(self): - """Reset the cache before each test.""" - agent_module.clear_context_cache() - - def test_context_cache_returns_cached_value(self): - """Verify second call returns cached content without reloading.""" - # First call should load context - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - mock_get.return_value = "Test context content" - - result1 = agent_module._get_cached_context() - self.assertEqual(result1, "Test context content") - self.assertEqual(mock_get.call_count, 1) - - # Second call should use cache (mock not called again) - result2 = agent_module._get_cached_context() - self.assertEqual(result2, "Test context content") - self.assertEqual(mock_get.call_count, 1) # Still 1, not 2 - - def test_context_cache_expires_after_ttl(self): - """Verify cache expires and refetches after TTL.""" - ttl = agent_module._CONTEXT_CACHE_TTL - - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - with patch.object(agent_module.time, 'time') as mock_time: - # First call at time 0 - mock_time.return_value = 0 - mock_get.return_value = "Original content" - - result1 = agent_module._get_cached_context() - self.assertEqual(result1, "Original content") - self.assertEqual(mock_get.call_count, 1) - - # Second call still within TTL - mock_time.return_value = ttl - 1 - result2 = agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 1) # Cache hit - - # Third call after TTL expires - mock_time.return_value = ttl + 1 - mock_get.return_value = "Updated content" - - result3 = agent_module._get_cached_context() - self.assertEqual(result3, "Updated content") - self.assertEqual(mock_get.call_count, 2) # Cache miss, refetched - - def test_clear_context_cache(self): - """Verify clear_context_cache empties the cache.""" - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - mock_get.return_value = "Test content" - - # Load into cache - agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 1) - - # Clear cache - agent_module.clear_context_cache() - - # Next call should reload - agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 2) - - -class TestModuleCaching(unittest.TestCase): - """Tests for OpenStax module content caching in openstax_content.py""" - - def setUp(self): - """Reset the module cache before each test.""" - openstax_content.clear_module_cache() - - def test_module_cache_hit(self): - """Verify cached module content is returned.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = "Module content for m12345" - - # First call - result1 = openstax_content.fetch_module_content_cached("m12345") - self.assertEqual(result1, "Module content for m12345") - self.assertEqual(mock_fetch.call_count, 1) - - # Second call should use cache - result2 = openstax_content.fetch_module_content_cached("m12345") - self.assertEqual(result2, "Module content for m12345") - self.assertEqual(mock_fetch.call_count, 1) # Still 1 - - def test_module_cache_miss_fetches_fresh(self): - """Verify cache miss triggers fresh fetch.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = "Content A" - - # Fetch module A - result_a = openstax_content.fetch_module_content_cached("moduleA") - self.assertEqual(result_a, "Content A") - - # Fetch different module B (cache miss) - mock_fetch.return_value = "Content B" - result_b = openstax_content.fetch_module_content_cached("moduleB") - self.assertEqual(result_b, "Content B") - - # Both fetches should have occurred - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_ttl_expiry(self): - """Verify module cache expires correctly.""" - ttl = openstax_content._MODULE_CACHE_TTL - - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - with patch.object(openstax_content.time, 'time') as mock_time: - mock_time.return_value = 0 - mock_fetch.return_value = "Old content" - - # First fetch - result1 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(result1, "Old content") - self.assertEqual(mock_fetch.call_count, 1) - - # Within TTL - should use cache - mock_time.return_value = ttl - 1 - result2 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(mock_fetch.call_count, 1) - - # After TTL expires - mock_time.return_value = ttl + 1 - mock_fetch.return_value = "New content" - - result3 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(result3, "New content") - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_handles_parse_flag(self): - """Verify parse flag creates separate cache entries.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - # Fetch with parse=True - mock_fetch.return_value = "Parsed content" - result1 = openstax_content.fetch_module_content_cached("m11111", parse=True) - self.assertEqual(result1, "Parsed content") - - # Fetch same module with parse=False (should be cache miss) - mock_fetch.return_value = "Raw content" - result2 = openstax_content.fetch_module_content_cached("m11111", parse=False) - self.assertEqual(result2, "Raw content") - - # Both should have been fetched (different cache keys) - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_handles_none_content(self): - """Verify None content is not cached.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = None - - # First call returns None - result1 = openstax_content.fetch_module_content_cached("missing_module") - self.assertIsNone(result1) - - # Second call should try again (not cached) - result2 = openstax_content.fetch_module_content_cached("missing_module") - self.assertIsNone(result2) - - # Both calls should have tried to fetch - self.assertEqual(mock_fetch.call_count, 2) - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/agent/tests/test_keyword_hints.py b/samples/personalized_learning/agent/tests/test_keyword_hints.py deleted file mode 100644 index 3220702bc..000000000 --- a/samples/personalized_learning/agent/tests/test_keyword_hints.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -Unit tests for KEYWORD_HINTS in openstax_chapters.py. - -Tests: -- New keywords map correctly to expected chapters -- Keyword matching is case insensitive -- Expanded keywords reduce LLM fallback scenarios -""" - -import unittest -from unittest.mock import patch -import sys -import os - -# Add parent directories to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -class TestKeywordHints(unittest.TestCase): - """Tests for KEYWORD_HINTS dictionary.""" - - def test_atp_keywords_map_correctly(self): - """Verify ATP-related keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - atp_keywords = [ - "atp", - "adp", - "adenosine triphosphate", - "adenosine diphosphate", - "cellular energy", - "cell energy", - "high energy bond", - "phosphate bond", - "energy currency", - "atp hydrolysis", - "hydrolysis", - ] - - for keyword in atp_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any("atp" in ch or "energy" in ch for ch in chapters), - f"Keyword '{keyword}' should map to ATP or energy chapters, got {chapters}" - ) - - def test_thermodynamics_keywords_map_correctly(self): - """Verify thermodynamics keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - thermo_keywords = [ - "thermodynamics", - "exergonic", - "endergonic", - "gibbs free energy", - "entropy", - ] - - expected_chapters = [ - "6-3-the-laws-of-thermodynamics", - "6-2-potential-kinetic-free-and-activation-energy", - ] - - for keyword in thermo_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any(ch in expected_chapters for ch in chapters), - f"Keyword '{keyword}' should map to thermodynamics chapters, got {chapters}" - ) - - def test_photosynthesis_keywords_map_correctly(self): - """Verify photosynthesis keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - photo_keywords = [ - "photosynthesis", - "chloroplast", - "chlorophyll", - "calvin cycle", - "light reaction", - ] - - for keyword in photo_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any("8-" in ch or "photosynthesis" in ch for ch in chapters), - f"Keyword '{keyword}' should map to photosynthesis chapters (8-*), got {chapters}" - ) - - def test_keyword_matching_case_insensitive(self): - """Verify keyword matching works regardless of case.""" - from openstax_chapters import KEYWORD_HINTS - - # All keywords should be lowercase in the dictionary - for keyword in KEYWORD_HINTS.keys(): - self.assertEqual(keyword, keyword.lower(), - f"Keyword '{keyword}' should be lowercase") - - def test_new_expanded_keywords_exist(self): - """Verify newly added keywords are present.""" - from openstax_chapters import KEYWORD_HINTS - - # These are keywords that were added in the latency optimization - new_keywords = [ - "adp", - "cellular energy", - "cell energy", - "high energy bond", - "phosphate bond", - "phosphate group", - "energy currency", - "energy transfer", - "bond breaking", - "bond energy", - "atp hydrolysis", - "exergonic", - "endergonic", - "gibbs free energy", - "thermodynamics", - "first law", - "second law", - "entropy", - ] - - for keyword in new_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"New keyword '{keyword}' should be in KEYWORD_HINTS") - - def test_keyword_chapters_are_valid(self): - """Verify all keyword mappings point to valid chapters.""" - from openstax_chapters import KEYWORD_HINTS, OPENSTAX_CHAPTERS - - for keyword, chapters in KEYWORD_HINTS.items(): - self.assertIsInstance(chapters, list, - f"Chapters for '{keyword}' should be a list") - self.assertGreater(len(chapters), 0, - f"Chapters for '{keyword}' should not be empty") - - for chapter_slug in chapters: - self.assertIn(chapter_slug, OPENSTAX_CHAPTERS, - f"Chapter '{chapter_slug}' for keyword '{keyword}' " - "should be in OPENSTAX_CHAPTERS") - - def test_common_topics_have_keywords(self): - """Verify common biology topics have keyword coverage.""" - from openstax_chapters import KEYWORD_HINTS - - common_topics = [ - "atp", - "dna", - "rna", - "protein", - "cell", - "enzyme", - "photosynthesis", - "respiration", - "mitosis", - "meiosis", - "evolution", - "genetics", - "nervous", - "immune", - "heart", - "lung", - ] - - covered = 0 - for topic in common_topics: - if topic in KEYWORD_HINTS: - covered += 1 - - coverage_pct = covered / len(common_topics) * 100 - self.assertGreater(coverage_pct, 80, - f"Should have >80% keyword coverage for common topics, " - f"got {coverage_pct:.1f}%") - - -class TestKeywordMatching(unittest.TestCase): - """Tests for keyword matching logic.""" - - def test_keyword_match_finds_chapters(self): - """Verify keyword matching finds the right chapters for common topics.""" - from openstax_chapters import KEYWORD_HINTS - - # Test topics that SHOULD match keywords - test_cases = [ - ("atp", ["6-4-atp-adenosine-triphosphate", "6-1-energy-and-metabolism"]), - ("photosynthesis", ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"]), - ("dna", ["14-2-dna-structure-and-sequencing", "14-3-basics-of-dna-replication"]), - ] - - for keyword, expected_chapters in test_cases: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - actual_chapters = KEYWORD_HINTS[keyword] - for expected in expected_chapters: - self.assertIn(expected, actual_chapters, - f"Expected chapter '{expected}' for keyword '{keyword}'") - - def test_keyword_match_returns_list(self): - """Verify all keyword mappings return lists of chapters.""" - from openstax_chapters import KEYWORD_HINTS - - for keyword, chapters in KEYWORD_HINTS.items(): - self.assertIsInstance(chapters, list, - f"Chapters for '{keyword}' should be a list") - self.assertGreater(len(chapters), 0, - f"Chapters list for '{keyword}' should not be empty") - for chapter in chapters: - self.assertIsInstance(chapter, str, - f"Each chapter slug should be a string") - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/agent/tests/test_parallel_fetch.py b/samples/personalized_learning/agent/tests/test_parallel_fetch.py deleted file mode 100644 index 212c27f11..000000000 --- a/samples/personalized_learning/agent/tests/test_parallel_fetch.py +++ /dev/null @@ -1,223 +0,0 @@ -""" -Unit tests for parallel fetching functionality in openstax_content.py. - -Tests: -- Parallel chapter fetching returns all content -- Partial failures don't break entire fetch -- Parallel is actually faster than sequential (with mocked delays) -""" - -import time -import unittest -from unittest.mock import patch, MagicMock -from concurrent.futures import ThreadPoolExecutor - -import sys -import os - -# Add parent directories to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -class TestParallelChapterFetch(unittest.TestCase): - """Tests for parallel chapter fetching in openstax_content.py""" - - def setUp(self): - """Reset caches before each test.""" - from openstax_content import clear_module_cache - clear_module_cache() - - def test_parallel_chapter_fetch_returns_all_content(self): - """Verify parallel fetch returns same content as sequential would.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock to return different content for each chapter - def side_effect(slug): - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": [f"m{hash(slug) % 10000}"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - # Fetch multiple chapters - chapters = ["6-4-atp", "7-2-glycolysis", "8-1-photosynthesis"] - results = fetch_multiple_chapters(chapters) - - # Verify all chapters were fetched - self.assertEqual(len(results), 3) - - # Verify content is correct - slugs = [r["chapter_slug"] for r in results] - self.assertIn("6-4-atp", slugs) - self.assertIn("7-2-glycolysis", slugs) - self.assertIn("8-1-photosynthesis", slugs) - - def test_parallel_fetch_handles_partial_failures(self): - """Verify partial failures don't break entire fetch.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock where one chapter fails - def side_effect(slug): - if slug == "failing-chapter": - raise Exception("Simulated failure") - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - # Fetch including one failing chapter - chapters = ["good-chapter-1", "failing-chapter", "good-chapter-2"] - results = fetch_multiple_chapters(chapters) - - # Should still get the two good chapters - self.assertEqual(len(results), 2) - slugs = [r["chapter_slug"] for r in results] - self.assertIn("good-chapter-1", slugs) - self.assertIn("good-chapter-2", slugs) - self.assertNotIn("failing-chapter", slugs) - - def test_parallel_fetch_handles_none_returns(self): - """Verify None returns are filtered out.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock where one chapter returns None - def side_effect(slug): - if slug == "missing-chapter": - return None - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - chapters = ["chapter-1", "missing-chapter", "chapter-2"] - results = fetch_multiple_chapters(chapters) - - # Should only get the two valid chapters - self.assertEqual(len(results), 2) - - def test_single_chapter_no_threading_overhead(self): - """Verify single chapter fetch doesn't use threading.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - with patch('openstax_content.ThreadPoolExecutor') as mock_executor: - mock_fetch.return_value = { - "chapter_slug": "single", - "title": "Single Chapter", - "url": "https://example.com/single", - "module_ids": ["m12345"], - "content": "Content", - } - - # Fetch single chapter - results = fetch_multiple_chapters(["single"]) - - # ThreadPoolExecutor should NOT be used for single chapter - mock_executor.assert_not_called() - - # But fetch should still work - self.assertEqual(len(results), 1) - - def test_empty_list_returns_empty(self): - """Verify empty input returns empty output.""" - from openstax_content import fetch_multiple_chapters - - results = fetch_multiple_chapters([]) - self.assertEqual(results, []) - - def test_parallel_fetch_faster_than_sequential(self): - """Verify parallel is actually faster with simulated delays.""" - from openstax_content import fetch_multiple_chapters - - def slow_fetch(slug): - """Simulate slow network fetch.""" - time.sleep(0.1) # 100ms delay - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - with patch('openstax_content.fetch_chapter_content', side_effect=slow_fetch): - chapters = ["ch1", "ch2", "ch3"] - - start = time.time() - results = fetch_multiple_chapters(chapters) - elapsed = time.time() - start - - # With 3 chapters at 100ms each: - # - Sequential would take ~300ms - # - Parallel should take ~100-150ms - self.assertEqual(len(results), 3) - - # Parallel should be significantly faster than sequential - # Allow some overhead, but should be under 250ms (vs 300ms sequential) - self.assertLess(elapsed, 0.25, - f"Parallel fetch took {elapsed:.3f}s, expected < 0.25s") - - -class TestParallelModuleFetch(unittest.TestCase): - """Tests for parallel module fetching within chapters.""" - - def setUp(self): - """Reset caches before each test.""" - from openstax_content import clear_module_cache - clear_module_cache() - - def test_chapter_content_fetches_modules_in_parallel(self): - """Verify chapter content fetches multiple modules in parallel.""" - from openstax_content import fetch_chapter_content - - # Mock the chapter mapping to have multiple modules - mock_modules = { - "test-chapter": ["m1", "m2", "m3"], - } - mock_chapters = { - "test-chapter": "Test Chapter Title", - } - - with patch('openstax_content.fetch_module_content_cached') as mock_fetch: - with patch.dict('openstax_chapters.CHAPTER_TO_MODULES', mock_modules): - with patch.dict('openstax_chapters.OPENSTAX_CHAPTERS', mock_chapters): - with patch('openstax_chapters.get_openstax_url_for_chapter', - return_value="https://example.com/test"): - - # Each module returns different content - mock_fetch.side_effect = lambda mid: f"Content for {mid}" - - # Import fresh to get patched values - from openstax_content import fetch_chapter_content as fetch_fn - - result = fetch_fn("test-chapter") - - # All 3 modules should have been fetched - self.assertEqual(mock_fetch.call_count, 3) - - # Content should be combined - if result: - self.assertIn("Content for m1", result["content"]) - self.assertIn("Content for m2", result["content"]) - self.assertIn("Content for m3", result["content"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/api-server.ts b/samples/personalized_learning/api-server.ts index cdf93556b..77eeb113d 100644 --- a/samples/personalized_learning/api-server.ts +++ b/samples/personalized_learning/api-server.ts @@ -21,10 +21,71 @@ import { execSync } from "child_process"; import { writeFileSync, readFileSync, existsSync } from "fs"; import { join } from "path"; import { config } from "dotenv"; +import { initializeApp, applicationDefault } from "firebase-admin/app"; +import { getAuth } from "firebase-admin/auth"; // Load environment variables config(); +// ============================================================================= +// FIREBASE ADMIN - Server-side authentication +// ============================================================================= +initializeApp({ credential: applicationDefault() }); + +// Local dev mode: skip auth when Firebase is not configured (matches client behavior) +const IS_LOCAL_DEV_MODE = !process.env.VITE_FIREBASE_API_KEY; +if (IS_LOCAL_DEV_MODE) { + console.warn("[API Server] ⚠️ LOCAL DEV MODE: Authentication disabled (VITE_FIREBASE_API_KEY not set)"); +} + +// Access control - reads from environment variables (shared with src/firebase-auth.ts) +// Uses VITE_ prefix so the same .env works for both client and server +const ALLOWED_DOMAIN = process.env.VITE_ALLOWED_DOMAIN ?? "google.com"; +const ALLOWED_EMAILS: string[] = (process.env.VITE_ALLOWED_EMAILS ?? "") + .split(",") + .map((e: string) => e.trim().toLowerCase()) + .filter((e: string) => e.length > 0); + +function isAllowedEmail(email: string | undefined): boolean { + if (!email) return false; + const emailLower = email.toLowerCase(); + if (ALLOWED_EMAILS.length > 0 && ALLOWED_EMAILS.includes(emailLower)) return true; + if (ALLOWED_DOMAIN && emailLower.endsWith(`@${ALLOWED_DOMAIN}`)) return true; + if (!ALLOWED_DOMAIN && ALLOWED_EMAILS.length === 0) return true; // No restrictions + return false; +} + +async function authenticateRequest(req: any, res: any): Promise { + // In local dev mode, skip authentication entirely + if (IS_LOCAL_DEV_MODE) { + return true; + } + + const authHeader = req.headers.authorization; + if (!authHeader?.startsWith("Bearer ")) { + res.writeHead(401, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Missing or malformed Authorization header" })); + return false; + } + try { + const token = authHeader.split("Bearer ")[1]; + const decoded = await getAuth().verifyIdToken(token); + if (!isAllowedEmail(decoded.email)) { + console.error("[API Server] Access denied for:", decoded.email); + res.writeHead(403, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Email not authorized" })); + return false; + } + return true; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error("[API Server] Auth failed:", message); + res.writeHead(403, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid or expired token" })); + return false; + } +} + // ============================================================================= // MESSAGE LOG - Captures all request/response traffic for demo purposes // ============================================================================= @@ -327,7 +388,7 @@ async function queryAgentEngine(format: string, context: string = ""): Promise dict: import urllib.error topic_lower = topic.lower() - matched_slugs = set() + matched_slugs = [] # Use list to preserve order (first match = highest priority) # First try keyword matching (fast path) # Use word boundary matching to avoid false positives like "vision" in "cell division" @@ -754,13 +762,17 @@ def fetch_openstax_content(topic: str) -> dict: # This ensures "vision" doesn't match "cell division" pattern = r'\b' + re.escape(keyword) + r'\b' if re.search(pattern, topic_lower): - matched_slugs.update(slugs) + for slug in slugs: + if slug not in matched_slugs: + matched_slugs.append(slug) # If no keyword match, use LLM to find relevant chapters if not matched_slugs: llm_slugs = llm_match_topic_to_chapters(topic) if llm_slugs: - matched_slugs.update(llm_slugs) + for slug in llm_slugs: + if slug not in matched_slugs: + matched_slugs.append(slug) # If still no match (LLM found nothing relevant), return empty with clear message if not matched_slugs: @@ -774,6 +786,12 @@ def fetch_openstax_content(topic: str) -> dict: content_parts = [] sources = [] + # Create SSL context once - use certifi CA bundle if available + if _HAS_CERTIFI: + ssl_ctx = ssl.create_default_context(cafile=certifi.where()) + else: + ssl_ctx = ssl.create_default_context() + for slug in chapter_slugs: module_ids = CHAPTER_TO_MODULES.get(slug, []) if not module_ids: @@ -787,7 +805,7 @@ def fetch_openstax_content(topic: str) -> dict: for module_id in module_ids: github_url = f"https://raw.githubusercontent.com/openstax/osbooks-biology-bundle/main/modules/{module_id}/index.cnxml" try: - with urllib.request.urlopen(github_url, timeout=10) as response: + with urllib.request.urlopen(github_url, timeout=10, context=ssl_ctx) as response: cnxml = response.read().decode('utf-8') text = parse_cnxml_to_text(cnxml) if text: @@ -952,7 +970,7 @@ async def generate_quiz( location=os.environ.get("GOOGLE_CLOUD_LOCATION", "us-central1"), ) - # Fetch OpenStax content for context - REQUIRED + # Fetch OpenStax content for context openstax_data = fetch_openstax_content(topic) textbook_context = openstax_data.get("content", "") sources = openstax_data.get("sources", []) @@ -1104,7 +1122,8 @@ async def get_textbook_content( }) return json.dumps({ - "content": content[:4000], # Limit content length + # Limit content length. Okay for a demo but could be improved + "content": content[:4000], "sources": source_citations }) @@ -1159,7 +1178,7 @@ async def get_video_content(tool_context: ToolContext) -> str: return json.dumps({"format": "video", "a2ui": a2ui, "surfaceId": SURFACE_ID}) - # Create the agent WITH tools + # Create the agent with tools agent = Agent( name="personalized_learning_agent", model=model_id, @@ -1217,11 +1236,11 @@ async def get_video_content(tool_context: ToolContext) -> str: print(f"Context Bucket: gs://{context_bucket}/learner_context/") print() print("Next steps:") - print(f" 1. Copy the Resource ID above") - print(f" 2. Paste it into the notebook's AGENT_RESOURCE_ID variable") + print(" 1. Copy the Resource ID above") + print(" 2. Paste it into the notebook's AGENT_RESOURCE_ID variable") print(f" 3. Upload learner context files to gs://{context_bucket}/learner_context/") - print(f" 4. Run the remaining notebook cells to configure and start the demo") + print(" 4. Run the remaining notebook cells to configure and start the demo") if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/samples/personalized_learning/deploy_hosting.py b/samples/personalized_learning/deploy_hosting.py index 51529ccbc..dca98ee6a 100755 --- a/samples/personalized_learning/deploy_hosting.py +++ b/samples/personalized_learning/deploy_hosting.py @@ -92,39 +92,63 @@ def check_prerequisites() -> dict: def prepare_build_context(demo_dir: Path) -> Path: """ - Prepare the build context by copying the A2UI dependency. + Prepare the build context by copying the A2UI dependencies. Returns the path to the prepared directory. """ print("\nPreparing build context...") - # The A2UI web-lib is at ../../renderers/lit relative to demo_dir - a2ui_source = demo_dir.parent.parent / "renderers" / "lit" + renderers_dir = demo_dir.parent.parent / "renderers" + + # Copy web_core first (lit depends on it) + web_core_source = renderers_dir / "web_core" + web_core_dest = demo_dir / "a2ui-web-core" + + if not web_core_source.exists(): + print(f"ERROR: A2UI web_core not found at {web_core_source}") + sys.exit(1) + + if web_core_dest.exists(): + print(f" Removing old {web_core_dest}") + shutil.rmtree(web_core_dest) + + print(f" Copying {web_core_source} → {web_core_dest}") + shutil.copytree(web_core_source, web_core_dest, ignore=shutil.ignore_patterns("node_modules", ".git")) + + # Copy lit (the main web-lib) + a2ui_source = renderers_dir / "lit" a2ui_dest = demo_dir / "a2ui-web-lib" if not a2ui_source.exists(): print(f"ERROR: A2UI web-lib not found at {a2ui_source}") sys.exit(1) - # Remove old copy if exists if a2ui_dest.exists(): print(f" Removing old {a2ui_dest}") shutil.rmtree(a2ui_dest) # Copy the dependency (excluding node_modules, but keeping dist/ for pre-built output) print(f" Copying {a2ui_source} → {a2ui_dest}") - shutil.copytree(a2ui_source, a2ui_dest, ignore=shutil.ignore_patterns("node_modules", ".git")) + # Update lit's package.json to point to the local web_core copy + lit_package_json = a2ui_dest / "package.json" + if lit_package_json.exists(): + content = lit_package_json.read_text() + content = content.replace('"@a2ui/web_core": "file:../web_core"', '"@a2ui/web_core": "file:../a2ui-web-core"') + lit_package_json.write_text(content) + print(" Updated lit package.json to reference local web_core") + print(" Build context ready") return demo_dir def cleanup_build_context(demo_dir: Path): - """Remove the temporary A2UI copy after deployment.""" - a2ui_dest = demo_dir / "a2ui-web-lib" - if a2ui_dest.exists(): - print(f"\nCleaning up {a2ui_dest}") - shutil.rmtree(a2ui_dest) + """Remove the temporary A2UI copies after deployment.""" + for dirname in ["a2ui-web-lib", "a2ui-web-core"]: + dest = demo_dir / dirname + if dest.exists(): + print(f"\nCleaning up {dest}") + shutil.rmtree(dest) def deploy_cloud_run(project_id: str, service_name: str, region: str) -> str: @@ -181,6 +205,15 @@ def deploy_cloud_run(project_id: str, service_name: str, region: str) -> str: "--quiet", ], check=False) + # Grant Artifact Registry writer permission to compute service account + # Cloud Run source deployments use the compute SA to push Docker images + run_command([ + "gcloud", "projects", "add-iam-policy-binding", project_id, + "--member", f"serviceAccount:{compute_sa}", + "--role", "roles/artifactregistry.writer", + "--quiet", + ], check=False) + # Also grant Cloud Build service account permissions cloudbuild_sa = f"{project_number}@cloudbuild.gserviceaccount.com" run_command([ @@ -311,7 +344,7 @@ def configure_iap_access( print("\n To grant access later, use:") print(f" gcloud run services add-iam-policy-binding {service_name} \\") print(f" --region={region} --member='user:EMAIL' --role='roles/run.invoker'") - print(f"\n Or for a domain:") + print("\n Or for a domain:") print(f" gcloud run services add-iam-policy-binding {service_name} \\") print(f" --region={region} --member='domain:DOMAIN' --role='roles/run.invoker'") return @@ -497,20 +530,20 @@ def main(): if not args.cloud_run_only: print(f"\n✅ Demo is live at: https://{project_id}.web.app") - print(f"\nAccess is controlled by Firebase Authentication.") - print(f"Users must sign in with a @google.com account (configurable in src/firebase-auth.ts).") + print("\nAccess is controlled by Firebase Authentication.") + print("Users must sign in with a @google.com account (configurable in src/firebase-auth.ts).") if args.cloud_run_only: print(f"\nCloud Run service: {args.service_name}") print(f"Region: {args.region}") if args.allow_domain or args.allow_users: - print(f"\nAuthentication: IAP-protected") + print("\nAuthentication: IAP-protected") if args.allow_domain: print(f" Allowed domain: {args.allow_domain}") if args.allow_users: print(f" Allowed users: {args.allow_users}") else: - print(f"\n⚠️ Cloud Run deployed with --no-allow-unauthenticated.") + print("\n⚠️ Cloud Run deployed with --no-allow-unauthenticated.") print(f" Grant access with: gcloud run services add-iam-policy-binding {args.service_name} \\") print(f" --region={args.region} --member='user:EMAIL' --role='roles/run.invoker'") diff --git a/samples/personalized_learning/index.html b/samples/personalized_learning/index.html index 174f879d8..0eec9ce27 100644 --- a/samples/personalized_learning/index.html +++ b/samples/personalized_learning/index.html @@ -371,6 +371,8 @@ .message-content { flex: 1; padding-top: 4px; + min-width: 0; /* Allow flex child to shrink below content size */ + overflow: hidden; /* Contain children within message bounds */ } .message-sender { @@ -433,6 +435,14 @@ background: var(--chat-bg); border-radius: 12px; border: 1px solid var(--border-color); + overflow-x: auto; + -webkit-overflow-scrolling: touch; + } + + /* Prevent A2UI child components from shrinking */ + .a2ui-container a2ui-surface, + .a2ui-container a2ui-theme-provider { + min-width: min-content; } /* Source attribution styling */ @@ -594,7 +604,7 @@ color: var(--text-secondary); } - /* Flashcard Row */ + /* Flashcard Row - prevent cards from shrinking when in horizontal scroll */ .flashcard-row { display: flex; flex-wrap: wrap; @@ -602,6 +612,13 @@ margin-top: 16px; } + /* A2UI Row children should not shrink (enables horizontal scroll) */ + .a2ui-container [data-a2ui-row] > *, + .a2ui-container a2ui-flashcard, + .a2ui-container a2ui-quizcard { + flex-shrink: 0; + } + /* Audio/Video Card */ .media-card { margin-top: 16px; diff --git a/samples/personalized_learning/package.json b/samples/personalized_learning/package.json index 173293824..31707df54 100644 --- a/samples/personalized_learning/package.json +++ b/samples/personalized_learning/package.json @@ -8,10 +8,8 @@ "dev": "concurrently \"npm run dev:api\" \"npm run dev:vite\"", "dev:vite": "vite", "dev:api": "tsx api-server.ts", - "dev:agent": "cd agent && python server.py", "build": "tsc && vite build", "preview": "vite preview", - "start:all": "concurrently \"npm run dev:api\" \"npm run dev:vite\" \"npm run dev:agent\"", "test": "node tests/unit-tests.mjs && node tests/integration-tests.mjs", "test:unit": "node tests/unit-tests.mjs", "test:integration": "node tests/integration-tests.mjs", @@ -23,6 +21,7 @@ "@lit-labs/signals": "^0.1.3", "@lit/context": "^1.1.4", "firebase": "^10.14.1", + "firebase-admin": "^12.0.0", "google-auth-library": "^9.0.0", "lit": "^3.3.1" }, diff --git a/samples/personalized_learning/quickstart_setup.sh b/samples/personalized_learning/quickstart_setup.sh index b4cf51d61..aa99303b8 100755 --- a/samples/personalized_learning/quickstart_setup.sh +++ b/samples/personalized_learning/quickstart_setup.sh @@ -82,7 +82,6 @@ if [ "$SKIP_PIP" = false ]; then "google-genai>=1.0.0" \ "google-cloud-storage>=2.10.0" \ "python-dotenv>=1.0.0" \ - "litellm>=1.0.0" \ "vertexai" 2>/dev/null echo " Python dependencies installed" else @@ -93,6 +92,10 @@ fi if [ "$SKIP_NPM" = false ]; then echo -e "${YELLOW}[3/6]${NC} Installing Node.js dependencies..." + # Build A2UI core library first (lit depends on it) + (cd ../../renderers/web_core && npm install --registry https://registry.npmjs.org/ --silent 2>/dev/null && npm run build --silent 2>/dev/null) + echo " A2UI core library built" + # Build A2UI renderer (cd ../../renderers/lit && npm install --registry https://registry.npmjs.org/ --silent 2>/dev/null && npm run build --silent 2>/dev/null) echo " A2UI renderer built" diff --git a/samples/personalized_learning/src/firebase-auth.ts b/samples/personalized_learning/src/firebase-auth.ts index 72d24e6e2..c5aec5898 100644 --- a/samples/personalized_learning/src/firebase-auth.ts +++ b/samples/personalized_learning/src/firebase-auth.ts @@ -1,11 +1,18 @@ /** * Firebase Authentication for Personalized Learning Demo * - * By default, restricts access to @google.com email addresses. - * To customize access: - * - Change ALLOWED_DOMAIN to your organization's domain - * - Add specific emails to ALLOWED_EMAILS whitelist - * - Or set ALLOWED_DOMAIN to "" and use only the whitelist + * Authentication flow: + * 1. User signs in with Google (Firebase Auth) + * 2. Client calls server /api/check-access to verify authorization + * 3. Server checks email against VITE_ALLOWED_DOMAIN and VITE_ALLOWED_EMAILS + * 4. If authorized, user proceeds; if not, signed out with error + * + * Access control is configured via environment variables (see .env.template): + * - VITE_ALLOWED_DOMAIN: restrict to a domain (e.g., "yourcompany.com") + * - VITE_ALLOWED_EMAILS: whitelist specific emails (comma-separated) + * + * The SERVER is the single source of truth for authorization decisions. + * This file only handles Firebase authentication, not authorization. * * LOCAL DEV MODE: If VITE_FIREBASE_API_KEY is not set, auth is bypassed * and the app runs without requiring sign-in. @@ -47,69 +54,35 @@ if (isFirebaseConfigured) { console.log("[Auth] Firebase not configured - running in local dev mode (no auth required)"); } -// Google provider with domain restriction hint +// Google provider +// Note: The 'hd' parameter is just a UI hint to show accounts from a specific domain. +// It does NOT enforce access - the server does that via /api/check-access. const provider = new GoogleAuthProvider(); -provider.setCustomParameters({ - hd: "google.com", // Hint to show only google.com accounts (change if using different domain) -}); +const hintDomain = import.meta.env.VITE_ALLOWED_DOMAIN; +if (hintDomain) { + provider.setCustomParameters({ hd: hintDomain }); +} // ============================================================================ -// ACCESS CONTROL CONFIGURATION -// ============================================================================ - -// Allowed email domain (e.g., "google.com", "yourcompany.com") -// Set to empty string "" to disable domain-based access and use only the whitelist -const ALLOWED_DOMAIN = "google.com"; - -// Whitelist of specific email addresses that are always allowed, -// regardless of domain. Add emails here to grant access to external collaborators. -// Example: ["alice@example.com", "bob@partner.org", "charlie@university.edu"] -const ALLOWED_EMAILS: string[] = [ - // "collaborator@example.com", - // "reviewer@partner.org", -]; - +// AUTHENTICATION FUNCTIONS // ============================================================================ /** - * Check if user's email is allowed (by domain or whitelist) - */ -function isAllowedEmail(email: string | null): boolean { - if (!email) return false; - - // Check whitelist first - if (ALLOWED_EMAILS.includes(email.toLowerCase())) { - return true; - } - - // Check domain if configured - if (ALLOWED_DOMAIN && email.endsWith(`@${ALLOWED_DOMAIN}`)) { - return true; - } - - return false; -} - -/** - * Get current user if authenticated and from allowed domain - * In local dev mode (no Firebase), returns null + * Get current Firebase user (if authenticated) + * Note: This only checks Firebase auth, not server authorization */ export function getCurrentUser(): User | null { if (!auth) return null; - const user = auth.currentUser; - if (user && isAllowedEmail(user.email)) { - return user; - } - return null; + return auth.currentUser; } /** * Get ID token for API requests - * In local dev mode, returns null (API server should allow unauthenticated requests locally) + * In local dev mode, returns null (API server allows unauthenticated requests locally) */ export async function getIdToken(): Promise { if (!auth) return null; - const user = getCurrentUser(); + const user = auth.currentUser; if (!user) return null; try { return await user.getIdToken(); @@ -119,10 +92,31 @@ export async function getIdToken(): Promise { } } +/** + * Check with server if the current user is authorized + * This is the ONLY place authorization is checked - the server is the source of truth. + * Returns true if authorized, false otherwise. + */ +export async function checkServerAuthorization(): Promise { + const token = await getIdToken(); + if (!token) return false; + + try { + const response = await fetch("/api/check-access", { + method: "GET", + headers: { Authorization: `Bearer ${token}` }, + }); + return response.ok; + } catch (error) { + console.error("[Auth] Server authorization check failed:", error); + return false; + } +} + /** * Sign in with Google - * Returns user if successful and from allowed domain, null otherwise - * In local dev mode, this should not be called (UI bypasses auth) + * Returns user if Firebase auth succeeds, null if cancelled + * IMPORTANT: Caller must then call checkServerAuthorization() to verify access */ export async function signInWithGoogle(): Promise { if (!auth) { @@ -131,16 +125,8 @@ export async function signInWithGoogle(): Promise { } try { const result = await signInWithPopup(auth, provider); - const user = result.user; - - if (!isAllowedEmail(user.email)) { - console.warn(`[Auth] User ${user.email} not from ${ALLOWED_DOMAIN}`); - await signOut(auth); - throw new Error(`Access restricted to @${ALLOWED_DOMAIN} accounts`); - } - - console.log(`[Auth] Signed in: ${user.email}`); - return user; + console.log(`[Auth] Firebase sign-in successful: ${result.user.email}`); + return result.user; } catch (error: any) { if (error.code === "auth/popup-closed-by-user") { console.log("[Auth] Sign-in cancelled by user"); @@ -161,34 +147,26 @@ export async function signOutUser(): Promise { /** * Subscribe to auth state changes - * Callback receives user if authenticated and from allowed domain, null otherwise - * In local dev mode, immediately calls back with a mock "authenticated" state + * Callback receives user if authenticated, null otherwise + * Note: This only tracks Firebase auth state, not server authorization */ export function onAuthChange( callback: (user: User | null) => void ): () => void { // Local dev mode: no Firebase, skip auth entirely if (!auth) { - // Immediately trigger callback as "authenticated" in local dev mode - // We pass null but main.ts will check isFirebaseConfigured to bypass auth setTimeout(() => callback(null), 0); - return () => {}; // No-op unsubscribe + return () => {}; } - return onAuthStateChanged(auth, (user) => { - if (user && isAllowedEmail(user.email)) { - callback(user); - } else { - callback(null); - } - }); + return onAuthStateChanged(auth, callback); } /** - * Check if user is authenticated - * In local dev mode, returns false (but app bypasses auth check) + * Check if user is authenticated with Firebase + * Note: This does not check server authorization */ export function isAuthenticated(): boolean { if (!auth) return false; - return getCurrentUser() !== null; + return auth.currentUser !== null; } diff --git a/samples/personalized_learning/src/main.ts b/samples/personalized_learning/src/main.ts index 20097c3a0..f7c1f8b26 100644 --- a/samples/personalized_learning/src/main.ts +++ b/samples/personalized_learning/src/main.ts @@ -17,6 +17,7 @@ import { signOutUser, getIdToken, isFirebaseConfigured, + checkServerAuthorization, } from "./firebase-auth"; // Store current user for display @@ -36,12 +37,22 @@ async function init() { } // Set up auth state listener - onAuthChange((user) => { + onAuthChange(async (user) => { if (user) { - currentUserEmail = user.email; - console.log(`[Demo] Authenticated as ${user.email}`); - showApp(); - initializeApp(); + // User is authenticated with Firebase, now check server authorization + console.log(`[Demo] Firebase auth OK: ${user.email}, checking server authorization...`); + const authorized = await checkServerAuthorization(); + if (authorized) { + currentUserEmail = user.email; + console.log(`[Demo] Authorized: ${user.email}`); + showApp(); + initializeApp(); + } else { + // User authenticated but not authorized - sign them out + console.log(`[Demo] Not authorized: ${user.email}`); + await signOutUser(); + showLoginScreen("Your email is not authorized to access this application."); + } } else { currentUserEmail = null; console.log("[Demo] Not authenticated"); @@ -51,7 +62,7 @@ async function init() { } // Show login screen -function showLoginScreen() { +function showLoginScreen(errorMessage?: string) { const appContainer = document.getElementById("app-container"); const loginScreen = document.getElementById("login-screen"); @@ -69,7 +80,7 @@ function showLoginScreen() {

Personalized Learning Demo

- +