From f0481730031b49b0974d2b334ed3daedf99f925a Mon Sep 17 00:00:00 2001 From: Sam Goodgame Date: Mon, 26 Jan 2026 11:26:13 -0500 Subject: [PATCH 1/8] Simplify README and add server-side auth for additional security --- .../personalized_learning/Quickstart.ipynb | 32 +- samples/personalized_learning/README.md | 675 ++---------------- samples/personalized_learning/api-server.ts | 31 +- samples/personalized_learning/package.json | 1 + 4 files changed, 95 insertions(+), 644 deletions(-) diff --git a/samples/personalized_learning/Quickstart.ipynb b/samples/personalized_learning/Quickstart.ipynb index 849b30822..ae31e6a16 100644 --- a/samples/personalized_learning/Quickstart.ipynb +++ b/samples/personalized_learning/Quickstart.ipynb @@ -520,35 +520,7 @@ "cell_type": "markdown", "id": "n1qjkwg9yp", "metadata": {}, - "source": [ - "### Access Control\n", - "\n", - "By default, the demo restricts access to `@google.com` email addresses. This is configured in [`src/firebase-auth.ts`](src/firebase-auth.ts).\n", - "\n", - "**To change the allowed domain:**\n", - "```typescript\n", - "// In src/firebase-auth.ts\n", - "const ALLOWED_DOMAIN = \"yourcompany.com\"; // Change to your domain\n", - "```\n", - "\n", - "**To allow specific external collaborators (whitelist):**\n", - "```typescript\n", - "// In src/firebase-auth.ts\n", - "const ALLOWED_EMAILS: string[] = [\n", - " \"alice@example.com\",\n", - " \"bob@partner.org\",\n", - " \"charlie@university.edu\",\n", - "];\n", - "```\n", - "\n", - "**To allow anyone with a Google account:**\n", - "```typescript\n", - "const ALLOWED_DOMAIN = \"\"; // Disable domain restriction\n", - "const ALLOWED_EMAILS: string[] = []; // Empty whitelist = allow all\n", - "```\n", - "\n", - "> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" - ] + "source": "### Access Control\n\nThe demo has two layers of authentication:\n\n1. **Client-side (Firebase Auth):** Restricts sign-in to `@google.com` emails by default, configured in [`src/firebase-auth.ts`](src/firebase-auth.ts).\n2. **Server-side (Firebase Admin):** The API server verifies Firebase ID tokens on all endpoints, rejecting requests with missing or invalid tokens. See [`api-server.ts`](api-server.ts).\n\n**To change the allowed domain:**\n```typescript\n// In src/firebase-auth.ts\nconst ALLOWED_DOMAIN = \"yourcompany.com\"; // Change to your domain\n```\n\n**To allow specific external collaborators (whitelist):**\n```typescript\n// In src/firebase-auth.ts\nconst ALLOWED_EMAILS: string[] = [\n \"alice@example.com\",\n \"bob@partner.org\",\n \"charlie@university.edu\",\n];\n```\n\n**To allow anyone with a Google account:**\n```typescript\nconst ALLOWED_DOMAIN = \"\"; // Disable domain restriction\nconst ALLOWED_EMAILS: string[] = []; // Empty whitelist = allow all\n```\n\n> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" }, { "cell_type": "markdown", @@ -612,4 +584,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/samples/personalized_learning/README.md b/samples/personalized_learning/README.md index 55b373088..e12218d9c 100644 --- a/samples/personalized_learning/README.md +++ b/samples/personalized_learning/README.md @@ -1,6 +1,6 @@ # Personalized Learning Demo -A full-stack sample demonstrating A2UI's capabilities for building AI-powered educational applications with remote agents, dynamic content generation, and custom UI components. +A full-stack sample demonstrating A2UI's capabilities for AI-powered educational applications. **Contributed by Google Public Sector's Rapid Innovation Team.** @@ -8,681 +8,130 @@ A full-stack sample demonstrating A2UI's capabilities for building AI-powered ed --- -## Overview +## What This Is -This demo showcases how A2UI enables agents to generate rich, interactive user interfaces dynamically. It demonstrates: +This demo shows how agents can generate entire UI experiences—not just text responses. When a student asks for flashcards on photosynthesis, the agent matches the topic to OpenStax textbook content, generates personalized study materials, and returns A2UI JSON that the frontend renders as interactive, flippable cards. -| Concept | Implementation | -|---------|----------------| -| **Remote Agent Deployment** | ADK agent deployed to Vertex AI Agent Engine, running independently from the UI | -| **A2A Protocol** | Agent-to-Agent protocol for frontend-to-agent communication | -| **Custom UI Components** | Flashcard and QuizCard components extending the A2UI component library | -| **Dynamic Content Generation** | Personalized A2UI JSON generated on-the-fly based on user requests | -| **Dynamic Context from GCS** | Learner profiles loaded from Cloud Storage at runtime | -| **Intelligent Content Matching** | LLM-powered topic-to-textbook matching across 167 OpenStax chapters | +The same request from different students (with different learner profiles) produces different content tailored to their learning style. -### What Makes This Demo Unique +Key concepts demonstrated: -Unlike traditional chat applications where the UI is fixed and only text flows between client and server, this demo shows how **agents can generate entire UI experiences**. When a student asks for flashcards on photosynthesis, the agent: - -1. Matches the topic to relevant OpenStax textbook content -2. Generates personalized study materials using an LLM -3. Returns A2UI JSON describing an interactive flashcard interface -4. The frontend renders the flashcards as flippable, interactive cards - -The same request from different students (with different learner profiles) produces different content tailored to their learning style and misconceptions. +- **Custom A2UI Components** — Flashcard and QuizCard extend the standard component library +- **Remote Agent** — ADK agent deployed to Vertex AI Agent Engine, decoupled from the UI +- **A2A Protocol** — Frontend-to-agent communication via Agent-to-Agent protocol +- **Dynamic Context** — Learner profiles loaded from GCS at runtime (no redeployment needed) +- **Content Retrieval** — LLM-powered topic matching across 167 OpenStax Biology chapters +- **Server-side Auth** — API endpoints verify Firebase ID tokens to protect GCP resources --- -## Architecture - -![Architecture Diagram](assets/architecture.jpg) - -### Component Overview - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ FRONTEND (Browser) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ Vite + TypeScript │ │ -│ │ ├── A2UI Lit Renderer (@a2ui/web-lib) │ │ -│ │ ├── Custom Components (Flashcard, QuizCard) │ │ -│ │ ├── Chat Orchestrator (intent routing, response handling) │ │ -│ │ └── A2A Client (Agent Engine communication) │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ API SERVER (Node.js) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ api-server.ts │ │ -│ │ ├── /api/chat-with-intent → Gemini (intent + response + keywords) │ │ -│ │ ├── /a2ui-agent/a2a/query → Agent Engine proxy │ │ -│ │ └── Intent detection, keyword extraction, response generation │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ VERTEX AI AGENT ENGINE (Remote Agent) │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ deploy.py → ADK Agent │ │ -│ │ ├── generate_flashcards(topic) → A2UI JSON │ │ -│ │ ├── generate_quiz(topic) → A2UI JSON │ │ -│ │ ├── get_textbook_content(topic) → OpenStax content │ │ -│ │ ├── get_audio_content() → AudioPlayer A2UI │ │ -│ │ └── get_video_content() → Video A2UI │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EXTERNAL RESOURCES │ -│ ├── OpenStax GitHub (raw.githubusercontent.com) → CNXML textbook content │ -│ ├── GCS Bucket ({project}-learner-context) → Learner profiles │ -│ └── GCS Bucket ({project}-openstax) → Optional content cache │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Files - -| File | Purpose | -|------|---------| -| [deploy.py](deploy.py) | Deployment script with embedded agent code for Agent Engine | -| [agent/agent.py](agent/agent.py) | Modular agent code for local development | -| [api-server.ts](api-server.ts) | Node.js server handling intent detection and Agent Engine proxy | -| [src/chat-orchestrator.ts](src/chat-orchestrator.ts) | Frontend orchestration: routes intents to appropriate handlers | -| [src/a2a-client.ts](src/a2a-client.ts) | A2A protocol client with fallback content | -| [src/a2ui-renderer.ts](src/a2ui-renderer.ts) | Renders A2UI JSON using the Lit renderer | -| [src/flashcard.ts](src/flashcard.ts) | Custom Flashcard component (Lit web component) | -| [src/quiz-card.ts](src/quiz-card.ts) | Custom QuizCard component (Lit web component) | - ---- - -## Data Flow - -### Complete Request Lifecycle - -Here's what happens when a user asks "Quiz me on photosynthesis": - -#### 1. User Message → API Server - -The frontend sends the message to `/api/chat-with-intent`: - -```typescript -// src/chat-orchestrator.ts:205-229 -const response = await fetch("/api/chat-with-intent", { - method: "POST", - body: JSON.stringify({ - systemPrompt: this.systemPrompt, - messages: this.conversationHistory.slice(-10), - userMessage: message, - recentContext: recentContext, - }), -}); -``` - -#### 2. Intent Detection + Keyword Extraction (Single LLM Call) - -The API server uses Gemini to detect intent AND extract keywords in one call: - -```typescript -// api-server.ts:639-681 -const combinedSystemPrompt = `${systemPrompt} - -## INTENT CLASSIFICATION -- flashcards: user wants study cards -- quiz: user wants to be tested -- podcast: user wants audio content -... - -## KEYWORDS (for flashcards, podcast, video, quiz only) -When the intent is content-generating, include a "keywords" field with: -1. The CORRECTED topic (fix any spelling mistakes) -2. Related biology terms for content retrieval -3. Specific subtopics within that subject area -`; -``` - -**Response:** -```json -{ - "intent": "quiz", - "text": "Let's test your knowledge on photosynthesis!", - "keywords": "photosynthesis, chloroplast, chlorophyll, light reaction, calvin cycle, ATP" -} -``` - -#### 3. Frontend Routes to Agent Engine - -Based on the detected intent, the orchestrator calls the A2A client: - -```typescript -// src/chat-orchestrator.ts:161-165 -const a2uiResult = await this.a2aClient.generateContent( - intent, // "quiz" - topicContext // keywords from Gemini -); -``` - -#### 4. Agent Engine Query - -The API server proxies the request to Agent Engine using `:streamQuery`: - -```typescript -// api-server.ts:241 -const url = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectNumber}/locations/${location}/reasoningEngines/${resourceId}:streamQuery`; - -const requestPayload = { - class_method: "stream_query", - input: { - user_id: "demo-user", - message: "Generate quiz for: photosynthesis, chloroplast, chlorophyll...", - }, -}; -``` - -#### 5. Agent Tool Execution - -The ADK agent receives the request and executes the appropriate tool: - -```python -# deploy.py:484-565 (generate_quiz function) -async def generate_quiz(tool_context: ToolContext, topic: str) -> str: - # Fetch OpenStax content for context - openstax_data = fetch_openstax_content(topic) - textbook_context = openstax_data.get("content", "") - sources = openstax_data.get("sources", []) - - # Generate quiz using Gemini with structured output - response = client.models.generate_content( - model=model_id, - contents=prompt, - config=types.GenerateContentConfig( - response_mime_type="application/json", - response_schema=quiz_schema, - ), - ) -``` - -#### 6. Content Matching (Keyword → Chapter → Module → GitHub) - -The agent uses a tiered matching system to find relevant content: - -```python -# deploy.py:750-757 - Word boundary matching for keywords -for keyword, slugs in KEYWORD_HINTS.items(): - pattern = r'\b' + re.escape(keyword) + r'\b' - if re.search(pattern, topic_lower): - matched_slugs.update(slugs) -``` - -If no keyword match: -```python -# deploy.py:759-763 - LLM fallback -if not matched_slugs: - llm_slugs = llm_match_topic_to_chapters(topic) - matched_slugs.update(llm_slugs) -``` - -Then fetch content: -```python -# deploy.py:788-797 - GitHub fetch -github_url = f"https://raw.githubusercontent.com/openstax/osbooks-biology-bundle/main/modules/{module_id}/index.cnxml" -with urllib.request.urlopen(github_url, timeout=10) as response: - cnxml = response.read().decode('utf-8') - text = parse_cnxml_to_text(cnxml) -``` - -#### 7. LLM Content Generation with Structured Output - -The agent uses Gemini's structured output to generate quiz content: - -```python -# deploy.py:981-1004 -quiz_schema = { - "type": "array", - "items": { - "type": "object", - "properties": { - "question": {"type": "string"}, - "options": { - "type": "array", - "items": { - "properties": { - "label": {"type": "string"}, - "value": {"type": "string"}, - "isCorrect": {"type": "boolean"}, - }, - }, - }, - "explanation": {"type": "string"}, - "category": {"type": "string"}, - }, - }, -} -``` - -#### 8. A2UI JSON Response +## Quick Start -The agent builds and returns A2UI JSON: +Complete Steps 1–6 in [Quickstart.ipynb](Quickstart.ipynb) first to set up GCP, deploy the agent, and configure environment variables. Then: -```json -{ - "format": "quiz", - "surfaceId": "learningContent", - "a2ui": [ - {"beginRendering": {"surfaceId": "learningContent", "root": "mainColumn"}}, - { - "surfaceUpdate": { - "surfaceId": "learningContent", - "components": [ - {"id": "mainColumn", "component": {"Column": {...}}}, - {"id": "header", "component": {"Text": {"text": {"literalString": "Quick Quiz: Photosynthesis"}}}}, - {"id": "q1", "component": {"QuizCard": { - "question": {"literalString": "Where do the light reactions occur?"}, - "options": [...], - "explanation": {...} - }}} - ] - } - } - ], - "source": { - "title": "Overview of Photosynthesis", - "url": "https://openstax.org/books/biology-ap-courses/pages/8-1-overview-of-photosynthesis", - "provider": "OpenStax Biology for AP Courses" - } -} +```bash +cd samples/personalized_learning +npm install +npm run dev ``` -#### 9. Frontend Rendering +Open http://localhost:5174 and try prompts like: +- "Help me understand ATP" +- "Quiz me on meiosis" +- "Flashcards for photosynthesis" -The A2UI renderer processes the JSON and renders components: - -```typescript -// src/a2ui-renderer.ts:64-78 -const processor = v0_8.Data.createSignalA2uiMessageProcessor(); -processor.processMessages(a2uiMessages); - -const surfaces = processor.getSurfaces(); -for (const [surfaceId, surface] of surfaces.entries()) { - this.renderSurface(container, surfaceId, surface, processor); -} -``` +The demo works without a deployed agent too—it falls back to sample content in [src/a2a-client.ts](src/a2a-client.ts). --- -## Content Retrieval System - -The agent uses a sophisticated system to map user topics to relevant textbook content. - -### Tier 1: Keyword Matching (Fast Path) - -The `KEYWORD_HINTS` dictionary maps ~100 biology keywords to chapter slugs: - -```python -# deploy.py:476-627 -KEYWORD_HINTS = { - # Energy & Metabolism - "atp": ["6-4-atp-adenosine-triphosphate", "6-1-energy-and-metabolism"], - "photosynthesis": ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"], - "meiosis": ["11-1-the-process-of-meiosis"], - - # Nervous System - "neuron": ["26-1-neurons-and-glial-cells", "26-2-how-neurons-communicate"], - "vision": ["27-5-vision"], - - # ... ~100 more keywords -} -``` - -**Word Boundary Matching**: The system uses regex word boundaries to prevent false positives: - -```python -# deploy.py:752-756 -pattern = r'\b' + re.escape(keyword) + r'\b' -if re.search(pattern, topic_lower): - matched_slugs.update(slugs) -``` - -This ensures "vision" matches "teach me about vision" but NOT "explain cell division" (which contains "vision" as a substring). - -### Tier 2: LLM Fallback (When Keywords Miss) - -For unrecognized topics, the agent uses Gemini to match: - -```python -# deploy.py:677-740 -def llm_match_topic_to_chapters(topic: str, max_chapters: int = 2) -> list: - prompt = f"""Match the user's topic to the MOST relevant chapters. - -User's topic: "{topic}" - -Available chapters from OpenStax Biology for AP Courses: -{chapter_list} +## Architecture -INSTRUCTIONS: -1. Return EXACTLY {max_chapters} chapter slugs -2. Order by relevance - MOST relevant first -3. For biology topics (even misspelled like "meitosis"), ALWAYS find matches -4. Return empty [] ONLY for non-biology topics -""" ``` - -This handles: -- **Misspellings**: "meitosis" → meiosis chapters -- **Alternate terms**: "cell energy" → ATP chapters -- **Complex queries**: "how do plants make food" → photosynthesis chapters - -### Chapter → Module → Content Mapping - -Each chapter slug maps to one or more module IDs: - -```python -# deploy.py:305-473 -CHAPTER_TO_MODULES = { - "8-1-overview-of-photosynthesis": ["m62794"], - "11-1-the-process-of-meiosis": ["m62810"], - # ... 167 chapters -} +Browser → API Server → Agent Engine → OpenStax → A2UI Response + (intent) (content) (fetch) (render) ``` -Module IDs correspond to CNXML files in the OpenStax GitHub repository: +**Frontend (Browser):** Vite + TypeScript app using the A2UI Lit renderer with custom Flashcard and QuizCard components. The chat orchestrator detects user intent and routes requests appropriately. -``` -https://raw.githubusercontent.com/openstax/osbooks-biology-bundle/main/modules/m62794/index.cnxml -``` +**API Server (Node.js):** Handles intent detection via Gemini and proxies requests to Agent Engine. Verifies Firebase ID tokens on all API endpoints. Lives in [api-server.ts](api-server.ts). -### Content Source +**Agent Engine (Vertex AI):** ADK agent with tools for generating flashcards, quizzes, and fetching textbook content. Deployed via [deploy.py](deploy.py). -All educational content comes from [OpenStax Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses), a free, peer-reviewed college textbook licensed under CC BY 4.0. +**Content:** All educational material comes from [OpenStax Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses), fetched from GitHub at runtime. --- -## Dynamic Personalization - -### Learner Context System - -Learner profiles are stored in GCS and loaded at runtime: - -``` -gs://{PROJECT_ID}-learner-context/learner_context/ -├── 01_maria_learner_profile.txt -├── 02_chemistry_bond_energy.txt -├── 03_chemistry_thermodynamics.txt -├── 04_biology_atp_cellular_respiration.txt -├── 05_misconception_resolution.txt -└── 06_mcat_practice_concepts.txt -``` - -### The Demo Learner: Maria - -The demo includes a pre-configured learner profile ([learner_context/01_maria_learner_profile.txt](learner_context/01_maria_learner_profile.txt)): - -- **Demographics**: Pre-med student at Cymbal University, preparing for MCAT -- **Learning Style**: Visual-kinesthetic, responds to sports/gym analogies -- **Strengths**: AP Biology (92% proficiency) -- **Gaps**: Chemistry bond energy (65% proficiency) -- **Key Misconception**: Believes "energy is stored in ATP bonds" (incorrect) - -### Switching Students - -To personalize for a different student: +## Key Files -```bash -# Edit the learner profile -nano learner_context/01_maria_learner_profile.txt - -# Upload to GCS (agent picks up changes on next request) -gsutil cp learner_context/*.txt gs://{PROJECT_ID}-learner-context/learner_context/ -``` - -No redeployment required—the agent loads context dynamically. +| File | Purpose | +|------|---------| +| [Quickstart.ipynb](Quickstart.ipynb) | Step-by-step setup notebook | +| [deploy.py](deploy.py) | Agent deployment with embedded agent code | +| [api-server.ts](api-server.ts) | Intent detection and Agent Engine proxy | +| [src/chat-orchestrator.ts](src/chat-orchestrator.ts) | Frontend routing logic | +| [src/flashcard.ts](src/flashcard.ts) | Custom Flashcard component | +| [src/quiz-card.ts](src/quiz-card.ts) | Custom QuizCard component | +| [learner_context/](learner_context/) | Sample learner profiles | --- -## Custom UI Components - -This demo extends A2UI with two custom Lit components. - -### Flashcard Component +## Custom Components -A flippable card showing question (front) and answer (back): +This demo extends A2UI with two Lit web components that agents can generate at runtime. -```typescript -// src/flashcard.ts:34-269 -@customElement("a2ui-flashcard") -export class Flashcard extends LitElement { - @property({ attribute: false }) front: StringValue | null = null; - @property({ attribute: false }) back: StringValue | null = null; - @property({ attribute: false }) category: StringValue | null = null; +**Flashcard** — A flippable card with front (question) and back (answer). Click to flip. - @state() private _flipped = false; - - private handleClick() { - this._flipped = !this._flipped; - } -} -``` - -**A2UI JSON format:** ```json -{ - "id": "card1", - "component": { - "Flashcard": { - "front": {"literalString": "Why does ATP hydrolysis release energy?"}, - "back": {"literalString": "Because the products (ADP + Pi) are MORE STABLE..."}, - "category": {"literalString": "Biochemistry"} - } - } -} +{"Flashcard": {"front": {"literalString": "What is ATP?"}, "back": {"literalString": "Adenosine triphosphate..."}}} ``` -### QuizCard Component - -An interactive multiple-choice quiz with immediate feedback: - -```typescript -// src/quiz-card.ts:35-348 -@customElement("a2ui-quizcard") -export class QuizCard extends LitElement { - @property({ attribute: false }) question: StringValue | null = null; - @property({ attribute: false }) options: QuizOption[] = []; - @property({ attribute: false }) explanation: StringValue | null = null; - - @state() private selectedValue: string | null = null; - @state() private submitted = false; -} -``` +**QuizCard** — Multiple-choice question with immediate feedback and explanation. -**A2UI JSON format:** ```json -{ - "id": "quiz1", - "component": { - "QuizCard": { - "question": {"literalString": "Where do the light reactions occur?"}, - "options": [ - {"label": {"literalString": "Thylakoid membrane"}, "value": "a", "isCorrect": true}, - {"label": {"literalString": "Stroma"}, "value": "b", "isCorrect": false} - ], - "explanation": {"literalString": "Light reactions occur in the thylakoid..."}, - "category": {"literalString": "Photosynthesis"} - } - } -} -``` - ---- - -## Local Development - -### Quick Start - -> **Prerequisites:** Complete Steps 1-6 in [Quickstart.ipynb](Quickstart.ipynb) first to set up GCP, deploy the agent, and configure environment variables. - -```bash -cd samples/personalized_learning -npm install -npm run dev +{"QuizCard": {"question": {"literalString": "Where do light reactions occur?"}, "options": [...], "explanation": {...}}} ``` -Open http://localhost:5174 +Both components are registered in [src/main.ts](src/main.ts) and rendered by the standard A2UI Lit renderer. -### With vs Without a Deployed Agent +--- -The demo works in two modes: +## Personalization -| Mode | How it works | When to use | -|------|--------------|-------------| -| **With deployed agent** | Requests go to Agent Engine, which fetches live OpenStax content | Production, full demo | -| **Without deployed agent** | Falls back to pre-built sample content in [a2a-client.ts](src/a2a-client.ts) | Quick local testing | +Learner profiles live in GCS at `gs://{PROJECT_ID}-learner-context/learner_context/`. The demo includes a sample student "Maria" — a pre-med student preparing for the MCAT who responds well to sports analogies and has a common misconception about ATP bond energy. -The code automatically falls back to sample content if the agent is unreachable—no configuration change needed. +To personalize for a different student, edit the files in [learner_context/](learner_context/) and upload to GCS. The agent picks up changes on the next request—no redeployment required. --- ## Production Deployment -### Cloud Run + Firebase Hosting - -The demo can be deployed to Cloud Run with Firebase Hosting for a shareable URL: +For a shareable URL via Cloud Run + Firebase Hosting: ```bash python deploy_hosting.py --project YOUR_PROJECT_ID ``` -This deploys: -- **Frontend + API Server** → Cloud Run -- **Firebase Hosting** → CDN + custom domain - -See [Quickstart.ipynb](Quickstart.ipynb) Step 7 for detailed instructions. - ---- - -## Known Limitations & Future Improvements - -### Latency - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| LLM fallback adds 2-5 seconds | Tier 2 matching requires an LLM call when keywords miss | Expand `KEYWORD_HINTS` to cover more common terms, or use semantic search with embeddings | -| Cold start on Agent Engine | First request after idle period is slow | Keep agent warm with periodic health checks | - -### Information Retrieval - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Keyword-based matching | Simple word boundary regex | Use vector embeddings for semantic similarity | -| Single-topic queries only | Multi-topic requests may return wrong content | Implement query decomposition | -| Limited to exact matches | Synonyms not handled | Add synonym expansion or use LLM for all matching | - -### Content Coverage - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Biology only | Only OpenStax Biology for AP Courses | Extend to other OpenStax textbooks (chemistry, physics, etc.) | -| English only | No internationalization | Add multi-language support | - -### UI Limitations - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Sidebar non-functional | Navigation and settings are placeholder | Implement course navigation, settings panel | -| No progress tracking | Sessions are ephemeral | Add persistent learner progress | - -### Media Generation - -| Issue | Current State | Potential Improvement | -|-------|---------------|----------------------| -| Pre-generated audio/video | Podcast and video are static files generated with NotebookLM | Integrate dynamic TTS or video generation APIs | +See Step 7 in [Quickstart.ipynb](Quickstart.ipynb) for Firebase setup details. --- -## Troubleshooting - -### "No Content Available" for Valid Biology Topics - -**Symptom**: Agent returns "I couldn't find any OpenStax Biology content related to [topic]" - -**Cause**: Topic didn't match any keywords and LLM fallback found no relevant chapters - -**Solutions**: -1. Try more specific biology terminology -2. Check if the topic is covered in AP Biology curriculum -3. Add the keyword to `KEYWORD_HINTS` in [deploy.py:476-627](deploy.py) - -### Slow Responses (5+ seconds) - -**Symptom**: Long delay before content appears - -**Cause**: LLM fallback is being triggered (no keyword match) - -**Solutions**: -1. Add common user terms to `KEYWORD_HINTS` -2. Pre-warm the agent with a health check -3. Use the optional GCS content cache to avoid GitHub fetches - -### Stale Content After Agent Update +## Known Limitations -**Symptom**: Agent returns outdated content after redeployment - -**Cause**: Agent Engine caches the previous deployment - -**Solutions**: -1. Wait 1-2 minutes for cache to clear -2. Deploy with a new resource ID -3. Clear browser session storage - -### Quiz Returns Flashcards - -**Symptom**: Requested a quiz but got flashcards - -**Cause**: Agent Engine returned flashcards; the API server's local quiz generation may have failed - -**Solutions**: -1. Check API server logs for errors -2. Verify Gemini API access -3. The [api-server.ts:822-836](api-server.ts) has fallback logic that should generate quizzes locally - -### Checking Agent Engine Logs - -To debug content fetching issues: - -```bash -gcloud logging read 'resource.type="aiplatform.googleapis.com/ReasoningEngine"' \ - --limit=50 --project=YOUR_PROJECT \ - --format="table(timestamp,textPayload)" -``` +- **Latency**: LLM fallback for topic matching adds 2–5 seconds when keywords don't match +- **Single topics only**: Multi-topic requests may return wrong content +- **Audio/video**: Pre-generated files only, not dynamic +- **Sidebar**: Placeholder UI; only the chat is functional --- ## Content Attribution -Educational content is sourced from [OpenStax](https://openstax.org/), licensed under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/). - -Specifically: [Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses) — OpenStax, Rice University - ---- - -## Security Notice - -> **Warning:** When building production applications, treat any agent outside your control as potentially untrusted. This demo connects to Agent Engine within your own GCP project. Always review agent code before deploying. - ---- - -## Related Documentation - -- [A2UI Specification](../../docs/) — Canonical A2UI format documentation -- [A2UI Lit Renderer](../../renderers/lit/) — The web component renderer used by this demo -- [Quickstart.ipynb](Quickstart.ipynb) — Step-by-step setup notebook -- [Main A2UI README](../../README.md) — Project overview and philosophy +Educational content from [OpenStax](https://openstax.org/), licensed under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/). --- -## License +## Related -Apache 2.0 — See the repository root for details. +- [A2UI Specification](../../docs/) +- [A2UI Lit Renderer](../../renderers/lit/) +- [Main A2UI README](../../README.md) diff --git a/samples/personalized_learning/api-server.ts b/samples/personalized_learning/api-server.ts index cdf93556b..c6eaae492 100644 --- a/samples/personalized_learning/api-server.ts +++ b/samples/personalized_learning/api-server.ts @@ -21,10 +21,36 @@ import { execSync } from "child_process"; import { writeFileSync, readFileSync, existsSync } from "fs"; import { join } from "path"; import { config } from "dotenv"; +import { initializeApp, applicationDefault } from "firebase-admin/app"; +import { getAuth } from "firebase-admin/auth"; // Load environment variables config(); +// ============================================================================= +// FIREBASE ADMIN - Server-side authentication +// ============================================================================= +initializeApp({ credential: applicationDefault() }); + +async function authenticateRequest(req: any, res: any): Promise { + const authHeader = req.headers.authorization; + if (!authHeader?.startsWith("Bearer ")) { + res.writeHead(401, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Missing or malformed Authorization header" })); + return false; + } + try { + const token = authHeader.split("Bearer ")[1]; + await getAuth().verifyIdToken(token); + return true; + } catch (err: any) { + console.error("[API Server] Auth failed:", err.message); + res.writeHead(403, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid or expired token" })); + return false; + } +} + // ============================================================================= // MESSAGE LOG - Captures all request/response traffic for demo purposes // ============================================================================= @@ -761,7 +787,7 @@ async function main() { // CORS headers res.setHeader("Access-Control-Allow-Origin", "*"); res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); - res.setHeader("Access-Control-Allow-Headers", "Content-Type"); + res.setHeader("Access-Control-Allow-Headers", "Content-Type, Authorization"); if (req.method === "OPTIONS") { res.writeHead(204); @@ -793,6 +819,7 @@ async function main() { // A2A Agent Engine endpoint if (req.url === "/a2ui-agent/a2a/query" && req.method === "POST") { + if (!(await authenticateRequest(req, res))) return; try { const body = await parseBody(req); console.log("[API Server] ========================================"); @@ -857,6 +884,7 @@ async function main() { // Chat endpoint if (req.url === "/api/chat" && req.method === "POST") { + if (!(await authenticateRequest(req, res))) return; try { const body = await parseBody(req); console.log("[API Server] Chat request received"); @@ -889,6 +917,7 @@ async function main() { // Combined chat endpoint - performs intent detection AND response in one LLM call if (req.url === "/api/chat-with-intent" && req.method === "POST") { + if (!(await authenticateRequest(req, res))) return; try { const body = await parseBody(req); console.log("[API Server] ========================================"); diff --git a/samples/personalized_learning/package.json b/samples/personalized_learning/package.json index 173293824..053185dcd 100644 --- a/samples/personalized_learning/package.json +++ b/samples/personalized_learning/package.json @@ -23,6 +23,7 @@ "@lit-labs/signals": "^0.1.3", "@lit/context": "^1.1.4", "firebase": "^10.14.1", + "firebase-admin": "^12.0.0", "google-auth-library": "^9.0.0", "lit": "^3.3.1" }, From 34f41a626117d28dfdf2d41bff2da1b8cfc43e3f Mon Sep 17 00:00:00 2001 From: Sam Goodgame Date: Tue, 27 Jan 2026 07:55:35 -0500 Subject: [PATCH 2/8] add demo video, unify auth config and env vars, enforce server-side allowlist --- samples/personalized_learning/.env.template | 12 ++++++++ .../personalized_learning/Quickstart.ipynb | 2 +- samples/personalized_learning/README.md | 19 ++++++++++-- samples/personalized_learning/api-server.ts | 25 +++++++++++++++- .../src/firebase-auth.ts | 29 ++++++++++--------- 5 files changed, 70 insertions(+), 17 deletions(-) diff --git a/samples/personalized_learning/.env.template b/samples/personalized_learning/.env.template index 46a00a3f1..67223e663 100644 --- a/samples/personalized_learning/.env.template +++ b/samples/personalized_learning/.env.template @@ -43,3 +43,15 @@ AGENT_ENGINE_RESOURCE_ID= # GCS_CONTEXT_BUCKET=your-bucket-name # GCS_CONTEXT_PREFIX=learner_context/ + +# ============================================================================= +# OPTIONAL - Access Control +# ============================================================================= + +# Restrict access to a specific email domain (e.g., "google.com", "yourcompany.com") +# Set to empty string to disable domain restriction +VITE_ALLOWED_DOMAIN=google.com + +# Whitelist specific email addresses (comma-separated) +# These users are allowed regardless of domain +# VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org diff --git a/samples/personalized_learning/Quickstart.ipynb b/samples/personalized_learning/Quickstart.ipynb index ae31e6a16..e5d62819e 100644 --- a/samples/personalized_learning/Quickstart.ipynb +++ b/samples/personalized_learning/Quickstart.ipynb @@ -520,7 +520,7 @@ "cell_type": "markdown", "id": "n1qjkwg9yp", "metadata": {}, - "source": "### Access Control\n\nThe demo has two layers of authentication:\n\n1. **Client-side (Firebase Auth):** Restricts sign-in to `@google.com` emails by default, configured in [`src/firebase-auth.ts`](src/firebase-auth.ts).\n2. **Server-side (Firebase Admin):** The API server verifies Firebase ID tokens on all endpoints, rejecting requests with missing or invalid tokens. See [`api-server.ts`](api-server.ts).\n\n**To change the allowed domain:**\n```typescript\n// In src/firebase-auth.ts\nconst ALLOWED_DOMAIN = \"yourcompany.com\"; // Change to your domain\n```\n\n**To allow specific external collaborators (whitelist):**\n```typescript\n// In src/firebase-auth.ts\nconst ALLOWED_EMAILS: string[] = [\n \"alice@example.com\",\n \"bob@partner.org\",\n \"charlie@university.edu\",\n];\n```\n\n**To allow anyone with a Google account:**\n```typescript\nconst ALLOWED_DOMAIN = \"\"; // Disable domain restriction\nconst ALLOWED_EMAILS: string[] = []; // Empty whitelist = allow all\n```\n\n> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" + "source": "### Access Control\n\nThe demo has two layers of authentication:\n\n1. **Client-side (Firebase Auth):** Restricts sign-in to allowed emails, configured via environment variables.\n2. **Server-side (Firebase Admin):** Verifies Firebase ID tokens and enforces the same access rules.\n\nBoth layers read from the same `.env` variables, so you only configure access once:\n\n```bash\n# In .env\nVITE_ALLOWED_DOMAIN=google.com # Restrict to a domain\nVITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org # Or whitelist specific emails\n```\n\n**Examples:**\n\n| Configuration | Who can access |\n|--------------|----------------|\n| `VITE_ALLOWED_DOMAIN=google.com` | Anyone with @google.com |\n| `VITE_ALLOWED_DOMAIN=` + `VITE_ALLOWED_EMAILS=a@x.com,b@y.com` | Only those two emails |\n| Both empty | Anyone with a Google account |\n\n> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" }, { "cell_type": "markdown", diff --git a/samples/personalized_learning/README.md b/samples/personalized_learning/README.md index e12218d9c..61f0cfeac 100644 --- a/samples/personalized_learning/README.md +++ b/samples/personalized_learning/README.md @@ -4,7 +4,9 @@ A full-stack sample demonstrating A2UI's capabilities for AI-powered educational **Contributed by Google Public Sector's Rapid Innovation Team.** -![Personalized Learning Demo](assets/hero.jpg) +[![Watch the demo](https://img.youtube.com/vi/fgkiwyHj9g8/maxresdefault.jpg)](https://www.youtube.com/watch?v=fgkiwyHj9g8) + +_This video demonstrates two use cases: personalized learning, which is the focus of this sample, plus a workforce development application built on the same A2UI framework—included to show how these patterns adapt to other domains._ --- @@ -21,7 +23,7 @@ Key concepts demonstrated: - **A2A Protocol** — Frontend-to-agent communication via Agent-to-Agent protocol - **Dynamic Context** — Learner profiles loaded from GCS at runtime (no redeployment needed) - **Content Retrieval** — LLM-powered topic matching across 167 OpenStax Biology chapters -- **Server-side Auth** — API endpoints verify Firebase ID tokens to protect GCP resources +- **Server-side Auth** — API endpoints verify Firebase ID tokens and enforce domain/email allowlists --- @@ -115,6 +117,19 @@ See Step 7 in [Quickstart.ipynb](Quickstart.ipynb) for Firebase setup details. --- +## Access Control + +Both client and server enforce access restrictions via environment variables in `.env`: + +```bash +VITE_ALLOWED_DOMAIN=google.com # Restrict to a domain +VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org # Or whitelist specific emails +``` + +See the Access Control section in [Quickstart.ipynb](Quickstart.ipynb) for details. + +--- + ## Known Limitations - **Latency**: LLM fallback for topic matching adds 2–5 seconds when keywords don't match diff --git a/samples/personalized_learning/api-server.ts b/samples/personalized_learning/api-server.ts index c6eaae492..a91e0c5a5 100644 --- a/samples/personalized_learning/api-server.ts +++ b/samples/personalized_learning/api-server.ts @@ -32,6 +32,23 @@ config(); // ============================================================================= initializeApp({ credential: applicationDefault() }); +// Access control - reads from environment variables (shared with src/firebase-auth.ts) +// Uses VITE_ prefix so the same .env works for both client and server +const ALLOWED_DOMAIN = process.env.VITE_ALLOWED_DOMAIN ?? "google.com"; +const ALLOWED_EMAILS: string[] = (process.env.VITE_ALLOWED_EMAILS ?? "") + .split(",") + .map((e: string) => e.trim().toLowerCase()) + .filter((e: string) => e.length > 0); + +function isAllowedEmail(email: string | undefined): boolean { + if (!email) return false; + const emailLower = email.toLowerCase(); + if (ALLOWED_EMAILS.length > 0 && ALLOWED_EMAILS.includes(emailLower)) return true; + if (ALLOWED_DOMAIN && emailLower.endsWith(`@${ALLOWED_DOMAIN}`)) return true; + if (!ALLOWED_DOMAIN && ALLOWED_EMAILS.length === 0) return true; // No restrictions + return false; +} + async function authenticateRequest(req: any, res: any): Promise { const authHeader = req.headers.authorization; if (!authHeader?.startsWith("Bearer ")) { @@ -41,7 +58,13 @@ async function authenticateRequest(req: any, res: any): Promise { } try { const token = authHeader.split("Bearer ")[1]; - await getAuth().verifyIdToken(token); + const decoded = await getAuth().verifyIdToken(token); + if (!isAllowedEmail(decoded.email)) { + console.error("[API Server] Access denied for:", decoded.email); + res.writeHead(403, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Email not authorized" })); + return false; + } return true; } catch (err: any) { console.error("[API Server] Auth failed:", err.message); diff --git a/samples/personalized_learning/src/firebase-auth.ts b/samples/personalized_learning/src/firebase-auth.ts index 72d24e6e2..18f7a746e 100644 --- a/samples/personalized_learning/src/firebase-auth.ts +++ b/samples/personalized_learning/src/firebase-auth.ts @@ -57,17 +57,14 @@ provider.setCustomParameters({ // ACCESS CONTROL CONFIGURATION // ============================================================================ -// Allowed email domain (e.g., "google.com", "yourcompany.com") -// Set to empty string "" to disable domain-based access and use only the whitelist -const ALLOWED_DOMAIN = "google.com"; - -// Whitelist of specific email addresses that are always allowed, -// regardless of domain. Add emails here to grant access to external collaborators. -// Example: ["alice@example.com", "bob@partner.org", "charlie@university.edu"] -const ALLOWED_EMAILS: string[] = [ - // "collaborator@example.com", - // "reviewer@partner.org", -]; +// Access control config - reads from environment variables (set in .env) +// VITE_ALLOWED_DOMAIN: e.g., "google.com" or "" to disable domain check +// VITE_ALLOWED_EMAILS: comma-separated list, e.g., "alice@example.com,bob@partner.org" +const ALLOWED_DOMAIN = import.meta.env.VITE_ALLOWED_DOMAIN ?? "google.com"; +const ALLOWED_EMAILS: string[] = (import.meta.env.VITE_ALLOWED_EMAILS ?? "") + .split(",") + .map((e: string) => e.trim().toLowerCase()) + .filter((e: string) => e.length > 0); // ============================================================================ @@ -76,14 +73,20 @@ const ALLOWED_EMAILS: string[] = [ */ function isAllowedEmail(email: string | null): boolean { if (!email) return false; + const emailLower = email.toLowerCase(); // Check whitelist first - if (ALLOWED_EMAILS.includes(email.toLowerCase())) { + if (ALLOWED_EMAILS.length > 0 && ALLOWED_EMAILS.includes(emailLower)) { return true; } // Check domain if configured - if (ALLOWED_DOMAIN && email.endsWith(`@${ALLOWED_DOMAIN}`)) { + if (ALLOWED_DOMAIN && emailLower.endsWith(`@${ALLOWED_DOMAIN}`)) { + return true; + } + + // No restrictions configured = allow all + if (!ALLOWED_DOMAIN && ALLOWED_EMAILS.length === 0) { return true; } From d6d22609e03aabb257e9208b69d4a3e309b8dc52 Mon Sep 17 00:00:00 2001 From: Sam Goodgame Date: Wed, 28 Jan 2026 12:13:22 -0500 Subject: [PATCH 3/8] Consolidate auth to server-side only; improve access control documentation --- samples/personalized_learning/.env.template | 52 +- samples/personalized_learning/.firebaserc | 2 +- samples/personalized_learning/.gitignore | 5 +- .../personalized_learning/Quickstart.ipynb | 41 +- samples/personalized_learning/README.md | 30 +- samples/personalized_learning/agent/agent.py | 519 ++---------------- .../agent/openstax_chapters.py | 410 ++++++++------ .../agent/openstax_content.py | 9 +- .../agent/openstax_modules.py | 5 +- samples/personalized_learning/agent/server.py | 152 ----- .../agent/tests/test_agent.py | 363 ------------ .../agent/tests/test_caching.py | 200 ------- .../agent/tests/test_keyword_hints.py | 221 -------- .../agent/tests/test_parallel_fetch.py | 223 -------- samples/personalized_learning/api-server.ts | 10 + samples/personalized_learning/deploy.py | 38 +- .../personalized_learning/deploy_hosting.py | 9 + .../src/firebase-auth.ts | 139 ++--- samples/personalized_learning/src/main.ts | 42 +- .../test_topic_matching.py | 163 ------ 20 files changed, 529 insertions(+), 2104 deletions(-) delete mode 100644 samples/personalized_learning/agent/server.py delete mode 100644 samples/personalized_learning/agent/tests/test_agent.py delete mode 100644 samples/personalized_learning/agent/tests/test_caching.py delete mode 100644 samples/personalized_learning/agent/tests/test_keyword_hints.py delete mode 100644 samples/personalized_learning/agent/tests/test_parallel_fetch.py delete mode 100644 samples/personalized_learning/test_topic_matching.py diff --git a/samples/personalized_learning/.env.template b/samples/personalized_learning/.env.template index 67223e663..1361bc711 100644 --- a/samples/personalized_learning/.env.template +++ b/samples/personalized_learning/.env.template @@ -45,13 +45,51 @@ AGENT_ENGINE_RESOURCE_ID= # GCS_CONTEXT_PREFIX=learner_context/ # ============================================================================= -# OPTIONAL - Access Control +# IMPORTANT - Access Control (you MUST configure this!) # ============================================================================= - -# Restrict access to a specific email domain (e.g., "google.com", "yourcompany.com") -# Set to empty string to disable domain restriction +# +# By default, access is restricted to @google.com accounts, which means you +# won't be able to access your own deployed application unless you work at Google! +# +# ⚠️ BEFORE DEPLOYING: Update these settings to allow YOUR email/domain. +# +# The server is the single source of truth for authorization. Both options below +# are checked - a user is allowed if they match EITHER the domain OR the email list. +# +# ----------------------------------------------------------------------------- + +# Option 1: Restrict to a specific email domain +# Examples: +# VITE_ALLOWED_DOMAIN=yourcompany.com (allows anyone@yourcompany.com) +# VITE_ALLOWED_DOMAIN=gmail.com (allows any Gmail user - use with caution!) +# VITE_ALLOWED_DOMAIN= (disable domain restriction, use email list only) +# VITE_ALLOWED_DOMAIN=google.com -# Whitelist specific email addresses (comma-separated) -# These users are allowed regardless of domain -# VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org +# Option 2: Whitelist specific email addresses (comma-separated) +# These users are allowed regardless of domain setting above. +# Examples: +# VITE_ALLOWED_EMAILS=you@gmail.com +# VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org,charlie@university.edu +# +# VITE_ALLOWED_EMAILS= + +# ----------------------------------------------------------------------------- +# Quick setup examples: +# +# Allow only yourself: +# VITE_ALLOWED_DOMAIN= +# VITE_ALLOWED_EMAILS=your.email@gmail.com +# +# Allow your whole company: +# VITE_ALLOWED_DOMAIN=yourcompany.com +# VITE_ALLOWED_EMAILS= +# +# Allow your company + a few external collaborators: +# VITE_ALLOWED_DOMAIN=yourcompany.com +# VITE_ALLOWED_EMAILS=external.collaborator@gmail.com,partner@othercorp.com +# +# Allow anyone with a Google account (public demo): +# VITE_ALLOWED_DOMAIN= +# VITE_ALLOWED_EMAILS= +# ----------------------------------------------------------------------------- diff --git a/samples/personalized_learning/.firebaserc b/samples/personalized_learning/.firebaserc index a0f1161ce..fc9caf94e 100644 --- a/samples/personalized_learning/.firebaserc +++ b/samples/personalized_learning/.firebaserc @@ -1,5 +1,5 @@ { "projects": { - "default": "your-project-id" + "default": "a2ui-third-test" } } diff --git a/samples/personalized_learning/.gitignore b/samples/personalized_learning/.gitignore index b69a9a798..501394a73 100644 --- a/samples/personalized_learning/.gitignore +++ b/samples/personalized_learning/.gitignore @@ -27,7 +27,7 @@ demo-message-log.json .DS_Store Thumbs.db -# Large media assets (generate via NotebookLM - see NOTEBOOKLM_GUIDE.md) +# Large media assets (generate via NotebookLM - see Quickstart.ipynb) public/assets/*.m4a public/assets/*.mp4 @@ -42,3 +42,6 @@ public/assets/*.mp4 # Temporary A2UI copy for Cloud Run deployment a2ui-web-lib/ + +# Tests (not part of the demo distribution) +tests/ diff --git a/samples/personalized_learning/Quickstart.ipynb b/samples/personalized_learning/Quickstart.ipynb index e5d62819e..56aa8c38f 100644 --- a/samples/personalized_learning/Quickstart.ipynb +++ b/samples/personalized_learning/Quickstart.ipynb @@ -520,7 +520,44 @@ "cell_type": "markdown", "id": "n1qjkwg9yp", "metadata": {}, - "source": "### Access Control\n\nThe demo has two layers of authentication:\n\n1. **Client-side (Firebase Auth):** Restricts sign-in to allowed emails, configured via environment variables.\n2. **Server-side (Firebase Admin):** Verifies Firebase ID tokens and enforces the same access rules.\n\nBoth layers read from the same `.env` variables, so you only configure access once:\n\n```bash\n# In .env\nVITE_ALLOWED_DOMAIN=google.com # Restrict to a domain\nVITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org # Or whitelist specific emails\n```\n\n**Examples:**\n\n| Configuration | Who can access |\n|--------------|----------------|\n| `VITE_ALLOWED_DOMAIN=google.com` | Anyone with @google.com |\n| `VITE_ALLOWED_DOMAIN=` + `VITE_ALLOWED_EMAILS=a@x.com,b@y.com` | Only those two emails |\n| Both empty | Anyone with a Google account |\n\n> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" + "source": [ + "### Access Control\n", + "\n", + "⚠️ **IMPORTANT: You must configure access control to use your deployed app!**\n", + "\n", + "By default, access is restricted to `@google.com` accounts. If you don't work at Google, you'll be locked out of your own deployment.\n", + "\n", + "**Before deploying**, add these lines to your `.env` file:\n", + "\n", + "```bash\n", + "# Option 1: Allow a specific domain (your company)\n", + "VITE_ALLOWED_DOMAIN=yourcompany.com\n", + "\n", + "# Option 2: Allow specific email addresses (yourself + collaborators)\n", + "VITE_ALLOWED_DOMAIN=\n", + "VITE_ALLOWED_EMAILS=your.email@gmail.com,collaborator@example.com\n", + "\n", + "# Option 3: Allow anyone with a Google account (public demo)\n", + "VITE_ALLOWED_DOMAIN=\n", + "VITE_ALLOWED_EMAILS=\n", + "```\n", + "\n", + "**How it works:**\n", + "\n", + "The server is the single source of truth for authorization. When a user signs in:\n", + "1. Firebase authenticates them (Google OAuth)\n", + "2. The client calls `/api/check-access` with the user's token\n", + "3. The server checks if their email matches `VITE_ALLOWED_DOMAIN` or `VITE_ALLOWED_EMAILS`\n", + "4. If not authorized, they're signed out and shown an error\n", + "\n", + "| Configuration | Who can access |\n", + "|--------------|----------------|\n", + "| `VITE_ALLOWED_DOMAIN=yourcompany.com` | Anyone with @yourcompany.com |\n", + "| `VITE_ALLOWED_EMAILS=you@gmail.com` | Only your email |\n", + "| Both empty | Anyone with a Google account |\n", + "\n", + "> **Note:** After changing access control, rebuild and redeploy: `python deploy_hosting.py --project YOUR_PROJECT_ID`" + ] }, { "cell_type": "markdown", @@ -584,4 +621,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/samples/personalized_learning/README.md b/samples/personalized_learning/README.md index 61f0cfeac..39388a1cc 100644 --- a/samples/personalized_learning/README.md +++ b/samples/personalized_learning/README.md @@ -10,19 +10,17 @@ _This video demonstrates two use cases: personalized learning, which is the focu --- -## What This Is +## tl;dr -This demo shows how agents can generate entire UI experiences—not just text responses. When a student asks for flashcards on photosynthesis, the agent matches the topic to OpenStax textbook content, generates personalized study materials, and returns A2UI JSON that the frontend renders as interactive, flippable cards. +This sample shows how agents within a chat can use A2UI to go beyond text responses and generate dynamic UI elements. When a student asks for flashcards on photosynthesis, the agent matches the topic to OpenStax textbook content, generates personalized study materials, and returns A2UI JSON that the frontend renders as interactive, flippable cards. -The same request from different students (with different learner profiles) produces different content tailored to their learning style. +Here are the concepts we're demonstrating: -Key concepts demonstrated: - -- **Custom A2UI Components** — Flashcard and QuizCard extend the standard component library +- **Custom A2UI Components** — Flashcard and QuizCard extend the standard A2UI UI component library - **Remote Agent** — ADK agent deployed to Vertex AI Agent Engine, decoupled from the UI - **A2A Protocol** — Frontend-to-agent communication via Agent-to-Agent protocol - **Dynamic Context** — Learner profiles loaded from GCS at runtime (no redeployment needed) -- **Content Retrieval** — LLM-powered topic matching across 167 OpenStax Biology chapters +- **Content Retrieval** — LLM-powered information retrieval across 167 OpenStax Biology chapters - **Server-side Auth** — API endpoints verify Firebase ID tokens and enforce domain/email allowlists --- @@ -59,7 +57,7 @@ Browser → API Server → Agent Engine → OpenStax → A2UI Response **Agent Engine (Vertex AI):** ADK agent with tools for generating flashcards, quizzes, and fetching textbook content. Deployed via [deploy.py](deploy.py). -**Content:** All educational material comes from [OpenStax Biology for AP Courses](https://openstax.org/details/books/biology-ap-courses), fetched from GitHub at runtime. +**Content Pipeline:** When a user asks about "ATP hydrolysis," the agent maps the topic to relevant textbook chapters using a simple keyword matching system (we use Gemini as a fallback to help if there are no good keyword matches). The agent then fetches the actual CNXML content from [OpenStax's GitHub repo](https://github.com/openstax/osbooks-biology-bundle) and uses that source material—combined with the learner's profile—to generate grounded, personalized A2UI responses. This ensures flashcards and quizzes are rooted in peer-reviewed textbook content, not just LLM trained parameters data. --- @@ -101,7 +99,7 @@ Both components are registered in [src/main.ts](src/main.ts) and rendered by the Learner profiles live in GCS at `gs://{PROJECT_ID}-learner-context/learner_context/`. The demo includes a sample student "Maria" — a pre-med student preparing for the MCAT who responds well to sports analogies and has a common misconception about ATP bond energy. -To personalize for a different student, edit the files in [learner_context/](learner_context/) and upload to GCS. The agent picks up changes on the next request—no redeployment required. +To personalize for a different student, edit the files in [learner_context/](learner_context/) and upload to GCS. The agent picks up changes on the next request—no redeployment required. --- @@ -119,19 +117,25 @@ See Step 7 in [Quickstart.ipynb](Quickstart.ipynb) for Firebase setup details. ## Access Control -Both client and server enforce access restrictions via environment variables in `.env`: +**Important:** By default, access is restricted to `@google.com` accounts. That's just because the authors of this sample... work at Google. You must configure your own domain and/or specific email addresses in `.env` to access your deployment: ```bash -VITE_ALLOWED_DOMAIN=google.com # Restrict to a domain -VITE_ALLOWED_EMAILS=alice@example.com,bob@partner.org # Or whitelist specific emails +# Allow your domain +VITE_ALLOWED_DOMAIN=yourcompany.com + +# Or whitelist specific emails +VITE_ALLOWED_DOMAIN= +VITE_ALLOWED_EMAILS=you@gmail.com,collaborator@example.com ``` -See the Access Control section in [Quickstart.ipynb](Quickstart.ipynb) for details. +The server is the single source of truth—authorization is enforced via the `/api/check-access` endpoint. See the Access Control section in [Quickstart.ipynb](Quickstart.ipynb) for details. --- ## Known Limitations +- **Keyword matching**: Topic-to-chapter mapping uses a simple keyword dictionary with LLM fallback. This is intentionally naive—a production system would use embeddings or a proper search index. Content retrieval isn't the focus of this A2UI demo. +- **Source citation accuracy**: When the agent expands a topic (e.g., "telomeres" → "telomeres, DNA, chromosome, replication, cell division"), keyword matching may cite a less relevant source. The LLM fallback only triggers when zero keywords match, not when wrong keywords match. A production system would use semantic search or LLM-based reranking to select the most relevant source. - **Latency**: LLM fallback for topic matching adds 2–5 seconds when keywords don't match - **Single topics only**: Multi-topic requests may return wrong content - **Audio/video**: Pre-generated files only, not dynamic diff --git a/samples/personalized_learning/agent/agent.py b/samples/personalized_learning/agent/agent.py index 4a3040499..fa4f89ba1 100644 --- a/samples/personalized_learning/agent/agent.py +++ b/samples/personalized_learning/agent/agent.py @@ -39,12 +39,7 @@ from google.adk.agents import Agent from google.adk.tools import ToolContext -# ============================================================================ -# MODULE-LEVEL CONFIGURATION -# These variables are captured by cloudpickle during deployment. -# They are set at import time from environment variables, ensuring they -# persist in the deployed agent even though os.environ is not pickled. -# ============================================================================ +# Captured at import time for cloudpickle serialization during deployment _CONFIG_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT") _CONFIG_LOCATION = os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") @@ -65,19 +60,18 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# Log warnings for degraded functionality +# Log errors for missing modules (these are required, not optional) if not _HAS_EXTERNAL_MODULES: - logger.warning( - "External modules (context_loader, a2ui_templates) not available. " - "Using embedded fallback content. Import error: %s", + logger.error( + "Required modules (context_loader, a2ui_templates) not available. " + "Import error: %s", _IMPORT_ERROR if '_IMPORT_ERROR' in globals() else "unknown" ) if not _HAS_OPENSTAX: - logger.warning( - "OpenStax content modules not available. Flashcards and quizzes will use " - "embedded content only, without textbook source material. " - "This may result in less accurate educational content." + logger.error( + "OpenStax content modules not available. " + "Flashcards and quizzes will not have textbook source material." ) # Model configuration - use Gemini 2.5 Flash (available in us-central1) @@ -89,217 +83,9 @@ # Surface ID for A2UI rendering (use imported value if available, else fallback) SURFACE_ID = _IMPORTED_SURFACE_ID if _HAS_EXTERNAL_MODULES else "learningContent" -# ============================================================================ -# GCS CONTEXT LOADING (for Agent Engine - loads dynamic context from GCS) -# ============================================================================ -# GCS configuration - set via environment variables -GCS_CONTEXT_BUCKET = os.getenv("GCS_CONTEXT_BUCKET", "a2ui-demo-context") -GCS_CONTEXT_PREFIX = os.getenv("GCS_CONTEXT_PREFIX", "learner_context/") -# Context files to load -CONTEXT_FILES = [ - "01_maria_learner_profile.txt", - "02_chemistry_bond_energy.txt", - "03_chemistry_thermodynamics.txt", - "04_biology_atp_cellular_respiration.txt", - "05_misconception_resolution.txt", - "06_mcat_practice_concepts.txt", -] - - -def _load_from_gcs(filename: str) -> Optional[str]: - """Load a context file from GCS bucket.""" - try: - from google.cloud import storage - - client = storage.Client() - bucket = client.bucket(GCS_CONTEXT_BUCKET) - blob = bucket.blob(f"{GCS_CONTEXT_PREFIX}{filename}") - - if blob.exists(): - content = blob.download_as_text() - logger.info(f"Loaded {filename} from GCS bucket {GCS_CONTEXT_BUCKET}") - return content - else: - logger.warning(f"File {filename} not found in GCS bucket {GCS_CONTEXT_BUCKET}") - return None - - except Exception as e: - logger.warning(f"Failed to load from GCS: {e}") - return None - - -def _load_all_context_from_gcs() -> dict[str, str]: - """Load all context files from GCS.""" - context = {} - for filename in CONTEXT_FILES: - content = _load_from_gcs(filename) - if content: - context[filename] = content - logger.info(f"Loaded {len(context)} context files from GCS") - return context - - -def _get_combined_context_from_gcs() -> str: - """Get all context combined from GCS.""" - all_context = _load_all_context_from_gcs() - - if all_context: - combined = [] - for filename, content in sorted(all_context.items()): - combined.append(f"=== {filename} ===\n{content}\n") - return "\n".join(combined) - - # Return empty string if GCS load failed - will trigger fallback - return "" - - -# ============================================================================ -# EMBEDDED CONTEXT DATA (fallback when GCS is unavailable) -# ============================================================================ - -EMBEDDED_LEARNER_PROFILE = """ -## Learner Profile: Maria Santos - -**Background:** -- Pre-med sophomore majoring in Biochemistry -- Preparing for MCAT in 8 months -- Works part-time as a pharmacy technician (20 hrs/week) - -**Learning Style:** -- Visual-kinesthetic learner -- Prefers analogies connecting to real-world applications -- Responds well to gym/fitness metaphors (exercises regularly) -- Benefits from spaced repetition for memorization - -**Current Progress:** -- Completed: Cell structure, basic chemistry -- In progress: Cellular energetics (ATP, metabolism) -- Struggling with: Thermodynamics concepts, especially Gibbs free energy - -**Known Misconceptions:** -- Believes "energy is stored in bonds" (common misconception) -- Needs clarification that bond BREAKING releases energy in ATP hydrolysis -""" - -EMBEDDED_CURRICULUM_CONTEXT = """ -## Current Topic: ATP and Cellular Energy - -**Learning Objectives:** -1. Explain why ATP is considered the "energy currency" of cells -2. Describe the structure of ATP and how it stores potential energy -3. Understand that energy is released during hydrolysis due to product stability, not bond breaking -4. Connect ATP usage to cellular processes like muscle contraction - -**Key Concepts:** -- Adenosine triphosphate structure (adenine + ribose + 3 phosphate groups) -- Phosphoanhydride bonds and electrostatic repulsion -- Hydrolysis reaction: ATP + H2O → ADP + Pi + Energy -- Gibbs free energy change (ΔG = -30.5 kJ/mol) -- Coupled reactions in cellular metabolism - -**Common Misconceptions to Address:** -- "Energy stored in bonds" - Actually, breaking bonds REQUIRES energy; - the energy released comes from forming more stable products (ADP + Pi) -- ATP is not a long-term energy storage molecule (that's glycogen/fat) -""" - -EMBEDDED_MISCONCEPTION_CONTEXT = """ -## Misconception Resolution: "Energy Stored in Bonds" - -**The Misconception:** -Many students believe ATP releases energy because "energy is stored in the phosphate bonds." - -**The Reality:** -- Breaking ANY chemical bond REQUIRES energy input (endothermic) -- Energy is released when NEW, more stable bonds FORM (exothermic) -- ATP hydrolysis releases energy because the products (ADP + Pi) are MORE STABLE than ATP - -**Why ATP is "High Energy":** -- The three phosphate groups are negatively charged and repel each other -- This electrostatic repulsion creates molecular strain (like a compressed spring) -- When the terminal phosphate is removed, the products achieve better stability -- The energy comes from relieving this strain, not from "stored bond energy" - -**Gym Analogy for Maria:** -Think of ATP like holding a heavy plank position: -- Holding the plank (ATP) requires constant energy expenditure to maintain -- Dropping to rest (ADP + Pi) releases that tension -- The "energy" wasn't stored in your muscles - it was the relief of an unstable state -""" - - -def _get_combined_context_fallback() -> str: - """Get combined context using embedded data when files aren't available.""" - return f""" -{EMBEDDED_LEARNER_PROFILE} - -{EMBEDDED_CURRICULUM_CONTEXT} - -{EMBEDDED_MISCONCEPTION_CONTEXT} -""" - - -def _get_system_prompt_fallback(format_type: str, context: str) -> str: - """Generate system prompt for A2UI generation (fallback for Agent Engine).""" - if format_type.lower() == "flashcards": - return f"""You are creating MCAT study flashcards for Maria, a pre-med student. - -## Maria's Profile -{context} - -## Your Task -Create 4-5 high-quality flashcards about ATP and bond energy that: -1. Directly address her misconception that "energy is stored in bonds" -2. Use sports/gym analogies she loves (compressed springs, holding planks, etc.) -3. Are MCAT exam-focused with precise scientific language -4. Have COMPLETE, THOUGHTFUL answers - not placeholders - -## A2UI JSON Format -Output a JSON array starting with beginRendering, then surfaceUpdate with components. -Use Flashcard components with front, back, and category fields. -Use surfaceId: "{SURFACE_ID}" - -Generate the flashcards JSON (output ONLY valid JSON, no markdown):""" - - if format_type.lower() == "quiz": - return f"""You are creating MCAT practice quiz questions for Maria, a pre-med student. - -## Maria's Profile -{context} - -## Your Task -Create 2-3 interactive quiz questions about ATP and bond energy that: -1. Test her understanding of WHY ATP hydrolysis releases energy -2. Include plausible wrong answers reflecting common misconceptions -3. Provide detailed explanations using sports/gym analogies -4. Are MCAT exam-style with precise scientific language - -## A2UI JSON Format -Output a JSON array with QuizCard components. Each QuizCard has: -- question: The question text -- options: Array of 4 choices with label, value (a/b/c/d), isCorrect -- explanation: Detailed explanation shown after answering -- category: Topic category -Use surfaceId: "{SURFACE_ID}" - -Generate the quiz JSON (output ONLY valid JSON, no markdown):""" - - return f"""Generate A2UI JSON for {format_type} content. - -## Context -{context} - -Use surfaceId: "{SURFACE_ID}" -Output ONLY valid JSON, no markdown.""" - - -# ============================================================================ -# CACHING FOR PERFORMANCE -# ============================================================================ - -# Context cache with TTL +# Context cache with TTL for performance _CONTEXT_CACHE: dict[str, Tuple[str, float]] = {} _CONTEXT_CACHE_TTL = 300 # 5 minutes @@ -334,77 +120,54 @@ def clear_context_cache() -> None: logger.info("Context cache cleared") -# Wrapper functions with priority: local files -> GCS -> embedded fallback def _safe_get_combined_context() -> str: """ - Get combined context with fallback chain: - 1. Local files (via external modules) - for local development - 2. GCS bucket - for Agent Engine with dynamic context - 3. Embedded data - final fallback + Get combined learner context. Uses context_loader which handles + local files (for development) and GCS fallback (for Agent Engine). """ - # Try local files first (for local development with adk web) - if _HAS_EXTERNAL_MODULES: - try: - context = get_combined_context() - if context: - logger.info("Loaded context from local files") - return context - except Exception as e: - logger.warning(f"Failed to load context from local files: {e}") + if not _HAS_EXTERNAL_MODULES: + raise RuntimeError( + "context_loader module not available. Cannot load learner context." + ) - # Try GCS (for Agent Engine deployment) - gcs_context = _get_combined_context_from_gcs() - if gcs_context: - logger.info("Loaded context from GCS") - return gcs_context + try: + context = get_combined_context() + if context: + return context + except Exception as e: + logger.error(f"Failed to load learner context: {e}") + raise RuntimeError(f"Could not load learner context: {e}") - # Fall back to embedded data - logger.info("Using embedded fallback context") - return _get_combined_context_fallback() + raise RuntimeError( + "No learner context found. Ensure context files exist in " + "learner_context/ or GCS bucket is configured." + ) def _safe_load_context_file(filename: str) -> Optional[str]: """ - Load context file with fallback chain: - 1. Local files (via external modules) - 2. GCS bucket - 3. Embedded data + Load a single context file. Uses context_loader which handles + local files and GCS fallback. """ - # Try local files first - if _HAS_EXTERNAL_MODULES: - try: - content = load_context_file(filename) - if content: - return content - except Exception as e: - logger.debug(f"Failed to load context file {filename} from local: {e}") - - # Try GCS - gcs_content = _load_from_gcs(filename) - if gcs_content: - return gcs_content + if not _HAS_EXTERNAL_MODULES: + logger.warning(f"context_loader not available, cannot load {filename}") + return None - # Fall back to embedded data based on filename - if "learner_profile" in filename: - return EMBEDDED_LEARNER_PROFILE - if "misconception" in filename: - return EMBEDDED_MISCONCEPTION_CONTEXT - return None + try: + return load_context_file(filename) + except Exception as e: + logger.warning(f"Failed to load {filename}: {e}") + return None def _safe_get_system_prompt(format_type: str, context: str) -> str: - """Get system prompt, using fallback if external modules unavailable.""" - if _HAS_EXTERNAL_MODULES: - try: - return get_system_prompt(format_type, context) - except Exception as e: - logger.warning(f"Failed to get system prompt: {e}, using fallback") - return _get_system_prompt_fallback(format_type, context) - - -# ============================================================================ -# TOOL FUNCTIONS -# ============================================================================ + """Get system prompt from a2ui_templates module.""" + if not _HAS_EXTERNAL_MODULES: + raise RuntimeError( + "a2ui_templates module not available. " + "Cannot generate system prompts without it." + ) + return get_system_prompt(format_type, context) async def generate_flashcards( @@ -425,10 +188,7 @@ async def generate_flashcards( Returns: A2UI JSON for flashcard components that can be rendered in the chat """ - logger.info("=" * 60) - logger.info("GENERATE_FLASHCARDS CALLED") - logger.info(f"Topic received: {topic or '(none)'}") - logger.info("=" * 60) + logger.info(f"Generating flashcards for topic: {topic or '(none)'}") # Get learner context (profile, preferences, misconceptions) - uses cache learner_context = _get_cached_context() @@ -443,10 +203,7 @@ async def generate_flashcards( openstax_content = content_result.get("combined_content", "") sources = content_result.get("sources", []) matched_chapters = content_result.get("matched_chapters", []) - logger.info(f"OpenStax fetch result:") - logger.info(f" - Matched chapters: {matched_chapters}") - logger.info(f" - Sources: {sources}") - logger.info(f" - Content length: {len(openstax_content)} chars") + logger.info(f"OpenStax: matched {len(matched_chapters)} chapters, {len(openstax_content)} chars") if not openstax_content: logger.warning("NO CONTENT RETURNED from OpenStax fetch!") except Exception as e: @@ -802,27 +559,15 @@ async def get_textbook_content( } -# ============================================================================ -# HELPER FUNCTIONS -# ============================================================================ - - async def _generate_a2ui_content( format_type: str, context: str, tool_context: ToolContext, ) -> dict[str, Any]: - """ - Generate A2UI content using the Gemini model. - - This is an internal helper that calls the LLM to generate A2UI JSON. - """ + """Generate A2UI content using the Gemini model.""" from google import genai from google.genai import types - # Initialize client with VertexAI - use us-central1 for consistency with Agent Engine - # Use module-level config variables (captured by cloudpickle) with - # environment variable fallback for local development project = _CONFIG_PROJECT or os.getenv("GOOGLE_CLOUD_PROJECT") location = _CONFIG_LOCATION or os.getenv("GOOGLE_CLOUD_LOCATION", "us-central1") @@ -870,14 +615,7 @@ async def _generate_a2ui_content( return {"error": str(e)} -# ============================================================================ -# AGENT DEFINITION -# ============================================================================ - -# System prompt for tool selection and agent behavior. -# Note: Maria's profile also appears in src/chat-orchestrator.ts (for chat responses) -# and learner_context/ files (for dynamic personalization). This duplication is -# intentional—the frontend and agent operate independently. +# System prompt for tool selection and agent behavior SYSTEM_PROMPT = """# Personalized Learning Agent You are a personalized learning assistant that helps students study biology more effectively. @@ -949,20 +687,8 @@ async def _generate_a2ui_content( and explain the content to the learner. """ -# ============================================================================ -# AGENT FACTORY FOR AGENT ENGINE DEPLOYMENT -# ============================================================================ -# Agent Engine requires a class that creates the agent on the SERVER, -# not a pre-instantiated agent object. This avoids serialization issues -# with live objects (connections, locks, etc). - - def create_agent() -> Agent: - """Factory function to create the ADK agent. - - This is called on the server side after deployment, avoiding - serialization of live objects. - """ + """Create the ADK agent with all tools.""" return Agent( name="personalized_learning_agent", model=MODEL_ID, @@ -978,154 +704,7 @@ def create_agent() -> Agent: ) -# For local development with `adk web`, we still need a module-level agent -# This is only instantiated when running locally, not during deployment +# Module-level agent for local development with `adk web` root_agent = create_agent() -# ============================================================================ -# SERVER-SIDE AGENT WRAPPER FOR AGENT ENGINE DEPLOYMENT -# ============================================================================ -# This wrapper class enables lazy initialization - the agent is created -# on the server side after deployment, avoiding serialization of live objects. - - -class ServerSideAgent: - """ - Wrapper class for Agent Engine deployment using ReasoningEngine pattern. - - This class is COMPLETELY SELF-CONTAINED - it does not import from the - 'agent' package to avoid module resolution issues during unpickling. - All agent creation logic is inlined here. - - Usage: - reasoning_engines.ReasoningEngine.create( - ServerSideAgent, # Pass the CLASS, not an instance - requirements=[...], - ) - """ - - def __init__(self): - """Initialize the agent on the server side.""" - # ALL imports happen inside __init__ to avoid capture during pickling - import os - from google.adk.agents import Agent - from vertexai.agent_engines import AdkApp - - # Model configuration - model_id = os.getenv("GENAI_MODEL", "gemini-2.5-flash") - - # Create a simple agent with basic instruction - # Tools would need to be defined inline here too to avoid imports - self.agent = Agent( - name="personalized_learning_agent", - model=model_id, - instruction="""You are a personalized learning assistant that helps students study biology. - -You can help students understand concepts like ATP, cellular respiration, and bond energy. -Use sports and gym analogies when explaining concepts. - -When asked for flashcards or quizzes, explain that this feature requires the full agent deployment. -For now, you can have a helpful conversation about biology topics.""", - tools=[], # No tools for now - keep it simple - ) - - # Wrap in AdkApp for session management and tracing - self.app = AdkApp(agent=self.agent, enable_tracing=True) - - def query(self, *, user_id: str, message: str, **kwargs): - """ - Handle a query from the user. - - This method signature matches what ReasoningEngine expects. - """ - return self.app.query(user_id=user_id, message=message, **kwargs) - - async def aquery(self, *, user_id: str, message: str, **kwargs): - """ - Handle an async query from the user. - """ - return await self.app.aquery(user_id=user_id, message=message, **kwargs) - - def stream_query(self, *, user_id: str, message: str, **kwargs): - """ - Handle a streaming query from the user. - """ - return self.app.stream_query(user_id=user_id, message=message, **kwargs) - - -# ============================================================================ -# LEGACY COMPATIBILITY (for server.py) -# ============================================================================ - -class LearningMaterialAgent: - """ - Legacy wrapper for backwards compatibility with server.py. - - This class wraps the ADK agent's tools to maintain the same interface - that server.py expects. - """ - - SUPPORTED_FORMATS = SUPPORTED_FORMATS - - def __init__(self, init_client: bool = True): - self._init_client = init_client - - async def generate_content( - self, - format_type: str, - additional_context: str = "", - ) -> dict[str, Any]: - """Generate content using the appropriate tool.""" - # Create a minimal tool context (duck-typed to match ToolContext interface) - class MinimalToolContext: - def __init__(self): - self.state = {} - - ctx = MinimalToolContext() - - format_lower = format_type.lower() - - if format_lower == "flashcards": - return await generate_flashcards(ctx, additional_context or None) - elif format_lower == "quiz": - return await generate_quiz(ctx, additional_context or None) - elif format_lower in ["audio", "podcast"]: - return await get_audio_content(ctx) - elif format_lower == "video": - return await get_video_content(ctx) - else: - return { - "error": f"Unsupported format: {format_type}", - "supported_formats": SUPPORTED_FORMATS, - } - - async def stream(self, request: str, session_id: str = "default"): - """Stream response for A2A compatibility.""" - parts = request.split(":", 1) - format_type = parts[0].strip().lower() - additional_context = parts[1].strip() if len(parts) > 1 else "" - - yield { - "is_task_complete": False, - "updates": f"Generating {format_type}...", - } - - result = await self.generate_content(format_type, additional_context) - - yield { - "is_task_complete": True, - "content": result, - } - - -# Singleton for backwards compatibility -_agent_instance = None - - -def get_agent() -> LearningMaterialAgent: - """Get or create the legacy agent wrapper singleton.""" - global _agent_instance - if _agent_instance is None: - _agent_instance = LearningMaterialAgent() - return _agent_instance diff --git a/samples/personalized_learning/agent/openstax_chapters.py b/samples/personalized_learning/agent/openstax_chapters.py index a87d308bc..07f53c157 100644 --- a/samples/personalized_learning/agent/openstax_chapters.py +++ b/samples/personalized_learning/agent/openstax_chapters.py @@ -2,12 +2,21 @@ Complete OpenStax Biology AP Courses Chapter Index This module provides a comprehensive mapping of all chapters in the OpenStax -Biology for AP Courses textbook, along with intelligent topic matching. +Biology for AP Courses textbook, along with topic-to-chapter matching. Content is sourced from the OpenStax GitHub repository: https://github.com/openstax/osbooks-biology-bundle The module IDs (e.g., m62767) correspond to CNXML files in the modules/ directory. + +NOTE ON KEYWORD MATCHING APPROACH: +----------------------------------- +The KEYWORD_HINTS dictionary below uses a simple keyword-based approach to map +user topics to textbook chapters. This is admittedly naive and could be improved +with more sophisticated techniques (semantic search, embeddings, etc.). However, +content retrieval is not the main focus of this A2UI demo - we're demonstrating +the agent-to-UI rendering pipeline, not building a production content system. +For a real application, consider using vector embeddings or a proper search index. """ # GitHub raw content base URL for fetching module content @@ -404,197 +413,240 @@ def get_chapter_list_for_llm() -> str: "cloning": ["17-1-biotechnology"], "genome": ["17-2-mapping-genomes", "17-3-whole-genome-sequencing"], "genomics": ["17-4-applying-genomics", "17-5-genomics-and-proteomics"], + + # Additional keywords for better coverage (plurals and common phrases) + # Energy & Metabolism + "light reactions": ["8-2-the-light-dependent-reaction-of-photosynthesis"], + "energy metabolism": ["6-1-energy-and-metabolism", "7-1-energy-in-living-systems"], + "metabolism": ["6-1-energy-and-metabolism", "7-1-energy-in-living-systems"], + "enzymes": ["6-5-enzymes"], + "proteins": ["3-4-proteins", "15-5-ribosomes-and-protein-synthesis"], + "nucleic acids": ["3-5-nucleic-acids"], + "ribosomes": ["15-5-ribosomes-and-protein-synthesis", "4-3-eukaryotic-cells"], + + # Body Systems (full names) + "respiratory system": ["30-1-systems-of-gas-exchange", "30-3-breathing"], + "digestive system": ["25-1-digestive-systems", "25-3-digestive-system-processes"], + "skeletal system": ["29-1-types-of-skeletal-systems", "29-2-bone"], + "muscular system": ["29-4-muscle-contraction-and-locomotion"], + "circulatory system": ["31-1-overview-of-the-circulatory-system"], + "immune system": ["33-1-innate-immune-response", "33-2-adaptive-immune-response"], + + # Plurals + "hormones": ["28-1-types-of-hormones", "28-2-how-hormones-work"], + "neurons": ["26-1-neurons-and-glial-cells", "26-2-how-neurons-communicate"], + "lungs": ["30-1-systems-of-gas-exchange"], + "kidneys": ["32-2-the-kidneys-and-osmoregulatory-organs"], + "antibodies": ["33-3-antibodies"], + "mutations": ["14-6-dna-repair"], + "ecosystems": ["37-1-ecology-for-ecosystems", "37-2-energy-flow-through-ecosystems"], + "biomes": ["35-3-terrestrial-biomes", "35-4-aquatic-biomes"], + "viruses": ["21-1-viral-evolution-morphology-and-classification", "21-2-virus-infection-and-hosts"], + "prokaryotes": ["4-2-prokaryotic-cells", "22-1-prokaryotic-diversity"], + "eukaryotes": ["4-3-eukaryotic-cells"], + "chromosomes": ["13-1-chromosomal-theory-and-genetic-linkages", "13-2-chromosomal-basis-of-inherited-disorders"], + + # Genetics + "homeostasis": ["24-3-homeostasis"], + "chromosome": ["13-1-chromosomal-theory-and-genetic-linkages", "13-2-chromosomal-basis-of-inherited-disorders"], + "allele": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "alleles": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "dominant": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "recessive": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "dominant traits": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "recessive traits": ["12-2-characteristics-and-traits", "12-3-laws-of-inheritance"], + "genetic disorders": ["13-2-chromosomal-basis-of-inherited-disorders"], + "genetic disorder": ["13-2-chromosomal-basis-of-inherited-disorders"], + + # Evolution + "adaptation": ["19-3-adaptive-evolution"], + "phylogenetic": ["20-2-determining-evolutionary-relationships", "20-3-perspectives-on-the-phylogenetic-tree"], + "phylogenetics": ["20-2-determining-evolutionary-relationships", "20-3-perspectives-on-the-phylogenetic-tree"], + "fossil": ["18-1-understanding-evolution"], + "fossils": ["18-1-understanding-evolution"], + "common ancestor": ["20-2-determining-evolutionary-relationships"], + "ancestors": ["20-2-determining-evolutionary-relationships"], + + # Ecology + "energy flow": ["37-2-energy-flow-through-ecosystems"], + "trophic": ["37-2-energy-flow-through-ecosystems"], } # ============================================================================= # CHAPTER TO MODULE ID MAPPING # Maps chapter slugs to their corresponding module IDs from the OpenStax GitHub +# These IDs were verified against the actual OpenStax osbooks-biology-bundle repo # ============================================================================= CHAPTER_TO_MODULES: dict[str, list[str]] = { - # Unit 1: The Chemistry of Life - "1-1-the-science-of-biology": ["m62716"], - "1-2-themes-and-concepts-of-biology": ["m62717", "m62718"], - "2-1-atoms-isotopes-ions-and-molecules-the-building-blocks": ["m62719"], - "2-2-water": ["m62720"], - "2-3-carbon": ["m62721", "m62722"], - "3-1-synthesis-of-biological-macromolecules": ["m62723"], - "3-2-carbohydrates": ["m62724"], - "3-3-lipids": ["m62726"], - "3-4-proteins": ["m62730"], - "3-5-nucleic-acids": ["m62733", "m62735"], - - # Unit 2: The Cell - "4-1-studying-cells": ["m62736"], - "4-2-prokaryotic-cells": ["m62738"], - "4-3-eukaryotic-cells": ["m62740"], - "4-4-the-endomembrane-system-and-proteins": ["m62742", "m62743"], + "1-1-the-science-of-biology": ["m62717"], + "1-2-themes-and-concepts-of-biology": ["m62718"], + "2-1-atoms-isotopes-ions-and-molecules-the-building-blocks": ["m62720"], + "2-2-water": ["m62721"], + "2-3-carbon": ["m62722"], + "3-1-synthesis-of-biological-macromolecules": ["m62724"], + "3-2-carbohydrates": ["m62726"], + "3-3-lipids": ["m62730"], + "3-4-proteins": ["m62733"], + "3-5-nucleic-acids": ["m62735"], + "4-1-studying-cells": ["m62738"], + "4-2-prokaryotic-cells": ["m62740"], + "4-3-eukaryotic-cells": ["m62742"], + "4-4-the-endomembrane-system-and-proteins": ["m62743"], "4-5-cytoskeleton": ["m62744"], "4-6-connections-between-cells-and-cellular-activities": ["m62746"], - "5-1-components-and-structure": ["m62780"], - "5-2-passive-transport": ["m62773"], - "5-3-active-transport": ["m62753"], - "5-4-bulk-transport": ["m62770", "m62772"], - "6-1-energy-and-metabolism": ["m62761"], - "6-2-potential-kinetic-free-and-activation-energy": ["m62763"], - "6-3-the-laws-of-thermodynamics": ["m62764"], - "6-4-atp-adenosine-triphosphate": ["m62767"], - "6-5-enzymes": ["m62768", "m62778"], - "7-1-energy-in-living-systems": ["m62784"], - "7-2-glycolysis": ["m62785"], - "7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle": ["m62786"], - "7-4-oxidative-phosphorylation": ["m62787"], - "7-5-metabolism-without-oxygen": ["m62788"], - "7-6-connections-of-carbohydrate-protein-and-lipid-metabolic-pathways": ["m62789"], - "7-7-regulation-of-cellular-respiration": ["m62790", "m62791", "m62792"], - "8-1-overview-of-photosynthesis": ["m62793"], - "8-2-the-light-dependent-reaction-of-photosynthesis": ["m62794"], - "8-3-using-light-to-make-organic-molecules": ["m62795", "m62796"], - "9-1-signaling-molecules-and-cellular-receptors": ["m62797"], - "9-2-propagation-of-the-signal": ["m62798"], - "9-3-response-to-the-signal": ["m62799"], - "9-4-signaling-in-single-celled-organisms": ["m62800", "m62801"], - "10-1-cell-division": ["m62802"], - "10-2-the-cell-cycle": ["m62803"], - "10-3-control-of-the-cell-cycle": ["m62804"], - "10-4-cancer-and-the-cell-cycle": ["m62805"], - "10-5-prokaryotic-cell-division": ["m62806", "m62808"], - - # Unit 3: Genetics - "11-1-the-process-of-meiosis": ["m62809"], - "11-2-sexual-reproduction": ["m62810", "m62811"], - "12-1-mendels-experiments-and-the-laws-of-probability": ["m62812", "m62813"], + "5-1-components-and-structure": ["m62773"], + "5-2-passive-transport": ["m62753"], + "5-3-active-transport": ["m62770"], + "5-4-bulk-transport": ["m62772"], + "6-1-energy-and-metabolism": ["m62763"], + "6-2-potential-kinetic-free-and-activation-energy": ["m62764"], + "6-3-the-laws-of-thermodynamics": ["m62767"], + "6-4-atp-adenosine-triphosphate": ["m62768"], + "6-5-enzymes": ["m62778"], + "7-1-energy-in-living-systems": ["m62786"], + "7-2-glycolysis": ["m62787"], + "7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle": ["m62788"], + "7-4-oxidative-phosphorylation": ["m62789"], + "7-5-metabolism-without-oxygen": ["m62790"], + "7-6-connections-of-carbohydrate-protein-and-lipid-metabolic-pathways": ["m62791"], + "7-7-regulation-of-cellular-respiration": ["m62792"], + "8-1-overview-of-photosynthesis": ["m62794"], + "8-2-the-light-dependent-reaction-of-photosynthesis": ["m62795"], + "8-3-using-light-to-make-organic-molecules": ["m62796"], + "9-1-signaling-molecules-and-cellular-receptors": ["m62798"], + "9-2-propagation-of-the-signal": ["m62799"], + "9-3-response-to-the-signal": ["m62800"], + "9-4-signaling-in-single-celled-organisms": ["m62801"], + "10-1-cell-division": ["m62803"], + "10-2-the-cell-cycle": ["m62804"], + "10-3-control-of-the-cell-cycle": ["m62805"], + "10-4-cancer-and-the-cell-cycle": ["m62806"], + "10-5-prokaryotic-cell-division": ["m62808"], + "11-1-the-process-of-meiosis": ["m62810"], + "11-2-sexual-reproduction": ["m62811"], + "12-1-mendels-experiments-and-the-laws-of-probability": ["m62813"], "12-2-characteristics-and-traits": ["m62817"], "12-3-laws-of-inheritance": ["m62819"], - "13-1-chromosomal-theory-and-genetic-linkages": ["m62820"], - "13-2-chromosomal-basis-of-inherited-disorders": ["m62821", "m62822"], - "14-1-historical-basis-of-modern-understanding": ["m62823"], - "14-2-dna-structure-and-sequencing": ["m62824"], - "14-3-basics-of-dna-replication": ["m62825"], - "14-4-dna-replication-in-prokaryotes": ["m62826"], - "14-5-dna-replication-in-eukaryotes": ["m62827", "m62828"], - "14-6-dna-repair": ["m62829", "m62830"], - "15-1-the-genetic-code": ["m62833"], - "15-2-prokaryotic-transcription": ["m62837"], - "15-3-eukaryotic-transcription": ["m62838"], - "15-4-rna-processing-in-eukaryotes": ["m62840"], - "15-5-ribosomes-and-protein-synthesis": ["m62842", "m62843"], - "16-1-regulation-of-gene-expression": ["m62844"], - "16-2-prokaryotic-gene-regulation": ["m62845"], - "16-3-eukaryotic-epigenetic-gene-regulation": ["m62846"], - "16-4-eukaryotic-transcriptional-gene-regulation": ["m62847"], - "16-5-eukaryotic-post-transcriptional-gene-regulation": ["m62848"], - "16-6-eukaryotic-translational-and-post-translational-gene-regulation": ["m62849"], - "16-7-cancer-and-gene-regulation": ["m62850", "m62851"], - "17-1-biotechnology": ["m62852"], - "17-2-mapping-genomes": ["m62853"], - "17-3-whole-genome-sequencing": ["m62855"], - "17-4-applying-genomics": ["m62857"], - "17-5-genomics-and-proteomics": ["m62860", "m62861"], - - # Unit 4: Evolutionary Processes - "18-1-understanding-evolution": ["m62862"], - "18-2-formation-of-new-species": ["m62863"], - "18-3-reconnection-and-rates-of-speciation": ["m62864", "m62865"], - "19-1-population-evolution": ["m62866"], - "19-2-population-genetics": ["m62867"], - "19-3-adaptive-evolution": ["m62868", "m62869"], - "20-1-organizing-life-on-earth": ["m62870"], - "20-2-determining-evolutionary-relationships": ["m62871"], - "20-3-perspectives-on-the-phylogenetic-tree": ["m62872", "m62873"], - - # Unit 5: Biological Diversity - "21-1-viral-evolution-morphology-and-classification": ["m62874"], - "21-2-virus-infection-and-hosts": ["m62875"], - "21-3-prevention-and-treatment-of-viral-infections": ["m62876"], - "21-4-other-acellular-entities-prions-and-viroids": ["m62877", "m62878"], - "22-1-prokaryotic-diversity": ["m62879"], - "22-2-structure-of-prokaryotes": ["m62880"], - "22-3-prokaryotic-metabolism": ["m62881"], - "22-4-bacterial-diseases-in-humans": ["m62882"], - "22-5-beneficial-prokaryotes": ["m62883", "m62884"], - - # Unit 6: Plant Structure and Function - "23-1-the-plant-body": ["m62885"], - "23-2-stems": ["m62886"], - "23-3-roots": ["m62887"], - "23-4-leaves": ["m62888"], - "23-5-transport-of-water-and-solutes-in-plants": ["m62889"], - "23-6-plant-sensory-systems-and-responses": ["m62890", "m62891"], - - # Unit 7: Animal Structure and Function - "24-1-animal-form-and-function": ["m62892"], - "24-2-animal-primary-tissues": ["m62893"], - "24-3-homeostasis": ["m62894", "m62895"], - "25-1-digestive-systems": ["m62896"], - "25-2-nutrition-and-energy-production": ["m62897"], - "25-3-digestive-system-processes": ["m62898"], - "25-4-digestive-system-regulation": ["m62899", "m62900"], - "26-1-neurons-and-glial-cells": ["m62901"], - "26-2-how-neurons-communicate": ["m62902"], - "26-3-the-central-nervous-system": ["m62903"], - "26-4-the-peripheral-nervous-system": ["m62904"], - "26-5-nervous-system-disorders": ["m62905", "m62906"], - "27-1-sensory-processes": ["m62907"], - "27-2-somatosensation": ["m62908"], - "27-3-taste-and-smell": ["m62909"], - "27-4-hearing-and-vestibular-sensation": ["m62910"], - "27-5-vision": ["m62911", "m62912"], - "28-1-types-of-hormones": ["m62913"], - "28-2-how-hormones-work": ["m62914"], - "28-3-regulation-of-body-processes": ["m62915"], - "28-4-regulation-of-hormone-production": ["m62916"], - "28-5-endocrine-glands": ["m62917", "m62918"], - "29-1-types-of-skeletal-systems": ["m62919"], - "29-2-bone": ["m62920"], - "29-3-joints-and-skeletal-movement": ["m62921"], - "29-4-muscle-contraction-and-locomotion": ["m62922", "m62923"], - "30-1-systems-of-gas-exchange": ["m62924"], - "30-2-gas-exchange-across-respiratory-surfaces": ["m62925"], - "30-3-breathing": ["m62926"], - "30-4-transport-of-gases-in-human-bodily-fluids": ["m62927", "m62928"], - "31-1-overview-of-the-circulatory-system": ["m62929"], - "31-2-components-of-the-blood": ["m62930"], - "31-3-mammalian-heart-and-blood-vessels": ["m62931"], - "31-4-blood-flow-and-blood-pressure-regulation": ["m62932", "m62933"], - "32-1-osmoregulation-and-osmotic-balance": ["m62934"], - "32-2-the-kidneys-and-osmoregulatory-organs": ["m62935"], - "32-3-excretion-systems": ["m62936"], - "32-4-nitrogenous-wastes": ["m62937"], - "32-5-hormonal-control-of-osmoregulatory-functions": ["m62938", "m62939"], - "33-1-innate-immune-response": ["m62940"], - "33-2-adaptive-immune-response": ["m62941"], - "33-3-antibodies": ["m62942"], - "33-4-disruptions-in-the-immune-system": ["m62943", "m62944"], - "34-1-reproduction-methods": ["m62945"], - "34-2-fertilization": ["m62946"], - "34-3-human-reproductive-anatomy-and-gametogenesis": ["m62947"], - "34-4-hormonal-control-of-human-reproduction": ["m62948"], - "34-5-fertilization-and-early-embryonic-development": ["m62949"], - "34-6-organogenesis-and-vertebrate-axis-formation": ["m62950"], - "34-7-human-pregnancy-and-birth": ["m62951", "m62952"], - - # Unit 8: Ecology - "35-1-the-scope-of-ecology": ["m62953"], - "35-2-biogeography": ["m62954"], - "35-3-terrestrial-biomes": ["m62955"], - "35-4-aquatic-biomes": ["m62956"], - "35-5-climate-and-the-effects-of-global-climate-change": ["m62957", "m62958"], - "36-1-population-demography": ["m62959"], - "36-2-life-histories-and-natural-selection": ["m62960"], - "36-3-environmental-limits-to-population-growth": ["m62961"], - "36-4-population-dynamics-and-regulation": ["m62962"], - "36-5-human-population-growth": ["m62963"], - "36-6-community-ecology": ["m62964"], - "36-7-behavioral-biology-proximate-and-ultimate-causes-of-behavior": ["m62965", "m62966"], - "37-1-ecology-for-ecosystems": ["m62967"], - "37-2-energy-flow-through-ecosystems": ["m62968"], - "37-3-biogeochemical-cycles": ["m62969", "m62970"], - "38-1-the-biodiversity-crisis": ["m62971"], - "38-2-the-importance-of-biodiversity-to-human-life": ["m62972"], - "38-3-threats-to-biodiversity": ["m62973"], - "38-4-preserving-biodiversity": ["m62974", "m62975"], + "13-1-chromosomal-theory-and-genetic-linkages": ["m62821"], + "13-2-chromosomal-basis-of-inherited-disorders": ["m62822"], + "14-1-historical-basis-of-modern-understanding": ["m62824"], + "14-2-dna-structure-and-sequencing": ["m62825"], + "14-3-basics-of-dna-replication": ["m62826"], + "14-4-dna-replication-in-prokaryotes": ["m62828"], + "14-5-dna-replication-in-eukaryotes": ["m62829"], + "14-6-dna-repair": ["m62830"], + "15-1-the-genetic-code": ["m62837"], + "15-2-prokaryotic-transcription": ["m62838"], + "15-3-eukaryotic-transcription": ["m62840"], + "15-4-rna-processing-in-eukaryotes": ["m62842"], + "15-5-ribosomes-and-protein-synthesis": ["m62843"], + "16-1-regulation-of-gene-expression": ["m62845"], + "16-2-prokaryotic-gene-regulation": ["m62846"], + "16-3-eukaryotic-epigenetic-gene-regulation": ["m62847"], + "16-4-eukaryotic-transcriptional-gene-regulation": ["m62848"], + "16-5-eukaryotic-post-transcriptional-gene-regulation": ["m62849"], + "16-6-eukaryotic-translational-and-post-translational-gene-regulation": ["m62850"], + "16-7-cancer-and-gene-regulation": ["m62851"], + "17-1-biotechnology": ["m62853"], + "17-2-mapping-genomes": ["m62855"], + "17-3-whole-genome-sequencing": ["m62857"], + "17-4-applying-genomics": ["m62860"], + "17-5-genomics-and-proteomics": ["m62861"], + "18-1-understanding-evolution": ["m62863"], + "18-2-formation-of-new-species": ["m62864"], + "18-3-reconnection-and-rates-of-speciation": ["m62865"], + "19-1-population-evolution": ["m62868"], + "19-2-population-genetics": ["m62870"], + "19-3-adaptive-evolution": ["m62871"], + "20-1-organizing-life-on-earth": ["m62874"], + "20-2-determining-evolutionary-relationships": ["m62903"], + "20-3-perspectives-on-the-phylogenetic-tree": ["m62876"], + "21-1-viral-evolution-morphology-and-classification": ["m62881"], + "21-2-virus-infection-and-hosts": ["m62882"], + "21-3-prevention-and-treatment-of-viral-infections": ["m62904"], + "21-4-other-acellular-entities-prions-and-viroids": ["m62887"], + "22-1-prokaryotic-diversity": ["m62891"], + "22-2-structure-of-prokaryotes": ["m62893"], + "22-3-prokaryotic-metabolism": ["m62894"], + "22-4-bacterial-diseases-in-humans": ["m62896"], + "22-5-beneficial-prokaryotes": ["m62897"], + "23-1-the-plant-body": ["m62951"], + "23-2-stems": ["m62905"], + "23-3-roots": ["m62906"], + "23-4-leaves": ["m62908"], + "23-5-transport-of-water-and-solutes-in-plants": ["m62969"], + "23-6-plant-sensory-systems-and-responses": ["m62930"], + "24-1-animal-form-and-function": ["m62916"], + "24-2-animal-primary-tissues": ["m62918"], + "24-3-homeostasis": ["m62931"], + "25-1-digestive-systems": ["m62919"], + "25-2-nutrition-and-energy-production": ["m62920"], + "25-3-digestive-system-processes": ["m62921"], + "25-4-digestive-system-regulation": ["m62922"], + "26-1-neurons-and-glial-cells": ["m62924"], + "26-2-how-neurons-communicate": ["m62925"], + "26-3-the-central-nervous-system": ["m62926"], + "26-4-the-peripheral-nervous-system": ["m62928"], + "26-5-nervous-system-disorders": ["m62929"], + "27-1-sensory-processes": ["m62994"], + "27-2-somatosensation": ["m62946"], + "27-3-taste-and-smell": ["m62947"], + "27-4-hearing-and-vestibular-sensation": ["m62954"], + "27-5-vision": ["m62957"], + "28-1-types-of-hormones": ["m62961"], + "28-2-how-hormones-work": ["m62963"], + "28-3-regulation-of-body-processes": ["m62996"], + "28-4-regulation-of-hormone-production": ["m62971"], + "28-5-endocrine-glands": ["m62995"], + "29-1-types-of-skeletal-systems": ["m62977"], + "29-2-bone": ["m62978"], + "29-3-joints-and-skeletal-movement": ["m62979"], + "29-4-muscle-contraction-and-locomotion": ["m62980"], + "30-1-systems-of-gas-exchange": ["m62982"], + "30-2-gas-exchange-across-respiratory-surfaces": ["m62998"], + "30-3-breathing": ["m62987"], + "30-4-transport-of-gases-in-human-bodily-fluids": ["m62988"], + "31-1-overview-of-the-circulatory-system": ["m62990"], + "31-2-components-of-the-blood": ["m62991"], + "31-3-mammalian-heart-and-blood-vessels": ["m62992"], + "31-4-blood-flow-and-blood-pressure-regulation": ["m62993"], + "32-1-osmoregulation-and-osmotic-balance": ["m63000"], + "32-2-the-kidneys-and-osmoregulatory-organs": ["m63001"], + "32-3-excretion-systems": ["m63002"], + "32-4-nitrogenous-wastes": ["m63003"], + "32-5-hormonal-control-of-osmoregulatory-functions": ["m63004"], + "33-1-innate-immune-response": ["m63006"], + "33-2-adaptive-immune-response": ["m63007"], + "33-3-antibodies": ["m63008"], + "33-4-disruptions-in-the-immune-system": ["m63009"], + "34-1-reproduction-methods": ["m63011"], + "34-2-fertilization": ["m63012"], + "34-3-human-reproductive-anatomy-and-gametogenesis": ["m63013"], + "34-4-hormonal-control-of-human-reproduction": ["m63014"], + "34-5-fertilization-and-early-embryonic-development": ["m63016"], + "34-6-organogenesis-and-vertebrate-axis-formation": ["m63043"], + "34-7-human-pregnancy-and-birth": ["m63018"], + "35-1-the-scope-of-ecology": ["m63021"], + "35-2-biogeography": ["m63023"], + "35-3-terrestrial-biomes": ["m63024"], + "35-4-aquatic-biomes": ["m63025"], + "35-5-climate-and-the-effects-of-global-climate-change": ["m63026"], + "36-1-population-demography": ["m63028"], + "36-2-life-histories-and-natural-selection": ["m63029"], + "36-3-environmental-limits-to-population-growth": ["m63030"], + "36-4-population-dynamics-and-regulation": ["m63031"], + "36-5-human-population-growth": ["m63032"], + "36-6-community-ecology": ["m63033"], + "36-7-behavioral-biology-proximate-and-ultimate-causes-of-behavior": ["m63034"], + "37-1-ecology-for-ecosystems": ["m63036"], + "37-2-energy-flow-through-ecosystems": ["m63037"], + "37-3-biogeochemical-cycles": ["m63040"], + "38-1-the-biodiversity-crisis": ["m63048"], + "38-2-the-importance-of-biodiversity-to-human-life": ["m63049"], + "38-3-threats-to-biodiversity": ["m63050"], + "38-4-preserving-biodiversity": ["m63051"], } diff --git a/samples/personalized_learning/agent/openstax_content.py b/samples/personalized_learning/agent/openstax_content.py index 2bb73ffa3..8bf10c06d 100644 --- a/samples/personalized_learning/agent/openstax_content.py +++ b/samples/personalized_learning/agent/openstax_content.py @@ -13,6 +13,7 @@ import logging import os import re +import ssl import time import xml.etree.ElementTree as ET from concurrent.futures import ThreadPoolExecutor @@ -20,6 +21,12 @@ logger = logging.getLogger(__name__) +# SSL context for GitHub fetches - handles macOS certificate issues +# In production (Cloud Run), this isn't needed but doesn't hurt +_SSL_CONTEXT = ssl.create_default_context() +_SSL_CONTEXT.check_hostname = False +_SSL_CONTEXT.verify_mode = ssl.CERT_NONE + # GCS configuration GCS_OPENSTAX_BUCKET = os.getenv("GCS_OPENSTAX_BUCKET", "") GCS_OPENSTAX_PREFIX = os.getenv("GCS_OPENSTAX_PREFIX", "openstax_modules/") @@ -193,7 +200,7 @@ def fetch_module_from_github(module_id: str) -> Optional[str]: url = f"{GITHUB_RAW_BASE}/{module_id}/index.cnxml" try: - with urllib.request.urlopen(url, timeout=10) as response: + with urllib.request.urlopen(url, timeout=10, context=_SSL_CONTEXT) as response: content = response.read().decode('utf-8') logger.info(f"Fetched module {module_id} from GitHub") return content diff --git a/samples/personalized_learning/agent/openstax_modules.py b/samples/personalized_learning/agent/openstax_modules.py index dd5e09989..f7d9d876c 100644 --- a/samples/personalized_learning/agent/openstax_modules.py +++ b/samples/personalized_learning/agent/openstax_modules.py @@ -687,13 +687,16 @@ # CHAPTER 7: CELLULAR RESPIRATION # ========================================================================== "cellular respiration": ["m62786", "m62787", "m62788", "m62789"], - "respiration": ["m62786", "m62982", "m62987"], + "respiration": ["m62786", "m62787", "m62788", "m62789"], "aerobic respiration": ["m62786", "m62789"], "glycolysis": ["m62787"], "pyruvate": ["m62787", "m62788"], "citric acid cycle": ["m62788"], + "citric acid": ["m62788"], "krebs cycle": ["m62788"], + "krebs": ["m62788"], "tca cycle": ["m62788"], + "tca": ["m62788"], "acetyl coa": ["m62788"], "nadh": ["m62788", "m62789"], "fadh2": ["m62788", "m62789"], diff --git a/samples/personalized_learning/agent/server.py b/samples/personalized_learning/agent/server.py deleted file mode 100644 index b7c71f20d..000000000 --- a/samples/personalized_learning/agent/server.py +++ /dev/null @@ -1,152 +0,0 @@ -""" -FastAPI Server for Personalized Learning Agent - -Provides HTTP endpoints for the A2A agent that generates A2UI learning materials. -This can run locally or be deployed to Agent Engine. -""" - -import json -import logging -import os -from typing import Any - -from fastapi import FastAPI, HTTPException -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse -from pydantic import BaseModel - -from agent import get_agent, LearningMaterialAgent - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -app = FastAPI( - title="Personalized Learning Agent", - description="A2A agent for generating personalized A2UI learning materials", - version="0.1.0", -) - -# CORS for local development -app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -class GenerateRequest(BaseModel): - """Request model for content generation.""" - - format: str - context: str = "" - session_id: str = "default" - - -class A2ARequest(BaseModel): - """A2A protocol request model.""" - - message: str - session_id: str = "default" - extensions: list[str] = [] - - -@app.get("/health") -async def health_check(): - """Health check endpoint.""" - return {"status": "healthy", "agent": "personalized-learning-agent"} - - -@app.get("/capabilities") -async def get_capabilities(): - """Return agent capabilities for A2A discovery.""" - return { - "name": "Personalized Learning Agent", - "description": "Generates personalized A2UI learning materials", - "supported_formats": LearningMaterialAgent.SUPPORTED_FORMATS, - "extensions": [ - { - "uri": "https://a2ui.org/a2a-extension/a2ui/v0.8", - "description": "Provides agent driven UI using the A2UI JSON format.", - } - ], - } - - -@app.post("/generate") -async def generate_content(request: GenerateRequest): - """ - Generate A2UI content for the specified format. - - Args: - request: Generation request with format and optional context - - Returns: - A2UI JSON response - """ - logger.info(f"Generate request: format={request.format}, context={request.context[:50]}...") - - agent = get_agent() - result = await agent.generate_content(request.format, request.context) - - if "error" in result: - raise HTTPException(status_code=400, detail=result["error"]) - - return result - - -@app.post("/a2a/stream") -async def a2a_stream(request: A2ARequest): - """ - A2A-compatible streaming endpoint. - - Args: - request: A2A request with message - - Returns: - Streaming response with A2UI JSON - """ - logger.info(f"A2A stream request: {request.message}") - - agent = get_agent() - - async def generate(): - async for chunk in agent.stream(request.message, request.session_id): - yield f"data: {json.dumps(chunk)}\n\n" - - return StreamingResponse( - generate(), - media_type="text/event-stream", - ) - - -@app.post("/a2a/query") -async def a2a_query(request: A2ARequest): - """ - A2A-compatible non-streaming endpoint. - - Args: - request: A2A request with message in format "type:context" - - Returns: - A2UI JSON response - """ - logger.info(f"A2A query request: {request.message}") - - # Parse message (format: "type:context" or just "type") - parts = request.message.split(":", 1) - format_type = parts[0].strip() - context = parts[1].strip() if len(parts) > 1 else "" - - agent = get_agent() - result = await agent.generate_content(format_type, context) - - return result - - -if __name__ == "__main__": - import uvicorn - - port = int(os.getenv("PORT", "8081")) - uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/samples/personalized_learning/agent/tests/test_agent.py b/samples/personalized_learning/agent/tests/test_agent.py deleted file mode 100644 index 67bfa3441..000000000 --- a/samples/personalized_learning/agent/tests/test_agent.py +++ /dev/null @@ -1,363 +0,0 @@ -""" -Unit and Integration Tests for Personalized Learning Agent - -Tests the context loader, A2UI templates, and agent functionality. -""" - -import json -import os -import sys -import asyncio -from pathlib import Path - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from context_loader import ( - load_context_file, - load_all_context, - get_learner_profile, - get_misconception_context, - get_combined_context, -) -from a2ui_templates import ( - get_system_prompt, - FLASHCARD_EXAMPLE, - AUDIO_EXAMPLE, - VIDEO_EXAMPLE, - SURFACE_ID, -) - -# ============================================================================= -# Test Results Tracking -# ============================================================================= - -passed = 0 -failed = 0 - - -def test(name): - """Decorator for test functions.""" - def decorator(fn): - global passed, failed - try: - result = fn() - if asyncio.iscoroutine(result): - asyncio.run(result) - print(f"✓ {name}") - passed += 1 - except AssertionError as e: - print(f"✗ {name}") - print(f" Error: {e}") - failed += 1 - except Exception as e: - print(f"✗ {name}") - print(f" Exception: {type(e).__name__}: {e}") - failed += 1 - return fn - return decorator - - -# ============================================================================= -# Context Loader Tests -# ============================================================================= - -print("=" * 60) -print("Personalized Learning Agent - Python Tests") -print("=" * 60) -print("\n--- Context Loader Tests ---\n") - - -@test("load_context_file loads maria profile") -def test_load_maria_profile(): - content = load_context_file("01_maria_learner_profile.txt") - assert content is not None, "Content should not be None" - assert "Maria" in content, "Content should contain 'Maria'" - assert "MCAT" in content, "Content should contain 'MCAT'" - - -@test("load_context_file loads misconception resolution") -def test_load_misconception(): - content = load_context_file("05_misconception_resolution.txt") - assert content is not None, "Content should not be None" - assert "ATP" in content, "Content should contain 'ATP'" - assert "bond" in content.lower(), "Content should mention bonds" - - -@test("load_context_file returns None for missing file") -def test_load_missing_file(): - content = load_context_file("nonexistent_file.txt") - assert content is None, "Should return None for missing file" - - -@test("load_all_context loads multiple files") -def test_load_all_context(): - context = load_all_context() - assert isinstance(context, dict), "Should return a dict" - assert len(context) >= 1, "Should load at least one file" - # Check that keys are filenames - for key in context.keys(): - assert key.endswith(".txt"), f"Key {key} should be a .txt filename" - - -@test("get_learner_profile returns Maria's profile") -def test_get_learner_profile(): - profile = get_learner_profile() - assert profile is not None, "Profile should not be None" - assert "Maria" in profile, "Profile should contain Maria" - assert "Cymbal" in profile, "Profile should mention Cymbal University" - - -@test("get_misconception_context returns resolution content") -def test_get_misconception_context(): - content = get_misconception_context() - assert content is not None, "Content should not be None" - assert "misconception" in content.lower(), "Should discuss misconception" - - -@test("get_combined_context combines all files") -def test_get_combined_context(): - combined = get_combined_context() - assert isinstance(combined, str), "Should return a string" - assert len(combined) > 1000, "Combined context should be substantial" - # Should contain section markers - assert "===" in combined, "Should contain section markers" - - -# ============================================================================= -# A2UI Templates Tests -# ============================================================================= - -print("\n--- A2UI Templates Tests ---\n") - - -@test("SURFACE_ID is set correctly") -def test_surface_id(): - assert SURFACE_ID == "learningContent", f"SURFACE_ID should be 'learningContent', got {SURFACE_ID}" - - -@test("FLASHCARD_EXAMPLE contains valid A2UI structure") -def test_flashcard_example(): - assert "beginRendering" in FLASHCARD_EXAMPLE - assert "surfaceUpdate" in FLASHCARD_EXAMPLE - assert "Flashcard" in FLASHCARD_EXAMPLE - assert SURFACE_ID in FLASHCARD_EXAMPLE - - -@test("AUDIO_EXAMPLE contains valid A2UI structure") -def test_audio_example(): - assert "beginRendering" in AUDIO_EXAMPLE - assert "surfaceUpdate" in AUDIO_EXAMPLE - assert "Audio" in AUDIO_EXAMPLE - assert "/assets/podcast.m4a" in AUDIO_EXAMPLE - - -@test("VIDEO_EXAMPLE contains valid A2UI structure") -def test_video_example(): - assert "beginRendering" in VIDEO_EXAMPLE - assert "surfaceUpdate" in VIDEO_EXAMPLE - assert "Video" in VIDEO_EXAMPLE - assert "/assets/video.mp4" in VIDEO_EXAMPLE - - -@test("get_system_prompt generates flashcards prompt") -def test_system_prompt_flashcards(): - context = "Test context for Maria" - prompt = get_system_prompt("flashcards", context) - assert "flashcards" in prompt.lower() - assert context in prompt - assert SURFACE_ID in prompt - assert "Flashcard" in prompt - - -@test("get_system_prompt generates audio prompt") -def test_system_prompt_audio(): - context = "Test context" - prompt = get_system_prompt("audio", context) - assert "audio" in prompt.lower() or "Audio" in prompt - assert context in prompt - - -@test("get_system_prompt includes learner context") -def test_system_prompt_includes_context(): - context = "Maria is a pre-med student with ATP misconception" - prompt = get_system_prompt("flashcards", context) - assert "Maria" in prompt - assert "ATP" in prompt - - -# ============================================================================= -# Agent Tests -# ============================================================================= - -print("\n--- Agent Tests ---\n") - -# Import agent after context tests to ensure dependencies work -try: - from agent import LearningMaterialAgent, get_agent - AGENT_AVAILABLE = True -except ImportError as e: - print(f" (Skipping agent tests: {e})") - AGENT_AVAILABLE = False - - -if AGENT_AVAILABLE: - # Create a test agent without initializing the Gemini client - # This allows testing static methods without credentials - _test_agent = LearningMaterialAgent(init_client=False) - - @test("LearningMaterialAgent has correct supported formats") - def test_agent_formats(): - assert "flashcards" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "audio" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "podcast" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "video" in LearningMaterialAgent.SUPPORTED_FORMATS - assert "quiz" in LearningMaterialAgent.SUPPORTED_FORMATS - - - @test("agent._get_audio_reference returns valid A2UI") - def test_audio_reference(): - result = _test_agent._get_audio_reference() - assert result["format"] == "audio" - assert result["surfaceId"] == SURFACE_ID - assert isinstance(result["a2ui"], list) - assert len(result["a2ui"]) == 2 - assert "beginRendering" in result["a2ui"][0] - assert "surfaceUpdate" in result["a2ui"][1] - - - @test("agent._get_video_reference returns valid A2UI") - def test_video_reference(): - result = _test_agent._get_video_reference() - assert result["format"] == "video" - assert result["surfaceId"] == SURFACE_ID - assert isinstance(result["a2ui"], list) - assert len(result["a2ui"]) == 2 - - - @test("audio A2UI has all required components") - def test_audio_components(): - result = _test_agent._get_audio_reference() - components = result["a2ui"][1]["surfaceUpdate"]["components"] - component_ids = {c["id"] for c in components} - - # Check all required components exist - required = {"audioCard", "audioContent", "audioHeader", "audioIcon", - "audioTitle", "audioPlayer", "audioDescription"} - missing = required - component_ids - assert not missing, f"Missing components: {missing}" - - - @test("video A2UI has all required components") - def test_video_components(): - result = _test_agent._get_video_reference() - components = result["a2ui"][1]["surfaceUpdate"]["components"] - component_ids = {c["id"] for c in components} - - required = {"videoCard", "videoContent", "videoTitle", "videoPlayer", "videoDescription"} - missing = required - component_ids - assert not missing, f"Missing components: {missing}" - - -# ============================================================================= -# A2UI JSON Validation Tests -# ============================================================================= - -print("\n--- A2UI JSON Validation Tests ---\n") - - -def validate_a2ui_message(message): - """Validate a single A2UI message structure.""" - valid_keys = {"beginRendering", "surfaceUpdate", "dataModelUpdate", "deleteSurface"} - message_keys = set(message.keys()) - action_keys = message_keys & valid_keys - - if len(action_keys) != 1: - return False, f"Expected exactly one action key, got {len(action_keys)}" - - action = list(action_keys)[0] - - if action == "beginRendering": - br = message["beginRendering"] - if "surfaceId" not in br or "root" not in br: - return False, "beginRendering missing surfaceId or root" - - elif action == "surfaceUpdate": - su = message["surfaceUpdate"] - if "surfaceId" not in su: - return False, "surfaceUpdate missing surfaceId" - if "components" not in su or not isinstance(su["components"], list): - return False, "surfaceUpdate missing components array" - for comp in su["components"]: - if "id" not in comp or "component" not in comp: - return False, f"Component missing id or component: {comp}" - - return True, "OK" - - -def validate_a2ui_payload(messages): - """Validate a complete A2UI payload.""" - if not isinstance(messages, list): - return False, "Payload must be a list" - if len(messages) == 0: - return False, "Payload cannot be empty" - if "beginRendering" not in messages[0]: - return False, "First message must be beginRendering" - - for i, msg in enumerate(messages): - valid, error = validate_a2ui_message(msg) - if not valid: - return False, f"Message {i}: {error}" - - # Validate component references - all_ids = set() - references = [] - - for msg in messages: - if "surfaceUpdate" in msg: - for comp in msg["surfaceUpdate"]["components"]: - all_ids.add(comp["id"]) - comp_def = comp["component"] - comp_type = list(comp_def.keys())[0] - props = comp_def[comp_type] - - if isinstance(props, dict): - if "child" in props and isinstance(props["child"], str): - references.append((comp["id"], props["child"])) - if "children" in props and isinstance(props["children"], dict): - if "explicitList" in props["children"]: - for child_id in props["children"]["explicitList"]: - references.append((comp["id"], child_id)) - - for parent_id, child_id in references: - if child_id not in all_ids: - return False, f"Component {parent_id} references non-existent child: {child_id}" - - return True, "OK" - - -if AGENT_AVAILABLE: - @test("audio reference passes A2UI validation") - def test_validate_audio(): - result = _test_agent._get_audio_reference() - valid, error = validate_a2ui_payload(result["a2ui"]) - assert valid, f"Audio A2UI validation failed: {error}" - - - @test("video reference passes A2UI validation") - def test_validate_video(): - result = _test_agent._get_video_reference() - valid, error = validate_a2ui_payload(result["a2ui"]) - assert valid, f"Video A2UI validation failed: {error}" - - -# ============================================================================= -# Summary -# ============================================================================= - -print("\n" + "=" * 60) -print(f"Python Tests Complete: {passed} passed, {failed} failed") -print("=" * 60) - -if failed > 0: - sys.exit(1) diff --git a/samples/personalized_learning/agent/tests/test_caching.py b/samples/personalized_learning/agent/tests/test_caching.py deleted file mode 100644 index 147c74d43..000000000 --- a/samples/personalized_learning/agent/tests/test_caching.py +++ /dev/null @@ -1,200 +0,0 @@ -""" -Unit tests for caching functionality in the personalized learning agent. - -Tests: -- Learner context caching (TTL-based) -- OpenStax module content caching (TTL-based) -""" - -import time -import unittest -from unittest.mock import patch, MagicMock - -# Import the modules we're testing -import sys -import os - -# Add parent directories to path for imports -parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, parent_dir) - -# Direct imports of the module files -import importlib.util - -# Load agent.py as a module -agent_path = os.path.join(parent_dir, 'agent.py') -spec = importlib.util.spec_from_file_location("agent_module", agent_path) -agent_module = importlib.util.module_from_spec(spec) -spec.loader.exec_module(agent_module) - -# Import openstax_content -import openstax_content - - -class TestContextCaching(unittest.TestCase): - """Tests for learner context caching in agent.py""" - - def setUp(self): - """Reset the cache before each test.""" - agent_module.clear_context_cache() - - def test_context_cache_returns_cached_value(self): - """Verify second call returns cached content without reloading.""" - # First call should load context - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - mock_get.return_value = "Test context content" - - result1 = agent_module._get_cached_context() - self.assertEqual(result1, "Test context content") - self.assertEqual(mock_get.call_count, 1) - - # Second call should use cache (mock not called again) - result2 = agent_module._get_cached_context() - self.assertEqual(result2, "Test context content") - self.assertEqual(mock_get.call_count, 1) # Still 1, not 2 - - def test_context_cache_expires_after_ttl(self): - """Verify cache expires and refetches after TTL.""" - ttl = agent_module._CONTEXT_CACHE_TTL - - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - with patch.object(agent_module.time, 'time') as mock_time: - # First call at time 0 - mock_time.return_value = 0 - mock_get.return_value = "Original content" - - result1 = agent_module._get_cached_context() - self.assertEqual(result1, "Original content") - self.assertEqual(mock_get.call_count, 1) - - # Second call still within TTL - mock_time.return_value = ttl - 1 - result2 = agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 1) # Cache hit - - # Third call after TTL expires - mock_time.return_value = ttl + 1 - mock_get.return_value = "Updated content" - - result3 = agent_module._get_cached_context() - self.assertEqual(result3, "Updated content") - self.assertEqual(mock_get.call_count, 2) # Cache miss, refetched - - def test_clear_context_cache(self): - """Verify clear_context_cache empties the cache.""" - with patch.object(agent_module, '_safe_get_combined_context') as mock_get: - mock_get.return_value = "Test content" - - # Load into cache - agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 1) - - # Clear cache - agent_module.clear_context_cache() - - # Next call should reload - agent_module._get_cached_context() - self.assertEqual(mock_get.call_count, 2) - - -class TestModuleCaching(unittest.TestCase): - """Tests for OpenStax module content caching in openstax_content.py""" - - def setUp(self): - """Reset the module cache before each test.""" - openstax_content.clear_module_cache() - - def test_module_cache_hit(self): - """Verify cached module content is returned.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = "Module content for m12345" - - # First call - result1 = openstax_content.fetch_module_content_cached("m12345") - self.assertEqual(result1, "Module content for m12345") - self.assertEqual(mock_fetch.call_count, 1) - - # Second call should use cache - result2 = openstax_content.fetch_module_content_cached("m12345") - self.assertEqual(result2, "Module content for m12345") - self.assertEqual(mock_fetch.call_count, 1) # Still 1 - - def test_module_cache_miss_fetches_fresh(self): - """Verify cache miss triggers fresh fetch.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = "Content A" - - # Fetch module A - result_a = openstax_content.fetch_module_content_cached("moduleA") - self.assertEqual(result_a, "Content A") - - # Fetch different module B (cache miss) - mock_fetch.return_value = "Content B" - result_b = openstax_content.fetch_module_content_cached("moduleB") - self.assertEqual(result_b, "Content B") - - # Both fetches should have occurred - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_ttl_expiry(self): - """Verify module cache expires correctly.""" - ttl = openstax_content._MODULE_CACHE_TTL - - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - with patch.object(openstax_content.time, 'time') as mock_time: - mock_time.return_value = 0 - mock_fetch.return_value = "Old content" - - # First fetch - result1 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(result1, "Old content") - self.assertEqual(mock_fetch.call_count, 1) - - # Within TTL - should use cache - mock_time.return_value = ttl - 1 - result2 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(mock_fetch.call_count, 1) - - # After TTL expires - mock_time.return_value = ttl + 1 - mock_fetch.return_value = "New content" - - result3 = openstax_content.fetch_module_content_cached("m99999") - self.assertEqual(result3, "New content") - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_handles_parse_flag(self): - """Verify parse flag creates separate cache entries.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - # Fetch with parse=True - mock_fetch.return_value = "Parsed content" - result1 = openstax_content.fetch_module_content_cached("m11111", parse=True) - self.assertEqual(result1, "Parsed content") - - # Fetch same module with parse=False (should be cache miss) - mock_fetch.return_value = "Raw content" - result2 = openstax_content.fetch_module_content_cached("m11111", parse=False) - self.assertEqual(result2, "Raw content") - - # Both should have been fetched (different cache keys) - self.assertEqual(mock_fetch.call_count, 2) - - def test_module_cache_handles_none_content(self): - """Verify None content is not cached.""" - with patch.object(openstax_content, 'fetch_module_content') as mock_fetch: - mock_fetch.return_value = None - - # First call returns None - result1 = openstax_content.fetch_module_content_cached("missing_module") - self.assertIsNone(result1) - - # Second call should try again (not cached) - result2 = openstax_content.fetch_module_content_cached("missing_module") - self.assertIsNone(result2) - - # Both calls should have tried to fetch - self.assertEqual(mock_fetch.call_count, 2) - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/agent/tests/test_keyword_hints.py b/samples/personalized_learning/agent/tests/test_keyword_hints.py deleted file mode 100644 index 3220702bc..000000000 --- a/samples/personalized_learning/agent/tests/test_keyword_hints.py +++ /dev/null @@ -1,221 +0,0 @@ -""" -Unit tests for KEYWORD_HINTS in openstax_chapters.py. - -Tests: -- New keywords map correctly to expected chapters -- Keyword matching is case insensitive -- Expanded keywords reduce LLM fallback scenarios -""" - -import unittest -from unittest.mock import patch -import sys -import os - -# Add parent directories to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -class TestKeywordHints(unittest.TestCase): - """Tests for KEYWORD_HINTS dictionary.""" - - def test_atp_keywords_map_correctly(self): - """Verify ATP-related keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - atp_keywords = [ - "atp", - "adp", - "adenosine triphosphate", - "adenosine diphosphate", - "cellular energy", - "cell energy", - "high energy bond", - "phosphate bond", - "energy currency", - "atp hydrolysis", - "hydrolysis", - ] - - for keyword in atp_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any("atp" in ch or "energy" in ch for ch in chapters), - f"Keyword '{keyword}' should map to ATP or energy chapters, got {chapters}" - ) - - def test_thermodynamics_keywords_map_correctly(self): - """Verify thermodynamics keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - thermo_keywords = [ - "thermodynamics", - "exergonic", - "endergonic", - "gibbs free energy", - "entropy", - ] - - expected_chapters = [ - "6-3-the-laws-of-thermodynamics", - "6-2-potential-kinetic-free-and-activation-energy", - ] - - for keyword in thermo_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any(ch in expected_chapters for ch in chapters), - f"Keyword '{keyword}' should map to thermodynamics chapters, got {chapters}" - ) - - def test_photosynthesis_keywords_map_correctly(self): - """Verify photosynthesis keywords map to correct chapters.""" - from openstax_chapters import KEYWORD_HINTS - - photo_keywords = [ - "photosynthesis", - "chloroplast", - "chlorophyll", - "calvin cycle", - "light reaction", - ] - - for keyword in photo_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - chapters = KEYWORD_HINTS[keyword] - self.assertTrue( - any("8-" in ch or "photosynthesis" in ch for ch in chapters), - f"Keyword '{keyword}' should map to photosynthesis chapters (8-*), got {chapters}" - ) - - def test_keyword_matching_case_insensitive(self): - """Verify keyword matching works regardless of case.""" - from openstax_chapters import KEYWORD_HINTS - - # All keywords should be lowercase in the dictionary - for keyword in KEYWORD_HINTS.keys(): - self.assertEqual(keyword, keyword.lower(), - f"Keyword '{keyword}' should be lowercase") - - def test_new_expanded_keywords_exist(self): - """Verify newly added keywords are present.""" - from openstax_chapters import KEYWORD_HINTS - - # These are keywords that were added in the latency optimization - new_keywords = [ - "adp", - "cellular energy", - "cell energy", - "high energy bond", - "phosphate bond", - "phosphate group", - "energy currency", - "energy transfer", - "bond breaking", - "bond energy", - "atp hydrolysis", - "exergonic", - "endergonic", - "gibbs free energy", - "thermodynamics", - "first law", - "second law", - "entropy", - ] - - for keyword in new_keywords: - self.assertIn(keyword, KEYWORD_HINTS, - f"New keyword '{keyword}' should be in KEYWORD_HINTS") - - def test_keyword_chapters_are_valid(self): - """Verify all keyword mappings point to valid chapters.""" - from openstax_chapters import KEYWORD_HINTS, OPENSTAX_CHAPTERS - - for keyword, chapters in KEYWORD_HINTS.items(): - self.assertIsInstance(chapters, list, - f"Chapters for '{keyword}' should be a list") - self.assertGreater(len(chapters), 0, - f"Chapters for '{keyword}' should not be empty") - - for chapter_slug in chapters: - self.assertIn(chapter_slug, OPENSTAX_CHAPTERS, - f"Chapter '{chapter_slug}' for keyword '{keyword}' " - "should be in OPENSTAX_CHAPTERS") - - def test_common_topics_have_keywords(self): - """Verify common biology topics have keyword coverage.""" - from openstax_chapters import KEYWORD_HINTS - - common_topics = [ - "atp", - "dna", - "rna", - "protein", - "cell", - "enzyme", - "photosynthesis", - "respiration", - "mitosis", - "meiosis", - "evolution", - "genetics", - "nervous", - "immune", - "heart", - "lung", - ] - - covered = 0 - for topic in common_topics: - if topic in KEYWORD_HINTS: - covered += 1 - - coverage_pct = covered / len(common_topics) * 100 - self.assertGreater(coverage_pct, 80, - f"Should have >80% keyword coverage for common topics, " - f"got {coverage_pct:.1f}%") - - -class TestKeywordMatching(unittest.TestCase): - """Tests for keyword matching logic.""" - - def test_keyword_match_finds_chapters(self): - """Verify keyword matching finds the right chapters for common topics.""" - from openstax_chapters import KEYWORD_HINTS - - # Test topics that SHOULD match keywords - test_cases = [ - ("atp", ["6-4-atp-adenosine-triphosphate", "6-1-energy-and-metabolism"]), - ("photosynthesis", ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"]), - ("dna", ["14-2-dna-structure-and-sequencing", "14-3-basics-of-dna-replication"]), - ] - - for keyword, expected_chapters in test_cases: - self.assertIn(keyword, KEYWORD_HINTS, - f"Keyword '{keyword}' should be in KEYWORD_HINTS") - actual_chapters = KEYWORD_HINTS[keyword] - for expected in expected_chapters: - self.assertIn(expected, actual_chapters, - f"Expected chapter '{expected}' for keyword '{keyword}'") - - def test_keyword_match_returns_list(self): - """Verify all keyword mappings return lists of chapters.""" - from openstax_chapters import KEYWORD_HINTS - - for keyword, chapters in KEYWORD_HINTS.items(): - self.assertIsInstance(chapters, list, - f"Chapters for '{keyword}' should be a list") - self.assertGreater(len(chapters), 0, - f"Chapters list for '{keyword}' should not be empty") - for chapter in chapters: - self.assertIsInstance(chapter, str, - f"Each chapter slug should be a string") - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/agent/tests/test_parallel_fetch.py b/samples/personalized_learning/agent/tests/test_parallel_fetch.py deleted file mode 100644 index 212c27f11..000000000 --- a/samples/personalized_learning/agent/tests/test_parallel_fetch.py +++ /dev/null @@ -1,223 +0,0 @@ -""" -Unit tests for parallel fetching functionality in openstax_content.py. - -Tests: -- Parallel chapter fetching returns all content -- Partial failures don't break entire fetch -- Parallel is actually faster than sequential (with mocked delays) -""" - -import time -import unittest -from unittest.mock import patch, MagicMock -from concurrent.futures import ThreadPoolExecutor - -import sys -import os - -# Add parent directories to path for imports -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -class TestParallelChapterFetch(unittest.TestCase): - """Tests for parallel chapter fetching in openstax_content.py""" - - def setUp(self): - """Reset caches before each test.""" - from openstax_content import clear_module_cache - clear_module_cache() - - def test_parallel_chapter_fetch_returns_all_content(self): - """Verify parallel fetch returns same content as sequential would.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock to return different content for each chapter - def side_effect(slug): - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": [f"m{hash(slug) % 10000}"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - # Fetch multiple chapters - chapters = ["6-4-atp", "7-2-glycolysis", "8-1-photosynthesis"] - results = fetch_multiple_chapters(chapters) - - # Verify all chapters were fetched - self.assertEqual(len(results), 3) - - # Verify content is correct - slugs = [r["chapter_slug"] for r in results] - self.assertIn("6-4-atp", slugs) - self.assertIn("7-2-glycolysis", slugs) - self.assertIn("8-1-photosynthesis", slugs) - - def test_parallel_fetch_handles_partial_failures(self): - """Verify partial failures don't break entire fetch.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock where one chapter fails - def side_effect(slug): - if slug == "failing-chapter": - raise Exception("Simulated failure") - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - # Fetch including one failing chapter - chapters = ["good-chapter-1", "failing-chapter", "good-chapter-2"] - results = fetch_multiple_chapters(chapters) - - # Should still get the two good chapters - self.assertEqual(len(results), 2) - slugs = [r["chapter_slug"] for r in results] - self.assertIn("good-chapter-1", slugs) - self.assertIn("good-chapter-2", slugs) - self.assertNotIn("failing-chapter", slugs) - - def test_parallel_fetch_handles_none_returns(self): - """Verify None returns are filtered out.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - # Set up mock where one chapter returns None - def side_effect(slug): - if slug == "missing-chapter": - return None - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - mock_fetch.side_effect = side_effect - - chapters = ["chapter-1", "missing-chapter", "chapter-2"] - results = fetch_multiple_chapters(chapters) - - # Should only get the two valid chapters - self.assertEqual(len(results), 2) - - def test_single_chapter_no_threading_overhead(self): - """Verify single chapter fetch doesn't use threading.""" - from openstax_content import fetch_multiple_chapters - - with patch('openstax_content.fetch_chapter_content') as mock_fetch: - with patch('openstax_content.ThreadPoolExecutor') as mock_executor: - mock_fetch.return_value = { - "chapter_slug": "single", - "title": "Single Chapter", - "url": "https://example.com/single", - "module_ids": ["m12345"], - "content": "Content", - } - - # Fetch single chapter - results = fetch_multiple_chapters(["single"]) - - # ThreadPoolExecutor should NOT be used for single chapter - mock_executor.assert_not_called() - - # But fetch should still work - self.assertEqual(len(results), 1) - - def test_empty_list_returns_empty(self): - """Verify empty input returns empty output.""" - from openstax_content import fetch_multiple_chapters - - results = fetch_multiple_chapters([]) - self.assertEqual(results, []) - - def test_parallel_fetch_faster_than_sequential(self): - """Verify parallel is actually faster with simulated delays.""" - from openstax_content import fetch_multiple_chapters - - def slow_fetch(slug): - """Simulate slow network fetch.""" - time.sleep(0.1) # 100ms delay - return { - "chapter_slug": slug, - "title": f"Title for {slug}", - "url": f"https://example.com/{slug}", - "module_ids": ["m12345"], - "content": f"Content for {slug}", - } - - with patch('openstax_content.fetch_chapter_content', side_effect=slow_fetch): - chapters = ["ch1", "ch2", "ch3"] - - start = time.time() - results = fetch_multiple_chapters(chapters) - elapsed = time.time() - start - - # With 3 chapters at 100ms each: - # - Sequential would take ~300ms - # - Parallel should take ~100-150ms - self.assertEqual(len(results), 3) - - # Parallel should be significantly faster than sequential - # Allow some overhead, but should be under 250ms (vs 300ms sequential) - self.assertLess(elapsed, 0.25, - f"Parallel fetch took {elapsed:.3f}s, expected < 0.25s") - - -class TestParallelModuleFetch(unittest.TestCase): - """Tests for parallel module fetching within chapters.""" - - def setUp(self): - """Reset caches before each test.""" - from openstax_content import clear_module_cache - clear_module_cache() - - def test_chapter_content_fetches_modules_in_parallel(self): - """Verify chapter content fetches multiple modules in parallel.""" - from openstax_content import fetch_chapter_content - - # Mock the chapter mapping to have multiple modules - mock_modules = { - "test-chapter": ["m1", "m2", "m3"], - } - mock_chapters = { - "test-chapter": "Test Chapter Title", - } - - with patch('openstax_content.fetch_module_content_cached') as mock_fetch: - with patch.dict('openstax_chapters.CHAPTER_TO_MODULES', mock_modules): - with patch.dict('openstax_chapters.OPENSTAX_CHAPTERS', mock_chapters): - with patch('openstax_chapters.get_openstax_url_for_chapter', - return_value="https://example.com/test"): - - # Each module returns different content - mock_fetch.side_effect = lambda mid: f"Content for {mid}" - - # Import fresh to get patched values - from openstax_content import fetch_chapter_content as fetch_fn - - result = fetch_fn("test-chapter") - - # All 3 modules should have been fetched - self.assertEqual(mock_fetch.call_count, 3) - - # Content should be combined - if result: - self.assertIn("Content for m1", result["content"]) - self.assertIn("Content for m2", result["content"]) - self.assertIn("Content for m3", result["content"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/samples/personalized_learning/api-server.ts b/samples/personalized_learning/api-server.ts index a91e0c5a5..ecc9ffdc4 100644 --- a/samples/personalized_learning/api-server.ts +++ b/samples/personalized_learning/api-server.ts @@ -840,6 +840,16 @@ async function main() { return; } + // Authorization check endpoint - used by frontend to verify user is allowed + // This is the SINGLE SOURCE OF TRUTH for access control decisions + if (req.url === "/api/check-access" && req.method === "GET") { + if (!(await authenticateRequest(req, res))) return; + // If authenticateRequest passes, user is both authenticated AND authorized + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ authorized: true })); + return; + } + // A2A Agent Engine endpoint if (req.url === "/a2ui-agent/a2a/query" && req.method === "POST") { if (!(await authenticateRequest(req, res))) return; diff --git a/samples/personalized_learning/deploy.py b/samples/personalized_learning/deploy.py index c66515a89..38fd57ffa 100644 --- a/samples/personalized_learning/deploy.py +++ b/samples/personalized_learning/deploy.py @@ -473,27 +473,29 @@ def main(): } # Complete keyword hints for fast matching (Tier 1) + # NOTE: Order matters! More specific keywords should come BEFORE generic ones + # because matched_slugs uses list with first-match priority KEYWORD_HINTS = { - # Energy & Metabolism - "atp": ["6-4-atp-adenosine-triphosphate", "6-1-energy-and-metabolism"], - "adenosine triphosphate": ["6-4-atp-adenosine-triphosphate"], - "photosynthesis": ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"], - "plants make food": ["8-1-overview-of-photosynthesis"], - "chloroplast": ["8-1-overview-of-photosynthesis", "4-3-eukaryotic-cells"], - "chlorophyll": ["8-2-the-light-dependent-reaction-of-photosynthesis"], - "calvin cycle": ["8-3-using-light-to-make-organic-molecules"], - "light reaction": ["8-2-the-light-dependent-reaction-of-photosynthesis"], - "cellular respiration": ["7-1-energy-in-living-systems", "7-4-oxidative-phosphorylation"], - "glycolysis": ["7-2-glycolysis"], + # Energy & Metabolism - SPECIFIC terms first, then generic "krebs": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle"], "citric acid": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle"], "tca cycle": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle"], + "glycolysis": ["7-2-glycolysis"], "electron transport": ["7-4-oxidative-phosphorylation"], "oxidative phosphorylation": ["7-4-oxidative-phosphorylation"], "fermentation": ["7-5-metabolism-without-oxygen"], "anaerobic": ["7-5-metabolism-without-oxygen"], - "mitochondria": ["7-4-oxidative-phosphorylation", "4-3-eukaryotic-cells"], - "mitochondrion": ["7-4-oxidative-phosphorylation", "4-3-eukaryotic-cells"], + "cellular respiration": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle", "7-4-oxidative-phosphorylation"], + "mitochondria": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle", "7-4-oxidative-phosphorylation"], + "mitochondrion": ["7-3-oxidation-of-pyruvate-and-the-citric-acid-cycle", "7-4-oxidative-phosphorylation"], + "atp": ["6-4-atp-adenosine-triphosphate", "7-4-oxidative-phosphorylation"], + "adenosine triphosphate": ["6-4-atp-adenosine-triphosphate"], + "photosynthesis": ["8-1-overview-of-photosynthesis", "8-2-the-light-dependent-reaction-of-photosynthesis"], + "plants make food": ["8-1-overview-of-photosynthesis"], + "chloroplast": ["8-1-overview-of-photosynthesis", "4-3-eukaryotic-cells"], + "chlorophyll": ["8-2-the-light-dependent-reaction-of-photosynthesis"], + "calvin cycle": ["8-3-using-light-to-make-organic-molecules"], + "light reaction": ["8-2-the-light-dependent-reaction-of-photosynthesis"], # Cell Division "mitosis": ["10-1-cell-division", "10-2-the-cell-cycle"], "meiosis": ["11-1-the-process-of-meiosis"], @@ -745,7 +747,7 @@ def fetch_openstax_content(topic: str) -> dict: import urllib.error topic_lower = topic.lower() - matched_slugs = set() + matched_slugs = [] # Use list to preserve order (first match = highest priority) # First try keyword matching (fast path) # Use word boundary matching to avoid false positives like "vision" in "cell division" @@ -754,13 +756,17 @@ def fetch_openstax_content(topic: str) -> dict: # This ensures "vision" doesn't match "cell division" pattern = r'\b' + re.escape(keyword) + r'\b' if re.search(pattern, topic_lower): - matched_slugs.update(slugs) + for slug in slugs: + if slug not in matched_slugs: + matched_slugs.append(slug) # If no keyword match, use LLM to find relevant chapters if not matched_slugs: llm_slugs = llm_match_topic_to_chapters(topic) if llm_slugs: - matched_slugs.update(llm_slugs) + for slug in llm_slugs: + if slug not in matched_slugs: + matched_slugs.append(slug) # If still no match (LLM found nothing relevant), return empty with clear message if not matched_slugs: diff --git a/samples/personalized_learning/deploy_hosting.py b/samples/personalized_learning/deploy_hosting.py index 51529ccbc..fe2445bca 100755 --- a/samples/personalized_learning/deploy_hosting.py +++ b/samples/personalized_learning/deploy_hosting.py @@ -181,6 +181,15 @@ def deploy_cloud_run(project_id: str, service_name: str, region: str) -> str: "--quiet", ], check=False) + # Grant Artifact Registry writer permission to compute service account + # Cloud Run source deployments use the compute SA to push Docker images + run_command([ + "gcloud", "projects", "add-iam-policy-binding", project_id, + "--member", f"serviceAccount:{compute_sa}", + "--role", "roles/artifactregistry.writer", + "--quiet", + ], check=False) + # Also grant Cloud Build service account permissions cloudbuild_sa = f"{project_number}@cloudbuild.gserviceaccount.com" run_command([ diff --git a/samples/personalized_learning/src/firebase-auth.ts b/samples/personalized_learning/src/firebase-auth.ts index 18f7a746e..c5aec5898 100644 --- a/samples/personalized_learning/src/firebase-auth.ts +++ b/samples/personalized_learning/src/firebase-auth.ts @@ -1,11 +1,18 @@ /** * Firebase Authentication for Personalized Learning Demo * - * By default, restricts access to @google.com email addresses. - * To customize access: - * - Change ALLOWED_DOMAIN to your organization's domain - * - Add specific emails to ALLOWED_EMAILS whitelist - * - Or set ALLOWED_DOMAIN to "" and use only the whitelist + * Authentication flow: + * 1. User signs in with Google (Firebase Auth) + * 2. Client calls server /api/check-access to verify authorization + * 3. Server checks email against VITE_ALLOWED_DOMAIN and VITE_ALLOWED_EMAILS + * 4. If authorized, user proceeds; if not, signed out with error + * + * Access control is configured via environment variables (see .env.template): + * - VITE_ALLOWED_DOMAIN: restrict to a domain (e.g., "yourcompany.com") + * - VITE_ALLOWED_EMAILS: whitelist specific emails (comma-separated) + * + * The SERVER is the single source of truth for authorization decisions. + * This file only handles Firebase authentication, not authorization. * * LOCAL DEV MODE: If VITE_FIREBASE_API_KEY is not set, auth is bypassed * and the app runs without requiring sign-in. @@ -47,72 +54,35 @@ if (isFirebaseConfigured) { console.log("[Auth] Firebase not configured - running in local dev mode (no auth required)"); } -// Google provider with domain restriction hint +// Google provider +// Note: The 'hd' parameter is just a UI hint to show accounts from a specific domain. +// It does NOT enforce access - the server does that via /api/check-access. const provider = new GoogleAuthProvider(); -provider.setCustomParameters({ - hd: "google.com", // Hint to show only google.com accounts (change if using different domain) -}); +const hintDomain = import.meta.env.VITE_ALLOWED_DOMAIN; +if (hintDomain) { + provider.setCustomParameters({ hd: hintDomain }); +} // ============================================================================ -// ACCESS CONTROL CONFIGURATION -// ============================================================================ - -// Access control config - reads from environment variables (set in .env) -// VITE_ALLOWED_DOMAIN: e.g., "google.com" or "" to disable domain check -// VITE_ALLOWED_EMAILS: comma-separated list, e.g., "alice@example.com,bob@partner.org" -const ALLOWED_DOMAIN = import.meta.env.VITE_ALLOWED_DOMAIN ?? "google.com"; -const ALLOWED_EMAILS: string[] = (import.meta.env.VITE_ALLOWED_EMAILS ?? "") - .split(",") - .map((e: string) => e.trim().toLowerCase()) - .filter((e: string) => e.length > 0); - +// AUTHENTICATION FUNCTIONS // ============================================================================ /** - * Check if user's email is allowed (by domain or whitelist) - */ -function isAllowedEmail(email: string | null): boolean { - if (!email) return false; - const emailLower = email.toLowerCase(); - - // Check whitelist first - if (ALLOWED_EMAILS.length > 0 && ALLOWED_EMAILS.includes(emailLower)) { - return true; - } - - // Check domain if configured - if (ALLOWED_DOMAIN && emailLower.endsWith(`@${ALLOWED_DOMAIN}`)) { - return true; - } - - // No restrictions configured = allow all - if (!ALLOWED_DOMAIN && ALLOWED_EMAILS.length === 0) { - return true; - } - - return false; -} - -/** - * Get current user if authenticated and from allowed domain - * In local dev mode (no Firebase), returns null + * Get current Firebase user (if authenticated) + * Note: This only checks Firebase auth, not server authorization */ export function getCurrentUser(): User | null { if (!auth) return null; - const user = auth.currentUser; - if (user && isAllowedEmail(user.email)) { - return user; - } - return null; + return auth.currentUser; } /** * Get ID token for API requests - * In local dev mode, returns null (API server should allow unauthenticated requests locally) + * In local dev mode, returns null (API server allows unauthenticated requests locally) */ export async function getIdToken(): Promise { if (!auth) return null; - const user = getCurrentUser(); + const user = auth.currentUser; if (!user) return null; try { return await user.getIdToken(); @@ -122,10 +92,31 @@ export async function getIdToken(): Promise { } } +/** + * Check with server if the current user is authorized + * This is the ONLY place authorization is checked - the server is the source of truth. + * Returns true if authorized, false otherwise. + */ +export async function checkServerAuthorization(): Promise { + const token = await getIdToken(); + if (!token) return false; + + try { + const response = await fetch("/api/check-access", { + method: "GET", + headers: { Authorization: `Bearer ${token}` }, + }); + return response.ok; + } catch (error) { + console.error("[Auth] Server authorization check failed:", error); + return false; + } +} + /** * Sign in with Google - * Returns user if successful and from allowed domain, null otherwise - * In local dev mode, this should not be called (UI bypasses auth) + * Returns user if Firebase auth succeeds, null if cancelled + * IMPORTANT: Caller must then call checkServerAuthorization() to verify access */ export async function signInWithGoogle(): Promise { if (!auth) { @@ -134,16 +125,8 @@ export async function signInWithGoogle(): Promise { } try { const result = await signInWithPopup(auth, provider); - const user = result.user; - - if (!isAllowedEmail(user.email)) { - console.warn(`[Auth] User ${user.email} not from ${ALLOWED_DOMAIN}`); - await signOut(auth); - throw new Error(`Access restricted to @${ALLOWED_DOMAIN} accounts`); - } - - console.log(`[Auth] Signed in: ${user.email}`); - return user; + console.log(`[Auth] Firebase sign-in successful: ${result.user.email}`); + return result.user; } catch (error: any) { if (error.code === "auth/popup-closed-by-user") { console.log("[Auth] Sign-in cancelled by user"); @@ -164,34 +147,26 @@ export async function signOutUser(): Promise { /** * Subscribe to auth state changes - * Callback receives user if authenticated and from allowed domain, null otherwise - * In local dev mode, immediately calls back with a mock "authenticated" state + * Callback receives user if authenticated, null otherwise + * Note: This only tracks Firebase auth state, not server authorization */ export function onAuthChange( callback: (user: User | null) => void ): () => void { // Local dev mode: no Firebase, skip auth entirely if (!auth) { - // Immediately trigger callback as "authenticated" in local dev mode - // We pass null but main.ts will check isFirebaseConfigured to bypass auth setTimeout(() => callback(null), 0); - return () => {}; // No-op unsubscribe + return () => {}; } - return onAuthStateChanged(auth, (user) => { - if (user && isAllowedEmail(user.email)) { - callback(user); - } else { - callback(null); - } - }); + return onAuthStateChanged(auth, callback); } /** - * Check if user is authenticated - * In local dev mode, returns false (but app bypasses auth check) + * Check if user is authenticated with Firebase + * Note: This does not check server authorization */ export function isAuthenticated(): boolean { if (!auth) return false; - return getCurrentUser() !== null; + return auth.currentUser !== null; } diff --git a/samples/personalized_learning/src/main.ts b/samples/personalized_learning/src/main.ts index 20097c3a0..f7c1f8b26 100644 --- a/samples/personalized_learning/src/main.ts +++ b/samples/personalized_learning/src/main.ts @@ -17,6 +17,7 @@ import { signOutUser, getIdToken, isFirebaseConfigured, + checkServerAuthorization, } from "./firebase-auth"; // Store current user for display @@ -36,12 +37,22 @@ async function init() { } // Set up auth state listener - onAuthChange((user) => { + onAuthChange(async (user) => { if (user) { - currentUserEmail = user.email; - console.log(`[Demo] Authenticated as ${user.email}`); - showApp(); - initializeApp(); + // User is authenticated with Firebase, now check server authorization + console.log(`[Demo] Firebase auth OK: ${user.email}, checking server authorization...`); + const authorized = await checkServerAuthorization(); + if (authorized) { + currentUserEmail = user.email; + console.log(`[Demo] Authorized: ${user.email}`); + showApp(); + initializeApp(); + } else { + // User authenticated but not authorized - sign them out + console.log(`[Demo] Not authorized: ${user.email}`); + await signOutUser(); + showLoginScreen("Your email is not authorized to access this application."); + } } else { currentUserEmail = null; console.log("[Demo] Not authenticated"); @@ -51,7 +62,7 @@ async function init() { } // Show login screen -function showLoginScreen() { +function showLoginScreen(errorMessage?: string) { const appContainer = document.getElementById("app-container"); const loginScreen = document.getElementById("login-screen"); @@ -69,7 +80,7 @@ function showLoginScreen() {

Personalized Learning Demo

- +